diff mbox

Fix PR53217

Message ID 1336532666.24856.5.camel@gnopaine
State New
Headers show

Commit Message

Bill Schmidt May 9, 2012, 3:04 a.m. UTC
This fixes another statement-placement issue when reassociating
expressions with repeated factors.  Multiplies feeding into
__builtin_powi calls were not getting placed properly ahead of them in
some cases.

Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new
regressions.  I've also run SPEC cpu2006 with no build or correctness
issues.  OK for trunk?

Thanks,
Bill


gcc:

2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR tree-optimization/53217
	* tree-ssa-reassoc.c (bip_map): New static variable.
	(possibly_move_powi): Move feeding multiplies with __builtin_powi call.
	(attempt_builtin_powi): Save feeding multiplies on a stack.
	(reassociate_bb): Create and destroy bip_map.

gcc/testsuite:

2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR tree-optimization/53217
	* gfortran.dg/pr53217.f90: New test.

Comments

Bill Schmidt May 16, 2012, 2:37 a.m. UTC | #1
Ping.

Thanks,
Bill

On Tue, 2012-05-08 at 22:04 -0500, William J. Schmidt wrote:
> This fixes another statement-placement issue when reassociating
> expressions with repeated factors.  Multiplies feeding into
> __builtin_powi calls were not getting placed properly ahead of them in
> some cases.
> 
> Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new
> regressions.  I've also run SPEC cpu2006 with no build or correctness
> issues.  OK for trunk?
> 
> Thanks,
> Bill
> 
> 
> gcc:
> 
> 2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	PR tree-optimization/53217
> 	* tree-ssa-reassoc.c (bip_map): New static variable.
> 	(possibly_move_powi): Move feeding multiplies with __builtin_powi call.
> 	(attempt_builtin_powi): Save feeding multiplies on a stack.
> 	(reassociate_bb): Create and destroy bip_map.
> 
> gcc/testsuite:
> 
> 2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	PR tree-optimization/53217
> 	* gfortran.dg/pr53217.f90: New test.
> 
> 
> Index: gcc/testsuite/gfortran.dg/pr53217.f90
> ===================================================================
> --- gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
> +++ gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
> @@ -0,0 +1,28 @@
> +! { dg-do compile }
> +! { dg-options "-O1 -ffast-math" }
> +
> +! This tests only for compile-time failure, which formerly occurred
> +! when statements were emitted out of order, failing verify_ssa.
> +
> +MODULE xc_cs1
> +  INTEGER, PARAMETER :: dp=KIND(0.0D0)
> +  REAL(KIND=dp), PARAMETER :: a = 0.04918_dp, &
> +                              c = 0.2533_dp, &
> +                              d = 0.349_dp
> +CONTAINS
> +  SUBROUTINE cs1_u_2 ( rho, grho, r13, e_rho_rho, e_rho_ndrho, e_ndrho_ndrho,&
> +       npoints, error)
> +    REAL(KIND=dp), DIMENSION(*), &
> +      INTENT(INOUT)                          :: e_rho_rho, e_rho_ndrho, &
> +                                                e_ndrho_ndrho
> +    DO ip = 1, npoints
> +      IF ( rho(ip) > eps_rho ) THEN
> +         oc = 1.0_dp/(r*r*r3*r3 + c*g*g)
> +         d2rF4 = c4p*f13*f23*g**4*r3/r * (193*d*r**5*r3*r3+90*d*d*r**5*r3 &
> +                 -88*g*g*c*r**3*r3-100*d*d*c*g*g*r*r*r3*r3 &
> +                 +104*r**6)*od**3*oc**4
> +         e_rho_rho(ip) = e_rho_rho(ip) + d2F1 + d2rF2 + d2F3 + d2rF4
> +      END IF
> +    END DO
> +  END SUBROUTINE cs1_u_2
> +END MODULE xc_cs1
> Index: gcc/tree-ssa-reassoc.c
> ===================================================================
> --- gcc/tree-ssa-reassoc.c	(revision 187117)
> +++ gcc/tree-ssa-reassoc.c	(working copy)
> @@ -200,6 +200,10 @@ static long *bb_rank;
>  /* Operand->rank hashtable.  */
>  static struct pointer_map_t *operand_rank;
> 
> +/* Map from inserted __builtin_powi calls to multiply chains that
> +   feed them.  */
> +static struct pointer_map_t *bip_map;
> +
>  /* Forward decls.  */
>  static long get_rank (tree);
> 
> @@ -2249,7 +2253,7 @@ remove_visited_stmt_chain (tree var)
>  static void
>  possibly_move_powi (gimple stmt, tree op)
>  {
> -  gimple stmt2;
> +  gimple stmt2, *mpy;
>    tree fndecl;
>    gimple_stmt_iterator gsi1, gsi2;
> 
> @@ -2278,9 +2282,39 @@ possibly_move_powi (gimple stmt, tree op)
>        return;
>      }
> 
> +  /* Move the __builtin_powi.  */
>    gsi1 = gsi_for_stmt (stmt);
>    gsi2 = gsi_for_stmt (stmt2);
>    gsi_move_before (&gsi2, &gsi1);
> +
> +  /* See if there are multiplies feeding the __builtin_powi base
> +     argument that must also be moved.  */
> +  while ((mpy = (gimple *) pointer_map_contains (bip_map, stmt2)) != NULL)
> +    {
> +      /* If we've already moved this statement, we're done.  This is
> +         identified by a NULL entry for the statement in bip_map.  */
> +      gimple *next = (gimple *) pointer_map_contains (bip_map, *mpy);
> +      if (next && !*next)
> +	return;
> +
> +      stmt = stmt2;
> +      stmt2 = *mpy;
> +      gsi1 = gsi_for_stmt (stmt);
> +      gsi2 = gsi_for_stmt (stmt2);
> +      gsi_move_before (&gsi2, &gsi1);
> +
> +      /* The moved multiply may be DAG'd from multiple calls if it
> +	 was the result of a cached multiply.  Only move it once.
> +	 Rank order ensures we move it to the right place the first
> +	 time.  */
> +      if (next)
> +	*next = NULL;
> +      else
> +	{
> +	  next = (gimple *) pointer_map_insert (bip_map, *mpy);
> +	  *next = NULL;
> +	}
> +    }
>  }
> 
>  /* This function checks three consequtive operands in
> @@ -3281,6 +3315,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>    while (true)
>      {
>        HOST_WIDE_INT power;
> +      gimple last_mul = NULL;
> 
>        /* First look for the largest cached product of factors from
>  	 preceding iterations.  If found, create a builtin_powi for
> @@ -3318,16 +3353,25 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>  	    }
>  	  else
>  	    {
> +	      gimple *value;
> +
>  	      iter_result = get_reassoc_pow_ssa_name (target, type);
>  	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 
>  					    build_int_cst (integer_type_node,
>  							   power));
>  	      gimple_call_set_lhs (pow_stmt, iter_result);
>  	      gimple_set_location (pow_stmt, gimple_location (stmt));
> -	      /* Temporarily place the call; we will move it to the
> -		 correct place during rewrite_expr.  */
> +	      /* Temporarily place the call; we will move it and its
> +		 feeding multiplies to the correct place during
> +		 rewrite_expr.  */
>  	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> 
> +	      if (!operand_equal_p (rf1->repr, rf1->factor, 0))
> +		{
> +		  value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> +		  *value = SSA_NAME_DEF_STMT (rf1->repr);
> +		}
> +
>  	      if (dump_file && (dump_flags & TDF_DETAILS))
>  		{
>  		  unsigned elt;
> @@ -3413,6 +3457,15 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>  		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
>  		  rf1->repr = target_ssa;
> 
> +		  /* Chain multiplies together for later movement.  */
> +		  if (last_mul)
> +		    {
> +		      gimple *value
> +			= (gimple *) pointer_map_insert (bip_map, mul_stmt);
> +		      *value = last_mul;
> +		    }
> +		  last_mul = mul_stmt;
> +
>  		  /* Don't reprocess the multiply we just introduced.  */
>  		  gimple_set_visited (mul_stmt, true);
>  		}
> @@ -3428,6 +3481,15 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
>  	  gimple_call_set_lhs (pow_stmt, iter_result);
>  	  gimple_set_location (pow_stmt, gimple_location (stmt));
>  	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> +
> +	  /* If we inserted a chain of multiplies before the pow_stmt,
> +	     record that fact so we can move it later when we move the
> +	     pow_stmt.  */
> +	  if (last_mul)
> +	    {
> +	      gimple *value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> +	      *value = last_mul;
> +	    }
>  	}
> 
>        /* Append the result of this iteration to the ops vector.  */
> @@ -3544,6 +3606,7 @@ reassociate_bb (basic_block bb)
>  	  if (associative_tree_code (rhs_code))
>  	    {
>  	      VEC(operand_entry_t, heap) *ops = NULL;
> +	      bip_map = pointer_map_create ();
> 
>  	      /* There may be no immediate uses left by the time we
>  		 get here because we may have eliminated them all.  */
> @@ -3608,6 +3671,7 @@ reassociate_bb (basic_block bb)
>  		}
> 
>  	      VEC_free (operand_entry_t, heap, ops);
> +	      pointer_map_destroy (bip_map);
>  	    }
>  	}
>      }
> 
>
Richard Biener May 16, 2012, 9:45 a.m. UTC | #2
On Tue, 15 May 2012, William J. Schmidt wrote:

> Ping.

I don't like it too much - but pondering a bit over it I can't find
a nicer solution.

So, ok.

Thanks,
Richard.

> Thanks,
> Bill
> 
> On Tue, 2012-05-08 at 22:04 -0500, William J. Schmidt wrote:
> > This fixes another statement-placement issue when reassociating
> > expressions with repeated factors.  Multiplies feeding into
> > __builtin_powi calls were not getting placed properly ahead of them in
> > some cases.
> > 
> > Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new
> > regressions.  I've also run SPEC cpu2006 with no build or correctness
> > issues.  OK for trunk?
> > 
> > Thanks,
> > Bill
> > 
> > 
> > gcc:
> > 
> > 2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > 
> > 	PR tree-optimization/53217
> > 	* tree-ssa-reassoc.c (bip_map): New static variable.
> > 	(possibly_move_powi): Move feeding multiplies with __builtin_powi call.
> > 	(attempt_builtin_powi): Save feeding multiplies on a stack.
> > 	(reassociate_bb): Create and destroy bip_map.
> > 
> > gcc/testsuite:
> > 
> > 2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > 
> > 	PR tree-optimization/53217
> > 	* gfortran.dg/pr53217.f90: New test.
> > 
> > 
> > Index: gcc/testsuite/gfortran.dg/pr53217.f90
> > ===================================================================
> > --- gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
> > +++ gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
> > @@ -0,0 +1,28 @@
> > +! { dg-do compile }
> > +! { dg-options "-O1 -ffast-math" }
> > +
> > +! This tests only for compile-time failure, which formerly occurred
> > +! when statements were emitted out of order, failing verify_ssa.
> > +
> > +MODULE xc_cs1
> > +  INTEGER, PARAMETER :: dp=KIND(0.0D0)
> > +  REAL(KIND=dp), PARAMETER :: a = 0.04918_dp, &
> > +                              c = 0.2533_dp, &
> > +                              d = 0.349_dp
> > +CONTAINS
> > +  SUBROUTINE cs1_u_2 ( rho, grho, r13, e_rho_rho, e_rho_ndrho, e_ndrho_ndrho,&
> > +       npoints, error)
> > +    REAL(KIND=dp), DIMENSION(*), &
> > +      INTENT(INOUT)                          :: e_rho_rho, e_rho_ndrho, &
> > +                                                e_ndrho_ndrho
> > +    DO ip = 1, npoints
> > +      IF ( rho(ip) > eps_rho ) THEN
> > +         oc = 1.0_dp/(r*r*r3*r3 + c*g*g)
> > +         d2rF4 = c4p*f13*f23*g**4*r3/r * (193*d*r**5*r3*r3+90*d*d*r**5*r3 &
> > +                 -88*g*g*c*r**3*r3-100*d*d*c*g*g*r*r*r3*r3 &
> > +                 +104*r**6)*od**3*oc**4
> > +         e_rho_rho(ip) = e_rho_rho(ip) + d2F1 + d2rF2 + d2F3 + d2rF4
> > +      END IF
> > +    END DO
> > +  END SUBROUTINE cs1_u_2
> > +END MODULE xc_cs1
> > Index: gcc/tree-ssa-reassoc.c
> > ===================================================================
> > --- gcc/tree-ssa-reassoc.c	(revision 187117)
> > +++ gcc/tree-ssa-reassoc.c	(working copy)
> > @@ -200,6 +200,10 @@ static long *bb_rank;
> >  /* Operand->rank hashtable.  */
> >  static struct pointer_map_t *operand_rank;
> > 
> > +/* Map from inserted __builtin_powi calls to multiply chains that
> > +   feed them.  */
> > +static struct pointer_map_t *bip_map;
> > +
> >  /* Forward decls.  */
> >  static long get_rank (tree);
> > 
> > @@ -2249,7 +2253,7 @@ remove_visited_stmt_chain (tree var)
> >  static void
> >  possibly_move_powi (gimple stmt, tree op)
> >  {
> > -  gimple stmt2;
> > +  gimple stmt2, *mpy;
> >    tree fndecl;
> >    gimple_stmt_iterator gsi1, gsi2;
> > 
> > @@ -2278,9 +2282,39 @@ possibly_move_powi (gimple stmt, tree op)
> >        return;
> >      }
> > 
> > +  /* Move the __builtin_powi.  */
> >    gsi1 = gsi_for_stmt (stmt);
> >    gsi2 = gsi_for_stmt (stmt2);
> >    gsi_move_before (&gsi2, &gsi1);
> > +
> > +  /* See if there are multiplies feeding the __builtin_powi base
> > +     argument that must also be moved.  */
> > +  while ((mpy = (gimple *) pointer_map_contains (bip_map, stmt2)) != NULL)
> > +    {
> > +      /* If we've already moved this statement, we're done.  This is
> > +         identified by a NULL entry for the statement in bip_map.  */
> > +      gimple *next = (gimple *) pointer_map_contains (bip_map, *mpy);
> > +      if (next && !*next)
> > +	return;
> > +
> > +      stmt = stmt2;
> > +      stmt2 = *mpy;
> > +      gsi1 = gsi_for_stmt (stmt);
> > +      gsi2 = gsi_for_stmt (stmt2);
> > +      gsi_move_before (&gsi2, &gsi1);
> > +
> > +      /* The moved multiply may be DAG'd from multiple calls if it
> > +	 was the result of a cached multiply.  Only move it once.
> > +	 Rank order ensures we move it to the right place the first
> > +	 time.  */
> > +      if (next)
> > +	*next = NULL;
> > +      else
> > +	{
> > +	  next = (gimple *) pointer_map_insert (bip_map, *mpy);
> > +	  *next = NULL;
> > +	}
> > +    }
> >  }
> > 
> >  /* This function checks three consequtive operands in
> > @@ -3281,6 +3315,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> >    while (true)
> >      {
> >        HOST_WIDE_INT power;
> > +      gimple last_mul = NULL;
> > 
> >        /* First look for the largest cached product of factors from
> >  	 preceding iterations.  If found, create a builtin_powi for
> > @@ -3318,16 +3353,25 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> >  	    }
> >  	  else
> >  	    {
> > +	      gimple *value;
> > +
> >  	      iter_result = get_reassoc_pow_ssa_name (target, type);
> >  	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 
> >  					    build_int_cst (integer_type_node,
> >  							   power));
> >  	      gimple_call_set_lhs (pow_stmt, iter_result);
> >  	      gimple_set_location (pow_stmt, gimple_location (stmt));
> > -	      /* Temporarily place the call; we will move it to the
> > -		 correct place during rewrite_expr.  */
> > +	      /* Temporarily place the call; we will move it and its
> > +		 feeding multiplies to the correct place during
> > +		 rewrite_expr.  */
> >  	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> > 
> > +	      if (!operand_equal_p (rf1->repr, rf1->factor, 0))
> > +		{
> > +		  value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> > +		  *value = SSA_NAME_DEF_STMT (rf1->repr);
> > +		}
> > +
> >  	      if (dump_file && (dump_flags & TDF_DETAILS))
> >  		{
> >  		  unsigned elt;
> > @@ -3413,6 +3457,15 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> >  		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
> >  		  rf1->repr = target_ssa;
> > 
> > +		  /* Chain multiplies together for later movement.  */
> > +		  if (last_mul)
> > +		    {
> > +		      gimple *value
> > +			= (gimple *) pointer_map_insert (bip_map, mul_stmt);
> > +		      *value = last_mul;
> > +		    }
> > +		  last_mul = mul_stmt;
> > +
> >  		  /* Don't reprocess the multiply we just introduced.  */
> >  		  gimple_set_visited (mul_stmt, true);
> >  		}
> > @@ -3428,6 +3481,15 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> >  	  gimple_call_set_lhs (pow_stmt, iter_result);
> >  	  gimple_set_location (pow_stmt, gimple_location (stmt));
> >  	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> > +
> > +	  /* If we inserted a chain of multiplies before the pow_stmt,
> > +	     record that fact so we can move it later when we move the
> > +	     pow_stmt.  */
> > +	  if (last_mul)
> > +	    {
> > +	      gimple *value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> > +	      *value = last_mul;
> > +	    }
> >  	}
> > 
> >        /* Append the result of this iteration to the ops vector.  */
> > @@ -3544,6 +3606,7 @@ reassociate_bb (basic_block bb)
> >  	  if (associative_tree_code (rhs_code))
> >  	    {
> >  	      VEC(operand_entry_t, heap) *ops = NULL;
> > +	      bip_map = pointer_map_create ();
> > 
> >  	      /* There may be no immediate uses left by the time we
> >  		 get here because we may have eliminated them all.  */
> > @@ -3608,6 +3671,7 @@ reassociate_bb (basic_block bb)
> >  		}
> > 
> >  	      VEC_free (operand_entry_t, heap, ops);
> > +	      pointer_map_destroy (bip_map);
> >  	    }
> >  	}
> >      }
> > 
> > 
> 
>
Bill Schmidt May 16, 2012, 12:03 p.m. UTC | #3
On Wed, 2012-05-16 at 11:45 +0200, Richard Guenther wrote:
> On Tue, 15 May 2012, William J. Schmidt wrote:
> 
> > Ping.
> 
> I don't like it too much - but pondering a bit over it I can't find
> a nicer solution.
> 
> So, ok.
> 
> Thanks,
> Richard.
> 
Agreed.  I'm not fond of it either, and I feel it's a bit fragile.

An alternative would be to go back to handling the exponentiation
expressions outside of the ops list (generating an explicit multiply to
hook them up with the results of normal linear/parallel expansion).  In
hindsight, placing the exponentiation results in the ops list and
letting the rank order handle things introduces some complexity as well
as saving some.  The DAG'd nature of the exponentiation expressions
isn't a perfect fit for the pure tree form of the reassociated
multiplies.

Let me know if you'd like me to pursue that instead.

Thanks,
Bill

> > Thanks,
> > Bill
> > 
> > On Tue, 2012-05-08 at 22:04 -0500, William J. Schmidt wrote:
> > > This fixes another statement-placement issue when reassociating
> > > expressions with repeated factors.  Multiplies feeding into
> > > __builtin_powi calls were not getting placed properly ahead of them in
> > > some cases.
> > > 
> > > Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new
> > > regressions.  I've also run SPEC cpu2006 with no build or correctness
> > > issues.  OK for trunk?
> > > 
> > > Thanks,
> > > Bill
> > > 
> > > 
> > > gcc:
> > > 
> > > 2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > > 
> > > 	PR tree-optimization/53217
> > > 	* tree-ssa-reassoc.c (bip_map): New static variable.
> > > 	(possibly_move_powi): Move feeding multiplies with __builtin_powi call.
> > > 	(attempt_builtin_powi): Save feeding multiplies on a stack.
> > > 	(reassociate_bb): Create and destroy bip_map.
> > > 
> > > gcc/testsuite:
> > > 
> > > 2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > > 
> > > 	PR tree-optimization/53217
> > > 	* gfortran.dg/pr53217.f90: New test.
> > > 
> > > 
> > > Index: gcc/testsuite/gfortran.dg/pr53217.f90
> > > ===================================================================
> > > --- gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
> > > +++ gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
> > > @@ -0,0 +1,28 @@
> > > +! { dg-do compile }
> > > +! { dg-options "-O1 -ffast-math" }
> > > +
> > > +! This tests only for compile-time failure, which formerly occurred
> > > +! when statements were emitted out of order, failing verify_ssa.
> > > +
> > > +MODULE xc_cs1
> > > +  INTEGER, PARAMETER :: dp=KIND(0.0D0)
> > > +  REAL(KIND=dp), PARAMETER :: a = 0.04918_dp, &
> > > +                              c = 0.2533_dp, &
> > > +                              d = 0.349_dp
> > > +CONTAINS
> > > +  SUBROUTINE cs1_u_2 ( rho, grho, r13, e_rho_rho, e_rho_ndrho, e_ndrho_ndrho,&
> > > +       npoints, error)
> > > +    REAL(KIND=dp), DIMENSION(*), &
> > > +      INTENT(INOUT)                          :: e_rho_rho, e_rho_ndrho, &
> > > +                                                e_ndrho_ndrho
> > > +    DO ip = 1, npoints
> > > +      IF ( rho(ip) > eps_rho ) THEN
> > > +         oc = 1.0_dp/(r*r*r3*r3 + c*g*g)
> > > +         d2rF4 = c4p*f13*f23*g**4*r3/r * (193*d*r**5*r3*r3+90*d*d*r**5*r3 &
> > > +                 -88*g*g*c*r**3*r3-100*d*d*c*g*g*r*r*r3*r3 &
> > > +                 +104*r**6)*od**3*oc**4
> > > +         e_rho_rho(ip) = e_rho_rho(ip) + d2F1 + d2rF2 + d2F3 + d2rF4
> > > +      END IF
> > > +    END DO
> > > +  END SUBROUTINE cs1_u_2
> > > +END MODULE xc_cs1
> > > Index: gcc/tree-ssa-reassoc.c
> > > ===================================================================
> > > --- gcc/tree-ssa-reassoc.c	(revision 187117)
> > > +++ gcc/tree-ssa-reassoc.c	(working copy)
> > > @@ -200,6 +200,10 @@ static long *bb_rank;
> > >  /* Operand->rank hashtable.  */
> > >  static struct pointer_map_t *operand_rank;
> > > 
> > > +/* Map from inserted __builtin_powi calls to multiply chains that
> > > +   feed them.  */
> > > +static struct pointer_map_t *bip_map;
> > > +
> > >  /* Forward decls.  */
> > >  static long get_rank (tree);
> > > 
> > > @@ -2249,7 +2253,7 @@ remove_visited_stmt_chain (tree var)
> > >  static void
> > >  possibly_move_powi (gimple stmt, tree op)
> > >  {
> > > -  gimple stmt2;
> > > +  gimple stmt2, *mpy;
> > >    tree fndecl;
> > >    gimple_stmt_iterator gsi1, gsi2;
> > > 
> > > @@ -2278,9 +2282,39 @@ possibly_move_powi (gimple stmt, tree op)
> > >        return;
> > >      }
> > > 
> > > +  /* Move the __builtin_powi.  */
> > >    gsi1 = gsi_for_stmt (stmt);
> > >    gsi2 = gsi_for_stmt (stmt2);
> > >    gsi_move_before (&gsi2, &gsi1);
> > > +
> > > +  /* See if there are multiplies feeding the __builtin_powi base
> > > +     argument that must also be moved.  */
> > > +  while ((mpy = (gimple *) pointer_map_contains (bip_map, stmt2)) != NULL)
> > > +    {
> > > +      /* If we've already moved this statement, we're done.  This is
> > > +         identified by a NULL entry for the statement in bip_map.  */
> > > +      gimple *next = (gimple *) pointer_map_contains (bip_map, *mpy);
> > > +      if (next && !*next)
> > > +	return;
> > > +
> > > +      stmt = stmt2;
> > > +      stmt2 = *mpy;
> > > +      gsi1 = gsi_for_stmt (stmt);
> > > +      gsi2 = gsi_for_stmt (stmt2);
> > > +      gsi_move_before (&gsi2, &gsi1);
> > > +
> > > +      /* The moved multiply may be DAG'd from multiple calls if it
> > > +	 was the result of a cached multiply.  Only move it once.
> > > +	 Rank order ensures we move it to the right place the first
> > > +	 time.  */
> > > +      if (next)
> > > +	*next = NULL;
> > > +      else
> > > +	{
> > > +	  next = (gimple *) pointer_map_insert (bip_map, *mpy);
> > > +	  *next = NULL;
> > > +	}
> > > +    }
> > >  }
> > > 
> > >  /* This function checks three consequtive operands in
> > > @@ -3281,6 +3315,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > >    while (true)
> > >      {
> > >        HOST_WIDE_INT power;
> > > +      gimple last_mul = NULL;
> > > 
> > >        /* First look for the largest cached product of factors from
> > >  	 preceding iterations.  If found, create a builtin_powi for
> > > @@ -3318,16 +3353,25 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > >  	    }
> > >  	  else
> > >  	    {
> > > +	      gimple *value;
> > > +
> > >  	      iter_result = get_reassoc_pow_ssa_name (target, type);
> > >  	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 
> > >  					    build_int_cst (integer_type_node,
> > >  							   power));
> > >  	      gimple_call_set_lhs (pow_stmt, iter_result);
> > >  	      gimple_set_location (pow_stmt, gimple_location (stmt));
> > > -	      /* Temporarily place the call; we will move it to the
> > > -		 correct place during rewrite_expr.  */
> > > +	      /* Temporarily place the call; we will move it and its
> > > +		 feeding multiplies to the correct place during
> > > +		 rewrite_expr.  */
> > >  	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> > > 
> > > +	      if (!operand_equal_p (rf1->repr, rf1->factor, 0))
> > > +		{
> > > +		  value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> > > +		  *value = SSA_NAME_DEF_STMT (rf1->repr);
> > > +		}
> > > +
> > >  	      if (dump_file && (dump_flags & TDF_DETAILS))
> > >  		{
> > >  		  unsigned elt;
> > > @@ -3413,6 +3457,15 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > >  		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
> > >  		  rf1->repr = target_ssa;
> > > 
> > > +		  /* Chain multiplies together for later movement.  */
> > > +		  if (last_mul)
> > > +		    {
> > > +		      gimple *value
> > > +			= (gimple *) pointer_map_insert (bip_map, mul_stmt);
> > > +		      *value = last_mul;
> > > +		    }
> > > +		  last_mul = mul_stmt;
> > > +
> > >  		  /* Don't reprocess the multiply we just introduced.  */
> > >  		  gimple_set_visited (mul_stmt, true);
> > >  		}
> > > @@ -3428,6 +3481,15 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > >  	  gimple_call_set_lhs (pow_stmt, iter_result);
> > >  	  gimple_set_location (pow_stmt, gimple_location (stmt));
> > >  	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> > > +
> > > +	  /* If we inserted a chain of multiplies before the pow_stmt,
> > > +	     record that fact so we can move it later when we move the
> > > +	     pow_stmt.  */
> > > +	  if (last_mul)
> > > +	    {
> > > +	      gimple *value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> > > +	      *value = last_mul;
> > > +	    }
> > >  	}
> > > 
> > >        /* Append the result of this iteration to the ops vector.  */
> > > @@ -3544,6 +3606,7 @@ reassociate_bb (basic_block bb)
> > >  	  if (associative_tree_code (rhs_code))
> > >  	    {
> > >  	      VEC(operand_entry_t, heap) *ops = NULL;
> > > +	      bip_map = pointer_map_create ();
> > > 
> > >  	      /* There may be no immediate uses left by the time we
> > >  		 get here because we may have eliminated them all.  */
> > > @@ -3608,6 +3671,7 @@ reassociate_bb (basic_block bb)
> > >  		}
> > > 
> > >  	      VEC_free (operand_entry_t, heap, ops);
> > > +	      pointer_map_destroy (bip_map);
> > >  	    }
> > >  	}
> > >      }
> > > 
> > > 
> > 
> > 
>
Richard Biener May 16, 2012, 12:05 p.m. UTC | #4
On Wed, 16 May 2012, William J. Schmidt wrote:

> On Wed, 2012-05-16 at 11:45 +0200, Richard Guenther wrote:
> > On Tue, 15 May 2012, William J. Schmidt wrote:
> > 
> > > Ping.
> > 
> > I don't like it too much - but pondering a bit over it I can't find
> > a nicer solution.
> > 
> > So, ok.
> > 
> > Thanks,
> > Richard.
> > 
> Agreed.  I'm not fond of it either, and I feel it's a bit fragile.
> 
> An alternative would be to go back to handling the exponentiation
> expressions outside of the ops list (generating an explicit multiply to
> hook them up with the results of normal linear/parallel expansion).  In
> hindsight, placing the exponentiation results in the ops list and
> letting the rank order handle things introduces some complexity as well
> as saving some.  The DAG'd nature of the exponentiation expressions
> isn't a perfect fit for the pure tree form of the reassociated
> multiplies.

True.

> Let me know if you'd like me to pursue that instead.

You can try - if the result looks better I'm all for it ;)

Thanks,
Richard.

> Thanks,
> Bill
> 
> > > Thanks,
> > > Bill
> > > 
> > > On Tue, 2012-05-08 at 22:04 -0500, William J. Schmidt wrote:
> > > > This fixes another statement-placement issue when reassociating
> > > > expressions with repeated factors.  Multiplies feeding into
> > > > __builtin_powi calls were not getting placed properly ahead of them in
> > > > some cases.
> > > > 
> > > > Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new
> > > > regressions.  I've also run SPEC cpu2006 with no build or correctness
> > > > issues.  OK for trunk?
> > > > 
> > > > Thanks,
> > > > Bill
> > > > 
> > > > 
> > > > gcc:
> > > > 
> > > > 2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > > > 
> > > > 	PR tree-optimization/53217
> > > > 	* tree-ssa-reassoc.c (bip_map): New static variable.
> > > > 	(possibly_move_powi): Move feeding multiplies with __builtin_powi call.
> > > > 	(attempt_builtin_powi): Save feeding multiplies on a stack.
> > > > 	(reassociate_bb): Create and destroy bip_map.
> > > > 
> > > > gcc/testsuite:
> > > > 
> > > > 2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > > > 
> > > > 	PR tree-optimization/53217
> > > > 	* gfortran.dg/pr53217.f90: New test.
> > > > 
> > > > 
> > > > Index: gcc/testsuite/gfortran.dg/pr53217.f90
> > > > ===================================================================
> > > > --- gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
> > > > +++ gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
> > > > @@ -0,0 +1,28 @@
> > > > +! { dg-do compile }
> > > > +! { dg-options "-O1 -ffast-math" }
> > > > +
> > > > +! This tests only for compile-time failure, which formerly occurred
> > > > +! when statements were emitted out of order, failing verify_ssa.
> > > > +
> > > > +MODULE xc_cs1
> > > > +  INTEGER, PARAMETER :: dp=KIND(0.0D0)
> > > > +  REAL(KIND=dp), PARAMETER :: a = 0.04918_dp, &
> > > > +                              c = 0.2533_dp, &
> > > > +                              d = 0.349_dp
> > > > +CONTAINS
> > > > +  SUBROUTINE cs1_u_2 ( rho, grho, r13, e_rho_rho, e_rho_ndrho, e_ndrho_ndrho,&
> > > > +       npoints, error)
> > > > +    REAL(KIND=dp), DIMENSION(*), &
> > > > +      INTENT(INOUT)                          :: e_rho_rho, e_rho_ndrho, &
> > > > +                                                e_ndrho_ndrho
> > > > +    DO ip = 1, npoints
> > > > +      IF ( rho(ip) > eps_rho ) THEN
> > > > +         oc = 1.0_dp/(r*r*r3*r3 + c*g*g)
> > > > +         d2rF4 = c4p*f13*f23*g**4*r3/r * (193*d*r**5*r3*r3+90*d*d*r**5*r3 &
> > > > +                 -88*g*g*c*r**3*r3-100*d*d*c*g*g*r*r*r3*r3 &
> > > > +                 +104*r**6)*od**3*oc**4
> > > > +         e_rho_rho(ip) = e_rho_rho(ip) + d2F1 + d2rF2 + d2F3 + d2rF4
> > > > +      END IF
> > > > +    END DO
> > > > +  END SUBROUTINE cs1_u_2
> > > > +END MODULE xc_cs1
> > > > Index: gcc/tree-ssa-reassoc.c
> > > > ===================================================================
> > > > --- gcc/tree-ssa-reassoc.c	(revision 187117)
> > > > +++ gcc/tree-ssa-reassoc.c	(working copy)
> > > > @@ -200,6 +200,10 @@ static long *bb_rank;
> > > >  /* Operand->rank hashtable.  */
> > > >  static struct pointer_map_t *operand_rank;
> > > > 
> > > > +/* Map from inserted __builtin_powi calls to multiply chains that
> > > > +   feed them.  */
> > > > +static struct pointer_map_t *bip_map;
> > > > +
> > > >  /* Forward decls.  */
> > > >  static long get_rank (tree);
> > > > 
> > > > @@ -2249,7 +2253,7 @@ remove_visited_stmt_chain (tree var)
> > > >  static void
> > > >  possibly_move_powi (gimple stmt, tree op)
> > > >  {
> > > > -  gimple stmt2;
> > > > +  gimple stmt2, *mpy;
> > > >    tree fndecl;
> > > >    gimple_stmt_iterator gsi1, gsi2;
> > > > 
> > > > @@ -2278,9 +2282,39 @@ possibly_move_powi (gimple stmt, tree op)
> > > >        return;
> > > >      }
> > > > 
> > > > +  /* Move the __builtin_powi.  */
> > > >    gsi1 = gsi_for_stmt (stmt);
> > > >    gsi2 = gsi_for_stmt (stmt2);
> > > >    gsi_move_before (&gsi2, &gsi1);
> > > > +
> > > > +  /* See if there are multiplies feeding the __builtin_powi base
> > > > +     argument that must also be moved.  */
> > > > +  while ((mpy = (gimple *) pointer_map_contains (bip_map, stmt2)) != NULL)
> > > > +    {
> > > > +      /* If we've already moved this statement, we're done.  This is
> > > > +         identified by a NULL entry for the statement in bip_map.  */
> > > > +      gimple *next = (gimple *) pointer_map_contains (bip_map, *mpy);
> > > > +      if (next && !*next)
> > > > +	return;
> > > > +
> > > > +      stmt = stmt2;
> > > > +      stmt2 = *mpy;
> > > > +      gsi1 = gsi_for_stmt (stmt);
> > > > +      gsi2 = gsi_for_stmt (stmt2);
> > > > +      gsi_move_before (&gsi2, &gsi1);
> > > > +
> > > > +      /* The moved multiply may be DAG'd from multiple calls if it
> > > > +	 was the result of a cached multiply.  Only move it once.
> > > > +	 Rank order ensures we move it to the right place the first
> > > > +	 time.  */
> > > > +      if (next)
> > > > +	*next = NULL;
> > > > +      else
> > > > +	{
> > > > +	  next = (gimple *) pointer_map_insert (bip_map, *mpy);
> > > > +	  *next = NULL;
> > > > +	}
> > > > +    }
> > > >  }
> > > > 
> > > >  /* This function checks three consequtive operands in
> > > > @@ -3281,6 +3315,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > > >    while (true)
> > > >      {
> > > >        HOST_WIDE_INT power;
> > > > +      gimple last_mul = NULL;
> > > > 
> > > >        /* First look for the largest cached product of factors from
> > > >  	 preceding iterations.  If found, create a builtin_powi for
> > > > @@ -3318,16 +3353,25 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > > >  	    }
> > > >  	  else
> > > >  	    {
> > > > +	      gimple *value;
> > > > +
> > > >  	      iter_result = get_reassoc_pow_ssa_name (target, type);
> > > >  	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 
> > > >  					    build_int_cst (integer_type_node,
> > > >  							   power));
> > > >  	      gimple_call_set_lhs (pow_stmt, iter_result);
> > > >  	      gimple_set_location (pow_stmt, gimple_location (stmt));
> > > > -	      /* Temporarily place the call; we will move it to the
> > > > -		 correct place during rewrite_expr.  */
> > > > +	      /* Temporarily place the call; we will move it and its
> > > > +		 feeding multiplies to the correct place during
> > > > +		 rewrite_expr.  */
> > > >  	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> > > > 
> > > > +	      if (!operand_equal_p (rf1->repr, rf1->factor, 0))
> > > > +		{
> > > > +		  value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> > > > +		  *value = SSA_NAME_DEF_STMT (rf1->repr);
> > > > +		}
> > > > +
> > > >  	      if (dump_file && (dump_flags & TDF_DETAILS))
> > > >  		{
> > > >  		  unsigned elt;
> > > > @@ -3413,6 +3457,15 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > > >  		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
> > > >  		  rf1->repr = target_ssa;
> > > > 
> > > > +		  /* Chain multiplies together for later movement.  */
> > > > +		  if (last_mul)
> > > > +		    {
> > > > +		      gimple *value
> > > > +			= (gimple *) pointer_map_insert (bip_map, mul_stmt);
> > > > +		      *value = last_mul;
> > > > +		    }
> > > > +		  last_mul = mul_stmt;
> > > > +
> > > >  		  /* Don't reprocess the multiply we just introduced.  */
> > > >  		  gimple_set_visited (mul_stmt, true);
> > > >  		}
> > > > @@ -3428,6 +3481,15 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > > >  	  gimple_call_set_lhs (pow_stmt, iter_result);
> > > >  	  gimple_set_location (pow_stmt, gimple_location (stmt));
> > > >  	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> > > > +
> > > > +	  /* If we inserted a chain of multiplies before the pow_stmt,
> > > > +	     record that fact so we can move it later when we move the
> > > > +	     pow_stmt.  */
> > > > +	  if (last_mul)
> > > > +	    {
> > > > +	      gimple *value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> > > > +	      *value = last_mul;
> > > > +	    }
> > > >  	}
> > > > 
> > > >        /* Append the result of this iteration to the ops vector.  */
> > > > @@ -3544,6 +3606,7 @@ reassociate_bb (basic_block bb)
> > > >  	  if (associative_tree_code (rhs_code))
> > > >  	    {
> > > >  	      VEC(operand_entry_t, heap) *ops = NULL;
> > > > +	      bip_map = pointer_map_create ();
> > > > 
> > > >  	      /* There may be no immediate uses left by the time we
> > > >  		 get here because we may have eliminated them all.  */
> > > > @@ -3608,6 +3671,7 @@ reassociate_bb (basic_block bb)
> > > >  		}
> > > > 
> > > >  	      VEC_free (operand_entry_t, heap, ops);
> > > > +	      pointer_map_destroy (bip_map);
> > > >  	    }
> > > >  	}
> > > >      }
> > > > 
> > > > 
> > > 
> > > 
> > 
> 
>
Bill Schmidt May 16, 2012, 12:10 p.m. UTC | #5
On Wed, 2012-05-16 at 14:05 +0200, Richard Guenther wrote:
> On Wed, 16 May 2012, William J. Schmidt wrote:
> 
> > On Wed, 2012-05-16 at 11:45 +0200, Richard Guenther wrote:
> > > On Tue, 15 May 2012, William J. Schmidt wrote:
> > > 
> > > > Ping.
> > > 
> > > I don't like it too much - but pondering a bit over it I can't find
> > > a nicer solution.
> > > 
> > > So, ok.
> > > 
> > > Thanks,
> > > Richard.
> > > 
> > Agreed.  I'm not fond of it either, and I feel it's a bit fragile.
> > 
> > An alternative would be to go back to handling the exponentiation
> > expressions outside of the ops list (generating an explicit multiply to
> > hook them up with the results of normal linear/parallel expansion).  In
> > hindsight, placing the exponentiation results in the ops list and
> > letting the rank order handle things introduces some complexity as well
> > as saving some.  The DAG'd nature of the exponentiation expressions
> > isn't a perfect fit for the pure tree form of the reassociated
> > multiplies.
> 
> True.
> 
> > Let me know if you'd like me to pursue that instead.
> 
> You can try - if the result looks better I'm all for it ;)
> 
OK. :)  I'll commit this for now to deal with the fallout, and work on
the alternative version in my spare time.

Thanks,
Bill

> Thanks,
> Richard.
> 
> > Thanks,
> > Bill
> > 
> > > > Thanks,
> > > > Bill
> > > > 
> > > > On Tue, 2012-05-08 at 22:04 -0500, William J. Schmidt wrote:
> > > > > This fixes another statement-placement issue when reassociating
> > > > > expressions with repeated factors.  Multiplies feeding into
> > > > > __builtin_powi calls were not getting placed properly ahead of them in
> > > > > some cases.
> > > > > 
> > > > > Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new
> > > > > regressions.  I've also run SPEC cpu2006 with no build or correctness
> > > > > issues.  OK for trunk?
> > > > > 
> > > > > Thanks,
> > > > > Bill
> > > > > 
> > > > > 
> > > > > gcc:
> > > > > 
> > > > > 2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > > > > 
> > > > > 	PR tree-optimization/53217
> > > > > 	* tree-ssa-reassoc.c (bip_map): New static variable.
> > > > > 	(possibly_move_powi): Move feeding multiplies with __builtin_powi call.
> > > > > 	(attempt_builtin_powi): Save feeding multiplies on a stack.
> > > > > 	(reassociate_bb): Create and destroy bip_map.
> > > > > 
> > > > > gcc/testsuite:
> > > > > 
> > > > > 2012-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> > > > > 
> > > > > 	PR tree-optimization/53217
> > > > > 	* gfortran.dg/pr53217.f90: New test.
> > > > > 
> > > > > 
> > > > > Index: gcc/testsuite/gfortran.dg/pr53217.f90
> > > > > ===================================================================
> > > > > --- gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
> > > > > +++ gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
> > > > > @@ -0,0 +1,28 @@
> > > > > +! { dg-do compile }
> > > > > +! { dg-options "-O1 -ffast-math" }
> > > > > +
> > > > > +! This tests only for compile-time failure, which formerly occurred
> > > > > +! when statements were emitted out of order, failing verify_ssa.
> > > > > +
> > > > > +MODULE xc_cs1
> > > > > +  INTEGER, PARAMETER :: dp=KIND(0.0D0)
> > > > > +  REAL(KIND=dp), PARAMETER :: a = 0.04918_dp, &
> > > > > +                              c = 0.2533_dp, &
> > > > > +                              d = 0.349_dp
> > > > > +CONTAINS
> > > > > +  SUBROUTINE cs1_u_2 ( rho, grho, r13, e_rho_rho, e_rho_ndrho, e_ndrho_ndrho,&
> > > > > +       npoints, error)
> > > > > +    REAL(KIND=dp), DIMENSION(*), &
> > > > > +      INTENT(INOUT)                          :: e_rho_rho, e_rho_ndrho, &
> > > > > +                                                e_ndrho_ndrho
> > > > > +    DO ip = 1, npoints
> > > > > +      IF ( rho(ip) > eps_rho ) THEN
> > > > > +         oc = 1.0_dp/(r*r*r3*r3 + c*g*g)
> > > > > +         d2rF4 = c4p*f13*f23*g**4*r3/r * (193*d*r**5*r3*r3+90*d*d*r**5*r3 &
> > > > > +                 -88*g*g*c*r**3*r3-100*d*d*c*g*g*r*r*r3*r3 &
> > > > > +                 +104*r**6)*od**3*oc**4
> > > > > +         e_rho_rho(ip) = e_rho_rho(ip) + d2F1 + d2rF2 + d2F3 + d2rF4
> > > > > +      END IF
> > > > > +    END DO
> > > > > +  END SUBROUTINE cs1_u_2
> > > > > +END MODULE xc_cs1
> > > > > Index: gcc/tree-ssa-reassoc.c
> > > > > ===================================================================
> > > > > --- gcc/tree-ssa-reassoc.c	(revision 187117)
> > > > > +++ gcc/tree-ssa-reassoc.c	(working copy)
> > > > > @@ -200,6 +200,10 @@ static long *bb_rank;
> > > > >  /* Operand->rank hashtable.  */
> > > > >  static struct pointer_map_t *operand_rank;
> > > > > 
> > > > > +/* Map from inserted __builtin_powi calls to multiply chains that
> > > > > +   feed them.  */
> > > > > +static struct pointer_map_t *bip_map;
> > > > > +
> > > > >  /* Forward decls.  */
> > > > >  static long get_rank (tree);
> > > > > 
> > > > > @@ -2249,7 +2253,7 @@ remove_visited_stmt_chain (tree var)
> > > > >  static void
> > > > >  possibly_move_powi (gimple stmt, tree op)
> > > > >  {
> > > > > -  gimple stmt2;
> > > > > +  gimple stmt2, *mpy;
> > > > >    tree fndecl;
> > > > >    gimple_stmt_iterator gsi1, gsi2;
> > > > > 
> > > > > @@ -2278,9 +2282,39 @@ possibly_move_powi (gimple stmt, tree op)
> > > > >        return;
> > > > >      }
> > > > > 
> > > > > +  /* Move the __builtin_powi.  */
> > > > >    gsi1 = gsi_for_stmt (stmt);
> > > > >    gsi2 = gsi_for_stmt (stmt2);
> > > > >    gsi_move_before (&gsi2, &gsi1);
> > > > > +
> > > > > +  /* See if there are multiplies feeding the __builtin_powi base
> > > > > +     argument that must also be moved.  */
> > > > > +  while ((mpy = (gimple *) pointer_map_contains (bip_map, stmt2)) != NULL)
> > > > > +    {
> > > > > +      /* If we've already moved this statement, we're done.  This is
> > > > > +         identified by a NULL entry for the statement in bip_map.  */
> > > > > +      gimple *next = (gimple *) pointer_map_contains (bip_map, *mpy);
> > > > > +      if (next && !*next)
> > > > > +	return;
> > > > > +
> > > > > +      stmt = stmt2;
> > > > > +      stmt2 = *mpy;
> > > > > +      gsi1 = gsi_for_stmt (stmt);
> > > > > +      gsi2 = gsi_for_stmt (stmt2);
> > > > > +      gsi_move_before (&gsi2, &gsi1);
> > > > > +
> > > > > +      /* The moved multiply may be DAG'd from multiple calls if it
> > > > > +	 was the result of a cached multiply.  Only move it once.
> > > > > +	 Rank order ensures we move it to the right place the first
> > > > > +	 time.  */
> > > > > +      if (next)
> > > > > +	*next = NULL;
> > > > > +      else
> > > > > +	{
> > > > > +	  next = (gimple *) pointer_map_insert (bip_map, *mpy);
> > > > > +	  *next = NULL;
> > > > > +	}
> > > > > +    }
> > > > >  }
> > > > > 
> > > > >  /* This function checks three consequtive operands in
> > > > > @@ -3281,6 +3315,7 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > > > >    while (true)
> > > > >      {
> > > > >        HOST_WIDE_INT power;
> > > > > +      gimple last_mul = NULL;
> > > > > 
> > > > >        /* First look for the largest cached product of factors from
> > > > >  	 preceding iterations.  If found, create a builtin_powi for
> > > > > @@ -3318,16 +3353,25 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > > > >  	    }
> > > > >  	  else
> > > > >  	    {
> > > > > +	      gimple *value;
> > > > > +
> > > > >  	      iter_result = get_reassoc_pow_ssa_name (target, type);
> > > > >  	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 
> > > > >  					    build_int_cst (integer_type_node,
> > > > >  							   power));
> > > > >  	      gimple_call_set_lhs (pow_stmt, iter_result);
> > > > >  	      gimple_set_location (pow_stmt, gimple_location (stmt));
> > > > > -	      /* Temporarily place the call; we will move it to the
> > > > > -		 correct place during rewrite_expr.  */
> > > > > +	      /* Temporarily place the call; we will move it and its
> > > > > +		 feeding multiplies to the correct place during
> > > > > +		 rewrite_expr.  */
> > > > >  	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> > > > > 
> > > > > +	      if (!operand_equal_p (rf1->repr, rf1->factor, 0))
> > > > > +		{
> > > > > +		  value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> > > > > +		  *value = SSA_NAME_DEF_STMT (rf1->repr);
> > > > > +		}
> > > > > +
> > > > >  	      if (dump_file && (dump_flags & TDF_DETAILS))
> > > > >  		{
> > > > >  		  unsigned elt;
> > > > > @@ -3413,6 +3457,15 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > > > >  		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
> > > > >  		  rf1->repr = target_ssa;
> > > > > 
> > > > > +		  /* Chain multiplies together for later movement.  */
> > > > > +		  if (last_mul)
> > > > > +		    {
> > > > > +		      gimple *value
> > > > > +			= (gimple *) pointer_map_insert (bip_map, mul_stmt);
> > > > > +		      *value = last_mul;
> > > > > +		    }
> > > > > +		  last_mul = mul_stmt;
> > > > > +
> > > > >  		  /* Don't reprocess the multiply we just introduced.  */
> > > > >  		  gimple_set_visited (mul_stmt, true);
> > > > >  		}
> > > > > @@ -3428,6 +3481,15 @@ attempt_builtin_powi (gimple stmt, VEC(operand_ent
> > > > >  	  gimple_call_set_lhs (pow_stmt, iter_result);
> > > > >  	  gimple_set_location (pow_stmt, gimple_location (stmt));
> > > > >  	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
> > > > > +
> > > > > +	  /* If we inserted a chain of multiplies before the pow_stmt,
> > > > > +	     record that fact so we can move it later when we move the
> > > > > +	     pow_stmt.  */
> > > > > +	  if (last_mul)
> > > > > +	    {
> > > > > +	      gimple *value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
> > > > > +	      *value = last_mul;
> > > > > +	    }
> > > > >  	}
> > > > > 
> > > > >        /* Append the result of this iteration to the ops vector.  */
> > > > > @@ -3544,6 +3606,7 @@ reassociate_bb (basic_block bb)
> > > > >  	  if (associative_tree_code (rhs_code))
> > > > >  	    {
> > > > >  	      VEC(operand_entry_t, heap) *ops = NULL;
> > > > > +	      bip_map = pointer_map_create ();
> > > > > 
> > > > >  	      /* There may be no immediate uses left by the time we
> > > > >  		 get here because we may have eliminated them all.  */
> > > > > @@ -3608,6 +3671,7 @@ reassociate_bb (basic_block bb)
> > > > >  		}
> > > > > 
> > > > >  	      VEC_free (operand_entry_t, heap, ops);
> > > > > +	      pointer_map_destroy (bip_map);
> > > > >  	    }
> > > > >  	}
> > > > >      }
> > > > > 
> > > > > 
> > > > 
> > > > 
> > > 
> > 
> > 
>
diff mbox

Patch

Index: gcc/testsuite/gfortran.dg/pr53217.f90
===================================================================
--- gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
+++ gcc/testsuite/gfortran.dg/pr53217.f90	(revision 0)
@@ -0,0 +1,28 @@ 
+! { dg-do compile }
+! { dg-options "-O1 -ffast-math" }
+
+! This tests only for compile-time failure, which formerly occurred
+! when statements were emitted out of order, failing verify_ssa.
+
+MODULE xc_cs1
+  INTEGER, PARAMETER :: dp=KIND(0.0D0)
+  REAL(KIND=dp), PARAMETER :: a = 0.04918_dp, &
+                              c = 0.2533_dp, &
+                              d = 0.349_dp
+CONTAINS
+  SUBROUTINE cs1_u_2 ( rho, grho, r13, e_rho_rho, e_rho_ndrho, e_ndrho_ndrho,&
+       npoints, error)
+    REAL(KIND=dp), DIMENSION(*), &
+      INTENT(INOUT)                          :: e_rho_rho, e_rho_ndrho, &
+                                                e_ndrho_ndrho
+    DO ip = 1, npoints
+      IF ( rho(ip) > eps_rho ) THEN
+         oc = 1.0_dp/(r*r*r3*r3 + c*g*g)
+         d2rF4 = c4p*f13*f23*g**4*r3/r * (193*d*r**5*r3*r3+90*d*d*r**5*r3 &
+                 -88*g*g*c*r**3*r3-100*d*d*c*g*g*r*r*r3*r3 &
+                 +104*r**6)*od**3*oc**4
+         e_rho_rho(ip) = e_rho_rho(ip) + d2F1 + d2rF2 + d2F3 + d2rF4
+      END IF
+    END DO
+  END SUBROUTINE cs1_u_2
+END MODULE xc_cs1
Index: gcc/tree-ssa-reassoc.c
===================================================================
--- gcc/tree-ssa-reassoc.c	(revision 187117)
+++ gcc/tree-ssa-reassoc.c	(working copy)
@@ -200,6 +200,10 @@  static long *bb_rank;
 /* Operand->rank hashtable.  */
 static struct pointer_map_t *operand_rank;
 
+/* Map from inserted __builtin_powi calls to multiply chains that
+   feed them.  */
+static struct pointer_map_t *bip_map;
+
 /* Forward decls.  */
 static long get_rank (tree);
 
@@ -2249,7 +2253,7 @@  remove_visited_stmt_chain (tree var)
 static void
 possibly_move_powi (gimple stmt, tree op)
 {
-  gimple stmt2;
+  gimple stmt2, *mpy;
   tree fndecl;
   gimple_stmt_iterator gsi1, gsi2;
 
@@ -2278,9 +2282,39 @@  possibly_move_powi (gimple stmt, tree op)
       return;
     }
 
+  /* Move the __builtin_powi.  */
   gsi1 = gsi_for_stmt (stmt);
   gsi2 = gsi_for_stmt (stmt2);
   gsi_move_before (&gsi2, &gsi1);
+
+  /* See if there are multiplies feeding the __builtin_powi base
+     argument that must also be moved.  */
+  while ((mpy = (gimple *) pointer_map_contains (bip_map, stmt2)) != NULL)
+    {
+      /* If we've already moved this statement, we're done.  This is
+         identified by a NULL entry for the statement in bip_map.  */
+      gimple *next = (gimple *) pointer_map_contains (bip_map, *mpy);
+      if (next && !*next)
+	return;
+
+      stmt = stmt2;
+      stmt2 = *mpy;
+      gsi1 = gsi_for_stmt (stmt);
+      gsi2 = gsi_for_stmt (stmt2);
+      gsi_move_before (&gsi2, &gsi1);
+
+      /* The moved multiply may be DAG'd from multiple calls if it
+	 was the result of a cached multiply.  Only move it once.
+	 Rank order ensures we move it to the right place the first
+	 time.  */
+      if (next)
+	*next = NULL;
+      else
+	{
+	  next = (gimple *) pointer_map_insert (bip_map, *mpy);
+	  *next = NULL;
+	}
+    }
 }
 
 /* This function checks three consequtive operands in
@@ -3281,6 +3315,7 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
   while (true)
     {
       HOST_WIDE_INT power;
+      gimple last_mul = NULL;
 
       /* First look for the largest cached product of factors from
 	 preceding iterations.  If found, create a builtin_powi for
@@ -3318,16 +3353,25 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
 	    }
 	  else
 	    {
+	      gimple *value;
+
 	      iter_result = get_reassoc_pow_ssa_name (target, type);
 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 
 					    build_int_cst (integer_type_node,
 							   power));
 	      gimple_call_set_lhs (pow_stmt, iter_result);
 	      gimple_set_location (pow_stmt, gimple_location (stmt));
-	      /* Temporarily place the call; we will move it to the
-		 correct place during rewrite_expr.  */
+	      /* Temporarily place the call; we will move it and its
+		 feeding multiplies to the correct place during
+		 rewrite_expr.  */
 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
 
+	      if (!operand_equal_p (rf1->repr, rf1->factor, 0))
+		{
+		  value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
+		  *value = SSA_NAME_DEF_STMT (rf1->repr);
+		}
+
 	      if (dump_file && (dump_flags & TDF_DETAILS))
 		{
 		  unsigned elt;
@@ -3413,6 +3457,15 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
 		  rf1->repr = target_ssa;
 
+		  /* Chain multiplies together for later movement.  */
+		  if (last_mul)
+		    {
+		      gimple *value
+			= (gimple *) pointer_map_insert (bip_map, mul_stmt);
+		      *value = last_mul;
+		    }
+		  last_mul = mul_stmt;
+
 		  /* Don't reprocess the multiply we just introduced.  */
 		  gimple_set_visited (mul_stmt, true);
 		}
@@ -3428,6 +3481,15 @@  attempt_builtin_powi (gimple stmt, VEC(operand_ent
 	  gimple_call_set_lhs (pow_stmt, iter_result);
 	  gimple_set_location (pow_stmt, gimple_location (stmt));
 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
+
+	  /* If we inserted a chain of multiplies before the pow_stmt,
+	     record that fact so we can move it later when we move the
+	     pow_stmt.  */
+	  if (last_mul)
+	    {
+	      gimple *value = (gimple *) pointer_map_insert (bip_map, pow_stmt);
+	      *value = last_mul;
+	    }
 	}
 
       /* Append the result of this iteration to the ops vector.  */
@@ -3544,6 +3606,7 @@  reassociate_bb (basic_block bb)
 	  if (associative_tree_code (rhs_code))
 	    {
 	      VEC(operand_entry_t, heap) *ops = NULL;
+	      bip_map = pointer_map_create ();
 
 	      /* There may be no immediate uses left by the time we
 		 get here because we may have eliminated them all.  */
@@ -3608,6 +3671,7 @@  reassociate_bb (basic_block bb)
 		}
 
 	      VEC_free (operand_entry_t, heap, ops);
+	      pointer_map_destroy (bip_map);
 	    }
 	}
     }