diff mbox

RFC: Add ADDR_EXPR lowering (PR tree-optimization/66718)

Message ID 20150704141951.GT10247@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek July 4, 2015, 2:19 p.m. UTC
On Fri, Jul 03, 2015 at 04:06:26PM +0200, Jakub Jelinek wrote:
> In the pr59984.c testcase, with Marek's patch and this patch, one loop in
> test is already vectorized (the ICE was on the other one), I'll work on
> recognizing multiples of GOMP_SIMD_LANE () as linear next, so that we
> vectorize also the loop with bar.  Without Marek's patch we weren't

And here is a patch to vectorize everything in pr59984.c.
For the purpose of elemental functions, addresses of variables in
simd magic arrays (e.g. private, linear, reduction etc.) are linear,
but they aren't linear in the whole loop, just are linear within the
vectorization factor.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

This one really depends on Marek's patch, doesn't make sense to commit
before it goes in.

2015-07-04  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/66718
	* tree-vect-stmts.c (struct simd_call_arg_info): Add simd_lane_linear
	field.
	(vectorizable_simd_clone_call): Support using linear arguments for
	addresses of arrays elements indexed by GOMP_SIMD_LANE result.



	Jakub

Comments

Richard Biener July 9, 2015, 9:14 a.m. UTC | #1
On Sat, 4 Jul 2015, Jakub Jelinek wrote:

> On Fri, Jul 03, 2015 at 04:06:26PM +0200, Jakub Jelinek wrote:
> > In the pr59984.c testcase, with Marek's patch and this patch, one loop in
> > test is already vectorized (the ICE was on the other one), I'll work on
> > recognizing multiples of GOMP_SIMD_LANE () as linear next, so that we
> > vectorize also the loop with bar.  Without Marek's patch we weren't
> 
> And here is a patch to vectorize everything in pr59984.c.
> For the purpose of elemental functions, addresses of variables in
> simd magic arrays (e.g. private, linear, reduction etc.) are linear,
> but they aren't linear in the whole loop, just are linear within the
> vectorization factor.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> This one really depends on Marek's patch, doesn't make sense to commit
> before it goes in.
> 
> 2015-07-04  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/66718
> 	* tree-vect-stmts.c (struct simd_call_arg_info): Add simd_lane_linear
> 	field.
> 	(vectorizable_simd_clone_call): Support using linear arguments for
> 	addresses of arrays elements indexed by GOMP_SIMD_LANE result.
> 
> --- gcc/tree-vect-stmts.c.jj	2015-07-03 14:06:28.000000000 +0200
> +++ gcc/tree-vect-stmts.c	2015-07-03 20:43:42.796573076 +0200
> @@ -2618,6 +2618,7 @@ struct simd_call_arg_info
>    enum vect_def_type dt;
>    HOST_WIDE_INT linear_step;
>    unsigned int align;
> +  bool simd_lane_linear;
>  };
>  
>  /* Function vectorizable_simd_clone_call.
> @@ -2702,6 +2703,7 @@ vectorizable_simd_clone_call (gimple stm
>        thisarginfo.linear_step = 0;
>        thisarginfo.align = 0;
>        thisarginfo.op = NULL_TREE;
> +      thisarginfo.simd_lane_linear = false;
>  
>        op = gimple_call_arg (stmt, i);
>        if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
> @@ -2724,21 +2726,24 @@ vectorizable_simd_clone_call (gimple stm
>  
>        /* For linear arguments, the analyze phase should have saved
>  	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
> -      if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
> -	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
> +      if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
> +	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
>  	{
>  	  gcc_assert (vec_stmt);
>  	  thisarginfo.linear_step
> -	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
> +	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
>  	  thisarginfo.op
> -	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
> +	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
> +	  thisarginfo.simd_lane_linear
> +	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
> +	       == boolean_true_node);
>  	  /* If loop has been peeled for alignment, we need to adjust it.  */
>  	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
>  	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
> -	  if (n1 != n2)
> +	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
>  	    {
>  	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
> -	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
> +	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
>  	      tree opt = TREE_TYPE (thisarginfo.op);
>  	      bias = fold_convert (TREE_TYPE (step), bias);
>  	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
> @@ -2764,6 +2769,93 @@ vectorizable_simd_clone_call (gimple stm
>  		|| thisarginfo.dt == vect_external_def)
>  	       && POINTER_TYPE_P (TREE_TYPE (op)))
>  	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
> +      /* Addresses of array elements indexed by GOMP_SIMD_LANE are
> +	 linear too.  */
> +      if (POINTER_TYPE_P (TREE_TYPE (op))
> +	  && !thisarginfo.linear_step
> +	  && !vec_stmt
> +	  && thisarginfo.dt != vect_constant_def
> +	  && thisarginfo.dt != vect_external_def
> +	  && loop_vinfo
> +	  && !slp_node
> +	  && TREE_CODE (op) == SSA_NAME)
> +	{
> +	  def_stmt = SSA_NAME_DEF_STMT (op);
> +	  if (is_gimple_assign (def_stmt)
> +	      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
> +	      && is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
> +	    {
> +	      tree base = gimple_assign_rhs1 (def_stmt);
> +	      HOST_WIDE_INT linear_step = 0;
> +	      tree v = gimple_assign_rhs2 (def_stmt);
> +	      while (v && TREE_CODE (v) == SSA_NAME)

Hmm, this looks like it could be split out to a function.

> +		{
> +		  tree t;
> +		  def_stmt = SSA_NAME_DEF_STMT (v);
> +		  if (is_gimple_assign (def_stmt))
> +		    switch (gimple_assign_rhs_code (def_stmt))
> +		      {
> +		      case PLUS_EXPR:
> +			t = gimple_assign_rhs2 (def_stmt);
> +			if (linear_step || TREE_CODE (t) != INTEGER_CST)
> +			  {
> +			    v = NULL_TREE;
> +			    continue;
> +			  }
> +			base = fold_build2 (POINTER_PLUS_EXPR,
> +					    TREE_TYPE (base), base, t);
> +			v = gimple_assign_rhs1 (def_stmt);
> +			continue;

what about MINUS_EXPR?  I suppose you only handle stuff that's
produced by get_inner_reference here?

> +		      case MULT_EXPR:
> +			t = gimple_assign_rhs2 (def_stmt);
> +			if (linear_step
> +			    || !tree_fits_shwi_p (t)
> +			    || integer_zerop (t))
> +			  {

So no VLAs... (I understand this code is for correctness?)

As I don't know too much about all this OMP stuff the patch is ok
if you split this out to a separate function.

Thanks,
Richard.

> +			    v = NULL_TREE;
> +			    continue;
> +			  }
> +			linear_step = tree_to_shwi (t);
> +			v = gimple_assign_rhs1 (def_stmt);
> +			continue;
> +		      CASE_CONVERT:
> +			t = gimple_assign_rhs1 (def_stmt);
> +			if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
> +			    || (TYPE_PRECISION (TREE_TYPE (v))
> +				< TYPE_PRECISION (TREE_TYPE (t))))
> +			  {
> +			    v = NULL_TREE;
> +			    continue;
> +			  }
> +			if (!linear_step)
> +			  linear_step = 1;
> +			v = t;
> +			continue;
> +		      default:
> +			v = NULL_TREE;
> +			continue;
> +		      }
> +		  else if (is_gimple_call (def_stmt)
> +			   && gimple_call_internal_p (def_stmt)
> +			   && (gimple_call_internal_fn (def_stmt)
> +			       == IFN_GOMP_SIMD_LANE)
> +			   && loop->simduid
> +			   && (TREE_CODE (gimple_call_arg (def_stmt, 0))
> +			       == SSA_NAME)
> +			   && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
> +			       == loop->simduid))
> +		    {
> +		      if (!linear_step)
> +			linear_step = 1;
> +		      thisarginfo.linear_step = linear_step;
> +		      thisarginfo.op = base;
> +		      thisarginfo.simd_lane_linear = true;
> +		      v = NULL_TREE;
> +		      continue;
> +		    }
> +		}
> +	    }
> +	}
>  
>        arginfo.quick_push (thisarginfo);
>      }
> @@ -2895,13 +2987,16 @@ vectorizable_simd_clone_call (gimple stm
>  	if (bestn->simdclone->args[i].arg_type
>  	    == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
>  	  {
> -	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
> +	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
>  									+ 1);
>  	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
>  	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
>  		       ? size_type_node : TREE_TYPE (arginfo[i].op);
>  	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
>  	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
> +	    tree sll = arginfo[i].simd_lane_linear
> +		       ? boolean_true_node : boolean_false_node;
> +	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
>  	  }
>        STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
>        if (dump_enabled_p ())
> @@ -3039,6 +3134,11 @@ vectorizable_simd_clone_call (gimple stm
>  		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
>  		      gcc_assert (!new_bb);
>  		    }
> +		  if (arginfo[i].simd_lane_linear)
> +		    {
> +		      vargs.safe_push (arginfo[i].op);
> +		      break;
> +		    }
>  		  tree phi_res = copy_ssa_name (op);
>  		  gphi *new_phi = create_phi_node (phi_res, loop->header);
>  		  set_vinfo_for_stmt (new_phi,
> 
> 
> 	Jakub
> 
>
Jakub Jelinek July 9, 2015, 9:21 a.m. UTC | #2
On Thu, Jul 09, 2015 at 11:14:01AM +0200, Richard Biener wrote:
> > @@ -2764,6 +2769,93 @@ vectorizable_simd_clone_call (gimple stm
> >  		|| thisarginfo.dt == vect_external_def)
> >  	       && POINTER_TYPE_P (TREE_TYPE (op)))
> >  	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
> > +      /* Addresses of array elements indexed by GOMP_SIMD_LANE are
> > +	 linear too.  */
> > +      if (POINTER_TYPE_P (TREE_TYPE (op))
> > +	  && !thisarginfo.linear_step
> > +	  && !vec_stmt
> > +	  && thisarginfo.dt != vect_constant_def
> > +	  && thisarginfo.dt != vect_external_def
> > +	  && loop_vinfo
> > +	  && !slp_node
> > +	  && TREE_CODE (op) == SSA_NAME)
> > +	{
> > +	  def_stmt = SSA_NAME_DEF_STMT (op);
> > +	  if (is_gimple_assign (def_stmt)
> > +	      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
> > +	      && is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
> > +	    {
> > +	      tree base = gimple_assign_rhs1 (def_stmt);
> > +	      HOST_WIDE_INT linear_step = 0;
> > +	      tree v = gimple_assign_rhs2 (def_stmt);
> > +	      while (v && TREE_CODE (v) == SSA_NAME)
> 
> Hmm, this looks like it could be split out to a function.

Ok, will try that.

> > +			base = fold_build2 (POINTER_PLUS_EXPR,
> > +					    TREE_TYPE (base), base, t);
> > +			v = gimple_assign_rhs1 (def_stmt);
> > +			continue;
> 
> what about MINUS_EXPR?  I suppose you only handle stuff that's
> produced by get_inner_reference here?

This is meant to be used with the "omp simd array" vars, and what
is produced by Marek's pass from those, so something that originally
in the code are simple scalar or aggregate
vars, I'm only handling + with a constant bias, wouldn't subtraction
be folded into addition of negative constant anyway?
This code isn't for correctness, but optimization, if it doesn't detect
something is linear, even when it is, it can simply not be vectorized or
vectorized less efficiently through some other simd clone.
> 
> > +		      case MULT_EXPR:
> > +			t = gimple_assign_rhs2 (def_stmt);
> > +			if (linear_step
> > +			    || !tree_fits_shwi_p (t)
> > +			    || integer_zerop (t))
> > +			  {
> 
> So no VLAs... (I understand this code is for correctness?)

VLAs aren't added as "omp simd array" ever, I punt on them early
(set safelen to 1).

	Jakub
diff mbox

Patch

--- gcc/tree-vect-stmts.c.jj	2015-07-03 14:06:28.000000000 +0200
+++ gcc/tree-vect-stmts.c	2015-07-03 20:43:42.796573076 +0200
@@ -2618,6 +2618,7 @@  struct simd_call_arg_info
   enum vect_def_type dt;
   HOST_WIDE_INT linear_step;
   unsigned int align;
+  bool simd_lane_linear;
 };
 
 /* Function vectorizable_simd_clone_call.
@@ -2702,6 +2703,7 @@  vectorizable_simd_clone_call (gimple stm
       thisarginfo.linear_step = 0;
       thisarginfo.align = 0;
       thisarginfo.op = NULL_TREE;
+      thisarginfo.simd_lane_linear = false;
 
       op = gimple_call_arg (stmt, i);
       if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
@@ -2724,21 +2726,24 @@  vectorizable_simd_clone_call (gimple stm
 
       /* For linear arguments, the analyze phase should have saved
 	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
-      if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
-	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
+      if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
+	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
 	{
 	  gcc_assert (vec_stmt);
 	  thisarginfo.linear_step
-	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
+	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
 	  thisarginfo.op
-	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
+	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
+	  thisarginfo.simd_lane_linear
+	    = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
+	       == boolean_true_node);
 	  /* If loop has been peeled for alignment, we need to adjust it.  */
 	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
 	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
-	  if (n1 != n2)
+	  if (n1 != n2 && !thisarginfo.simd_lane_linear)
 	    {
 	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
-	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
+	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
 	      tree opt = TREE_TYPE (thisarginfo.op);
 	      bias = fold_convert (TREE_TYPE (step), bias);
 	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
@@ -2764,6 +2769,93 @@  vectorizable_simd_clone_call (gimple stm
 		|| thisarginfo.dt == vect_external_def)
 	       && POINTER_TYPE_P (TREE_TYPE (op)))
 	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
+      /* Addresses of array elements indexed by GOMP_SIMD_LANE are
+	 linear too.  */
+      if (POINTER_TYPE_P (TREE_TYPE (op))
+	  && !thisarginfo.linear_step
+	  && !vec_stmt
+	  && thisarginfo.dt != vect_constant_def
+	  && thisarginfo.dt != vect_external_def
+	  && loop_vinfo
+	  && !slp_node
+	  && TREE_CODE (op) == SSA_NAME)
+	{
+	  def_stmt = SSA_NAME_DEF_STMT (op);
+	  if (is_gimple_assign (def_stmt)
+	      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
+	      && is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
+	    {
+	      tree base = gimple_assign_rhs1 (def_stmt);
+	      HOST_WIDE_INT linear_step = 0;
+	      tree v = gimple_assign_rhs2 (def_stmt);
+	      while (v && TREE_CODE (v) == SSA_NAME)
+		{
+		  tree t;
+		  def_stmt = SSA_NAME_DEF_STMT (v);
+		  if (is_gimple_assign (def_stmt))
+		    switch (gimple_assign_rhs_code (def_stmt))
+		      {
+		      case PLUS_EXPR:
+			t = gimple_assign_rhs2 (def_stmt);
+			if (linear_step || TREE_CODE (t) != INTEGER_CST)
+			  {
+			    v = NULL_TREE;
+			    continue;
+			  }
+			base = fold_build2 (POINTER_PLUS_EXPR,
+					    TREE_TYPE (base), base, t);
+			v = gimple_assign_rhs1 (def_stmt);
+			continue;
+		      case MULT_EXPR:
+			t = gimple_assign_rhs2 (def_stmt);
+			if (linear_step
+			    || !tree_fits_shwi_p (t)
+			    || integer_zerop (t))
+			  {
+			    v = NULL_TREE;
+			    continue;
+			  }
+			linear_step = tree_to_shwi (t);
+			v = gimple_assign_rhs1 (def_stmt);
+			continue;
+		      CASE_CONVERT:
+			t = gimple_assign_rhs1 (def_stmt);
+			if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
+			    || (TYPE_PRECISION (TREE_TYPE (v))
+				< TYPE_PRECISION (TREE_TYPE (t))))
+			  {
+			    v = NULL_TREE;
+			    continue;
+			  }
+			if (!linear_step)
+			  linear_step = 1;
+			v = t;
+			continue;
+		      default:
+			v = NULL_TREE;
+			continue;
+		      }
+		  else if (is_gimple_call (def_stmt)
+			   && gimple_call_internal_p (def_stmt)
+			   && (gimple_call_internal_fn (def_stmt)
+			       == IFN_GOMP_SIMD_LANE)
+			   && loop->simduid
+			   && (TREE_CODE (gimple_call_arg (def_stmt, 0))
+			       == SSA_NAME)
+			   && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
+			       == loop->simduid))
+		    {
+		      if (!linear_step)
+			linear_step = 1;
+		      thisarginfo.linear_step = linear_step;
+		      thisarginfo.op = base;
+		      thisarginfo.simd_lane_linear = true;
+		      v = NULL_TREE;
+		      continue;
+		    }
+		}
+	    }
+	}
 
       arginfo.quick_push (thisarginfo);
     }
@@ -2895,13 +2987,16 @@  vectorizable_simd_clone_call (gimple stm
 	if (bestn->simdclone->args[i].arg_type
 	    == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
 	  {
-	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
+	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
 									+ 1);
 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
 	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
 		       ? size_type_node : TREE_TYPE (arginfo[i].op);
 	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
 	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
+	    tree sll = arginfo[i].simd_lane_linear
+		       ? boolean_true_node : boolean_false_node;
+	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
 	  }
       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
       if (dump_enabled_p ())
@@ -3039,6 +3134,11 @@  vectorizable_simd_clone_call (gimple stm
 		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
 		      gcc_assert (!new_bb);
 		    }
+		  if (arginfo[i].simd_lane_linear)
+		    {
+		      vargs.safe_push (arginfo[i].op);
+		      break;
+		    }
 		  tree phi_res = copy_ssa_name (op);
 		  gphi *new_phi = create_phi_node (phi_res, loop->header);
 		  set_vinfo_for_stmt (new_phi,