diff mbox

[3/6] Vectorize internal functions

Message ID 87611bnnoo.fsf@e105548-lin.cambridge.arm.com
State New
Headers show

Commit Message

Richard Sandiford Nov. 9, 2015, 4:27 p.m. UTC
This patch tries to vectorize built-in and internal functions as
internal functions first, falling back on the current built-in
target hooks otherwise.


gcc/
	* internal-fn.h (direct_internal_fn_info): Add vectorizable flag.
	* internal-fn.c (direct_internal_fn_array): Update accordingly.
	* tree-vectorizer.h (vectorizable_function): Delete.
	* tree-vect-stmts.c: Include internal-fn.h.
	(vectorizable_internal_function): New function.
	(vectorizable_function): Inline into...
	(vectorizable_call): ...here.  Explicitly reject calls that read
	from or write to memory.  Try using an internal function before
	falling back on the old vectorizable_function behavior.

Comments

Richard Sandiford Nov. 17, 2015, 9:30 a.m. UTC | #1
Thanks for all the reviews for this series.  I think the patch below
is the only target-independent one that hasn't had any comments.

Richard

Richard Sandiford <richard.sandiford@arm.com> writes:
> This patch tries to vectorize built-in and internal functions as
> internal functions first, falling back on the current built-in
> target hooks otherwise.
>
>
> gcc/
> 	* internal-fn.h (direct_internal_fn_info): Add vectorizable flag.
> 	* internal-fn.c (direct_internal_fn_array): Update accordingly.
> 	* tree-vectorizer.h (vectorizable_function): Delete.
> 	* tree-vect-stmts.c: Include internal-fn.h.
> 	(vectorizable_internal_function): New function.
> 	(vectorizable_function): Inline into...
> 	(vectorizable_call): ...here.  Explicitly reject calls that read
> 	from or write to memory.  Try using an internal function before
> 	falling back on the old vectorizable_function behavior.
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 898c83d..a5bda2f 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -69,13 +69,13 @@ init_internal_fns ()
>  
>  /* Create static initializers for the information returned by
>     direct_internal_fn.  */
> -#define not_direct { -2, -2 }
> -#define mask_load_direct { -1, -1 }
> -#define load_lanes_direct { -1, -1 }
> -#define mask_store_direct { 3, 3 }
> -#define store_lanes_direct { 0, 0 }
> -#define unary_direct { 0, 0 }
> -#define binary_direct { 0, 0 }
> +#define not_direct { -2, -2, false }
> +#define mask_load_direct { -1, -1, false }
> +#define load_lanes_direct { -1, -1, false }
> +#define mask_store_direct { 3, 3, false }
> +#define store_lanes_direct { 0, 0, false }
> +#define unary_direct { 0, 0, true }
> +#define binary_direct { 0, 0, true }
>  
>  const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
>  #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 6cb123f..aea6abd 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -134,6 +134,14 @@ struct direct_internal_fn_info
>       function isn't directly mapped to an optab.  */
>    signed int type0 : 8;
>    signed int type1 : 8;
> +  /* True if the function is pointwise, so that it can be vectorized by
> +     converting the return type and all argument types to vectors of the
> +     same number of elements.  E.g. we can vectorize an IFN_SQRT on
> +     floats as an IFN_SQRT on vectors of N floats.
> +
> +     This only needs 1 bit, but occupies the full 16 to ensure a nice
> +     layout.  */
> +  unsigned int vectorizable : 16;
>  };
>  
>  extern const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1];
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 75389c4..1142142 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-scalar-evolution.h"
>  #include "tree-vectorizer.h"
>  #include "builtins.h"
> +#include "internal-fn.h"
>  
>  /* For lang_hooks.types.type_for_mode.  */
>  #include "langhooks.h"
> @@ -1632,27 +1633,32 @@ vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
>      add_stmt_to_eh_lp (vec_stmt, lp_nr);
>  }
>  
> -/* Checks if CALL can be vectorized in type VECTYPE.  Returns
> -   a function declaration if the target has a vectorized version
> -   of the function, or NULL_TREE if the function cannot be vectorized.  */
> +/* We want to vectorize a call to combined function CFN with function
> +   decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
> +   as the types of all inputs.  Check whether this is possible using
> +   an internal function, returning its code if so or IFN_LAST if not.  */
>  
> -tree
> -vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
> +static internal_fn
> +vectorizable_internal_function (combined_fn cfn, tree fndecl,
> +				tree vectype_out, tree vectype_in)
>  {
> -  /* We only handle functions that do not read or clobber memory.  */
> -  if (gimple_vuse (call))
> -    return NULL_TREE;
> -
> -  combined_fn fn = gimple_call_combined_fn (call);
> -  if (fn != CFN_LAST)
> -    return targetm.vectorize.builtin_vectorized_function
> -      (fn, vectype_out, vectype_in);
> -
> -  if (gimple_call_builtin_p (call, BUILT_IN_MD))
> -    return targetm.vectorize.builtin_md_vectorized_function
> -      (gimple_call_fndecl (call), vectype_out, vectype_in);
> -
> -  return NULL_TREE;
> +  internal_fn ifn;
> +  if (internal_fn_p (cfn))
> +    ifn = as_internal_fn (cfn);
> +  else
> +    ifn = associated_internal_fn (fndecl);
> +  if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
> +    {
> +      const direct_internal_fn_info &info = direct_internal_fn (ifn);
> +      if (info.vectorizable)
> +	{
> +	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
> +	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
> +	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1)))
> +	    return ifn;
> +	}
> +    }
> +  return IFN_LAST;
>  }
>  
>  
> @@ -2232,15 +2238,43 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>    else
>      return false;
>  
> +  /* We only handle functions that do not read or clobber memory.  */
> +  if (gimple_vuse (stmt))
> +    {
> +      if (dump_enabled_p ())
> +	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +			 "function reads from or writes to memory.\n");
> +      return false;
> +    }
> +
>    /* For now, we only vectorize functions if a target specific builtin
>       is available.  TODO -- in some cases, it might be profitable to
>       insert the calls for pieces of the vector, in order to be able
>       to vectorize other operations in the loop.  */
> -  fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
> -  if (fndecl == NULL_TREE)
> +  fndecl = NULL_TREE;
> +  internal_fn ifn = IFN_LAST;
> +  combined_fn cfn = gimple_call_combined_fn (stmt);
> +  tree callee = gimple_call_fndecl (stmt);
> +
> +  /* First try using an internal function.  */
> +  if (cfn != CFN_LAST)
> +    ifn = vectorizable_internal_function (cfn, callee, vectype_out,
> +					  vectype_in);
> +
> +  /* If that fails, try asking for a target-specific built-in function.  */
> +  if (ifn == IFN_LAST)
> +    {
> +      if (cfn != CFN_LAST)
> +	fndecl = targetm.vectorize.builtin_vectorized_function
> +	  (cfn, vectype_out, vectype_in);
> +      else
> +	fndecl = targetm.vectorize.builtin_md_vectorized_function
> +	  (callee, vectype_out, vectype_in);
> +    }
> +
> +  if (ifn == IFN_LAST && !fndecl)
>      {
> -      if (gimple_call_internal_p (stmt)
> -	  && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
> +      if (cfn == CFN_GOMP_SIMD_LANE
>  	  && !slp_node
>  	  && loop_vinfo
>  	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
> @@ -2261,8 +2295,6 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>  	}
>      }
>  
> -  gcc_assert (!gimple_vuse (stmt));
> -
>    if (slp_node || PURE_SLP_STMT (stmt_info))
>      ncopies = 1;
>    else if (modifier == NARROW)
> @@ -2324,7 +2356,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>  		      vec<tree> vec_oprndsk = vec_defs[k];
>  		      vargs[k] = vec_oprndsk[i];
>  		    }
> -		  new_stmt = gimple_build_call_vec (fndecl, vargs);
> +		  if (ifn != IFN_LAST)
> +		    new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +		  else
> +		    new_stmt = gimple_build_call_vec (fndecl, vargs);
>  		  new_temp = make_ssa_name (vec_dest, new_stmt);
>  		  gimple_call_set_lhs (new_stmt, new_temp);
>  		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
> @@ -2372,7 +2407,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>  	    }
>  	  else
>  	    {
> -	      new_stmt = gimple_build_call_vec (fndecl, vargs);
> +	      if (ifn != IFN_LAST)
> +		new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +	      else
> +		new_stmt = gimple_build_call_vec (fndecl, vargs);
>  	      new_temp = make_ssa_name (vec_dest, new_stmt);
>  	      gimple_call_set_lhs (new_stmt, new_temp);
>  	    }
> @@ -2418,7 +2456,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>  		      vargs.quick_push (vec_oprndsk[i]);
>  		      vargs.quick_push (vec_oprndsk[i + 1]);
>  		    }
> -		  new_stmt = gimple_build_call_vec (fndecl, vargs);
> +		  if (ifn != IFN_LAST)
> +		    new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +		  else
> +		    new_stmt = gimple_build_call_vec (fndecl, vargs);
>  		  new_temp = make_ssa_name (vec_dest, new_stmt);
>  		  gimple_call_set_lhs (new_stmt, new_temp);
>  		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
> @@ -2456,7 +2497,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>  	      vargs.quick_push (vec_oprnd1);
>  	    }
>  
> -	  new_stmt = gimple_build_call_vec (fndecl, vargs);
> +	  if (ifn != IFN_LAST)
> +	    new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +	  else
> +	    new_stmt = gimple_build_call_vec (fndecl, vargs);
>  	  new_temp = make_ssa_name (vec_dest, new_stmt);
>  	  gimple_call_set_lhs (new_stmt, new_temp);
>  	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 9cde091..bb1ab39 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -958,7 +958,6 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
>  					     int *, vec<tree> *);
>  extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *);
>  extern void free_stmt_vec_info (gimple *stmt);
> -extern tree vectorizable_function (gcall *, tree, tree);
>  extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
>                                      stmt_vector_for_cost *,
>  				    stmt_vector_for_cost *);
Richard Biener Nov. 17, 2015, 2:32 p.m. UTC | #2
On Tue, Nov 17, 2015 at 10:30 AM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> Thanks for all the reviews for this series.  I think the patch below
> is the only target-independent one that hasn't had any comments.

This patch is ok.

Thanks,
Richard.

> Richard
>
> Richard Sandiford <richard.sandiford@arm.com> writes:
>> This patch tries to vectorize built-in and internal functions as
>> internal functions first, falling back on the current built-in
>> target hooks otherwise.
>>
>>
>> gcc/
>>       * internal-fn.h (direct_internal_fn_info): Add vectorizable flag.
>>       * internal-fn.c (direct_internal_fn_array): Update accordingly.
>>       * tree-vectorizer.h (vectorizable_function): Delete.
>>       * tree-vect-stmts.c: Include internal-fn.h.
>>       (vectorizable_internal_function): New function.
>>       (vectorizable_function): Inline into...
>>       (vectorizable_call): ...here.  Explicitly reject calls that read
>>       from or write to memory.  Try using an internal function before
>>       falling back on the old vectorizable_function behavior.
>>
>> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
>> index 898c83d..a5bda2f 100644
>> --- a/gcc/internal-fn.c
>> +++ b/gcc/internal-fn.c
>> @@ -69,13 +69,13 @@ init_internal_fns ()
>>
>>  /* Create static initializers for the information returned by
>>     direct_internal_fn.  */
>> -#define not_direct { -2, -2 }
>> -#define mask_load_direct { -1, -1 }
>> -#define load_lanes_direct { -1, -1 }
>> -#define mask_store_direct { 3, 3 }
>> -#define store_lanes_direct { 0, 0 }
>> -#define unary_direct { 0, 0 }
>> -#define binary_direct { 0, 0 }
>> +#define not_direct { -2, -2, false }
>> +#define mask_load_direct { -1, -1, false }
>> +#define load_lanes_direct { -1, -1, false }
>> +#define mask_store_direct { 3, 3, false }
>> +#define store_lanes_direct { 0, 0, false }
>> +#define unary_direct { 0, 0, true }
>> +#define binary_direct { 0, 0, true }
>>
>>  const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
>>  #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
>> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
>> index 6cb123f..aea6abd 100644
>> --- a/gcc/internal-fn.h
>> +++ b/gcc/internal-fn.h
>> @@ -134,6 +134,14 @@ struct direct_internal_fn_info
>>       function isn't directly mapped to an optab.  */
>>    signed int type0 : 8;
>>    signed int type1 : 8;
>> +  /* True if the function is pointwise, so that it can be vectorized by
>> +     converting the return type and all argument types to vectors of the
>> +     same number of elements.  E.g. we can vectorize an IFN_SQRT on
>> +     floats as an IFN_SQRT on vectors of N floats.
>> +
>> +     This only needs 1 bit, but occupies the full 16 to ensure a nice
>> +     layout.  */
>> +  unsigned int vectorizable : 16;
>>  };
>>
>>  extern const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1];
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index 75389c4..1142142 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "tree-scalar-evolution.h"
>>  #include "tree-vectorizer.h"
>>  #include "builtins.h"
>> +#include "internal-fn.h"
>>
>>  /* For lang_hooks.types.type_for_mode.  */
>>  #include "langhooks.h"
>> @@ -1632,27 +1633,32 @@ vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
>>      add_stmt_to_eh_lp (vec_stmt, lp_nr);
>>  }
>>
>> -/* Checks if CALL can be vectorized in type VECTYPE.  Returns
>> -   a function declaration if the target has a vectorized version
>> -   of the function, or NULL_TREE if the function cannot be vectorized.  */
>> +/* We want to vectorize a call to combined function CFN with function
>> +   decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
>> +   as the types of all inputs.  Check whether this is possible using
>> +   an internal function, returning its code if so or IFN_LAST if not.  */
>>
>> -tree
>> -vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
>> +static internal_fn
>> +vectorizable_internal_function (combined_fn cfn, tree fndecl,
>> +                             tree vectype_out, tree vectype_in)
>>  {
>> -  /* We only handle functions that do not read or clobber memory.  */
>> -  if (gimple_vuse (call))
>> -    return NULL_TREE;
>> -
>> -  combined_fn fn = gimple_call_combined_fn (call);
>> -  if (fn != CFN_LAST)
>> -    return targetm.vectorize.builtin_vectorized_function
>> -      (fn, vectype_out, vectype_in);
>> -
>> -  if (gimple_call_builtin_p (call, BUILT_IN_MD))
>> -    return targetm.vectorize.builtin_md_vectorized_function
>> -      (gimple_call_fndecl (call), vectype_out, vectype_in);
>> -
>> -  return NULL_TREE;
>> +  internal_fn ifn;
>> +  if (internal_fn_p (cfn))
>> +    ifn = as_internal_fn (cfn);
>> +  else
>> +    ifn = associated_internal_fn (fndecl);
>> +  if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
>> +    {
>> +      const direct_internal_fn_info &info = direct_internal_fn (ifn);
>> +      if (info.vectorizable)
>> +     {
>> +       tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
>> +       tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
>> +       if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1)))
>> +         return ifn;
>> +     }
>> +    }
>> +  return IFN_LAST;
>>  }
>>
>>
>> @@ -2232,15 +2238,43 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>>    else
>>      return false;
>>
>> +  /* We only handle functions that do not read or clobber memory.  */
>> +  if (gimple_vuse (stmt))
>> +    {
>> +      if (dump_enabled_p ())
>> +     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> +                      "function reads from or writes to memory.\n");
>> +      return false;
>> +    }
>> +
>>    /* For now, we only vectorize functions if a target specific builtin
>>       is available.  TODO -- in some cases, it might be profitable to
>>       insert the calls for pieces of the vector, in order to be able
>>       to vectorize other operations in the loop.  */
>> -  fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
>> -  if (fndecl == NULL_TREE)
>> +  fndecl = NULL_TREE;
>> +  internal_fn ifn = IFN_LAST;
>> +  combined_fn cfn = gimple_call_combined_fn (stmt);
>> +  tree callee = gimple_call_fndecl (stmt);
>> +
>> +  /* First try using an internal function.  */
>> +  if (cfn != CFN_LAST)
>> +    ifn = vectorizable_internal_function (cfn, callee, vectype_out,
>> +                                       vectype_in);
>> +
>> +  /* If that fails, try asking for a target-specific built-in function.  */
>> +  if (ifn == IFN_LAST)
>> +    {
>> +      if (cfn != CFN_LAST)
>> +     fndecl = targetm.vectorize.builtin_vectorized_function
>> +       (cfn, vectype_out, vectype_in);
>> +      else
>> +     fndecl = targetm.vectorize.builtin_md_vectorized_function
>> +       (callee, vectype_out, vectype_in);
>> +    }
>> +
>> +  if (ifn == IFN_LAST && !fndecl)
>>      {
>> -      if (gimple_call_internal_p (stmt)
>> -       && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
>> +      if (cfn == CFN_GOMP_SIMD_LANE
>>         && !slp_node
>>         && loop_vinfo
>>         && LOOP_VINFO_LOOP (loop_vinfo)->simduid
>> @@ -2261,8 +2295,6 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>>       }
>>      }
>>
>> -  gcc_assert (!gimple_vuse (stmt));
>> -
>>    if (slp_node || PURE_SLP_STMT (stmt_info))
>>      ncopies = 1;
>>    else if (modifier == NARROW)
>> @@ -2324,7 +2356,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>>                     vec<tree> vec_oprndsk = vec_defs[k];
>>                     vargs[k] = vec_oprndsk[i];
>>                   }
>> -               new_stmt = gimple_build_call_vec (fndecl, vargs);
>> +               if (ifn != IFN_LAST)
>> +                 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
>> +               else
>> +                 new_stmt = gimple_build_call_vec (fndecl, vargs);
>>                 new_temp = make_ssa_name (vec_dest, new_stmt);
>>                 gimple_call_set_lhs (new_stmt, new_temp);
>>                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
>> @@ -2372,7 +2407,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>>           }
>>         else
>>           {
>> -           new_stmt = gimple_build_call_vec (fndecl, vargs);
>> +           if (ifn != IFN_LAST)
>> +             new_stmt = gimple_build_call_internal_vec (ifn, vargs);
>> +           else
>> +             new_stmt = gimple_build_call_vec (fndecl, vargs);
>>             new_temp = make_ssa_name (vec_dest, new_stmt);
>>             gimple_call_set_lhs (new_stmt, new_temp);
>>           }
>> @@ -2418,7 +2456,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>>                     vargs.quick_push (vec_oprndsk[i]);
>>                     vargs.quick_push (vec_oprndsk[i + 1]);
>>                   }
>> -               new_stmt = gimple_build_call_vec (fndecl, vargs);
>> +               if (ifn != IFN_LAST)
>> +                 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
>> +               else
>> +                 new_stmt = gimple_build_call_vec (fndecl, vargs);
>>                 new_temp = make_ssa_name (vec_dest, new_stmt);
>>                 gimple_call_set_lhs (new_stmt, new_temp);
>>                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
>> @@ -2456,7 +2497,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>>             vargs.quick_push (vec_oprnd1);
>>           }
>>
>> -       new_stmt = gimple_build_call_vec (fndecl, vargs);
>> +       if (ifn != IFN_LAST)
>> +         new_stmt = gimple_build_call_internal_vec (ifn, vargs);
>> +       else
>> +         new_stmt = gimple_build_call_vec (fndecl, vargs);
>>         new_temp = make_ssa_name (vec_dest, new_stmt);
>>         gimple_call_set_lhs (new_stmt, new_temp);
>>         vect_finish_stmt_generation (stmt, new_stmt, gsi);
>> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
>> index 9cde091..bb1ab39 100644
>> --- a/gcc/tree-vectorizer.h
>> +++ b/gcc/tree-vectorizer.h
>> @@ -958,7 +958,6 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
>>                                            int *, vec<tree> *);
>>  extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *);
>>  extern void free_stmt_vec_info (gimple *stmt);
>> -extern tree vectorizable_function (gcall *, tree, tree);
>>  extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
>>                                      stmt_vector_for_cost *,
>>                                   stmt_vector_for_cost *);
>
diff mbox

Patch

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 898c83d..a5bda2f 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -69,13 +69,13 @@  init_internal_fns ()
 
 /* Create static initializers for the information returned by
    direct_internal_fn.  */
-#define not_direct { -2, -2 }
-#define mask_load_direct { -1, -1 }
-#define load_lanes_direct { -1, -1 }
-#define mask_store_direct { 3, 3 }
-#define store_lanes_direct { 0, 0 }
-#define unary_direct { 0, 0 }
-#define binary_direct { 0, 0 }
+#define not_direct { -2, -2, false }
+#define mask_load_direct { -1, -1, false }
+#define load_lanes_direct { -1, -1, false }
+#define mask_store_direct { 3, 3, false }
+#define store_lanes_direct { 0, 0, false }
+#define unary_direct { 0, 0, true }
+#define binary_direct { 0, 0, true }
 
 const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 6cb123f..aea6abd 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -134,6 +134,14 @@  struct direct_internal_fn_info
      function isn't directly mapped to an optab.  */
   signed int type0 : 8;
   signed int type1 : 8;
+  /* True if the function is pointwise, so that it can be vectorized by
+     converting the return type and all argument types to vectors of the
+     same number of elements.  E.g. we can vectorize an IFN_SQRT on
+     floats as an IFN_SQRT on vectors of N floats.
+
+     This only needs 1 bit, but occupies the full 16 to ensure a nice
+     layout.  */
+  unsigned int vectorizable : 16;
 };
 
 extern const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1];
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 75389c4..1142142 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -47,6 +47,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-scalar-evolution.h"
 #include "tree-vectorizer.h"
 #include "builtins.h"
+#include "internal-fn.h"
 
 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -1632,27 +1633,32 @@  vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
     add_stmt_to_eh_lp (vec_stmt, lp_nr);
 }
 
-/* Checks if CALL can be vectorized in type VECTYPE.  Returns
-   a function declaration if the target has a vectorized version
-   of the function, or NULL_TREE if the function cannot be vectorized.  */
+/* We want to vectorize a call to combined function CFN with function
+   decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
+   as the types of all inputs.  Check whether this is possible using
+   an internal function, returning its code if so or IFN_LAST if not.  */
 
-tree
-vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
+static internal_fn
+vectorizable_internal_function (combined_fn cfn, tree fndecl,
+				tree vectype_out, tree vectype_in)
 {
-  /* We only handle functions that do not read or clobber memory.  */
-  if (gimple_vuse (call))
-    return NULL_TREE;
-
-  combined_fn fn = gimple_call_combined_fn (call);
-  if (fn != CFN_LAST)
-    return targetm.vectorize.builtin_vectorized_function
-      (fn, vectype_out, vectype_in);
-
-  if (gimple_call_builtin_p (call, BUILT_IN_MD))
-    return targetm.vectorize.builtin_md_vectorized_function
-      (gimple_call_fndecl (call), vectype_out, vectype_in);
-
-  return NULL_TREE;
+  internal_fn ifn;
+  if (internal_fn_p (cfn))
+    ifn = as_internal_fn (cfn);
+  else
+    ifn = associated_internal_fn (fndecl);
+  if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
+    {
+      const direct_internal_fn_info &info = direct_internal_fn (ifn);
+      if (info.vectorizable)
+	{
+	  tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
+	  tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
+	  if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1)))
+	    return ifn;
+	}
+    }
+  return IFN_LAST;
 }
 
 
@@ -2232,15 +2238,43 @@  vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   else
     return false;
 
+  /* We only handle functions that do not read or clobber memory.  */
+  if (gimple_vuse (stmt))
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "function reads from or writes to memory.\n");
+      return false;
+    }
+
   /* For now, we only vectorize functions if a target specific builtin
      is available.  TODO -- in some cases, it might be profitable to
      insert the calls for pieces of the vector, in order to be able
      to vectorize other operations in the loop.  */
-  fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
-  if (fndecl == NULL_TREE)
+  fndecl = NULL_TREE;
+  internal_fn ifn = IFN_LAST;
+  combined_fn cfn = gimple_call_combined_fn (stmt);
+  tree callee = gimple_call_fndecl (stmt);
+
+  /* First try using an internal function.  */
+  if (cfn != CFN_LAST)
+    ifn = vectorizable_internal_function (cfn, callee, vectype_out,
+					  vectype_in);
+
+  /* If that fails, try asking for a target-specific built-in function.  */
+  if (ifn == IFN_LAST)
+    {
+      if (cfn != CFN_LAST)
+	fndecl = targetm.vectorize.builtin_vectorized_function
+	  (cfn, vectype_out, vectype_in);
+      else
+	fndecl = targetm.vectorize.builtin_md_vectorized_function
+	  (callee, vectype_out, vectype_in);
+    }
+
+  if (ifn == IFN_LAST && !fndecl)
     {
-      if (gimple_call_internal_p (stmt)
-	  && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
+      if (cfn == CFN_GOMP_SIMD_LANE
 	  && !slp_node
 	  && loop_vinfo
 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
@@ -2261,8 +2295,6 @@  vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 	}
     }
 
-  gcc_assert (!gimple_vuse (stmt));
-
   if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else if (modifier == NARROW)
@@ -2324,7 +2356,10 @@  vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 		      vec<tree> vec_oprndsk = vec_defs[k];
 		      vargs[k] = vec_oprndsk[i];
 		    }
-		  new_stmt = gimple_build_call_vec (fndecl, vargs);
+		  if (ifn != IFN_LAST)
+		    new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+		  else
+		    new_stmt = gimple_build_call_vec (fndecl, vargs);
 		  new_temp = make_ssa_name (vec_dest, new_stmt);
 		  gimple_call_set_lhs (new_stmt, new_temp);
 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -2372,7 +2407,10 @@  vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 	    }
 	  else
 	    {
-	      new_stmt = gimple_build_call_vec (fndecl, vargs);
+	      if (ifn != IFN_LAST)
+		new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+	      else
+		new_stmt = gimple_build_call_vec (fndecl, vargs);
 	      new_temp = make_ssa_name (vec_dest, new_stmt);
 	      gimple_call_set_lhs (new_stmt, new_temp);
 	    }
@@ -2418,7 +2456,10 @@  vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 		      vargs.quick_push (vec_oprndsk[i]);
 		      vargs.quick_push (vec_oprndsk[i + 1]);
 		    }
-		  new_stmt = gimple_build_call_vec (fndecl, vargs);
+		  if (ifn != IFN_LAST)
+		    new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+		  else
+		    new_stmt = gimple_build_call_vec (fndecl, vargs);
 		  new_temp = make_ssa_name (vec_dest, new_stmt);
 		  gimple_call_set_lhs (new_stmt, new_temp);
 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -2456,7 +2497,10 @@  vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 	      vargs.quick_push (vec_oprnd1);
 	    }
 
-	  new_stmt = gimple_build_call_vec (fndecl, vargs);
+	  if (ifn != IFN_LAST)
+	    new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+	  else
+	    new_stmt = gimple_build_call_vec (fndecl, vargs);
 	  new_temp = make_ssa_name (vec_dest, new_stmt);
 	  gimple_call_set_lhs (new_stmt, new_temp);
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 9cde091..bb1ab39 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -958,7 +958,6 @@  extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
 					     int *, vec<tree> *);
 extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *);
 extern void free_stmt_vec_info (gimple *stmt);
-extern tree vectorizable_function (gcall *, tree, tree);
 extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
                                     stmt_vector_for_cost *,
 				    stmt_vector_for_cost *);