diff mbox series

[1/2] middle-end : Initial scaffolding and definitions for SLP patttern matches

Message ID patch-13785-tamar@arm.com
State New
Headers show
Series [1/2] middle-end : Initial scaffolding and definitions for SLP patttern matches | expand

Commit Message

Tamar Christina Nov. 14, 2020, 2:37 p.m. UTC
Hi All,

This patch adds the pre-requisites and general scaffolding for supporting doing
SLP pattern matching.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* tree-vect-loop.c (vect_dissolve_slp_only_patterns): New.
	(vect_dissolve_slp_only_groups): Call here.
	* tree-vect-slp.c (vect_free_slp_tree, vect_create_new_slp_node): Export
	from file.
	(vect_build_slp_tree_2): Set vectype for externals.
	(vect_print_slp_tree): Print SLP only patterns.
	(optimize_load_redistribution_1, optimize_load_redistribution,
	vect_match_slp_patterns_2, vect_match_slp_patterns): New.
	(vect_analyze_slp): Call matcher.
	* tree-vectorizer.c (vec_info::add_pattern_stmt): Save relevancy.
	* tree-vectorizer.h (STMT_VINFO_SAVED_RELEVANT, vect_pop_relevancy,
	vect_dissolve_pattern_relevancy, vect_save_relevancy,
	vect_push_relevancy, vect_free_slp_tree, enum _complex_operation,
	class vect_pattern): New.

--- inline copy of patch --



--

Comments

Richard Biener Nov. 16, 2020, 1:16 p.m. UTC | #1
On Sat, 14 Nov 2020, Tamar Christina wrote:

> Hi All,
> 
> This patch adds the pre-requisites and general scaffolding for supporting doing
> SLP pattern matching.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

Comments below.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* tree-vect-loop.c (vect_dissolve_slp_only_patterns): New.
> 	(vect_dissolve_slp_only_groups): Call here.
> 	* tree-vect-slp.c (vect_free_slp_tree, vect_create_new_slp_node): Export
> 	from file.
> 	(vect_build_slp_tree_2): Set vectype for externals.
> 	(vect_print_slp_tree): Print SLP only patterns.
> 	(optimize_load_redistribution_1, optimize_load_redistribution,
> 	vect_match_slp_patterns_2, vect_match_slp_patterns): New.
> 	(vect_analyze_slp): Call matcher.
> 	* tree-vectorizer.c (vec_info::add_pattern_stmt): Save relevancy.
> 	* tree-vectorizer.h (STMT_VINFO_SAVED_RELEVANT, vect_pop_relevancy,
> 	vect_dissolve_pattern_relevancy, vect_save_relevancy,
> 	vect_push_relevancy, vect_free_slp_tree, enum _complex_operation,
> 	class vect_pattern): New.
> 
> --- inline copy of patch --
> 
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 39b7319e8253c351a4f6fbdd8c154330f08f2b1b..791d9c6cb0649862a84fd3c80efc89fefedbb085 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -1979,6 +1979,61 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
>    return opt_result::success ();
>  }
>  
> +/* For every SLP only pattern created by the pattern matched rooted in ROOT
> +   restore the relevancy of the original statements over those of the pattern
> +   and destroy the pattern relationship.  This restores the SLP tree to a state
> +   where it can be used when SLP build is cancelled or re-tried.  */
> +
> +static void
> +vect_dissolve_slp_only_patterns (loop_vec_info loop_vinfo,
> +				 hash_set<slp_tree> *visited, slp_tree root)
> +{
> +  if (!root || visited->add (root))
> +    return;
> +
> +  unsigned int i;
> +  slp_tree node;
> +  stmt_vec_info related_stmt_info;
> +  stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (root);
> +
> +  if (stmt_info && STMT_VINFO_SLP_VECT_ONLY (stmt_info))
> +    {
> +      vect_pop_relevancy (stmt_info);
> +      if ((related_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)) != NULL)
> +	{
> +	  if (dump_enabled_p ())
> +	    dump_printf_loc (MSG_NOTE, vect_location,
> +			     "dissolving relevancy of %G",
> +			     STMT_VINFO_STMT (stmt_info));
> +	  vect_dissolve_pattern_relevancy (related_stmt_info);
> +	}
> +    }
> +
> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, node)
> +    vect_dissolve_slp_only_patterns (loop_vinfo, visited, node);
> +}
> +
> +/* Lookup any SLP Only Pattern statements created by the SLP pattern matcher in
> +   all slp_instances in LOOP_VINFO and undo the relevancy of statements such
> +   that the original SLP tree before the pattern matching is used.  */
> +
> +static void
> +vect_dissolve_slp_only_patterns (loop_vec_info loop_vinfo)
> +{
> +
> +  unsigned int i;
> +  hash_set<slp_tree> visited;
> +
> +  DUMP_VECT_SCOPE ("vect_dissolve_slp_only_patterns");
> +
> +  /* Unmark any SLP only patterns as relevant and restore the STMT_INFO of the
> +     related instruction.  */
> +  slp_instance instance;
> +  FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
> +    vect_dissolve_slp_only_patterns (loop_vinfo, &visited,
> +				     SLP_INSTANCE_TREE (instance));
> +}
> +
>  /* Look for SLP-only access groups and turn each individual access into its own
>     group.  */
>  static void
> @@ -2583,6 +2638,9 @@ again:
>    /* Ensure that "ok" is false (with an opt_problem if dumping is enabled).  */
>    gcc_assert (!ok);
>  
> +  /* Dissolve any SLP patterns created by the SLP pattern matcher.  */
> +  vect_dissolve_slp_only_patterns (loop_vinfo);
> +
>    /* Try again with SLP forced off but if we didn't do any SLP there is
>       no point in re-trying.  */
>    if (!slp)
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index 0c065e835ad13ad32d222e2590e05ef56849c411..3be565a2e566e09a9e42d6c77ba402b9499b06b6 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -105,7 +105,7 @@ _slp_tree::~_slp_tree ()
>  
>  /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
>  
> -static void
> +void
>  vect_free_slp_tree (slp_tree node)
>  {
>    int i;
> @@ -148,7 +148,7 @@ vect_free_slp_instance (slp_instance instance)
>  
>  /* Create an SLP node for SCALAR_STMTS.  */
>  
> -slp_tree
> +static slp_tree
>  vect_create_new_slp_node (slp_tree node,
>  			  vec<stmt_vec_info> scalar_stmts, unsigned nops)
>  {
> @@ -165,7 +165,7 @@ vect_create_new_slp_node (slp_tree node,
>  
>  /* Create an SLP node for SCALAR_STMTS.  */
>  
> -static slp_tree
> +slp_tree
>  vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops)
>  {
>    return vect_create_new_slp_node (new _slp_tree, scalar_stmts, nops);
> @@ -1646,6 +1646,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
>  	{
>  	  slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
>  	  SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
> +	  SLP_TREE_VECTYPE (invnode) = vectype;

This is wrong - the vector type of invariants is determined by
the consuming SLP stmts in vectorizable_*

>  	  oprnd_info->ops = vNULL;
>  	  children.safe_push (invnode);
>  	  continue;
> @@ -1929,6 +1930,13 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
>  	dump_printf (dump_kind, " %u", j);
>        dump_printf (dump_kind, " }\n");
>      }
> +  if (SLP_TREE_REPRESENTATIVE (node)
> +      && STMT_VINFO_SLP_VECT_ONLY (SLP_TREE_REPRESENTATIVE (node)))
> +    {
> +      dump_printf_loc (metadata, user_loc, "\tSLP Only pattern:\n");
> +      dump_printf_loc (dump_kind, user_loc, "\t %G",
> +		       STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
> +    }
>    if (SLP_TREE_LANE_PERMUTATION (node).exists ())
>      {
>        dump_printf_loc (metadata, user_loc, "\tlane permutation {");
> @@ -2174,6 +2182,219 @@ calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
>    return exact_div (common_multiple (nunits, group_size), group_size);
>  }
>  
> +/* Helper function of optimize_load_redistribution that performs the operation
> +   recursively.  */
> +
> +bool optimize_load_redistribution_1 (hash_set<slp_tree> *loads,
> +				     scalar_stmts_to_slp_tree_map_t *bst_map,
> +				     hash_set<slp_tree> *visited,
> +				     slp_tree parent, unsigned idx,
> +				     slp_tree root)
> +{
> +  if (visited->contains (root))
> +    return true;
> +  visited->add (root);

    if (visited->add (root))
      return true;

> +
> +  slp_tree node;
> +  unsigned i;
> +  stmt_vec_info dr_stmt = NULL;
> +
> +  /* For now, we don't know anything about externals so do not do anything.  */
> +  if (SLP_TREE_DEF_TYPE (root) == vect_external_def
> +      || SLP_TREE_DEF_TYPE (root) == vect_constant_def)
> +    return false;
> +
> +  if (gimple_assign_load_p (STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (root))))

The appropriate check is STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE 
(root)) && DR_IS_READ (STMT_VINFO_DATA_REF (...))

let's not mix gimple & SLP checks when not necessary.

> +    loads->add (root);
> +
> +  if (SLP_TREE_CODE (root) == VEC_PERM_EXPR

else if

> +      && SLP_TREE_LANE_PERMUTATION (root).exists ()
> +      && !SLP_TREE_SCALAR_STMTS (root).exists ())

why do !SLP_TREE_SCALAR_STMTS matter?

> +    {
> +

extra vertical space

> +      /* First convert this node into a load node and add it to the leaves
> +         list and flatten the permute from a lane to a load one.  If it's
> +         unneeded it will be elided later.  */
> +      auto_vec<stmt_vec_info> stmts;
> +      stmts.create (SLP_TREE_LANES (root));
> +      load_permutation_t load_perm;
> +      load_perm.create (SLP_TREE_LANES (root));
> +      lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION (root);
> +      for (unsigned j = 0; j < lane_perm.length (); j++)
> +        {
> +          std::pair<unsigned, unsigned> perm = lane_perm[j];
> +	  /* This isn't strictly needed, but this function is a temporary
> +	     one for specifically pattern matching, so don't want it to
> +	     optimize things the remainder of the pipeline will.  */
> +	  if (perm.first != j)
> +	    goto next;
> +          node = SLP_TREE_CHILDREN (root)[perm.first];
> +	  if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
> +	   {
> +	      load_perm.release ();
> +	      return false;

you're possibly prematurely exiting the DFS walk on a two-operator
permute.  Ah, guess that's what the above check was for?  I guess
it's better to pre-check all children of the VEC_PERM node
to be proper, two(?) leafs with load permutation.

> +	   }
> +
> +	  stmt_vec_info rep_stmt = SLP_TREE_REPRESENTATIVE (node);
> +	  if (!STMT_VINFO_GROUPED_ACCESS (rep_stmt))
> +	    goto next;

I think for a node with a load permutation this never triggers.

> +
> +	  if (!dr_stmt)
> +	    dr_stmt = DR_GROUP_FIRST_ELEMENT (rep_stmt);
> +
> +	  if (dr_stmt != DR_GROUP_FIRST_ELEMENT (rep_stmt))
> +	    goto next;

So all of the above could be done on the children w/o allocating
and the rest on the loop iterating over the lanes.

> +	  stmts.quick_push (SLP_TREE_SCALAR_STMTS (node)[perm.second]);
> +          load_perm.safe_push (SLP_TREE_LOAD_PERMUTATION (node)[perm.second]);
> +        }
> +
> +      if (dump_enabled_p ())
> +	dump_printf_loc (MSG_NOTE, vect_location,
> +			 "converting loads on permute node %p\n", root);
> +
> +      slp_tree *value = bst_map->get (stmts);
> +      if (value)
> +	node = *value;
> +      else
> +	{
> +	  /* One last iteration to free the nodes.  */
> +	  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, node)
> +	    {
> +	      /* If we are the only reference to the node, remove the vertex.
> +		 We don't have to modify the graph since vertices lead the
> +		 graph traversal.  */
> +	      vect_free_slp_tree (node);
> +	    }

You should do this unconditionally (also if the bst_map lookup
succeeded), but after bumping the refcount for the use in this
node.

> +
> +	  vec<stmt_vec_info> stmts_cpy = stmts.copy ();
> +	  node = vect_create_new_slp_node (stmts_cpy.copy (), 0);
> +	  bst_map->put (stmts_cpy, node);
> +	}
> +      SLP_TREE_CHILDREN (parent)[idx] = node;

hmm, what is 'parent' - shouldn't this be 'root'?  Ah, I see what
you are doing.  I think you should do what optimize_slp does,
namely replace 'root' in-place so you do not have to adjust
parents (or even care for the case of multiple parents refering to
'root').  I see that doesn't easily work when attempting to CSE so
the actual CSE needs to happen below (*)

> +      SLP_TREE_REF_COUNT (node)++;
> +      SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (root);
> +      SLP_TREE_LOAD_PERMUTATION (node) = load_perm;
> +      loads->add (node);

Note with the load_lane check delayed we no longer need
to vect_gather_slp_loads so early so I suggest to simply
remove the existing early call and do it after
pattern matching instead.

> +      //do this up the recursive call.
> +      //vect_free_slp_tree (root);
> +
> +      return true;
> +    }
> +
> +next:
> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
> +    optimize_load_redistribution_1 (loads, bst_map, visited, root, i, node);

(*) so here after recursing (and maybe returning that 'node' was changed)
try to CSE 'node' like

  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
    if (optimize_load_redistribution_1 (loads, bst_map, visited, root, i, 
node))
      {
        if (slp_tree *value = bst_map->get (stmts))
          {
            SLP_TREE_CHILDREN (root)[i] = value;
            SLP_TREE_REFCNT (value)++;
            slp_tree_free (root);
          }
      }

or so.  Note that the lane configuration of the result of you
transform doesn't change, we're merely propagating the lanes
from the permuted loads to the VEC_PERM node (where we "missed" them)
and then we're doing a re-lookup to do the CSE.  Then of course
there's the part turning a VEC_PERM node to a load itself.

So I wonder if we can make sure SLP_TREE_SCALAR_STMTS (the output
lane config) for the VEC_PERM node is set appropriately by
pattern matching?  Or if we should make this a two-step thing,
propagate the lane setup and CSE and then turn VEC_PERMs into
permuted loads?

It might make sense to split out this optimization from the
patch.

> +
> +  return true;
> +}
> +
> +/* Temporary workaround for loads not being CSEd during SLP build.  This
> +   function will traverse the SLP tree rooted in ROOT for INSTANCE and find
> +   VEC_PERM nodes that blend vectors from multiple nodes that all read from the
> +   same DR such that the final operation is equal to a permuted load.  Such
> +   NODES are then directly converted into LOADS themselves.  The nodes are
> +   CSEd using BST_MAP.
> +
> +   Finally the LOADS in INSTANCE are updated with the current set of loads.  */
> +
> +bool optimize_load_redistribution (slp_instance instance,
> +				   scalar_stmts_to_slp_tree_map_t *bst_map,
> +				   slp_tree root)
> +{
> +  slp_tree node;
> +  unsigned i;
> +  hash_set<slp_tree> visited;
> +  hash_set<slp_tree> loads;
> +
> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
> +    optimize_load_redistribution_1 (&loads, bst_map, &visited, root, i, node);
> +
> +  SLP_INSTANCE_LOADS (instance).truncate (0);
> +  for (hash_set<slp_tree>::iterator it = loads.begin ();
> +       it != loads.end (); ++it)
> +    SLP_INSTANCE_LOADS (instance).safe_push (*it);
> +
> +  return true;
> +}
> +
> +/* Helper function of vect_match_slp_patterns.
> +
> +   Attempts to match patterns against the slp tree rooted in REF_NODE using
> +   VINFO.  Patterns are matched in post-order traversal.
> +
> +   If matching is successful the value in REF_NODE is updated and returned, if
> +   not then it is returned unchanged.  */
> +
> +static bool
> +vect_match_slp_patterns_2 (slp_tree *ref_node, vec_info *vinfo,
> +			   slp_tree_to_load_perm_map_t *perm_cache,
> +			   hash_set<slp_tree> *visited)
> +{
> +  unsigned i;
> +  slp_tree node = *ref_node;
> +  bool found_p = false, found_rec_p = false;
> +  if (!node || visited->add (node))
> +    return false;
> +
> +  slp_tree child;
> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +    found_rec_p |= vect_match_slp_patterns_2 (&SLP_TREE_CHILDREN (node)[i],
> +					      vinfo, perm_cache, visited);
> +
> +  for (unsigned x = 0; x < num__slp_patterns; x++)
> +    {
> +      vect_pattern *pattern = slp_patterns[x] (ref_node);
> +      found_p = pattern->recognize (perm_cache, vinfo);
> +      delete pattern;
> +      found_rec_p = found_p | found_rec_p;
> +    }
> +
> +  return found_rec_p;
> +}
> +
> +/* Applies pattern matching to the given SLP tree rooted in REF_NODE using
> +   vec_info VINFO.
> +
> +   The modified tree is returned.  Patterns are tried in order and multiple
> +   patterns may match.  */
> +
> +static bool
> +vect_match_slp_patterns (slp_instance instance, vec_info *vinfo,
> +			 hash_set<slp_tree> *visited,
> +			 slp_tree_to_load_perm_map_t *perm_cache,
> +			 scalar_stmts_to_slp_tree_map_t *bst_map)
> +{
> +  DUMP_VECT_SCOPE ("vect_match_slp_patterns");
> +  slp_tree *ref_node = &SLP_INSTANCE_TREE (instance);
> +
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +		     "Analyzing SLP tree %p for patterns\n",
> +		     SLP_INSTANCE_TREE (instance));
> +
> +  bool found_p
> +    = vect_match_slp_patterns_2 (ref_node, vinfo, perm_cache, visited);
> +
> +  if (found_p)
> +    {
> +      optimize_load_redistribution (instance, bst_map, *ref_node);
> +
> +      if (dump_enabled_p ())
> +	{
> +	  dump_printf_loc (MSG_NOTE, vect_location,
> +			   "Pattern matched SLP tree\n");
> +	  vect_print_slp_graph (MSG_NOTE, vect_location, *ref_node);
> +	}
> +    }
> +
> +  return found_p;
> +}
> +
> +/* Analyze an SLP instance starting from a group of grouped stores.  Call
> +   vect_build_slp_tree to build a tree of packed stmts if possible.
> +   Return FALSE if it's impossible to SLP any stmt in the loop.  */
> +
>  static bool
>  vect_analyze_slp_instance (vec_info *vinfo,
>  			   scalar_stmts_to_slp_tree_map_t *bst_map,
> @@ -2540,6 +2761,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
>  {
>    unsigned int i;
>    stmt_vec_info first_element;
> +  slp_instance instance;
>  
>    DUMP_VECT_SCOPE ("vect_analyze_slp");
>  
> @@ -2586,6 +2808,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
>  				   slp_inst_kind_reduc_group, max_tree_size);
>      }
>  
> +  hash_set<slp_tree> visited_patterns;
> +  slp_tree_to_load_perm_map_t perm_cache;
> +  /* See if any patterns can be found in the SLP tree.  */
> +  FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (vinfo), i, instance)
> +    vect_match_slp_patterns (instance, vinfo, &visited_patterns, &perm_cache,
> +			     bst_map);
> +
>    /* The map keeps a reference on SLP nodes built, release that.  */
>    for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
>         it != bst_map->end (); ++it)
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index fa211b95c0e54be1d51ed949d7a06c31b7b50802..3b49ce22f7aae0465dbd0b24cbf48ae054c31d22 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -27,6 +27,7 @@ typedef class _stmt_vec_info *stmt_vec_info;
>  #include "tree-hash-traits.h"
>  #include "target.h"
>  #include "alloc-pool.h"
> +#include "internal-fn.h"
>  
>  
>  /* Used for naming of new temporaries.  */
> @@ -1118,6 +1119,11 @@ public:
>       indicates whether the stmt needs to be vectorized.  */
>    enum vect_relevant relevant;
>  
> +  /* During SLP vectorization we may need to change the relevancy of a statement
> +     but restore it during dissolving of SLP nodes.  This field contains a copy
> +     of the original relevancy analysis.  */
> +  enum vect_relevant saved_relevant;
> +
>    /* For loads if this is a gather, for stores if this is a scatter.  */
>    bool gather_scatter_p;
>  
> @@ -1240,6 +1246,7 @@ struct gather_scatter_info {
>  #define STMT_VINFO_TYPE(S)                 (S)->type
>  #define STMT_VINFO_STMT(S)                 (S)->stmt
>  #define STMT_VINFO_RELEVANT(S)             (S)->relevant
> +#define STMT_VINFO_SAVED_RELEVANT(S)       (S)->saved_relevant
>  #define STMT_VINFO_LIVE_P(S)               (S)->live
>  #define STMT_VINFO_VECTYPE(S)              (S)->vectype
>  #define STMT_VINFO_VEC_STMTS(S)            (S)->vec_stmts
> @@ -1368,6 +1375,46 @@ vect_orig_stmt (stmt_vec_info stmt_info)
>    return stmt_info;
>  }
>  
> +/* If restore the saved relevancy information STMT_INFO from the copy made
> +   during SLP pattern detection.  */
> +
> +static inline void
> +vect_pop_relevancy (stmt_vec_info stmt_info)
> +{
> +  STMT_VINFO_RELEVANT (stmt_info) = STMT_VINFO_SAVED_RELEVANT (stmt_info);
> +}

Calling the single-level "stack" a stack is a bit odd.  Since
you already have vect_save_relevancy () just call ig
vect_restore_relevancy ()?

> +
> +/* Restores the saved relevancy of STMT_INFO and marks it as not being inside a
> +   pattern.  Lastly the SLP_TYPE is set to loop_vect.  */
> +
> +static inline void
> +vect_dissolve_pattern_relevancy (stmt_vec_info stmt_info)
> +{
> +  vect_pop_relevancy (stmt_info);
> +  STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
> +  STMT_SLP_TYPE (stmt_info) = loop_vect;
> +}
> +
> +/* Save the current relevancy of STMT_INFO such that it can be restored by
> +   vect_pop_relevancy.  */
> +
> +static inline void
> +vect_save_relevancy (stmt_vec_info stmt_info)
> +{
> +  STMT_VINFO_SAVED_RELEVANT (stmt_info)
> +    = STMT_VINFO_RELEVANT (stmt_info);
> +}
> +
> +/* Save the current relevancy of STMT_INFO before changing it to REL.  */
> +
> +static inline void
> +vect_push_relevancy (stmt_vec_info stmt_info, enum vect_relevant rel)
> +{
> +  vect_save_relevancy (stmt_info);
> +  STMT_VINFO_RELEVANT (stmt_info) = rel;
> +}

See above - maybe combine save and push by doing

vect_set_slp_relevancy (...)

and

vect_restore_loop_relevancy (...)

rather than the four separate functions.

It would be nice if the vect_dissolve_slp_only_patterns work
could be done fully from the IL walk we do here:

  /* Reset SLP type to loop_vect on all stmts.  */
  for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
    {
      basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
      for (gimple_stmt_iterator si = gsi_start_phis (bb);
           !gsi_end_p (si); gsi_next (&si))
        {
...

where we also reset STMT_VINFO_SLP_TYPE.

> +
>  /* Return the later statement between STMT1_INFO and STMT2_INFO.  */
>  
>  static inline stmt_vec_info
> @@ -1993,6 +2040,7 @@ extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
>  extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
>  extern bool vect_update_shared_vectype (stmt_vec_info, tree);
>  extern slp_tree vect_create_new_slp_node (vec<stmt_vec_info>, unsigned);
> +extern void vect_free_slp_tree (slp_tree);
>  
>  /* In tree-vect-patterns.c.  */
>  extern void
> @@ -2009,4 +2057,108 @@ void vect_free_loop_info_assumptions (class loop *);
>  gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL);
>  bool vect_stmt_dominates_stmt_p (gimple *, gimple *);
>  
> +/* SLP Pattern matcher types, tree-vect-slp-patterns.c.  */
> +
> +/* Forward declaration of possible two operands operation that can be matched
> +   by the complex numbers pattern matchers.  */
> +enum _complex_operation : unsigned;
> +
> +/* Cache from nodes to the load permutation they represent.  */
> +typedef hash_map <slp_tree, load_permutation_t >
> +  slp_tree_to_load_perm_map_t;
> +
> +/* Vector pattern matcher base class.  All SLP pattern matchers must inherit
> +   from this type.  */
> +
> +class vect_pattern
> +{
> +  protected:
> +    /* The number of arguments that the IFN requires.  */
> +    unsigned m_num_args;
> +
> +    /* The internal function that will be used when a pattern is created.  */
> +    internal_fn m_ifn;
> +
> +    /* The current node being inspected.  */
> +    slp_tree *m_node;
> +
> +    /* The list of operands to be the children for the node produced when the
> +       internal function is created.  */
> +    vec<slp_tree> m_ops;
> +
> +    /* Default constructor where NODE is the root of the tree to inspect.  */
> +    vect_pattern (slp_tree *node)
> +    {
> +      this->m_ifn = IFN_LAST;
> +      this->m_node = node;
> +      this->m_ops.create (0);
> +    }
> +
> +  public:
> +    /* Attempt to recognize a pattern, validate and update the tree rooted in
> +       M_NODE.  */
> +    virtual bool recognize (slp_tree_to_load_perm_map_t *, vec_info *);
> +
> +    /* Only perform the pattern creation part of the matcher.  This creates and
> +       returns the new pattern statement.  */
> +    virtual gcall *build (vec_info *) = 0;
> +
> +    /* Performs a check to see if the matched IFN is supported by the current
> +       target.  */
> +    virtual bool is_optab_supported_p (tree vectype, optimization_type opt_type)
> +    {
> +      if (!vectype)
> +        return false;
> +
> +      return direct_internal_fn_supported_p (this->m_ifn, vectype, opt_type);
> +    }
> +
> +    /* Create a new instance of the pattern matcher class of the given type.  */
> +    static vect_pattern* create (slp_tree *);
> +
> +    /* Match but do not perform any additional operations on the SLP tree.  */
> +    virtual bool matches (slp_tree_to_load_perm_map_t *) = 0;
> +
> +    /* Match but use for the first operation the supplied COMPLEX_OPERATION.  No
> +       additional operations or modification of the SLP tree are performed.  */
> +    virtual bool matches (enum _complex_operation,
> +			  slp_tree_to_load_perm_map_t *, vec<slp_tree>)
> +    {
> +      return false;
> +    }
> +
> +    /* Friendly name of the operation the pattern matches.  */
> +    virtual const char* get_name () = 0;
> +
> +    /* Default destructor.  */
> +    virtual ~vect_pattern ()
> +    {
> +	this->m_ops.release ();
> +    }
> +
> +    /* Check to see if the matched tree is valid for the operation the matcher
> +       wants.  If the operation is valid then the tree is reshaped in the final
> +       format that build () requires.  */
> +    virtual bool validate_p (slp_tree_to_load_perm_map_t *)
> +    {
> +      return true;
> +    }
> +
> +    /* Return the matched internal function.  If no match was done this is set
> +       to LAST_IFN.  */
> +    virtual internal_fn get_ifn ()
> +    {
> +      return this->m_ifn;
> +    }
> +};
> +
> +/* Function pointer to create a new pattern matcher from a generic type.  */
> +typedef vect_pattern* (*vect_pattern_decl_t) (slp_tree *);
> +
> +/* List of supported pattern matchers.  */
> +extern vect_pattern_decl_t slp_patterns[];
> +
> +/* Number of supported pattern matchers.  */
> +extern size_t num__slp_patterns;
> +
>  #endif  /* GCC_TREE_VECTORIZER_H  */
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index d81774b242569262a51b7be02815acd6d1a6bfd0..2a6ddd685922f6b60ae1305974335fb863a2af39 100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -535,6 +535,8 @@ vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
>    stmt_vec_info res = new_stmt_vec_info (stmt);
>    set_vinfo_for_stmt (stmt, res, false);
>    STMT_VINFO_RELATED_STMT (res) = stmt_info;
> +  vect_save_relevancy (stmt_info);
> +  vect_push_relevancy (res, STMT_VINFO_RELEVANT (stmt_info));

Hmmm, that looks like an odd place to do this.  I suspect it's not
the "final" modification of either relevancy?

Can we get rid of this hunk somehow?

Thanks,
Richard.

>    return res;
>  }
>  
> 
> 
>
Richard Biener Nov. 16, 2020, 1:33 p.m. UTC | #2
On Mon, 16 Nov 2020, Richard Biener wrote:

> On Sat, 14 Nov 2020, Tamar Christina wrote:
> 
> > Hi All,
> > 
> > This patch adds the pre-requisites and general scaffolding for supporting doing
> > SLP pattern matching.
> > 
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > 
> > Ok for master?
> 
> Comments below.
> 
> > Thanks,
> > Tamar
> > 
> > gcc/ChangeLog:
> > 
> > 	* tree-vect-loop.c (vect_dissolve_slp_only_patterns): New.
> > 	(vect_dissolve_slp_only_groups): Call here.
> > 	* tree-vect-slp.c (vect_free_slp_tree, vect_create_new_slp_node): Export
> > 	from file.
> > 	(vect_build_slp_tree_2): Set vectype for externals.
> > 	(vect_print_slp_tree): Print SLP only patterns.
> > 	(optimize_load_redistribution_1, optimize_load_redistribution,
> > 	vect_match_slp_patterns_2, vect_match_slp_patterns): New.
> > 	(vect_analyze_slp): Call matcher.
> > 	* tree-vectorizer.c (vec_info::add_pattern_stmt): Save relevancy.
> > 	* tree-vectorizer.h (STMT_VINFO_SAVED_RELEVANT, vect_pop_relevancy,
> > 	vect_dissolve_pattern_relevancy, vect_save_relevancy,
> > 	vect_push_relevancy, vect_free_slp_tree, enum _complex_operation,
> > 	class vect_pattern): New.
> > 
> > --- inline copy of patch --
> > 
> > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> > index 39b7319e8253c351a4f6fbdd8c154330f08f2b1b..791d9c6cb0649862a84fd3c80efc89fefedbb085 100644
> > --- a/gcc/tree-vect-loop.c
> > +++ b/gcc/tree-vect-loop.c
> > @@ -1979,6 +1979,61 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
> >    return opt_result::success ();
> >  }
> >  
> > +/* For every SLP only pattern created by the pattern matched rooted in ROOT
> > +   restore the relevancy of the original statements over those of the pattern
> > +   and destroy the pattern relationship.  This restores the SLP tree to a state
> > +   where it can be used when SLP build is cancelled or re-tried.  */
> > +
> > +static void
> > +vect_dissolve_slp_only_patterns (loop_vec_info loop_vinfo,
> > +				 hash_set<slp_tree> *visited, slp_tree root)
> > +{
> > +  if (!root || visited->add (root))
> > +    return;
> > +
> > +  unsigned int i;
> > +  slp_tree node;
> > +  stmt_vec_info related_stmt_info;
> > +  stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (root);
> > +
> > +  if (stmt_info && STMT_VINFO_SLP_VECT_ONLY (stmt_info))
> > +    {
> > +      vect_pop_relevancy (stmt_info);
> > +      if ((related_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)) != NULL)
> > +	{
> > +	  if (dump_enabled_p ())
> > +	    dump_printf_loc (MSG_NOTE, vect_location,
> > +			     "dissolving relevancy of %G",
> > +			     STMT_VINFO_STMT (stmt_info));
> > +	  vect_dissolve_pattern_relevancy (related_stmt_info);
> > +	}
> > +    }
> > +
> > +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, node)
> > +    vect_dissolve_slp_only_patterns (loop_vinfo, visited, node);
> > +}
> > +
> > +/* Lookup any SLP Only Pattern statements created by the SLP pattern matcher in
> > +   all slp_instances in LOOP_VINFO and undo the relevancy of statements such
> > +   that the original SLP tree before the pattern matching is used.  */
> > +
> > +static void
> > +vect_dissolve_slp_only_patterns (loop_vec_info loop_vinfo)
> > +{
> > +
> > +  unsigned int i;
> > +  hash_set<slp_tree> visited;
> > +
> > +  DUMP_VECT_SCOPE ("vect_dissolve_slp_only_patterns");
> > +
> > +  /* Unmark any SLP only patterns as relevant and restore the STMT_INFO of the
> > +     related instruction.  */
> > +  slp_instance instance;
> > +  FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
> > +    vect_dissolve_slp_only_patterns (loop_vinfo, &visited,
> > +				     SLP_INSTANCE_TREE (instance));
> > +}
> > +
> >  /* Look for SLP-only access groups and turn each individual access into its own
> >     group.  */
> >  static void
> > @@ -2583,6 +2638,9 @@ again:
> >    /* Ensure that "ok" is false (with an opt_problem if dumping is enabled).  */
> >    gcc_assert (!ok);
> >  
> > +  /* Dissolve any SLP patterns created by the SLP pattern matcher.  */
> > +  vect_dissolve_slp_only_patterns (loop_vinfo);
> > +
> >    /* Try again with SLP forced off but if we didn't do any SLP there is
> >       no point in re-trying.  */
> >    if (!slp)
> > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> > index 0c065e835ad13ad32d222e2590e05ef56849c411..3be565a2e566e09a9e42d6c77ba402b9499b06b6 100644
> > --- a/gcc/tree-vect-slp.c
> > +++ b/gcc/tree-vect-slp.c
> > @@ -105,7 +105,7 @@ _slp_tree::~_slp_tree ()
> >  
> >  /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
> >  
> > -static void
> > +void
> >  vect_free_slp_tree (slp_tree node)
> >  {
> >    int i;
> > @@ -148,7 +148,7 @@ vect_free_slp_instance (slp_instance instance)
> >  
> >  /* Create an SLP node for SCALAR_STMTS.  */
> >  
> > -slp_tree
> > +static slp_tree
> >  vect_create_new_slp_node (slp_tree node,
> >  			  vec<stmt_vec_info> scalar_stmts, unsigned nops)
> >  {
> > @@ -165,7 +165,7 @@ vect_create_new_slp_node (slp_tree node,
> >  
> >  /* Create an SLP node for SCALAR_STMTS.  */
> >  
> > -static slp_tree
> > +slp_tree
> >  vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops)
> >  {
> >    return vect_create_new_slp_node (new _slp_tree, scalar_stmts, nops);
> > @@ -1646,6 +1646,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
> >  	{
> >  	  slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
> >  	  SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
> > +	  SLP_TREE_VECTYPE (invnode) = vectype;
> 
> This is wrong - the vector type of invariants is determined by
> the consuming SLP stmts in vectorizable_*
> 
> >  	  oprnd_info->ops = vNULL;
> >  	  children.safe_push (invnode);
> >  	  continue;
> > @@ -1929,6 +1930,13 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
> >  	dump_printf (dump_kind, " %u", j);
> >        dump_printf (dump_kind, " }\n");
> >      }
> > +  if (SLP_TREE_REPRESENTATIVE (node)
> > +      && STMT_VINFO_SLP_VECT_ONLY (SLP_TREE_REPRESENTATIVE (node)))
> > +    {
> > +      dump_printf_loc (metadata, user_loc, "\tSLP Only pattern:\n");
> > +      dump_printf_loc (dump_kind, user_loc, "\t %G",
> > +		       STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
> > +    }
> >    if (SLP_TREE_LANE_PERMUTATION (node).exists ())
> >      {
> >        dump_printf_loc (metadata, user_loc, "\tlane permutation {");
> > @@ -2174,6 +2182,219 @@ calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
> >    return exact_div (common_multiple (nunits, group_size), group_size);
> >  }
> >  
> > +/* Helper function of optimize_load_redistribution that performs the operation
> > +   recursively.  */
> > +
> > +bool optimize_load_redistribution_1 (hash_set<slp_tree> *loads,
> > +				     scalar_stmts_to_slp_tree_map_t *bst_map,
> > +				     hash_set<slp_tree> *visited,
> > +				     slp_tree parent, unsigned idx,
> > +				     slp_tree root)
> > +{
> > +  if (visited->contains (root))
> > +    return true;
> > +  visited->add (root);
> 
>     if (visited->add (root))
>       return true;
> 
> > +
> > +  slp_tree node;
> > +  unsigned i;
> > +  stmt_vec_info dr_stmt = NULL;
> > +
> > +  /* For now, we don't know anything about externals so do not do anything.  */
> > +  if (SLP_TREE_DEF_TYPE (root) == vect_external_def
> > +      || SLP_TREE_DEF_TYPE (root) == vect_constant_def)
> > +    return false;
> > +
> > +  if (gimple_assign_load_p (STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (root))))
> 
> The appropriate check is STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE 
> (root)) && DR_IS_READ (STMT_VINFO_DATA_REF (...))
> 
> let's not mix gimple & SLP checks when not necessary.
> 
> > +    loads->add (root);
> > +
> > +  if (SLP_TREE_CODE (root) == VEC_PERM_EXPR
> 
> else if
> 
> > +      && SLP_TREE_LANE_PERMUTATION (root).exists ()
> > +      && !SLP_TREE_SCALAR_STMTS (root).exists ())
> 
> why do !SLP_TREE_SCALAR_STMTS matter?
> 
> > +    {
> > +
> 
> extra vertical space
> 
> > +      /* First convert this node into a load node and add it to the leaves
> > +         list and flatten the permute from a lane to a load one.  If it's
> > +         unneeded it will be elided later.  */
> > +      auto_vec<stmt_vec_info> stmts;
> > +      stmts.create (SLP_TREE_LANES (root));
> > +      load_permutation_t load_perm;
> > +      load_perm.create (SLP_TREE_LANES (root));
> > +      lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION (root);
> > +      for (unsigned j = 0; j < lane_perm.length (); j++)
> > +        {
> > +          std::pair<unsigned, unsigned> perm = lane_perm[j];
> > +	  /* This isn't strictly needed, but this function is a temporary
> > +	     one for specifically pattern matching, so don't want it to
> > +	     optimize things the remainder of the pipeline will.  */
> > +	  if (perm.first != j)
> > +	    goto next;
> > +          node = SLP_TREE_CHILDREN (root)[perm.first];
> > +	  if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
> > +	   {
> > +	      load_perm.release ();
> > +	      return false;
> 
> you're possibly prematurely exiting the DFS walk on a two-operator
> permute.  Ah, guess that's what the above check was for?  I guess
> it's better to pre-check all children of the VEC_PERM node
> to be proper, two(?) leafs with load permutation.
> 
> > +	   }
> > +
> > +	  stmt_vec_info rep_stmt = SLP_TREE_REPRESENTATIVE (node);
> > +	  if (!STMT_VINFO_GROUPED_ACCESS (rep_stmt))
> > +	    goto next;
> 
> I think for a node with a load permutation this never triggers.
> 
> > +
> > +	  if (!dr_stmt)
> > +	    dr_stmt = DR_GROUP_FIRST_ELEMENT (rep_stmt);
> > +
> > +	  if (dr_stmt != DR_GROUP_FIRST_ELEMENT (rep_stmt))
> > +	    goto next;
> 
> So all of the above could be done on the children w/o allocating
> and the rest on the loop iterating over the lanes.
> 
> > +	  stmts.quick_push (SLP_TREE_SCALAR_STMTS (node)[perm.second]);
> > +          load_perm.safe_push (SLP_TREE_LOAD_PERMUTATION (node)[perm.second]);
> > +        }
> > +
> > +      if (dump_enabled_p ())
> > +	dump_printf_loc (MSG_NOTE, vect_location,
> > +			 "converting loads on permute node %p\n", root);
> > +
> > +      slp_tree *value = bst_map->get (stmts);
> > +      if (value)
> > +	node = *value;
> > +      else
> > +	{
> > +	  /* One last iteration to free the nodes.  */
> > +	  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, node)
> > +	    {
> > +	      /* If we are the only reference to the node, remove the vertex.
> > +		 We don't have to modify the graph since vertices lead the
> > +		 graph traversal.  */
> > +	      vect_free_slp_tree (node);
> > +	    }
> 
> You should do this unconditionally (also if the bst_map lookup
> succeeded), but after bumping the refcount for the use in this
> node.
> 
> > +
> > +	  vec<stmt_vec_info> stmts_cpy = stmts.copy ();
> > +	  node = vect_create_new_slp_node (stmts_cpy.copy (), 0);
> > +	  bst_map->put (stmts_cpy, node);
> > +	}
> > +      SLP_TREE_CHILDREN (parent)[idx] = node;
> 
> hmm, what is 'parent' - shouldn't this be 'root'?  Ah, I see what
> you are doing.  I think you should do what optimize_slp does,
> namely replace 'root' in-place so you do not have to adjust
> parents (or even care for the case of multiple parents refering to
> 'root').  I see that doesn't easily work when attempting to CSE so
> the actual CSE needs to happen below (*)
> 
> > +      SLP_TREE_REF_COUNT (node)++;
> > +      SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (root);
> > +      SLP_TREE_LOAD_PERMUTATION (node) = load_perm;
> > +      loads->add (node);
> 
> Note with the load_lane check delayed we no longer need
> to vect_gather_slp_loads so early so I suggest to simply
> remove the existing early call and do it after
> pattern matching instead.

I'm testing the patch below for this.

Richard.

From 51b89070fcf8eacb188439e6c1b777fd9db4b2ae Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Mon, 16 Nov 2020 14:26:20 +0100
Subject: [PATCH] Delay SLP instance loads gathering
To: gcc-patches@gcc.gnu.org

This delays filling SLP_INSTANCE_LOADS.

2020-11-16  Richard Biener  <rguenther@suse.de>

	* tree-vectorizer.h (vect_gather_slp_loads): Declare.
	* tree-vect-loop.c (vect_analyze_loop_2): Call
	vect_gather_slp_loads.
	* tree-vect-slp.c (vect_build_slp_instance): Do not gather
	SLP loads here.
	(vect_gather_slp_loads): Remove wrapper, new function.
	(vect_slp_analyze_bb_1): Call it.
---
 gcc/tree-vect-loop.c  |  3 +++
 gcc/tree-vect-slp.c   | 26 ++++++++++++++++++--------
 gcc/tree-vectorizer.h |  1 +
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 4d5532f71d0..ecaaf0116d3 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2298,6 +2298,9 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
 
       /* Optimize the SLP graph with the vectorization factor fixed.  */
       vect_optimize_slp (loop_vinfo);
+
+      /* Gather the loads reachable from the SLP graph entries.  */
+      vect_gather_slp_loads (loop_vinfo);
     }
 
   bool saved_can_use_partial_vectors_p
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index b98d5db9f76..d2f2407ac92 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2071,13 +2071,6 @@ vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node,
     }
 }
 
-static void
-vect_gather_slp_loads (slp_instance inst, slp_tree node)
-{
-  hash_set<slp_tree> visited;
-  vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst), node, visited);
-}
-
 
 /* Find the last store in SLP INSTANCE.  */
 
@@ -2252,7 +2245,6 @@ vect_build_slp_instance (vec_info *vinfo,
 	  new_instance->cost_vec = vNULL;
 	  new_instance->subgraph_entries = vNULL;
 
-	  vect_gather_slp_loads (new_instance, node);
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_NOTE, vect_location,
 			     "SLP size %u vs. limit %u.\n",
@@ -3071,6 +3063,21 @@ vect_optimize_slp (vec_info *vinfo)
     }
 }
 
+/* Gather loads reachable from the individual SLP graph entries.  */
+
+void
+vect_gather_slp_loads (vec_info *vinfo)
+{
+  unsigned i;
+  slp_instance instance;
+  FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance)
+    {
+      hash_set<slp_tree> visited;
+      vect_gather_slp_loads (SLP_INSTANCE_LOADS (instance),
+			     SLP_INSTANCE_TREE (instance), visited);
+    }
+}
+
 
 /* For each possible SLP instance decide whether to SLP it and calculate overall
    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
@@ -4152,6 +4159,9 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
   /* Optimize permutations.  */
   vect_optimize_slp (bb_vinfo);
 
+  /* Gather the loads reachable from the SLP graph entries.  */
+  vect_gather_slp_loads (bb_vinfo);
+
   vect_record_base_alignments (bb_vinfo);
 
   /* Analyze and verify the alignment of data references and the
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 3ccd0fd552d..0ee4ef32eb2 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1974,6 +1974,7 @@ extern opt_result vect_analyze_slp (vec_info *, unsigned);
 extern bool vect_make_slp_decision (loop_vec_info);
 extern void vect_detect_hybrid_slp (loop_vec_info);
 extern void vect_optimize_slp (vec_info *);
+extern void vect_gather_slp_loads (vec_info *);
 extern void vect_get_slp_defs (slp_tree, vec<tree> *);
 extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
 			       unsigned n = -1U);
diff mbox series

Patch

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 39b7319e8253c351a4f6fbdd8c154330f08f2b1b..791d9c6cb0649862a84fd3c80efc89fefedbb085 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1979,6 +1979,61 @@  vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
   return opt_result::success ();
 }
 
+/* For every SLP only pattern created by the pattern matched rooted in ROOT
+   restore the relevancy of the original statements over those of the pattern
+   and destroy the pattern relationship.  This restores the SLP tree to a state
+   where it can be used when SLP build is cancelled or re-tried.  */
+
+static void
+vect_dissolve_slp_only_patterns (loop_vec_info loop_vinfo,
+				 hash_set<slp_tree> *visited, slp_tree root)
+{
+  if (!root || visited->add (root))
+    return;
+
+  unsigned int i;
+  slp_tree node;
+  stmt_vec_info related_stmt_info;
+  stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (root);
+
+  if (stmt_info && STMT_VINFO_SLP_VECT_ONLY (stmt_info))
+    {
+      vect_pop_relevancy (stmt_info);
+      if ((related_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)) != NULL)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "dissolving relevancy of %G",
+			     STMT_VINFO_STMT (stmt_info));
+	  vect_dissolve_pattern_relevancy (related_stmt_info);
+	}
+    }
+
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, node)
+    vect_dissolve_slp_only_patterns (loop_vinfo, visited, node);
+}
+
+/* Lookup any SLP Only Pattern statements created by the SLP pattern matcher in
+   all slp_instances in LOOP_VINFO and undo the relevancy of statements such
+   that the original SLP tree before the pattern matching is used.  */
+
+static void
+vect_dissolve_slp_only_patterns (loop_vec_info loop_vinfo)
+{
+
+  unsigned int i;
+  hash_set<slp_tree> visited;
+
+  DUMP_VECT_SCOPE ("vect_dissolve_slp_only_patterns");
+
+  /* Unmark any SLP only patterns as relevant and restore the STMT_INFO of the
+     related instruction.  */
+  slp_instance instance;
+  FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
+    vect_dissolve_slp_only_patterns (loop_vinfo, &visited,
+				     SLP_INSTANCE_TREE (instance));
+}
+
 /* Look for SLP-only access groups and turn each individual access into its own
    group.  */
 static void
@@ -2583,6 +2638,9 @@  again:
   /* Ensure that "ok" is false (with an opt_problem if dumping is enabled).  */
   gcc_assert (!ok);
 
+  /* Dissolve any SLP patterns created by the SLP pattern matcher.  */
+  vect_dissolve_slp_only_patterns (loop_vinfo);
+
   /* Try again with SLP forced off but if we didn't do any SLP there is
      no point in re-trying.  */
   if (!slp)
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 0c065e835ad13ad32d222e2590e05ef56849c411..3be565a2e566e09a9e42d6c77ba402b9499b06b6 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -105,7 +105,7 @@  _slp_tree::~_slp_tree ()
 
 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
 
-static void
+void
 vect_free_slp_tree (slp_tree node)
 {
   int i;
@@ -148,7 +148,7 @@  vect_free_slp_instance (slp_instance instance)
 
 /* Create an SLP node for SCALAR_STMTS.  */
 
-slp_tree
+static slp_tree
 vect_create_new_slp_node (slp_tree node,
 			  vec<stmt_vec_info> scalar_stmts, unsigned nops)
 {
@@ -165,7 +165,7 @@  vect_create_new_slp_node (slp_tree node,
 
 /* Create an SLP node for SCALAR_STMTS.  */
 
-static slp_tree
+slp_tree
 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops)
 {
   return vect_create_new_slp_node (new _slp_tree, scalar_stmts, nops);
@@ -1646,6 +1646,7 @@  vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 	{
 	  slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
 	  SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
+	  SLP_TREE_VECTYPE (invnode) = vectype;
 	  oprnd_info->ops = vNULL;
 	  children.safe_push (invnode);
 	  continue;
@@ -1929,6 +1930,13 @@  vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
 	dump_printf (dump_kind, " %u", j);
       dump_printf (dump_kind, " }\n");
     }
+  if (SLP_TREE_REPRESENTATIVE (node)
+      && STMT_VINFO_SLP_VECT_ONLY (SLP_TREE_REPRESENTATIVE (node)))
+    {
+      dump_printf_loc (metadata, user_loc, "\tSLP Only pattern:\n");
+      dump_printf_loc (dump_kind, user_loc, "\t %G",
+		       STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
+    }
   if (SLP_TREE_LANE_PERMUTATION (node).exists ())
     {
       dump_printf_loc (metadata, user_loc, "\tlane permutation {");
@@ -2174,6 +2182,219 @@  calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
   return exact_div (common_multiple (nunits, group_size), group_size);
 }
 
+/* Helper function of optimize_load_redistribution that performs the operation
+   recursively.  */
+
+bool optimize_load_redistribution_1 (hash_set<slp_tree> *loads,
+				     scalar_stmts_to_slp_tree_map_t *bst_map,
+				     hash_set<slp_tree> *visited,
+				     slp_tree parent, unsigned idx,
+				     slp_tree root)
+{
+  if (visited->contains (root))
+    return true;
+  visited->add (root);
+
+  slp_tree node;
+  unsigned i;
+  stmt_vec_info dr_stmt = NULL;
+
+  /* For now, we don't know anything about externals so do not do anything.  */
+  if (SLP_TREE_DEF_TYPE (root) == vect_external_def
+      || SLP_TREE_DEF_TYPE (root) == vect_constant_def)
+    return false;
+
+  if (gimple_assign_load_p (STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (root))))
+    loads->add (root);
+
+  if (SLP_TREE_CODE (root) == VEC_PERM_EXPR
+      && SLP_TREE_LANE_PERMUTATION (root).exists ()
+      && !SLP_TREE_SCALAR_STMTS (root).exists ())
+    {
+
+      /* First convert this node into a load node and add it to the leaves
+         list and flatten the permute from a lane to a load one.  If it's
+         unneeded it will be elided later.  */
+      auto_vec<stmt_vec_info> stmts;
+      stmts.create (SLP_TREE_LANES (root));
+      load_permutation_t load_perm;
+      load_perm.create (SLP_TREE_LANES (root));
+      lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION (root);
+      for (unsigned j = 0; j < lane_perm.length (); j++)
+        {
+          std::pair<unsigned, unsigned> perm = lane_perm[j];
+	  /* This isn't strictly needed, but this function is a temporary
+	     one for specifically pattern matching, so don't want it to
+	     optimize things the remainder of the pipeline will.  */
+	  if (perm.first != j)
+	    goto next;
+          node = SLP_TREE_CHILDREN (root)[perm.first];
+	  if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
+	   {
+	      load_perm.release ();
+	      return false;
+	   }
+
+	  stmt_vec_info rep_stmt = SLP_TREE_REPRESENTATIVE (node);
+	  if (!STMT_VINFO_GROUPED_ACCESS (rep_stmt))
+	    goto next;
+
+	  if (!dr_stmt)
+	    dr_stmt = DR_GROUP_FIRST_ELEMENT (rep_stmt);
+
+	  if (dr_stmt != DR_GROUP_FIRST_ELEMENT (rep_stmt))
+	    goto next;
+
+	  stmts.quick_push (SLP_TREE_SCALAR_STMTS (node)[perm.second]);
+          load_perm.safe_push (SLP_TREE_LOAD_PERMUTATION (node)[perm.second]);
+        }
+
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "converting loads on permute node %p\n", root);
+
+      slp_tree *value = bst_map->get (stmts);
+      if (value)
+	node = *value;
+      else
+	{
+	  /* One last iteration to free the nodes.  */
+	  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, node)
+	    {
+	      /* If we are the only reference to the node, remove the vertex.
+		 We don't have to modify the graph since vertices lead the
+		 graph traversal.  */
+	      vect_free_slp_tree (node);
+	    }
+
+	  vec<stmt_vec_info> stmts_cpy = stmts.copy ();
+	  node = vect_create_new_slp_node (stmts_cpy.copy (), 0);
+	  bst_map->put (stmts_cpy, node);
+	}
+      SLP_TREE_CHILDREN (parent)[idx] = node;
+      SLP_TREE_REF_COUNT (node)++;
+      SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (root);
+      SLP_TREE_LOAD_PERMUTATION (node) = load_perm;
+      loads->add (node);
+      //do this up the recursive call.
+      //vect_free_slp_tree (root);
+
+      return true;
+    }
+
+next:
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
+    optimize_load_redistribution_1 (loads, bst_map, visited, root, i, node);
+
+  return true;
+}
+
+/* Temporary workaround for loads not being CSEd during SLP build.  This
+   function will traverse the SLP tree rooted in ROOT for INSTANCE and find
+   VEC_PERM nodes that blend vectors from multiple nodes that all read from the
+   same DR such that the final operation is equal to a permuted load.  Such
+   NODES are then directly converted into LOADS themselves.  The nodes are
+   CSEd using BST_MAP.
+
+   Finally the LOADS in INSTANCE are updated with the current set of loads.  */
+
+bool optimize_load_redistribution (slp_instance instance,
+				   scalar_stmts_to_slp_tree_map_t *bst_map,
+				   slp_tree root)
+{
+  slp_tree node;
+  unsigned i;
+  hash_set<slp_tree> visited;
+  hash_set<slp_tree> loads;
+
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
+    optimize_load_redistribution_1 (&loads, bst_map, &visited, root, i, node);
+
+  SLP_INSTANCE_LOADS (instance).truncate (0);
+  for (hash_set<slp_tree>::iterator it = loads.begin ();
+       it != loads.end (); ++it)
+    SLP_INSTANCE_LOADS (instance).safe_push (*it);
+
+  return true;
+}
+
+/* Helper function of vect_match_slp_patterns.
+
+   Attempts to match patterns against the slp tree rooted in REF_NODE using
+   VINFO.  Patterns are matched in post-order traversal.
+
+   If matching is successful the value in REF_NODE is updated and returned, if
+   not then it is returned unchanged.  */
+
+static bool
+vect_match_slp_patterns_2 (slp_tree *ref_node, vec_info *vinfo,
+			   slp_tree_to_load_perm_map_t *perm_cache,
+			   hash_set<slp_tree> *visited)
+{
+  unsigned i;
+  slp_tree node = *ref_node;
+  bool found_p = false, found_rec_p = false;
+  if (!node || visited->add (node))
+    return false;
+
+  slp_tree child;
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+    found_rec_p |= vect_match_slp_patterns_2 (&SLP_TREE_CHILDREN (node)[i],
+					      vinfo, perm_cache, visited);
+
+  for (unsigned x = 0; x < num__slp_patterns; x++)
+    {
+      vect_pattern *pattern = slp_patterns[x] (ref_node);
+      found_p = pattern->recognize (perm_cache, vinfo);
+      delete pattern;
+      found_rec_p = found_p | found_rec_p;
+    }
+
+  return found_rec_p;
+}
+
+/* Applies pattern matching to the given SLP tree rooted in REF_NODE using
+   vec_info VINFO.
+
+   The modified tree is returned.  Patterns are tried in order and multiple
+   patterns may match.  */
+
+static bool
+vect_match_slp_patterns (slp_instance instance, vec_info *vinfo,
+			 hash_set<slp_tree> *visited,
+			 slp_tree_to_load_perm_map_t *perm_cache,
+			 scalar_stmts_to_slp_tree_map_t *bst_map)
+{
+  DUMP_VECT_SCOPE ("vect_match_slp_patterns");
+  slp_tree *ref_node = &SLP_INSTANCE_TREE (instance);
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "Analyzing SLP tree %p for patterns\n",
+		     SLP_INSTANCE_TREE (instance));
+
+  bool found_p
+    = vect_match_slp_patterns_2 (ref_node, vinfo, perm_cache, visited);
+
+  if (found_p)
+    {
+      optimize_load_redistribution (instance, bst_map, *ref_node);
+
+      if (dump_enabled_p ())
+	{
+	  dump_printf_loc (MSG_NOTE, vect_location,
+			   "Pattern matched SLP tree\n");
+	  vect_print_slp_graph (MSG_NOTE, vect_location, *ref_node);
+	}
+    }
+
+  return found_p;
+}
+
+/* Analyze an SLP instance starting from a group of grouped stores.  Call
+   vect_build_slp_tree to build a tree of packed stmts if possible.
+   Return FALSE if it's impossible to SLP any stmt in the loop.  */
+
 static bool
 vect_analyze_slp_instance (vec_info *vinfo,
 			   scalar_stmts_to_slp_tree_map_t *bst_map,
@@ -2540,6 +2761,7 @@  vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
 {
   unsigned int i;
   stmt_vec_info first_element;
+  slp_instance instance;
 
   DUMP_VECT_SCOPE ("vect_analyze_slp");
 
@@ -2586,6 +2808,13 @@  vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
 				   slp_inst_kind_reduc_group, max_tree_size);
     }
 
+  hash_set<slp_tree> visited_patterns;
+  slp_tree_to_load_perm_map_t perm_cache;
+  /* See if any patterns can be found in the SLP tree.  */
+  FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (vinfo), i, instance)
+    vect_match_slp_patterns (instance, vinfo, &visited_patterns, &perm_cache,
+			     bst_map);
+
   /* The map keeps a reference on SLP nodes built, release that.  */
   for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
        it != bst_map->end (); ++it)
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index fa211b95c0e54be1d51ed949d7a06c31b7b50802..3b49ce22f7aae0465dbd0b24cbf48ae054c31d22 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -27,6 +27,7 @@  typedef class _stmt_vec_info *stmt_vec_info;
 #include "tree-hash-traits.h"
 #include "target.h"
 #include "alloc-pool.h"
+#include "internal-fn.h"
 
 
 /* Used for naming of new temporaries.  */
@@ -1118,6 +1119,11 @@  public:
      indicates whether the stmt needs to be vectorized.  */
   enum vect_relevant relevant;
 
+  /* During SLP vectorization we may need to change the relevancy of a statement
+     but restore it during dissolving of SLP nodes.  This field contains a copy
+     of the original relevancy analysis.  */
+  enum vect_relevant saved_relevant;
+
   /* For loads if this is a gather, for stores if this is a scatter.  */
   bool gather_scatter_p;
 
@@ -1240,6 +1246,7 @@  struct gather_scatter_info {
 #define STMT_VINFO_TYPE(S)                 (S)->type
 #define STMT_VINFO_STMT(S)                 (S)->stmt
 #define STMT_VINFO_RELEVANT(S)             (S)->relevant
+#define STMT_VINFO_SAVED_RELEVANT(S)       (S)->saved_relevant
 #define STMT_VINFO_LIVE_P(S)               (S)->live
 #define STMT_VINFO_VECTYPE(S)              (S)->vectype
 #define STMT_VINFO_VEC_STMTS(S)            (S)->vec_stmts
@@ -1368,6 +1375,46 @@  vect_orig_stmt (stmt_vec_info stmt_info)
   return stmt_info;
 }
 
+/* If restore the saved relevancy information STMT_INFO from the copy made
+   during SLP pattern detection.  */
+
+static inline void
+vect_pop_relevancy (stmt_vec_info stmt_info)
+{
+  STMT_VINFO_RELEVANT (stmt_info) = STMT_VINFO_SAVED_RELEVANT (stmt_info);
+}
+
+/* Restores the saved relevancy of STMT_INFO and marks it as not being inside a
+   pattern.  Lastly the SLP_TYPE is set to loop_vect.  */
+
+static inline void
+vect_dissolve_pattern_relevancy (stmt_vec_info stmt_info)
+{
+  vect_pop_relevancy (stmt_info);
+  STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
+  STMT_SLP_TYPE (stmt_info) = loop_vect;
+}
+
+/* Save the current relevancy of STMT_INFO such that it can be restored by
+   vect_pop_relevancy.  */
+
+static inline void
+vect_save_relevancy (stmt_vec_info stmt_info)
+{
+  STMT_VINFO_SAVED_RELEVANT (stmt_info)
+    = STMT_VINFO_RELEVANT (stmt_info);
+}
+
+/* Save the current relevancy of STMT_INFO before changing it to REL.  */
+
+static inline void
+vect_push_relevancy (stmt_vec_info stmt_info, enum vect_relevant rel)
+{
+  vect_save_relevancy (stmt_info);
+  STMT_VINFO_RELEVANT (stmt_info) = rel;
+}
+
+
 /* Return the later statement between STMT1_INFO and STMT2_INFO.  */
 
 static inline stmt_vec_info
@@ -1993,6 +2040,7 @@  extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
 extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
 extern bool vect_update_shared_vectype (stmt_vec_info, tree);
 extern slp_tree vect_create_new_slp_node (vec<stmt_vec_info>, unsigned);
+extern void vect_free_slp_tree (slp_tree);
 
 /* In tree-vect-patterns.c.  */
 extern void
@@ -2009,4 +2057,108 @@  void vect_free_loop_info_assumptions (class loop *);
 gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL);
 bool vect_stmt_dominates_stmt_p (gimple *, gimple *);
 
+/* SLP Pattern matcher types, tree-vect-slp-patterns.c.  */
+
+/* Forward declaration of possible two operands operation that can be matched
+   by the complex numbers pattern matchers.  */
+enum _complex_operation : unsigned;
+
+/* Cache from nodes to the load permutation they represent.  */
+typedef hash_map <slp_tree, load_permutation_t >
+  slp_tree_to_load_perm_map_t;
+
+/* Vector pattern matcher base class.  All SLP pattern matchers must inherit
+   from this type.  */
+
+class vect_pattern
+{
+  protected:
+    /* The number of arguments that the IFN requires.  */
+    unsigned m_num_args;
+
+    /* The internal function that will be used when a pattern is created.  */
+    internal_fn m_ifn;
+
+    /* The current node being inspected.  */
+    slp_tree *m_node;
+
+    /* The list of operands to be the children for the node produced when the
+       internal function is created.  */
+    vec<slp_tree> m_ops;
+
+    /* Default constructor where NODE is the root of the tree to inspect.  */
+    vect_pattern (slp_tree *node)
+    {
+      this->m_ifn = IFN_LAST;
+      this->m_node = node;
+      this->m_ops.create (0);
+    }
+
+  public:
+    /* Attempt to recognize a pattern, validate and update the tree rooted in
+       M_NODE.  */
+    virtual bool recognize (slp_tree_to_load_perm_map_t *, vec_info *);
+
+    /* Only perform the pattern creation part of the matcher.  This creates and
+       returns the new pattern statement.  */
+    virtual gcall *build (vec_info *) = 0;
+
+    /* Performs a check to see if the matched IFN is supported by the current
+       target.  */
+    virtual bool is_optab_supported_p (tree vectype, optimization_type opt_type)
+    {
+      if (!vectype)
+        return false;
+
+      return direct_internal_fn_supported_p (this->m_ifn, vectype, opt_type);
+    }
+
+    /* Create a new instance of the pattern matcher class of the given type.  */
+    static vect_pattern* create (slp_tree *);
+
+    /* Match but do not perform any additional operations on the SLP tree.  */
+    virtual bool matches (slp_tree_to_load_perm_map_t *) = 0;
+
+    /* Match but use for the first operation the supplied COMPLEX_OPERATION.  No
+       additional operations or modification of the SLP tree are performed.  */
+    virtual bool matches (enum _complex_operation,
+			  slp_tree_to_load_perm_map_t *, vec<slp_tree>)
+    {
+      return false;
+    }
+
+    /* Friendly name of the operation the pattern matches.  */
+    virtual const char* get_name () = 0;
+
+    /* Default destructor.  */
+    virtual ~vect_pattern ()
+    {
+	this->m_ops.release ();
+    }
+
+    /* Check to see if the matched tree is valid for the operation the matcher
+       wants.  If the operation is valid then the tree is reshaped in the final
+       format that build () requires.  */
+    virtual bool validate_p (slp_tree_to_load_perm_map_t *)
+    {
+      return true;
+    }
+
+    /* Return the matched internal function.  If no match was done this is set
+       to LAST_IFN.  */
+    virtual internal_fn get_ifn ()
+    {
+      return this->m_ifn;
+    }
+};
+
+/* Function pointer to create a new pattern matcher from a generic type.  */
+typedef vect_pattern* (*vect_pattern_decl_t) (slp_tree *);
+
+/* List of supported pattern matchers.  */
+extern vect_pattern_decl_t slp_patterns[];
+
+/* Number of supported pattern matchers.  */
+extern size_t num__slp_patterns;
+
 #endif  /* GCC_TREE_VECTORIZER_H  */
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index d81774b242569262a51b7be02815acd6d1a6bfd0..2a6ddd685922f6b60ae1305974335fb863a2af39 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -535,6 +535,8 @@  vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
   stmt_vec_info res = new_stmt_vec_info (stmt);
   set_vinfo_for_stmt (stmt, res, false);
   STMT_VINFO_RELATED_STMT (res) = stmt_info;
+  vect_save_relevancy (stmt_info);
+  vect_push_relevancy (res, STMT_VINFO_RELEVANT (stmt_info));
   return res;
 }