From 147dc1eb8a835a70411ac8e11b6c9bc63695c431 Mon Sep 17 00:00:00 2001
From: Bin Cheng <binche01@e108451-lin.cambridge.arm.com>
Date: Wed, 9 Aug 2017 11:47:52 +0100
Subject: [PATCH 6/6] vect-rt-alias-dep-info-20170801.txt
---
gcc/testsuite/gcc.dg/vect/vect-rt-alias-info.c | 11 ++
gcc/tree-vect-loop-manip.c | 210 +++++++++++++++++++++++++
gcc/tree-vect-loop.c | 8 +
gcc/tree-vect-stmts.c | 32 ++++
gcc/tree-vectorizer.h | 12 ++
5 files changed, 273 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-rt-alias-info.c
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fdump-tree-vect-alias -fdump-tree-loopdone-alias" } */
+
+void foo (int *a, int *c, int *d)
+{
+ for (int i = 0; i < 1024; ++i)
+ a[i] = c[i] + d[i];
+}
+/* { dg-final { scan-tree-dump "clique . base . fixed" "vect" } } */
+/* { dg-final { scan-tree-dump-not "clique . base . fixed" "loopdone" } } */
@@ -2091,6 +2091,215 @@ vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
}
}
+/* Hash map callback function freeing rt_alias_clique. */
+
+bool
+free_rt_alias_clique (data_reference_p const &,
+ rt_alias_clique_p const &clique, void *)
+{
+ free (clique);
+ return true;
+}
+
+/* Private data for callback function traversing graph edges. */
+
+struct edge_callback_data
+{
+ vec<data_reference_p> *datarefs;
+ hash_map<data_reference_p, struct rt_alias_clique *> *clique_map;
+ bool valid_p;
+};
+
+/* Callback function for traversing graph edges. */
+
+static void
+alias_graphd_edge_callback (struct graph *, struct graph_edge *e, void *data)
+{
+ struct edge_callback_data *edata = (struct edge_callback_data *) data;
+ struct rt_alias_clique **slot1, **slot2;
+
+ if (!edata->valid_p)
+ return;
+
+ slot1 = edata->clique_map->get ((*edata->datarefs)[e->src]);
+ gcc_assert (slot1 != NULL && (*slot1)->id == e->src);
+ if ((*slot1)->clique == 0)
+ {
+ edata->valid_p = false;
+ return;
+ }
+ slot2 = edata->clique_map->get ((*edata->datarefs)[e->dest]);
+ gcc_assert (slot2 != NULL && (*slot2)->id == e->dest);
+ /* Don't check if source and dest have the same dependence_info.base.
+ The overall effect is like we discard this edge. */
+ if ((*slot1)->clique == 0)
+ edata->valid_p = false;
+}
+
+/* Return TRUE if memory reference of DREF has dependence info set. */
+
+static inline bool
+data_ref_with_dep_info_p (data_reference_p dref)
+{
+ tree ref = DR_REF (dref);
+
+ return ((TREE_CODE (ref) == MEM_REF || TREE_CODE (ref) == TARGET_MEM_REF)
+ && MR_DEPENDENCE_CLIQUE (ref) != 0);
+}
+
+/* This function analyzes data dependence given by runtime alias checks.
+ If analysis succeeds, it assigns dependence information to data refs
+ and records it in a hash map.
+
+ Runtime alias checks can be modeled as an arbitrary graph with data
+ references as vertices. In theory we can record the graph and pass
+ it to later optimizers in order to enable more transformations. In
+ practice, it's not feasible for high space/time cost in handling
+ arbitrary graph. GCC uses simple data structure in which two fields
+ (clique/base) are set for each memory reference. Two references are
+ known to not alias each other if dependence_info.clique are equal and
+ dependence_info.base are distinct. This simple data structure can
+ not accurately model graph with diameter bigger than 2. For example,
+ below graph can't be modeled.
+
+ v0----v1----v2----v3
+
+ The data structure can't model some graph with diameter equal to 2
+ if the graph contains clique with more than 2 vertices, as below:
+
+ v0----v1---v4
+ \ /
+ \ /
+ v2
+
+ This function only handles (diameter <= 2) graphs. Fortunately, we
+ can reduce arbitrary graph by simply discarding edges to sub-graph
+ which can be modeled. For example, we can discard edge <v0, v2> in
+ above graph. This approximation results in less optimizations but
+ correct code. */
+
+static void
+record_runtime_alias_for_data_refs (loop_vec_info loop_vinfo)
+{
+ vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
+ unsigned short clique = cfun->last_clique;
+ unsigned j, k;
+ rt_alias_clique_p *slot_a, *slot_b;
+ int i, num_sccs, start;
+ auto_vec<data_reference_p> datarefs;
+ hash_map<data_reference_p, rt_alias_clique_p> *clique_map;
+ struct graph *alias_graph;
+ struct edge_callback_data edata;
+ auto_vec<int> vertices_a, vertices_b;
+
+ /* Initialize hash map from data reference to dependence info. */
+ clique_map = new hash_map<data_reference_p, rt_alias_clique_p> ();
+ for (j = 0, i = 0; j < may_alias_ddrs.length (); j++)
+ {
+ data_reference_p dra = DDR_A (may_alias_ddrs[j]);
+ data_reference_p drb = DDR_B (may_alias_ddrs[j]);
+
+ if (data_ref_with_dep_info_p (dra)
+ || data_ref_with_dep_info_p (drb))
+ continue;
+
+ if ((clique_map->get (dra)) == NULL)
+ {
+ rt_alias_clique_p clique_p = XCNEW (struct rt_alias_clique);
+ clique_p->id = i++;
+ clique_map->put (dra, clique_p);
+ datarefs.safe_push (dra);
+ }
+ if ((clique_map->get (drb)) == NULL)
+ {
+ rt_alias_clique_p clique_p = XCNEW (struct rt_alias_clique);
+ clique_p->id = i++;
+ clique_map->put (drb, clique_p);
+ datarefs.safe_push (drb);
+ }
+ }
+
+ /* Create graph and add dependence edges to it. */
+ alias_graph = new_graph (datarefs.length ());
+ for (j = 0; j < may_alias_ddrs.length (); j++)
+ {
+ if (data_ref_with_dep_info_p (DDR_A (may_alias_ddrs[j]))
+ || data_ref_with_dep_info_p (DDR_B (may_alias_ddrs[j])))
+ continue;
+
+ slot_a = clique_map->get (DDR_A (may_alias_ddrs[j]));
+ slot_b = clique_map->get (DDR_B (may_alias_ddrs[j]));
+ add_edge (alias_graph, (*slot_a)->id, (*slot_b)->id);
+ add_edge (alias_graph, (*slot_b)->id, (*slot_a)->id);
+ }
+
+ /* traverse each SCC of graph in two steps and assign dependence info. */
+ num_sccs = graphds_scc (alias_graph, NULL);
+ for (i = 0; i < num_sccs; ++i)
+ {
+ clique++;
+ /* Find start vertex of SCC. */
+ for (j = 0; j < datarefs.length (); ++j)
+ if (alias_graph->vertices[j].component == i)
+ break;
+
+ gcc_assert (j < datarefs.length ());
+ start = (int) j;
+
+ /* Assign dependence info for start vertex of SCC. */
+ slot_a = clique_map->get (datarefs[start]);
+ (*slot_a)->clique = clique;
+ (*slot_a)->base = 0;
+
+ /* Get all adjacent vertices of start vertex. */
+ vertices_a.truncate (0);
+ adjacent_vertices (alias_graph, start, &vertices_a);
+ /* Assign dependence info for adjacent vertices of start vertex. */
+ for (j = 0; j < vertices_a.length (); ++j)
+ {
+ slot_a = clique_map->get (datarefs[vertices_a[j]]);
+ (*slot_a)->clique = clique;
+ (*slot_a)->base = 1;
+ }
+ /* Assign dependence info for adjacent vertices of ajacent vertices
+ of start vertex. */
+ for (j = 0; j < vertices_a.length (); ++j)
+ {
+ vertices_b.truncate (0);
+ adjacent_vertices (alias_graph, vertices_a[j], &vertices_b);
+ for (k = 0; k < vertices_b.length (); ++k)
+ {
+ slot_b = clique_map->get (datarefs[vertices_b[k]]);
+ if ((*slot_b)->clique == 0)
+ {
+ (*slot_b)->clique = clique;
+ (*slot_b)->base = 0;
+ }
+ }
+ }
+ }
+
+ edata.datarefs = &datarefs;
+ edata.clique_map = clique_map;
+ edata.valid_p = true;
+ /* Traverse graph edges and check if all vertices have been assigned
+ dependence info. */
+ for_each_edge (alias_graph, alias_graphd_edge_callback, &edata);
+ free_graph (alias_graph);
+
+ /* We only handle graph whose diameter is at most 2, otherwise the
+ dependence graph can't be represented by the clique model accurately. */
+ if (edata.valid_p)
+ {
+ cfun->last_clique = clique;
+ LOOP_VINFO_RT_ALIAS_CLIQUE_MAP (loop_vinfo) = clique_map;
+ return;
+ }
+
+ clique_map->traverse<void *, free_rt_alias_clique> (NULL);
+ delete clique_map;
+}
+
/* Function vect_create_cond_for_alias_checks.
Create a conditional expression that represents the run-time checks for
@@ -2122,6 +2331,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
&comp_alias_ddrs, cond_expr);
+ record_runtime_alias_for_data_refs (loop_vinfo);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"created %u versioning for alias checks.\n",
@@ -1114,6 +1114,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in)
unaligned_dr (NULL),
peeling_for_alignment (0),
ptr_mask (0),
+ rt_alias_clique_map (NULL),
slp_unrolling_factor (1),
single_scalar_iteration_cost (0),
vectorizable (false),
@@ -1223,6 +1224,13 @@ _loop_vec_info::~_loop_vec_info ()
free (bbs);
+ if (rt_alias_clique_map != NULL)
+ {
+ rt_alias_clique_map->traverse<void *, free_rt_alias_clique> (NULL);
+ delete rt_alias_clique_map;
+ rt_alias_clique_map = NULL;
+ }
+
loop->aux = NULL;
}
@@ -5601,6 +5601,30 @@ get_group_alias_ptr_type (gimple *first_stmt)
return reference_alias_ptr_type (DR_REF (first_dr));
}
+/* Given data reference DR and its vectorized form REF, this function
+ sets runtime alias/non-dependent info of DR to REF. */
+
+static void
+set_runtime_alias_dependence_info (loop_vec_info loop_vinfo,
+ data_reference_p dr, tree ref)
+{
+ rt_alias_clique_p *slot;
+ hash_map<data_reference_p, rt_alias_clique_p> *clique_map;
+
+ if ((clique_map = LOOP_VINFO_RT_ALIAS_CLIQUE_MAP (loop_vinfo)) == NULL)
+ return;
+
+ slot = clique_map->get (dr);
+ if (slot == NULL || (*slot)->clique == 0)
+ return;
+
+ MR_DEPENDENCE_CLIQUE (ref) = (*slot)->clique;
+ MR_DEPENDENCE_BASE (ref) = (*slot)->base;
+ /* Non-dependent info from runtime alias check is only valid for fixed
+ length access. It should be reset whenever the memory reference is
+ copied. */
+ MR_DEPENDENCE_FIXED_LENGTH_P (ref) = true;
+}
/* Function vectorizable_store.
@@ -6423,6 +6447,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
misalign);
+ if (loop_vinfo)
+ set_runtime_alias_dependence_info (loop_vinfo, dr, data_ref);
+
if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
{
tree perm_mask = perm_mask_for_reverse (vectype);
@@ -7515,6 +7542,11 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
&& TREE_CODE (dataref_ptr) == SSA_NAME)
set_ptr_info_alignment (get_ptr_info (dataref_ptr),
align, misalign);
+
+ if (loop_vinfo)
+ set_runtime_alias_dependence_info (loop_vinfo,
+ dr, data_ref);
+
break;
}
case dr_explicit_realign:
@@ -211,6 +211,15 @@ is_a_helper <_bb_vec_info *>::test (vec_info *i)
}
+typedef struct rt_alias_clique
+{
+ int id;
+ unsigned short clique, base;
+}*rt_alias_clique_p;
+
+extern bool free_rt_alias_clique (data_reference_p const &,
+ rt_alias_clique_p const &, void *);
+
/*-----------------------------------------------------------------*/
/* Info on vectorized loops. */
/*-----------------------------------------------------------------*/
@@ -285,6 +294,8 @@ typedef struct _loop_vec_info : public vec_info {
/* Cost vector for a single scalar iteration. */
auto_vec<stmt_info_for_cost> scalar_cost_vec;
+ hash_map<data_reference_p, rt_alias_clique_p> *rt_alias_clique_map;
+
/* The unrolling factor needed to SLP the loop. In case of that pure SLP is
applied to the loop, i.e., no unrolling is needed, this is 1. */
unsigned slp_unrolling_factor;
@@ -379,6 +390,7 @@ typedef struct _loop_vec_info : public vec_info {
#define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop
#define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store
#define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
+#define LOOP_VINFO_RT_ALIAS_CLIQUE_MAP(L) (L)->rt_alias_clique_map
#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
#define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
--
1.9.1