===================================================================
@@ -1529,6 +1529,7 @@
tree-vect-slp.o \
tree-vectorizer.o \
tree-vect-unified.o \
+ tree-vect-unified-opts.o \
tree-vrp.o \
tree.o \
valtrack.o \
===================================================================
@@ -136,16 +136,9 @@
return scalar_type;
}
-
-/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
- tested at run-time. Return TRUE if DDR was successfully inserted.
- Return false if versioning is not supported. */
-
-static bool
-vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
+bool
+vect_mark_for_runtime_alias_test_1 (ddr_p ddr, loop *loop)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-
if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
return false;
@@ -189,11 +182,28 @@
return false;
}
- LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
return true;
}
+
+/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
+ tested at run-time. Return TRUE if DDR was successfully inserted.
+ Return false if versioning is not supported. */
+
+static bool
+vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ bool is_alias;
+
+ is_alias = vect_mark_for_runtime_alias_test_1 (ddr, loop);
+ if (is_alias)
+ LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
+ return is_alias;
+}
+
+
/* Function vect_analyze_data_ref_dependence.
Return TRUE if there (might) exist a dependence between a memory-reference
===================================================================
@@ -0,0 +1,391 @@
+/* lOOP Vectorization using unified representation
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Loop autovectorization using unified representation for permute
+ instructions. */
+#if 1
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "predict.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "cgraph.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-cfg.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-propagate.h"
+#include "dbgcnt.h"
+#include "tree-scalar-evolution.h"
+#include "tree-vect-unified.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
+#include "target.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "optabs-tree.h"
+#include "dumpfile.h"
+#include "alias.h"
+#include "tree-eh.h"
+#include "gimplify.h"
+#include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-ssa-loop.h"
+#include "expr.h"
+#include "builtins.h"
+#include "params.h"
+#include "pretty-print.h"
+#include "pretty-print.h"
+
+/* Function ILV_arity_reduction.
+
+ ILV_k^N ILV_m^Nk/m
+ / | \ => / | \
+ / | \ / | \
+ T1 T2 ... Tk / | ... \
+ / | \
+ ILV_k/m^N ILV_k/m^N ILV_k/m^N
+ / | \ / | \ / | \
+ / |...\ / |...\ / |...\
+ T1 Tm+1 Tk-m+1 T2 Tm+2 Tk-m+2 Tm T2m Tk
+
+*/
+
+struct primop_tree *
+ILV_arity_reduction (struct primop_tree *root, int from_arity, int to_arity)
+{
+ struct primop_tree *new_root, *new_child, *tmp;
+ tree new_iter_count;
+ int i, j;
+
+ gcc_assert (from_arity == PT_DIVISION (root));
+
+ new_iter_count = size_binop (MULT_EXPR,
+ fold_convert (ssizetype, PT_ITER_COUNT (root)),
+ ssize_int (from_arity / to_arity));
+ new_root = create_primTree_combine (POP_ILV, NULL, to_arity,
+ new_iter_count, PT_PARENT (root));
+
+ for (i = 0; i < to_arity; i++)
+ {
+ new_child = create_primTree_combine (POP_ILV, NULL,
+ from_arity / to_arity, PT_ITER_COUNT (root), new_root);
+
+ for (j = 0; j < from_arity / to_arity; j++)
+ {
+ add_child_at_index (new_child, PT_CHILD (root, j * to_arity + i), j);
+ }
+
+ tmp = k_arity_promotion_reduction (new_child, to_arity);
+
+ if (tmp != NULL)
+ {
+ add_child_at_index (new_root, tmp, i);
+ }
+ else
+ return NULL;
+ }
+
+ return new_root;
+}
+
+/*Function EXTR_arity_reduction.
+
+ EXTR Arity Reduction
+
+ EXTR_k,i^N EXTR_m,(i/(k/m))%m^Nm/k
+ | => |
+ | |
+ T EXTR_k/m,i%(k/m)^N
+ |
+ |
+ T
+*/
+
+struct primop_tree *
+EXTR_arity_reduction (struct primop_tree *root, int from_arity, int to_arity)
+{
+ struct primop_tree *new_root, *new_child, *tmp;
+ tree new_iter_count;
+ int i, j;
+ gcc_assert (from_arity == PT_DIVISION (root));
+ new_iter_count = size_binop (MULT_EXPR,
+ fold_convert (ssizetype, PT_ITER_COUNT (root)),
+ ssize_int (to_arity / from_arity));
+ new_root = create_primTree_partition (POP_EXTR, NULL, to_arity,
+ (PT_OPERAND_SELECTOR (root) * to_arity / from_arity) % to_arity,
+ new_iter_count, PT_PARENT (root));
+
+ new_child = create_primTree_partition (POP_EXTR, NULL, from_arity/to_arity,
+ PT_OPERAND_SELECTOR (root) % (from_arity / to_arity),
+ PT_ITER_COUNT (root), new_root);
+
+ add_child_at_index (new_child, PT_CHILD (root, 0), 0);
+
+ tmp = k_arity_promotion_reduction (new_child, to_arity);
+
+ if (tmp != NULL)
+ {
+ add_child_at_index (new_root, tmp, 0);
+ }
+ else
+ return NULL;
+
+
+ return new_root;
+}
+
+/* Function k_arity_reduction.
+
+ Driver function for arity reduction of EXTR, ILV, CONCAT and SPLT. CONCAT
+ and SPLT need not be implemented due to phase ordering. */
+
+struct primop_tree *
+k_arity_reduction (struct primop_tree *root, int from_arity, int to_arity)
+{
+ if (PT_NODE_OP (root) == POP_ILV)
+ return ILV_arity_reduction (root, from_arity, to_arity);
+
+ if (PT_NODE_OP (root) == POP_EXTR)
+ return EXTR_arity_reduction (root, from_arity, to_arity);
+
+ /* For leaf nodes - like const or memory, return root. */
+ if (PT_ARITY (root) == 0)
+ return root;
+
+ return NULL;
+}
+
+/* Function ILV_arity_promotion.
+
+ ILV_k^N
+ / | \
+ / | \
+ / | \
+ T1 T2 Tk
+
+ _||_
+ \ /
+ \/
+
+ ILV_m^Nk/m
+ +-----------------+-----------+-+---------------------+
+ / / / \ \
+ / / / \ \
+ / / / \ \
+EXTR_m/k^N,0 ... EXTR_m/k^N,0 EXTR_m/k^N,1 ... EXTR_m/k^N,m/k-1 ... EXTR_m/k^N,m/k-1
+ | | | | |
+ T1 Tk T1 T1 Tk
+
+*/
+
+struct primop_tree *
+ILV_arity_promotion (struct primop_tree *root, int from_arity, int to_arity)
+{
+ struct primop_tree *new_root, *new_child, *tmp;
+ tree new_iter_count;
+ int i, j;
+
+ gcc_assert (from_arity == PT_DIVISION (root));
+ new_iter_count = size_binop (EXACT_DIV_EXPR, size_binop (MULT_EXPR,
+ fold_convert (ssizetype,
+ PT_ITER_COUNT (root)),
+ ssize_int (from_arity)),
+ ssize_int (to_arity));
+ new_root = create_primTree_combine (POP_ILV, NULL, to_arity,
+ new_iter_count, PT_PARENT (root));
+ for (i = 0; i < to_arity / from_arity; i++)
+ {
+ for (j = 0; j < from_arity; j++)
+ {
+ new_child = create_primTree_partition (POP_EXTR, NULL,
+ to_arity / from_arity, i, PT_ITER_COUNT (root),
+ new_root);
+ add_child_at_index (new_child, PT_CHILD (root, j), 0);
+ tmp = k_arity_promotion_reduction (new_child, to_arity);
+ if (tmp != NULL)
+ add_child_at_index (new_root, tmp, i * from_arity + j);
+ else
+ return NULL;
+ }
+ }
+
+ return new_root;
+}
+
+/* Function merge_EXTR_nodes.
+
+ Multiple EXTR nodes can be there in primop_tree, which need to be merged,
+ before promoting/reducing EXTR node for better optimization. If merged EXTR
+ node needs further promotion, the vectorization cannot be applied, as EXTR
+ promotion can introduce new ILV node.
+ TODO - Add stricter check for final promotion/reduction to target specific
+ arity. Currently, we do not block vectorization if EXTR arity cannot be
+ promoted.
+*/
+
+struct primop_tree *
+merge_EXTR_nodes (struct primop_tree *root, int to_arity)
+{
+ int from_arity, parts, selector;
+ struct primop_tree *iter_node, *tmp;
+ tree iter_count;
+
+ from_arity = PT_DIVISION (root);
+ parts = 1;
+ iter_node = root;
+ selector = 0;
+
+ while (PT_NODE_OP (iter_node) == POP_EXTR)
+ {
+ iter_count = PT_ITER_COUNT (iter_node);
+ parts = parts * PT_DIVISION (iter_node);
+ selector = selector * PT_DIVISION (iter_node)
+ + PT_OPERAND_SELECTOR (iter_node);
+ iter_node = PT_CHILD (iter_node, 0);
+ }
+
+ if (iter_node != PT_CHILD (root, 0))
+ {
+ tmp = create_primTree_partition (POP_EXTR, NULL, parts, selector,
+ iter_count, PT_PARENT (root));
+ add_child_at_index (tmp, iter_node, 0);
+ return tmp;
+ }
+
+ return root;
+}
+
+/* Function k_arity_promotion.
+
+ Driver function for arity promotion of EXTR, ILV, CONCAT and SPLT. CONCAT
+ and SPLIT need not be implemented due to phase ordering. Implementation of
+ ILV-promotion introduces new EXTR nodes whereas implementation of
+ EXTR-promotion introduce ILV node. To avoid the infinite loop, only
+ ILV-promotion or EXTR-promotion can be implemented. As the
+ k_arity_promotion_reduction is top-down algorithm, ILV-promotion is
+ implemented. For EXTR-promotion, merge newly created EXPR node with child
+ EXTR node if any, and hope that arity promotion/reduction is applicable
+ to new EXTR node. */
+
+struct primop_tree *
+k_arity_promotion (struct primop_tree *root, int from_arity, int to_arity)
+{
+ struct primop_tree *tmp_root;
+
+ if (PT_NODE_OP (root) == POP_ILV)
+ return ILV_arity_promotion (root, from_arity, to_arity);
+
+ if (PT_NODE_OP (root) == POP_EXTR)
+ {
+ tmp_root = merge_EXTR_nodes (root, to_arity);
+ if (tmp_root != root)
+ return k_arity_promotion_reduction (tmp_root, to_arity);
+ else
+ return root;
+ }
+/* For leaf nodes - like const or memory, return root. */
+ if (PT_ARITY (root) == 0)
+ return root;
+
+ return NULL;
+}
+
+/* Function k_arity_promotion_reduction.
+
+ Driver function for promoting/reducing arity of the tree rooted at ROOT from
+ FROM_ARITY to TO_ARITY. */
+
+struct primop_tree *
+k_arity_promotion_reduction (struct primop_tree *root, int to_arity)
+{
+ struct primop_tree *retval = root;
+ int from_arity, i;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "\n k_arity_promotion_reduction: ");
+ dump_primtree_node (MSG_NOTE, root);
+ }
+
+ if (PT_NODE_OP (root) == POP_EXTR || PT_NODE_OP (root) == POP_SPLT)
+ from_arity = PT_DIVISION (root);
+ else
+ from_arity = PT_ARITY (root);
+
+ if (PT_NODE_OP (root) >= MAX_TREE_CODES && PT_NODE_OP (root) < POP_COLLAPSE)
+ {
+ if (from_arity > to_arity)
+ {
+ /* Arity reduction. */
+ if (from_arity % to_arity == 0)
+ {
+ retval = k_arity_reduction (root, from_arity, to_arity);
+ return retval;
+ }
+ else
+ return NULL;
+ }
+ else if (from_arity < to_arity)
+ {
+ /* Arity promotion. */
+ if (to_arity % from_arity == 0)
+ {
+ retval = k_arity_promotion (root, from_arity, to_arity);
+ return retval;
+ }
+ else
+ return NULL;
+ }
+ else
+ {
+ retval = duplicate_prim_node (root);
+ //return retval;
+ }
+ }
+
+ if (retval != NULL)
+ {
+ /* The tree node is compute-node. Hence, no action to be taken for arity
+ promotion/reduction. However, the subtrees below this root may need
+ arity adjustment. Hence, invoke k_arity_promotion_reduction algorithm
+ recursively on children of root. */
+ for (i = 0; i < retval->children.length (); i++)
+ {
+ struct primop_tree *tmp;
+ tmp = k_arity_promotion_reduction (PT_CHILD (retval, i), to_arity);
+ if (tmp == NULL)
+ return NULL;
+
+ PT_CHILD (retval, i) = tmp;
+ }
+
+ PT_ARITY (retval) = i;
+ }
+
+ return retval;
+
+
+}
+
+#endif
===================================================================
@@ -1,4 +1,10 @@
-/* lOOP Vectorization using unified representation
+/* lOOP Vectorization using unified representation for permute instructions.
+ Copyright (C) 2003-2015 Free Software Foundation, Inc.
+ Contributed by Sameera Deshpande <sameera.deshpande@imgtec.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
@@ -102,6 +108,26 @@
} // anon namespace
+#define DEFTREECODE(SYM, NAME, TYPE, LEN) NAME,
+#define END_OF_BASE_TREE_CODES "@dummy",
+
+static const char *const tree_code_name[] = {
+#include "all-tree.def"
+"ILV",
+"CONCAT",
+"EXTR",
+"SPLT",
+"COLLAPSE",
+"MEMREF",
+"CONST",
+"INVAR",
+"ITER"
+};
+
+#undef DEFTREECODE
+#undef END_OF_BASE_TREE_CODES
+
+
gimple_opt_pass *
make_pass_unified_vectorize (gcc::context *ctxt)
{
@@ -108,6 +134,53 @@
return new pass_unified_vectorize (ctxt);
}
+
+vec<struct stmt_attr *> stmt_attr_vec;
+
+void
+init_stmt_attr_vec (void)
+{
+ gcc_assert (!stmt_attr_vec.exists ());
+ stmt_attr_vec.create (50);
+}
+
+void
+free_stmt_attr_vec (void)
+{
+ gcc_assert (stmt_attr_vec.exists ());
+ stmt_attr_vec.release ();
+}
+
+inline void
+set_stmt_attr (gimple *stmt, struct stmt_attr *info)
+{
+ unsigned int uid = gimple_uid (stmt);
+ if (uid == 0)
+ {
+ gcc_checking_assert (info);
+ uid = stmt_attr_vec.length () + 1;
+ gimple_set_uid (stmt, uid);
+ stmt_attr_vec.safe_push (info);
+ }
+ else
+ {
+ gcc_checking_assert (info == NULL);
+ stmt_attr_vec[uid - 1] = info;
+ }
+}
+
+inline struct stmt_attr *
+get_stmt_attr (gimple *stmt)
+{
+ unsigned int uid = gimple_uid (stmt);
+ if (uid == 0)
+ return NULL;
+
+ return stmt_attr_vec[uid - 1];
+}
+
+
+
/* Function new_iter_node.
Create new ITER_node for the loop LOOP, and return the pointer. */
@@ -1498,7 +1571,7 @@
*/
struct primop_tree *
-populate_prim_node (enum primop_code pcode, struct ITER_node *inode,
+populate_prim_node (enum primop_code pcode, tree iter_count,
struct primop_tree *parent, gimple *stmt)
{
struct primop_tree *ptree;
@@ -1506,7 +1579,7 @@
PT_NODE_OP (ptree) = (int) pcode;
PT_PARENT (ptree) = parent;
- PT_ITER_COUNT (ptree) = ITER_NODE_NITERS (inode);
+ PT_ITER_COUNT (ptree) = iter_count;
if (stmt)
{
@@ -1566,11 +1639,11 @@
struct primop_tree *
create_primTree_memref (tree base, tree step, bool is_read, int num,
- struct ITER_node *inode, struct primop_tree *parent)
+ tree iter_count, struct primop_tree *parent)
{
struct primop_tree * ptree;
- ptree = populate_prim_node (POP_MEMREF, inode, parent, NULL);
+ ptree = populate_prim_node (POP_MEMREF, iter_count, parent, NULL);
PT_MEMVAL_BASE (ptree) = unshare_expr (base);
PT_MEMVAL_MULT_IDX (ptree) = unshare_expr (step);
@@ -1579,8 +1652,8 @@
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
- " create_primTree_memref : stride - %d\n ",
- tree_to_uhwi (step) / num);
+ " create_primTree_memref %d : stride - %d\n ",
+ PT_PID (ptree), tree_to_uhwi (step) / num);
}
return ptree;
}
@@ -1591,11 +1664,11 @@
which primtree is being created. */
struct primop_tree *
create_primTree_combine (enum primop_code pcode, gimple *stmt, int parts,
- struct ITER_node *inode, struct primop_tree *parent)
+ tree iter_count, struct primop_tree *parent)
{
struct primop_tree * ptree;
- ptree = populate_prim_node (pcode, inode, parent, stmt);
+ ptree = populate_prim_node (pcode, iter_count, parent, stmt);
PT_OPERAND_SELECTOR (ptree) = -1;
PT_DIVISION (ptree) = parts;
PT_VAR_STRIDE (ptree) = NULL;
@@ -1602,7 +1675,8 @@
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
- " create_primTree_combine: parts - %d\n", parts);
+ " create_primTree_combine %d : parts - %d\n",
+ PT_PID (ptree), parts);
}
return ptree;
@@ -1615,12 +1689,12 @@
SELECTOR is the part being selected. */
struct primop_tree *
create_primTree_partition (enum primop_code pcode, gimple *stmt, int parts,
- int selector, struct ITER_node *inode,
+ int selector, tree iter_count,
struct primop_tree *parent)
{
struct primop_tree * ptree;
- ptree = populate_prim_node (pcode, inode, parent, stmt);
+ ptree = populate_prim_node (pcode, iter_count, parent, stmt);
PT_OPERAND_SELECTOR (ptree) = selector;
PT_DIVISION (ptree) = parts;
PT_VAR_STRIDE (ptree) = NULL;
@@ -1628,8 +1702,8 @@
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
- " create_primTree_partition : parts - %d selector - %d\n",
- parts, selector);
+ " create_primTree_partition %d : parts - %d selector - %d\n",
+ PT_PID (ptree), parts, selector);
}
return ptree;
@@ -1660,7 +1734,46 @@
return PT_CHILD (ptree, idx);
}
+/* Function duplicate_prim_node.
+ This function copies contents of SRC node into new node, and returns pointer
+ to it.
+*/
+
+struct primop_tree *
+duplicate_prim_node (struct primop_tree *src)
+{
+ struct primop_tree *ptree;
+ int i;
+
+ ptree = init_primop_node ();
+
+ PT_NODE_OP (ptree) = PT_NODE_OP (src);
+ PT_PARENT (ptree) = PT_PARENT (src);
+ PT_ITER_COUNT (ptree) = PT_ITER_COUNT (src);
+ PT_VEC_SIZE (ptree) = PT_VEC_SIZE (src);
+ PT_VEC_TYPE (ptree) = PT_VEC_TYPE (src);
+
+ PT_ATTR_NO (ptree) = PT_ATTR_NO (src);
+
+ for (i = 0; i < src->children.length (); i++)
+ {
+ add_child_at_index (ptree, PT_CHILD (src, i), i);
+ }
+
+ memcpy (&ptree->u, &src->u, sizeof (ptree->u));
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " duplicate_prim_node : pid - %d node_op - %s\n",
+ PT_PID (src), tree_code_name[PT_NODE_OP (ptree)]);
+ }
+
+ return ptree;
+}
+
+
/* Function vectorizable_store.
This function checks if STMT is STORE instruction and is vectorizable for
@@ -1808,10 +1921,11 @@
pnode = exists_primTree_with_memref (base, step, false, inode);
if (pnode == NULL)
{
- pnode = create_primTree_memref (base, step, false, num, inode, NULL);
- ITER_NODE_LOOP_BODY (inode).safe_push (pnode);
+ pnode = create_primTree_memref (base, step, false, num, ITER_NODE_NITERS (inode), NULL);
+ ITER_NODE_LOOP_BODY (inode).safe_insert (ITER_NODE_LOOP_BODY (inode).length (),
+ pnode);
pchild1 = create_primTree_combine (POP_ILV, stmt,
- tree_to_uhwi (step) / num, inode, pnode);
+ tree_to_uhwi (step) / num, ITER_NODE_NITERS (inode), pnode);
add_child_at_index (pnode, pchild1, 0);
}
else
@@ -1922,8 +2036,8 @@
pnode = create_primTree_partition (POP_EXTR, stmt,
tree_to_uhwi (step) / num,
- tree_to_uhwi (offset) / num, inode, parent);
- pchild1 = create_primTree_memref (base, step, true, num, inode, pnode);
+ tree_to_uhwi (offset) / num, ITER_NODE_NITERS (inode), parent);
+ pchild1 = create_primTree_memref (base, step, true, num, ITER_NODE_NITERS (inode), pnode);
add_child_at_index (pnode, pchild1, 0);
return pnode;
}
@@ -2067,7 +2181,7 @@
static bool
create_ptree (struct ITER_node *inode)
{
- auto_vec<gimple *, 64> worklist;
+ vec<gimple *> worklist;
bool is_ok;
mark_probable_root_nodes (inode);
@@ -2230,6 +2344,54 @@
return create_ptree (inode);
}
+void
+print_primtree (FILE *fp, struct primop_tree *t)
+{
+ switch (PT_NODE_OP (t))
+ {
+ case POP_ILV:
+ case POP_CONCAT:
+ dump_printf (MSG_NOTE, "%s:%d_%d\n", tree_code_name[PT_NODE_OP (t)],
+ PT_PID (t), PT_DIVISION (t));
+ break;
+
+ case POP_EXTR:
+ case POP_SPLT:
+ dump_printf (MSG_NOTE, "%s:%d_%d,%d\n", tree_code_name[PT_NODE_OP (t)],
+ PT_PID (t), PT_DIVISION (t), PT_OPERAND_SELECTOR (t));
+ break;
+
+ case POP_MEMREF:
+ dump_printf (MSG_NOTE, "%s:%d\n", tree_code_name[PT_NODE_OP (t)],
+ PT_PID (t));
+ print_generic_expr (fp, PT_MEMVAL_BASE (t), TDF_SLIM);
+ break;
+
+ case POP_CONST:
+ case POP_INV:
+ case POP_COLLAPSE:
+ break;
+
+ case POP_ITER:
+ break;
+
+ default:
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, PT_COMPUTE_STMT (t), 0);
+ break;
+
+ }
+}
+
+void
+dump_primtree_node (int dump_kind, struct primop_tree *t)
+{
+ if (dump_file)
+ print_primtree (dump_file, t);
+
+ else if (alt_dump_file)
+ print_primtree (alt_dump_file, t);
+}
+
/* Function pretty_print_gimple_vec.
Function to pretty print all the gimple statements in LIST. */
@@ -2250,25 +2412,6 @@
}
}
-#define DEFTREECODE(SYM, NAME, TYPE, LEN) NAME,
-#define END_OF_BASE_TREE_CODES "@dummy",
-
-static const char *const tree_code_name[] = {
-#include "all-tree.def"
-"ILV",
-"CONCAT",
-"EXTR",
-"SPLT",
-"COLLAPSE",
-"MEMREF",
-"CONST",
-"INVAR",
-"ITER"
-};
-
-#undef DEFTREECODE
-#undef END_OF_BASE_TREE_CODES
-
/* Function pp_primop_tree.
Function to pretty print primop_tree PTREE. */
@@ -2276,6 +2419,7 @@
void
pp_primop_tree (pretty_printer *pp, struct primop_tree * ptree)
{
+ int i;
pp_newline_and_indent (pp, 0);
pp_indent (pp);
pp_printf (pp,"node [shape=record];");
@@ -2330,12 +2474,13 @@
worklist = ptree->children.copy ();
- while (worklist.length () > 0)
+ for (i = 0; i < worklist.length (); i++)
{
- struct primop_tree *child = worklist.pop ();
+ struct primop_tree *child;
+ worklist.iterate (i, &child);
pp_newline_and_indent (pp, 0);
pp_indent (pp);
- pp_printf (pp, "%d -> %d;", PT_PID (ptree), PT_PID (child));
+ pp_printf (pp, "%d -> %d [label = %d];", PT_PID (ptree), PT_PID (child), i);
}
}
@@ -2349,13 +2494,16 @@
pretty_print_ptree_vec (pretty_printer *pp, vec<struct primop_tree *> list)
{
vec<struct primop_tree *> worklist;
+ struct primop_tree *tmp;
+ int i;
worklist = list.copy ();
- while (worklist.length () > 0)
+ for (i = 0; i < worklist.length (); i++)
{
pp_newline_and_indent (pp, 0);
- pp_primop_tree (pp, worklist.pop ());
+ worklist.iterate (i, &tmp);
+ pp_primop_tree (pp, tmp);
}
}
@@ -2449,6 +2597,8 @@
unsigned int num_vectorized_loops = 0;
unsigned int ret = 0;
unsigned int i;
+ unsigned int vector_sizes, max_vec_size;
+
vect_loops_num = number_of_loops (cfun);
/* Bail out if there are no loops. */
@@ -2474,6 +2624,9 @@
/* Vectorization should be possible. Let us find if all statements are
vectorizable, and if yes, create p-tree. */
struct ITER_node * tmp_iter_node;
+ struct primop_tree *tmp_tree;
+ bool failed;
+ vec<struct primop_tree *> worklist;
vect_location = find_loop_location (loop);
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
@@ -2516,6 +2669,55 @@
dump_iter_node (tmp_iter_node, alt_dump_file);
}
+ /* To enable best possible instruction selection, the tree should be
+ promoted to MAX_VEC_SIZE first, and then reduced to arity supported
+ by architecture. The macro TARGET_VECTORIZATION_ARITY provides list
+ of arities supported by architecture. */
+
+ vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
+
+ max_vec_size = 1 << floor_log2 (vector_sizes);
+
+ failed =false;
+ i = 0;
+ worklist = vNULL;
+ worklist = (ITER_NODE_LOOP_BODY (tmp_iter_node)).copy ();
+ for (i = 0; i < worklist.length (); i++)
+ {
+ gcc_assert (worklist.iterate (i, &tmp_tree));
+ tmp_tree = k_arity_promotion_reduction (tmp_tree, max_vec_size);
+ if (tmp_tree == NULL)
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "%d_promotion failed.\n", max_vec_size);
+ failed = true;
+ break;
+ }
+
+ tmp_tree = k_arity_promotion_reduction (tmp_tree, 2);
+ if (tmp_tree == NULL)
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "arity_promotion_reduction failed.\n");
+ failed = true;
+ break;
+ }
+
+ ITER_NODE_LOOP_BODY (tmp_iter_node)[i] = tmp_tree;
+ }
+
+ if (failed)
+ continue;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf (MSG_NOTE, "\nk-arity promotion/reduction applied.\n");
+ if (dump_file)
+ dump_iter_node (tmp_iter_node, dump_file);
+ if (alt_dump_file)
+ dump_iter_node (tmp_iter_node, alt_dump_file);
+ }
+
gimple *loop_vectorized_call = vect_loop_vectorized_call (loop);
/* If the loop is vectorized, set uid of stmts within scalar loop to
0. This change is needed if transform phase uses this loop info. */
===================================================================
@@ -121,50 +121,6 @@
#define STMT_ATTR_DR(s) (get_stmt_attr (s))->dr
#define STMT_ATTR_VECTYPE(s) (get_stmt_attr (s))->vectype
-vec<struct stmt_attr *> stmt_attr_vec;
-
-void
-init_stmt_attr_vec (void)
-{
- gcc_assert (!stmt_attr_vec.exists ());
- stmt_attr_vec.create (50);
-}
-
-void
-free_stmt_attr_vec (void)
-{
- gcc_assert (stmt_attr_vec.exists ());
- stmt_attr_vec.release ();
-}
-
-inline void
-set_stmt_attr (gimple *stmt, struct stmt_attr *info)
-{
- unsigned int uid = gimple_uid (stmt);
- if (uid == 0)
- {
- gcc_checking_assert (info);
- uid = stmt_attr_vec.length () + 1;
- gimple_set_uid (stmt, uid);
- stmt_attr_vec.safe_push (info);
- }
- else
- {
- gcc_checking_assert (info == NULL);
- stmt_attr_vec[uid - 1] = info;
- }
-}
-
-inline struct stmt_attr *
-get_stmt_attr (gimple *stmt)
-{
- unsigned int uid = gimple_uid (stmt);
- if (uid == 0)
- return NULL;
-
- return stmt_attr_vec[uid - 1];
-}
-
/* PRIMOP_TREE : Memory accesses within a loop have definite repetative pattern
which can be captured using primitive permute operators which can be used to
determine desired permute order for the vector computations. The PRIMOP_TREE
@@ -277,14 +233,6 @@
#define PT_AUX(x) (x)->aux
//struct ITER_node *iter_node;
-
-extern unsigned int vectorize_loops_using_uniop (void);
-extern struct primop_tree * analyze_and_create_ptree (struct primop_tree *,
- gimple *, struct ITER_node *);
-extern void pretty_print_ptree_vec (pretty_printer *,
- vec<struct primop_tree*>);
-extern void pretty_print_iter_node (pretty_printer *, struct ITER_node *, int);
-
enum primop_code {
POP_ILV=MAX_TREE_CODES,
POP_CONCAT,
@@ -295,3 +243,32 @@
POP_CONST,
POP_INV,
POP_ITER};
+
+extern unsigned int vectorize_loops_using_uniop (void);
+extern struct primop_tree * analyze_and_create_ptree (struct primop_tree *,
+ gimple *, struct ITER_node *);
+extern void pretty_print_ptree_vec (pretty_printer *,
+ vec<struct primop_tree*>);
+extern void pretty_print_iter_node (pretty_printer *, struct ITER_node *, int);
+extern struct primop_tree * k_arity_promotion_reduction (struct primop_tree *,
+ int);
+extern struct primop_tree * init_primop_node (void);
+extern struct primop_tree * populate_prim_node (enum primop_code, tree,
+ struct primop_tree *, gimple *);
+extern struct primop_tree * exists_primTree_with_memref (tree, tree, bool,
+ struct ITER_node *);
+extern struct primop_tree * create_primTree_memref (tree, tree, bool, int, tree,
+ struct primop_tree *);
+extern struct primop_tree * create_primTree_combine (enum primop_code, gimple *,
+ int, tree, struct primop_tree *);
+extern struct primop_tree * create_primTree_partition (enum primop_code,
+ gimple *, int, int, tree,
+ struct primop_tree *);
+extern void add_child_at_index (struct primop_tree *, struct primop_tree *, int);
+extern struct primop_tree * get_child_at_index (struct primop_tree *, int);
+extern struct primop_tree * duplicate_prim_node (struct primop_tree *);
+extern void pp_primop_tree (pretty_printer *, struct primop_tree *);
+extern void dump_primtree_node (int, struct primop_tree *);
+
+
+