===================================================================
@@ -4103,9 +4103,6 @@ get_initial_defs_for_reduction (slp_tree
tree vop;
int group_size = stmts.length ();
unsigned int vec_num, i;
- unsigned number_of_copies = 1;
- vec<tree> voprnds;
- voprnds.create (number_of_vectors);
struct loop *loop;
auto_vec<tree, 16> permute_results;
@@ -4138,115 +4135,78 @@ get_initial_defs_for_reduction (slp_tree
if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
nunits = group_size;
- number_of_copies = nunits * number_of_vectors / group_size;
-
number_of_places_left_in_vector = nunits;
bool constant_p = true;
tree_vector_builder elts (vector_type, nunits, 1);
elts.quick_grow (nunits);
- for (j = 0; j < number_of_copies; j++)
+ for (j = 0; j < nunits * number_of_vectors; ++j)
{
- for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
- {
- tree op;
- /* Get the def before the loop. In reduction chain we have only
- one initial value. */
- if ((j != (number_of_copies - 1)
- || (reduc_chain && i != 0))
- && neutral_op)
- op = neutral_op;
- else
- op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
-
- /* Create 'vect_ = {op0,op1,...,opn}'. */
- number_of_places_left_in_vector--;
- elts[number_of_places_left_in_vector] = op;
- if (!CONSTANT_CLASS_P (op))
- constant_p = false;
+ tree op;
+ i = j % group_size;
+ stmt_vinfo = stmts[i];
+
+ /* Get the def before the loop. In reduction chain we have only
+ one initial value. Else we have as many as PHIs in the group. */
+ if (reduc_chain)
+ op = i != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
+ else if (((vec_oprnds->length () + 1) * nunits
+ - number_of_places_left_in_vector >= group_size)
+ && neutral_op)
+ op = neutral_op;
+ else
+ op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
- if (number_of_places_left_in_vector == 0)
- {
- gimple_seq ctor_seq = NULL;
- tree init;
- if (constant_p && !neutral_op
- ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
- : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
- /* Build the vector directly from ELTS. */
- init = gimple_build_vector (&ctor_seq, &elts);
- else if (neutral_op)
+ /* Create 'vect_ = {op0,op1,...,opn}'. */
+ number_of_places_left_in_vector--;
+ elts[nunits - number_of_places_left_in_vector - 1] = op;
+ if (!CONSTANT_CLASS_P (op))
+ constant_p = false;
+
+ if (number_of_places_left_in_vector == 0)
+ {
+ gimple_seq ctor_seq = NULL;
+ tree init;
+ if (constant_p && !neutral_op
+ ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
+ : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
+ /* Build the vector directly from ELTS. */
+ init = gimple_build_vector (&ctor_seq, &elts);
+ else if (neutral_op)
+ {
+ /* Build a vector of the neutral value and shift the
+ other elements into place. */
+ init = gimple_build_vector_from_val (&ctor_seq, vector_type,
+ neutral_op);
+ int k = nunits;
+ while (k > 0 && elts[k - 1] == neutral_op)
+ k -= 1;
+ while (k > 0)
{
- /* Build a vector of the neutral value and shift the
- other elements into place. */
- init = gimple_build_vector_from_val (&ctor_seq, vector_type,
- neutral_op);
- int k = nunits;
- while (k > 0 && elts[k - 1] == neutral_op)
- k -= 1;
- while (k > 0)
- {
- k -= 1;
- init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
- vector_type, init, elts[k]);
- }
- }
- else
- {
- /* First time round, duplicate ELTS to fill the
- required number of vectors, then cherry pick the
- appropriate result for each iteration. */
- if (vec_oprnds->is_empty ())
- duplicate_and_interleave (&ctor_seq, vector_type, elts,
- number_of_vectors,
- permute_results);
- init = permute_results[number_of_vectors - j - 1];
+ k -= 1;
+ init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
+ vector_type, init, elts[k]);
}
- if (ctor_seq != NULL)
- gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
- voprnds.quick_push (init);
-
- number_of_places_left_in_vector = nunits;
- elts.new_vector (vector_type, nunits, 1);
- elts.quick_grow (nunits);
- constant_p = true;
- }
- }
- }
-
- /* Since the vectors are created in the reverse order, we should invert
- them. */
- vec_num = voprnds.length ();
- for (j = vec_num; j != 0; j--)
- {
- vop = voprnds[j - 1];
- vec_oprnds->quick_push (vop);
- }
-
- voprnds.release ();
-
- /* In case that VF is greater than the unrolling factor needed for the SLP
- group of stmts, NUMBER_OF_VECTORS to be created is greater than
- NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
- to replicate the vectors. */
- tree neutral_vec = NULL;
- while (number_of_vectors > vec_oprnds->length ())
- {
- if (neutral_op)
- {
- if (!neutral_vec)
- {
- gimple_seq ctor_seq = NULL;
- neutral_vec = gimple_build_vector_from_val
- (&ctor_seq, vector_type, neutral_op);
- if (ctor_seq != NULL)
- gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
}
- vec_oprnds->quick_push (neutral_vec);
- }
- else
- {
- for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
- vec_oprnds->quick_push (vop);
- }
+ else
+ {
+ /* First time round, duplicate ELTS to fill the
+ required number of vectors, then cherry pick the
+ appropriate result for each iteration. */
+ if (vec_oprnds->is_empty ())
+ duplicate_and_interleave (&ctor_seq, vector_type, elts,
+ number_of_vectors,
+ permute_results);
+ init = permute_results[number_of_vectors - j - 1];
+ }
+ if (ctor_seq != NULL)
+ gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
+ vec_oprnds->quick_push (init);
+
+ number_of_places_left_in_vector = nunits;
+ elts.new_vector (vector_type, nunits, 1);
+ elts.quick_grow (nunits);
+ constant_p = true;
+ }
}
}
===================================================================
@@ -12,7 +12,7 @@ extern void abort (void);
int __attribute__((noinline,noclone))
foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2)
{
- int i_sum = 0;
+ int i_sum = 5;
for( int y = 0; y < 16; y++ )
{
i_sum += abs ( pix1[0] - pix2[0] );
@@ -52,7 +52,7 @@ main ()
__asm__ volatile ("");
}
- if (foo (X, Y, 16) != 32512)
+ if (foo (X, Y, 16) != 32512 + 5)
abort ();
return 0;