Comments
Patch
different loads without gaps, but we don't check this in the analysis. This
patch adds such check.
Bootstrapped and tested on powerpc64-suse-linux.
Committed.
Ira
ChangeLog:
PR tree-optimization/47001
* tree-vect-slp.c (vect_supported_load_permutation_p): Check that
the loads in reduction are different and there are no gaps between
them.
testsuite/ChangeLog:
PR tree-optimization/47001
* gcc.dg/vect/pr47001.c: New.
===================================================================
@@ -1002,7 +1002,36 @@ vect_supported_load_permutation_p (slp_instance sl
if (!bad_permutation)
{
- /* This permutaion is valid for reduction. Since the order of
the
+ /* Check that the loads in the first sequence are different and
there
+ are no gaps between them. */
+ load_index = sbitmap_alloc (group_size);
+ sbitmap_zero (load_index);
+ for (k = 0; k < group_size; k++)
+ {
+ first_group_load_index = VEC_index (int, load_permutation,
k);
+ if (TEST_BIT (load_index, first_group_load_index))
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ SET_BIT (load_index, first_group_load_index);
+ }
+
+ if (!bad_permutation)
+ for (k = 0; k < group_size; k++)
+ if (!TEST_BIT (load_index, k))
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ sbitmap_free (load_index);
+ }
+
+ if (!bad_permutation)
+ {
+ /* This permutation is valid for reduction. Since the order of
the
statements in the nodes is not important unless they are
memory
accesses, we can rearrange the statements in all the nodes
according to the order of the loads. */
===================================================================
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+
+#include <stdlib.h>
+
+#define N 128
+
+int a[N];
+
+int main1 (int res0, int res1)
+{
+ int i;
+ int sum0 = 0, sum1 = 0;
+
+ for (i = 0; i < N/2; i++) {
+ sum1 += a[2*i];
+ sum0 += a[2*i];
+ }
+
+ /* Check results: */
+ if (sum0 != res0
+ || sum1 != res1)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */