new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+#include "tree-vect.h"
+
+int a[512];
+int b[512];
+int c[512];
+
+void __attribute__((noipa))
+foo(int * __restrict p)
+{
+ for (int i = 0; i < 64; ++i)
+ {
+ int tem = 2, tem2 = 2;
+ if (a[4*i + 1])
+ tem = p[4*i];
+ if (a[4*i])
+ tem2 = p[4*i + 2];
+ b[2*i] = tem2;
+ b[2*i+1] = tem;
+ if (a[4*i + 2])
+ tem = p[4*i + 1];
+ if (a[4*i + 3])
+ tem2 = p[4*i + 3];
+ c[2*i] = tem2;
+ c[2*i+1] = tem;
+ }
+}
+int main()
+{
+ check_vect ();
+
+ for (int i = 0; i < 512; ++i)
+ a[i] = (i >> 1) & 1;
+
+ foo (a);
+
+ if (c[0] != 1 || c[1] != 0 || c[2] != 1 || c[3] != 0
+ || b[0] != 2 || b[1] != 2 || b[2] != 2 || b[3] != 2)
+ abort ();
+
+ return 0;
+}
+
@@ -1921,12 +1921,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
if (STMT_VINFO_DATA_REF (stmt_info)
&& DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
{
- if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
- gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
- || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
- || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
- || gimple_call_internal_p (stmt, IFN_MASK_LEN_GATHER_LOAD));
- else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
else
{
@@ -1943,19 +1938,43 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
load_permutation.create (group_size);
stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
+ bool any_permute = false;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
{
int load_place;
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
load_place = vect_get_place_in_interleaving_chain
- (load_info, first_stmt_info);
+ (load_info, first_stmt_info);
else
load_place = 0;
gcc_assert (load_place != -1);
- load_permutation.safe_push (load_place);
+ any_permute |= load_place != j;
+ load_permutation.quick_push (load_place);
+ }
+
+ if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
+ {
+ gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
+ || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
+ || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)
+ || gimple_call_internal_p (stmt,
+ IFN_MASK_LEN_GATHER_LOAD));
+ load_permutation.release ();
+ /* We cannot handle permuted masked loads, see PR114375. */
+ if (any_permute
+ || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ && DR_GROUP_SIZE (first_stmt_info) != group_size)
+ || STMT_VINFO_STRIDED_P (stmt_info))
+ {
+ matches[0] = false;
+ return NULL;
+ }
+ }
+ else
+ {
+ SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
+ return node;
}
- SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
- return node;
}
}
else if (gimple_assign_single_p (stmt_info->stmt)
@@ -10080,6 +10080,14 @@ vectorizable_load (vec_info *vinfo,
"unsupported masked emulated gather.\n");
return false;
}
+ else if (memory_access_type == VMAT_ELEMENTWISE
+ || memory_access_type == VMAT_STRIDED_SLP)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported masked strided access.\n");
+ return false;
+ }
}
bool costing_p = !vec_stmt;