From patchwork Tue Sep 7 12:58:56 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: Fix PR43432, vectorize backwards stepping loads From: Michael Matz X-Patchwork-Id: 64009 Message-Id: To: gcc-patches@gcc.gnu.org Date: Tue, 7 Sep 2010 14:58:56 +0200 (CEST) Hi, this implements handling of consecutive loads whose indices run backwards. I'm handling the easy cases only, namely when no explicit realignment instructions need to be generated and when we need to emit only one load per original load (i.e. no multiple type width in the loop). This all requires that we be able to reverse the elements of a vector, hence two new helper functions for that are added. Reversed stores aren't handled but should be reasonably easy to extend. Regstrapped on x86_64-linux. I've had to add x86_64-*-* to target-supports.exp/vect_perm recognition which in turn makes vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably because while x86_64 supports permutation for 4 and 8-sized elements it doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going to investigate this but seek feedback on the patch itself already now. Ciao, Michael. Index: tree-vect-data-refs.c =================================================================== --- tree-vect-data-refs.c (revision 163773) +++ tree-vect-data-refs.c (working copy) @@ -2281,7 +2281,9 @@ vect_analyze_data_ref_access (struct dat } /* Consecutive? */ - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)) + || (dr_step < 0 + && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step))) { /* Mark that it is not interleaving. */ DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL; @@ -2953,6 +2955,7 @@ vect_create_data_ref_ptr (gimple stmt, s tree vptr; gimple_stmt_iterator incr_gsi; bool insert_after; + bool negative; tree indx_before_incr, indx_after_incr; gimple incr; tree step; @@ -2985,6 +2988,7 @@ vect_create_data_ref_ptr (gimple stmt, s *inv_p = true; else *inv_p = false; + negative = tree_int_cst_compare (step, size_zero_node) < 0; /* Create an expression for the first address accessed by this load in LOOP. */ @@ -3144,6 +3148,8 @@ vect_create_data_ref_ptr (gimple stmt, s LOOP is zero. In this case the step here is also zero. */ if (*inv_p) step = size_zero_node; + else if (negative) + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); standard_iv_increment_position (loop, &incr_gsi, &insert_after); Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 163773) +++ tree-vect-stmts.c (working copy) @@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_ if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step for store."); + return false; + } + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { strided_store = true; @@ -3400,6 +3407,68 @@ vectorizable_store (gimple stmt, gimple_ return true; } +/* Given a vector type VECTYPE returns a builtin DECL to be used + for vector permutation and stores a mask into *MASK that implements + reversal of the vector elements. If that is impossible to do + returns NULL (and *MASK is unchanged). */ + +static tree +perm_mask_for_reverse (tree vectype, tree *mask) +{ + tree builtin_decl; + tree mask_element_type, mask_type; + tree mask_vec = NULL; + int i; + int nunits; + if (!targetm.vectorize.builtin_vec_perm) + return NULL; + + builtin_decl = targetm.vectorize.builtin_vec_perm (vectype, + &mask_element_type); + if (!builtin_decl || !mask_element_type) + return NULL; + + mask_type = get_vectype_for_scalar_type (mask_element_type); + nunits = TYPE_VECTOR_SUBPARTS (vectype); + if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type)) + return NULL; + + for (i = 0; i < nunits; i++) + mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec); + mask_vec = build_vector (mask_type, mask_vec); + + if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec)) + return NULL; + if (mask) + *mask = mask_vec; + return builtin_decl; +} + +/* Given a vector variable X, that was generated for the scalar LHS of + STMT, generate instructions to reverse the vector elements of X, + insert them a *GSI and return the permuted vector variable. */ + +static tree +reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi) +{ + tree vectype = TREE_TYPE (x); + tree mask_vec, builtin_decl; + tree perm_dest, data_ref; + gimple perm_stmt; + + builtin_decl = perm_mask_for_reverse (vectype, &mask_vec); + + perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); + + /* Generate the permute statement. */ + perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec); + data_ref = make_ssa_name (perm_dest, perm_stmt); + gimple_call_set_lhs (perm_stmt, data_ref); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + + return data_ref; +} + /* vectorizable_load. Check if STMT reads a non scalar data-ref (array/pointer/structure) that @@ -3442,6 +3511,7 @@ vectorizable_load (gimple stmt, gimple_s gimple first_stmt; tree scalar_type; bool inv_p; + bool negative; bool compute_in_loop = false; struct loop *at_loop; int vec_num; @@ -3504,6 +3574,14 @@ vectorizable_load (gimple stmt, gimple_s if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; + if (negative && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types with negative step."); + return false; + } + scalar_type = TREE_TYPE (DR_REF (dr)); mode = TYPE_MODE (vectype); @@ -3538,6 +3616,25 @@ vectorizable_load (gimple stmt, gimple_s return false; } + if (negative) + { + gcc_assert (!strided_load); + alignment_support_scheme = vect_supportable_dr_alignment (dr, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step but alignment required."); + return false; + } + if (!perm_mask_for_reverse (vectype, NULL)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step and reversing not supported."); + return false; + } + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; @@ -3712,6 +3809,9 @@ vectorizable_load (gimple stmt, gimple_s else at_loop = loop; + if (negative) + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); + prev_stmt_info = NULL; for (j = 0; j < ncopies; j++) { @@ -3885,6 +3985,12 @@ vectorizable_load (gimple stmt, gimple_s gcc_unreachable (); /* FORNOW. */ } + if (negative) + { + new_temp = reverse_vec_elements (new_temp, stmt, gsi); + new_stmt = SSA_NAME_DEF_STMT (new_temp); + } + /* Collect vector loads and later create their permutation in vect_transform_strided_load (). */ if (strided_load || slp_perm) Index: testsuite/gcc.dg/vect/pr43432.c =================================================================== --- testsuite/gcc.dg/vect/pr43432.c (revision 0) +++ testsuite/gcc.dg/vect/pr43432.c (revision 0) @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ +/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */ + + +void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, +int len){ + int i; + src1 += len-1; + for(i=0; i