From patchwork Thu Sep 9 14:30:28 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michael Matz X-Patchwork-Id: 64304 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id E8AD2B70CE for ; Fri, 10 Sep 2010 00:31:06 +1000 (EST) Received: (qmail 11942 invoked by alias); 9 Sep 2010 14:30:56 -0000 Received: (qmail 11911 invoked by uid 22791); 9 Sep 2010 14:30:46 -0000 X-SWARE-Spam-Status: No, hits=-3.1 required=5.0 tests=AWL, BAYES_00, TW_TM, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from cantor2.suse.de (HELO mx2.suse.de) (195.135.220.15) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 09 Sep 2010 14:30:31 +0000 Received: from relay1.suse.de (charybdis-ext.suse.de [195.135.221.2]) by mx2.suse.de (Postfix) with ESMTP id 1B9F588B6C; Thu, 9 Sep 2010 16:30:29 +0200 (CEST) Date: Thu, 9 Sep 2010 16:30:28 +0200 (CEST) From: Michael Matz To: gcc-patches@gcc.gnu.org Cc: Ira Rosen Subject: Re: Fix PR43432, vectorize backwards stepping loads In-Reply-To: Message-ID: References: MIME-Version: 1.0 X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hi, On Wed, 8 Sep 2010, Ira Rosen wrote: > > Regstrapped on x86_64-linux. I've had to add x86_64-*-* to > > target-supports.exp/vect_perm recognition which in turn makes > > vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably > > because while x86_64 supports permutation for 4 and 8-sized elements it > > doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going > > to investigate this but seek feedback on the patch itself already now. > > > > Looks good to me. Okay, so here's the complete patch. Functionality is unchanged, but I extended the testsuite to have vect_perm_byte and vect_perm_short predicates (using them in slp-perm-8.c and slp-perm-9.c), so that I can enable vect_perm on x86_64. With this one I get no regressions on x86_64-linux (all default languages). Okay for trunk? Ciao, Michael. Index: tree-vect-data-refs.c =================================================================== --- tree-vect-data-refs.c (revision 163997) +++ tree-vect-data-refs.c (working copy) @@ -2282,7 +2282,9 @@ vect_analyze_data_ref_access (struct dat } /* Consecutive? */ - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)) + || (dr_step < 0 + && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step))) { /* Mark that it is not interleaving. */ DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL; @@ -2954,6 +2956,7 @@ vect_create_data_ref_ptr (gimple stmt, s tree vptr; gimple_stmt_iterator incr_gsi; bool insert_after; + bool negative; tree indx_before_incr, indx_after_incr; gimple incr; tree step; @@ -2986,6 +2989,7 @@ vect_create_data_ref_ptr (gimple stmt, s *inv_p = true; else *inv_p = false; + negative = tree_int_cst_compare (step, size_zero_node) < 0; /* Create an expression for the first address accessed by this load in LOOP. */ @@ -3145,6 +3149,8 @@ vect_create_data_ref_ptr (gimple stmt, s LOOP is zero. In this case the step here is also zero. */ if (*inv_p) step = size_zero_node; + else if (negative) + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); standard_iv_increment_position (loop, &incr_gsi, &insert_after); Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 163999) +++ tree-vect-stmts.c (working copy) @@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_ if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step for store."); + return false; + } + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { strided_store = true; @@ -3412,6 +3419,68 @@ vectorizable_store (gimple stmt, gimple_ return true; } +/* Given a vector type VECTYPE returns a builtin DECL to be used + for vector permutation and stores a mask into *MASK that implements + reversal of the vector elements. If that is impossible to do + returns NULL (and *MASK is unchanged). */ + +static tree +perm_mask_for_reverse (tree vectype, tree *mask) +{ + tree builtin_decl; + tree mask_element_type, mask_type; + tree mask_vec = NULL; + int i; + int nunits; + if (!targetm.vectorize.builtin_vec_perm) + return NULL; + + builtin_decl = targetm.vectorize.builtin_vec_perm (vectype, + &mask_element_type); + if (!builtin_decl || !mask_element_type) + return NULL; + + mask_type = get_vectype_for_scalar_type (mask_element_type); + nunits = TYPE_VECTOR_SUBPARTS (vectype); + if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type)) + return NULL; + + for (i = 0; i < nunits; i++) + mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec); + mask_vec = build_vector (mask_type, mask_vec); + + if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec)) + return NULL; + if (mask) + *mask = mask_vec; + return builtin_decl; +} + +/* Given a vector variable X, that was generated for the scalar LHS of + STMT, generate instructions to reverse the vector elements of X, + insert them a *GSI and return the permuted vector variable. */ + +static tree +reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi) +{ + tree vectype = TREE_TYPE (x); + tree mask_vec, builtin_decl; + tree perm_dest, data_ref; + gimple perm_stmt; + + builtin_decl = perm_mask_for_reverse (vectype, &mask_vec); + + perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); + + /* Generate the permute statement. */ + perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec); + data_ref = make_ssa_name (perm_dest, perm_stmt); + gimple_call_set_lhs (perm_stmt, data_ref); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + + return data_ref; +} + /* vectorizable_load. Check if STMT reads a non scalar data-ref (array/pointer/structure) that @@ -3454,6 +3523,7 @@ vectorizable_load (gimple stmt, gimple_s gimple first_stmt; tree scalar_type; bool inv_p; + bool negative; bool compute_in_loop = false; struct loop *at_loop; int vec_num; @@ -3516,6 +3586,14 @@ vectorizable_load (gimple stmt, gimple_s if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; + if (negative && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types with negative step."); + return false; + } + scalar_type = TREE_TYPE (DR_REF (dr)); mode = TYPE_MODE (vectype); @@ -3550,6 +3628,25 @@ vectorizable_load (gimple stmt, gimple_s return false; } + if (negative) + { + gcc_assert (!strided_load); + alignment_support_scheme = vect_supportable_dr_alignment (dr, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step but alignment required."); + return false; + } + if (!perm_mask_for_reverse (vectype, NULL)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step and reversing not supported."); + return false; + } + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; @@ -3724,6 +3821,9 @@ vectorizable_load (gimple stmt, gimple_s else at_loop = loop; + if (negative) + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); + prev_stmt_info = NULL; for (j = 0; j < ncopies; j++) { @@ -3912,6 +4012,12 @@ vectorizable_load (gimple stmt, gimple_s gcc_unreachable (); /* FORNOW. */ } + if (negative) + { + new_temp = reverse_vec_elements (new_temp, stmt, gsi); + new_stmt = SSA_NAME_DEF_STMT (new_temp); + } + /* Collect vector loads and later create their permutation in vect_transform_strided_load (). */ if (strided_load || slp_perm) Index: testsuite/gcc.dg/vect/vect-114.c =================================================================== --- testsuite/gcc.dg/vect/vect-114.c (revision 163997) +++ testsuite/gcc.dg/vect/vect-114.c (working copy) @@ -34,6 +34,7 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_perm } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/slp-perm-8.c =================================================================== --- testsuite/gcc.dg/vect/slp-perm-8.c (revision 163997) +++ testsuite/gcc.dg/vect/slp-perm-8.c (working copy) @@ -53,7 +53,7 @@ int main (int argc, const char* argv[]) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm_byte } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/pr43432.c =================================================================== --- testsuite/gcc.dg/vect/pr43432.c (revision 0) +++ testsuite/gcc.dg/vect/pr43432.c (revision 0) @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ +/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */ + + +void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, +int len){ + int i; + src1 += len-1; + for(i=0; i