From patchwork Tue Sep 7 12:58:56 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michael Matz X-Patchwork-Id: 64009 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 6FE21B6EF7 for ; Tue, 7 Sep 2010 23:02:18 +1000 (EST) Received: (qmail 23209 invoked by alias); 7 Sep 2010 12:59:59 -0000 Received: (qmail 22940 invoked by uid 22791); 7 Sep 2010 12:59:33 -0000 X-SWARE-Spam-Status: No, hits=-5.6 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_HI, TW_TM, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from cantor.suse.de (HELO mx1.suse.de) (195.135.220.2) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 07 Sep 2010 12:59:00 +0000 Received: from relay2.suse.de (charybdis-ext.suse.de [195.135.221.2]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.suse.de (Postfix) with ESMTP id F293B90975 for ; Tue, 7 Sep 2010 14:58:56 +0200 (CEST) Date: Tue, 7 Sep 2010 14:58:56 +0200 (CEST) From: Michael Matz To: gcc-patches@gcc.gnu.org Subject: Fix PR43432, vectorize backwards stepping loads Message-ID: MIME-Version: 1.0 X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hi, this implements handling of consecutive loads whose indices run backwards. I'm handling the easy cases only, namely when no explicit realignment instructions need to be generated and when we need to emit only one load per original load (i.e. no multiple type width in the loop). This all requires that we be able to reverse the elements of a vector, hence two new helper functions for that are added. Reversed stores aren't handled but should be reasonably easy to extend. Regstrapped on x86_64-linux. I've had to add x86_64-*-* to target-supports.exp/vect_perm recognition which in turn makes vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably because while x86_64 supports permutation for 4 and 8-sized elements it doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going to investigate this but seek feedback on the patch itself already now. Ciao, Michael. Index: tree-vect-data-refs.c =================================================================== --- tree-vect-data-refs.c (revision 163773) +++ tree-vect-data-refs.c (working copy) @@ -2281,7 +2281,9 @@ vect_analyze_data_ref_access (struct dat } /* Consecutive? */ - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)) + || (dr_step < 0 + && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step))) { /* Mark that it is not interleaving. */ DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL; @@ -2953,6 +2955,7 @@ vect_create_data_ref_ptr (gimple stmt, s tree vptr; gimple_stmt_iterator incr_gsi; bool insert_after; + bool negative; tree indx_before_incr, indx_after_incr; gimple incr; tree step; @@ -2985,6 +2988,7 @@ vect_create_data_ref_ptr (gimple stmt, s *inv_p = true; else *inv_p = false; + negative = tree_int_cst_compare (step, size_zero_node) < 0; /* Create an expression for the first address accessed by this load in LOOP. */ @@ -3144,6 +3148,8 @@ vect_create_data_ref_ptr (gimple stmt, s LOOP is zero. In this case the step here is also zero. */ if (*inv_p) step = size_zero_node; + else if (negative) + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); standard_iv_increment_position (loop, &incr_gsi, &insert_after); Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 163773) +++ tree-vect-stmts.c (working copy) @@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_ if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step for store."); + return false; + } + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { strided_store = true; @@ -3400,6 +3407,68 @@ vectorizable_store (gimple stmt, gimple_ return true; } +/* Given a vector type VECTYPE returns a builtin DECL to be used + for vector permutation and stores a mask into *MASK that implements + reversal of the vector elements. If that is impossible to do + returns NULL (and *MASK is unchanged). */ + +static tree +perm_mask_for_reverse (tree vectype, tree *mask) +{ + tree builtin_decl; + tree mask_element_type, mask_type; + tree mask_vec = NULL; + int i; + int nunits; + if (!targetm.vectorize.builtin_vec_perm) + return NULL; + + builtin_decl = targetm.vectorize.builtin_vec_perm (vectype, + &mask_element_type); + if (!builtin_decl || !mask_element_type) + return NULL; + + mask_type = get_vectype_for_scalar_type (mask_element_type); + nunits = TYPE_VECTOR_SUBPARTS (vectype); + if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type)) + return NULL; + + for (i = 0; i < nunits; i++) + mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec); + mask_vec = build_vector (mask_type, mask_vec); + + if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec)) + return NULL; + if (mask) + *mask = mask_vec; + return builtin_decl; +} + +/* Given a vector variable X, that was generated for the scalar LHS of + STMT, generate instructions to reverse the vector elements of X, + insert them a *GSI and return the permuted vector variable. */ + +static tree +reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi) +{ + tree vectype = TREE_TYPE (x); + tree mask_vec, builtin_decl; + tree perm_dest, data_ref; + gimple perm_stmt; + + builtin_decl = perm_mask_for_reverse (vectype, &mask_vec); + + perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); + + /* Generate the permute statement. */ + perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec); + data_ref = make_ssa_name (perm_dest, perm_stmt); + gimple_call_set_lhs (perm_stmt, data_ref); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + + return data_ref; +} + /* vectorizable_load. Check if STMT reads a non scalar data-ref (array/pointer/structure) that @@ -3442,6 +3511,7 @@ vectorizable_load (gimple stmt, gimple_s gimple first_stmt; tree scalar_type; bool inv_p; + bool negative; bool compute_in_loop = false; struct loop *at_loop; int vec_num; @@ -3504,6 +3574,14 @@ vectorizable_load (gimple stmt, gimple_s if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; + if (negative && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types with negative step."); + return false; + } + scalar_type = TREE_TYPE (DR_REF (dr)); mode = TYPE_MODE (vectype); @@ -3538,6 +3616,25 @@ vectorizable_load (gimple stmt, gimple_s return false; } + if (negative) + { + gcc_assert (!strided_load); + alignment_support_scheme = vect_supportable_dr_alignment (dr, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step but alignment required."); + return false; + } + if (!perm_mask_for_reverse (vectype, NULL)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step and reversing not supported."); + return false; + } + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; @@ -3712,6 +3809,9 @@ vectorizable_load (gimple stmt, gimple_s else at_loop = loop; + if (negative) + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); + prev_stmt_info = NULL; for (j = 0; j < ncopies; j++) { @@ -3885,6 +3985,12 @@ vectorizable_load (gimple stmt, gimple_s gcc_unreachable (); /* FORNOW. */ } + if (negative) + { + new_temp = reverse_vec_elements (new_temp, stmt, gsi); + new_stmt = SSA_NAME_DEF_STMT (new_temp); + } + /* Collect vector loads and later create their permutation in vect_transform_strided_load (). */ if (strided_load || slp_perm) Index: testsuite/gcc.dg/vect/pr43432.c =================================================================== --- testsuite/gcc.dg/vect/pr43432.c (revision 0) +++ testsuite/gcc.dg/vect/pr43432.c (revision 0) @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ +/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */ + + +void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, +int len){ + int i; + src1 += len-1; + for(i=0; i