From patchwork Tue Apr 12 13:44:27 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Sandiford X-Patchwork-Id: 90803 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 46C5CB6F0C for ; Tue, 12 Apr 2011 23:44:41 +1000 (EST) Received: (qmail 24270 invoked by alias); 12 Apr 2011 13:44:39 -0000 Received: (qmail 24250 invoked by uid 22791); 12 Apr 2011 13:44:37 -0000 X-SWARE-Spam-Status: No, hits=-2.4 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_LOW, TW_TM X-Spam-Check-By: sourceware.org Received: from mail-ww0-f51.google.com (HELO mail-ww0-f51.google.com) (74.125.82.51) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 12 Apr 2011 13:44:31 +0000 Received: by wwf26 with SMTP id 26so7668966wwf.8 for ; Tue, 12 Apr 2011 06:44:30 -0700 (PDT) Received: by 10.227.36.203 with SMTP id u11mr6906294wbd.162.1302615870453; Tue, 12 Apr 2011 06:44:30 -0700 (PDT) Received: from richards-thinkpad (gbibp9ph1--blueice2n1.emea.ibm.com [195.212.29.75]) by mx.google.com with ESMTPS id ed10sm747000wbb.32.2011.04.12.06.44.28 (version=TLSv1/SSLv3 cipher=OTHER); Tue, 12 Apr 2011 06:44:29 -0700 (PDT) From: Richard Sandiford To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org, patches@linaro.org, richard.sandiford@linaro.org Cc: patches@linaro.org Subject: [4/9] Move power-of-two checks for interleaving References: Date: Tue, 12 Apr 2011 14:44:27 +0100 In-Reply-To: (Richard Sandiford's message of "Tue, 12 Apr 2011 14:20:54 +0100") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/23.1 (gnu/linux) MIME-Version: 1.0 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org NEON has vld3 and vst3 instructions, which support an interleaving of three vectors. This patch therefore removes the blanket power-of-two requirement for interleaving and enforces it on a per-operation basis instead. The patch also replaces: /* Check that the operation is supported. */ if (!vect_strided_store_supported (vectype)) return false; with: gcc_assert (vect_strided_store_supported (vectype, length)); because it was vectorizable_store's responsibility to check this upfront. Likewise for loads. Tested on x86_64-linux-gnu and arm-linux-gnueabi. OK to install? Richard gcc/ * tree-vectorizer.h (vect_strided_store_supported): Add a HOST_WIDE_INT argument. (vect_strided_load_supported): Likewise. (vect_permute_store_chain): Return void. (vect_transform_strided_load): Likewise. (vect_permute_load_chain): Delete. * tree-vect-data-refs.c (vect_strided_store_supported): Take a count argument. Check that the count is a power of two. (vect_strided_load_supported): Likewise. (vect_permute_store_chain): Return void. Update after above changes. Assert that the access is supported. (vect_permute_load_chain): Likewise. (vect_transform_strided_load): Return void. * tree-vect-stmts.c (vectorizable_store): Update calls after above interface changes. (vectorizable_load): Likewise. (vect_analyze_stmt): Don't check for strided powers of two here. Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h 2011-04-12 11:55:07.000000000 +0100 +++ gcc/tree-vectorizer.h 2011-04-12 11:55:09.000000000 +0100 @@ -828,16 +828,14 @@ extern tree vect_create_data_ref_ptr (gi gimple *, bool, bool *); extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree); extern tree vect_create_destination_var (tree, tree); -extern bool vect_strided_store_supported (tree); -extern bool vect_strided_load_supported (tree); -extern bool vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, +extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT); +extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT); +extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, gimple_stmt_iterator *, VEC(tree,heap) **); extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *, enum dr_alignment_support, tree, struct loop **); -extern bool vect_permute_load_chain (VEC(tree,heap) *,unsigned int, gimple, - gimple_stmt_iterator *, VEC(tree,heap) **); -extern bool vect_transform_strided_load (gimple, VEC(tree,heap) *, int, +extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int, gimple_stmt_iterator *); extern int vect_get_place_in_interleaving_chain (gimple, gimple); extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c 2011-04-12 11:55:07.000000000 +0100 +++ gcc/tree-vect-data-refs.c 2011-04-12 11:55:09.000000000 +0100 @@ -2196,19 +2196,6 @@ vect_analyze_group_access (struct data_r return false; } - /* FORNOW: we handle only interleaving that is a power of 2. - We don't fail here if it may be still possible to vectorize the - group using SLP. If not, the size of the group will be checked in - vect_analyze_operations, and the vectorization will fail. */ - if (exact_log2 (stride) == -1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "interleaving is not a power of 2"); - - if (slp_impossible) - return false; - } - if (stride == 0) stride = count; @@ -3349,13 +3336,22 @@ vect_create_destination_var (tree scalar and FALSE otherwise. */ bool -vect_strided_store_supported (tree vectype) +vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count) { optab interleave_high_optab, interleave_low_optab; enum machine_mode mode; mode = TYPE_MODE (vectype); + /* vect_permute_store_chain requires the group size to be a power of two. */ + if (exact_log2 (count) == -1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "the size of the group of strided accesses" + " is not a power of 2"); + return false; + } + /* Check that the operation is supported. */ interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR, vectype, optab_default); @@ -3441,7 +3437,7 @@ vect_strided_store_supported (tree vecty I3: 4 12 20 28 5 13 21 30 I4: 6 14 22 30 7 15 23 31. */ -bool +void vect_permute_store_chain (VEC(tree,heap) *dr_chain, unsigned int length, gimple stmt, @@ -3455,9 +3451,7 @@ vect_permute_store_chain (VEC(tree,heap) unsigned int j; enum tree_code high_code, low_code; - /* Check that the operation is supported. */ - if (!vect_strided_store_supported (vectype)) - return false; + gcc_assert (vect_strided_store_supported (vectype, length)); *result_chain = VEC_copy (tree, heap, dr_chain); @@ -3510,7 +3504,6 @@ vect_permute_store_chain (VEC(tree,heap) } dr_chain = VEC_copy (tree, heap, *result_chain); } - return true; } /* Function vect_setup_realignment @@ -3787,13 +3780,22 @@ vect_setup_realignment (gimple stmt, gim and FALSE otherwise. */ bool -vect_strided_load_supported (tree vectype) +vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count) { optab perm_even_optab, perm_odd_optab; enum machine_mode mode; mode = TYPE_MODE (vectype); + /* vect_permute_load_chain requires the group size to be a power of two. */ + if (exact_log2 (count) == -1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "the size of the group of strided accesses" + " is not a power of 2"); + return false; + } + perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype, optab_default); if (!perm_even_optab) @@ -3905,7 +3907,7 @@ vect_strided_load_supported (tree vectyp 3rd vec (E2): 2 6 10 14 18 22 26 30 4th vec (E4): 3 7 11 15 19 23 27 31. */ -bool +static void vect_permute_load_chain (VEC(tree,heap) *dr_chain, unsigned int length, gimple stmt, @@ -3918,9 +3920,7 @@ vect_permute_load_chain (VEC(tree,heap) int i; unsigned int j; - /* Check that the operation is supported. */ - if (!vect_strided_load_supported (vectype)) - return false; + gcc_assert (vect_strided_load_supported (vectype, length)); *result_chain = VEC_copy (tree, heap, dr_chain); for (i = 0; i < exact_log2 (length); i++) @@ -3963,7 +3963,6 @@ vect_permute_load_chain (VEC(tree,heap) } dr_chain = VEC_copy (tree, heap, *result_chain); } - return true; } @@ -3974,7 +3973,7 @@ vect_permute_load_chain (VEC(tree,heap) the scalar statements. */ -bool +void vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size, gimple_stmt_iterator *gsi) { @@ -3990,8 +3989,7 @@ vect_transform_strided_load (gimple stmt vectors, that are ready for vector computation. */ result_chain = VEC_alloc (tree, heap, size); /* Permute. */ - if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain)) - return false; + vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain); /* Put a permuted data-ref in the VECTORIZED_STMT field. Since we scan the chain starting from it's first node, their order @@ -4055,7 +4053,6 @@ vect_transform_strided_load (gimple stmt } VEC_free (tree, heap, result_chain); - return true; } /* Function vect_force_dr_alignment_p. Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c 2011-04-12 11:55:09.000000000 +0100 +++ gcc/tree-vect-stmts.c 2011-04-12 11:55:09.000000000 +0100 @@ -3412,9 +3412,12 @@ vectorizable_store (gimple stmt, gimple_ { strided_store = true; first_stmt = DR_GROUP_FIRST_DR (stmt_info); - if (!vect_strided_store_supported (vectype) - && !PURE_SLP_STMT (stmt_info) && !slp) - return false; + if (!slp && !PURE_SLP_STMT (stmt_info)) + { + group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); + if (!vect_strided_store_supported (vectype, group_size)) + return false; + } if (first_stmt == stmt) { @@ -3617,9 +3620,8 @@ vectorizable_store (gimple stmt, gimple_ { result_chain = VEC_alloc (tree, heap, group_size); /* Permute. */ - if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi, - &result_chain)) - return false; + vect_permute_store_chain (dr_chain, group_size, stmt, gsi, + &result_chain); } next_stmt = first_stmt; @@ -3912,10 +3914,13 @@ vectorizable_load (gimple stmt, gimple_s /* FORNOW */ gcc_assert (! nested_in_vect_loop); - /* Check if interleaving is supported. */ - if (!vect_strided_load_supported (vectype) - && !PURE_SLP_STMT (stmt_info) && !slp) - return false; + first_stmt = DR_GROUP_FIRST_DR (stmt_info); + if (!slp && !PURE_SLP_STMT (stmt_info)) + { + group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); + if (!vect_strided_load_supported (vectype, group_size)) + return false; + } } if (negative) @@ -4344,10 +4349,7 @@ vectorizable_load (gimple stmt, gimple_s { if (strided_load) { - if (!vect_transform_strided_load (stmt, dr_chain, - group_size, gsi)) - return false; - + vect_transform_strided_load (stmt, dr_chain, group_size, gsi); *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); } else @@ -4766,27 +4768,6 @@ vect_analyze_stmt (gimple stmt, bool *ne return false; } - if (!PURE_SLP_STMT (stmt_info)) - { - /* Groups of strided accesses whose size is not a power of 2 are not - vectorizable yet using loop-vectorization. Therefore, if this stmt - feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and - loop-based vectorized), the loop cannot be vectorized. */ - if (STMT_VINFO_STRIDED_ACCESS (stmt_info) - && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt ( - DR_GROUP_FIRST_DR (stmt_info)))) == -1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "not vectorized: the size of group " - "of strided accesses is not a power of 2"); - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } - - return false; - } - } - return true; }