From patchwork Thu Oct 17 11:47:42 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Biener X-Patchwork-Id: 1178483 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-511210-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=suse.de Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="AtDl92m9"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 46v6qG67yMz9sPF for ; Thu, 17 Oct 2019 22:47:54 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; q=dns; s= default; b=CPY7HWiLvEDDoluhE0Pdv8KCfk7xK/WtAqmrCPff5LLR/RJ4OOXXC yVwrmHTPUN37veDoOIMdGFkmAliSD9KRYxGZp9GuX4le6gr+r/JNvPr30AwjT7es 33fk06TEJ9/ON8XUnxMmxJIl2Oq4Q6CHqk9FIGuKo4QeJQQIeOKMZ0= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; s= default; bh=zSaHVpk0nq9YHUXvyCZbm6y73SI=; b=AtDl92m9h3in9Lztld5N JqrO3jPZMM+i8FL1MisHMxfsQit8Fftrmy3QhoFX0JMFId4kZSfQZ90GF8fklLOV +/nVufA6ww5pz84aI7/3vVJQgxkah+fRhjT22od/X/rjvi8sTrXVRXdSc3NJlS3P ugP+EGIk/mLTA6qKmIak5hU= Received: (qmail 31089 invoked by alias); 17 Oct 2019 11:47:47 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 31081 invoked by uid 89); 17 Oct 2019 11:47:47 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-18.3 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, SPF_PASS autolearn=ham version=3.3.1 spammy=Drive, UD:tree-vect-patterns.c, sk:vect_re, stmt_info X-HELO: mx1.suse.de Received: from mx2.suse.de (HELO mx1.suse.de) (195.135.220.15) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Thu, 17 Oct 2019 11:47:45 +0000 Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id C3C5CB226 for ; Thu, 17 Oct 2019 11:47:42 +0000 (UTC) Date: Thu, 17 Oct 2019 13:47:42 +0200 (CEST) From: Richard Biener To: gcc-patches@gcc.gnu.org Subject: [PATCH] Move the rest of the validity checks from vect_is_simple_reduction Message-ID: User-Agent: Alpine 2.21 (LSU 202 2017-01-01) MIME-Version: 1.0 to vectorizable_reduction, that is. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. * tree-vect-loop.c (needs_fold_left_reduction_p): Export. (vect_is_simple_reduction): Move all validity checks ... (vectorizable_reduction): ... here. Compute whether we need a fold-left reduction here. * tree-vect-patterns.c (vect_reassociating_reduction_p): Merge both overloads, check needs_fold_left_reduction_p directly. * tree-vectorizer.h (needs_fold_left_reduction_p): Declare. diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 0530d6643b4..791c17ab0ea 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2536,7 +2536,7 @@ report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg) on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer overflow must wrap. */ -static bool +bool needs_fold_left_reduction_p (tree type, tree_code code) { /* CHECKME: check for !flag_finite_math_only too? */ @@ -2888,13 +2888,6 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, op1 = gimple_assign_rhs2 (def_stmt); op2 = gimple_assign_rhs3 (def_stmt); } - else if (!commutative_tree_code (code) || !associative_tree_code (code)) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: not commutative/associative: "); - return NULL; - } else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) { op1 = gimple_assign_rhs1 (def_stmt); @@ -2917,18 +2910,6 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, return NULL; } - /* Check whether it's ok to change the order of the computation. - Generally, when vectorizing a reduction we change the order of the - computation. This may change the behavior of the program in some - cases, so we need to check that this is ok. One exception is when - vectorizing an outer-loop: the inner-loop is executed sequentially, - and therefore vectorizing reductions in the inner-loop during - outer-loop vectorization is safe. */ - tree type = TREE_TYPE (gimple_assign_lhs (def_stmt)); - if (STMT_VINFO_REDUC_TYPE (phi_info) == TREE_CODE_REDUCTION - && needs_fold_left_reduction_p (type, code)) - STMT_VINFO_REDUC_TYPE (phi_info) = FOLD_LEFT_REDUCTION; - /* Reduction is safe. We're dealing with one of the following: 1) integer arithmetic and no trapv 2) floating point arithmetic, and special flags permit this optimization @@ -5633,7 +5614,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code; - internal_fn reduc_fn; int op_type; enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type; stmt_vec_info cond_stmt_vinfo = NULL; @@ -5872,19 +5852,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, operation in the reduction meta. */ STMT_VINFO_REDUC_IDX (reduc_info) = reduc_index; - /* When vectorizing a reduction chain w/o SLP the reduction PHI is not - directy used in stmt. */ - if (reduc_index == -1) - { - if (STMT_VINFO_REDUC_TYPE (phi_info) == FOLD_LEFT_REDUCTION) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "in-order reduction chain without SLP.\n"); - return false; - } - } - if (!(reduc_index == -1 || dts[reduc_index] == vect_reduction_def || dts[reduc_index] == vect_nested_cycle @@ -6047,17 +6014,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, double_reduc = true; } - vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); - if ((double_reduc || reduction_type != TREE_CODE_REDUCTION) - && ncopies > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "multiple types in double reduction or condition " - "reduction.\n"); - return false; - } - /* 4.2. Check support for the epilog operation. If STMT represents a reduction pattern, then the type of the @@ -6093,38 +6049,75 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, (and also the same tree-code) when generating the epilog code and when generating the code inside the loop. */ - enum tree_code orig_code; - if (orig_stmt_info - && (reduction_type == TREE_CODE_REDUCTION - || reduction_type == FOLD_LEFT_REDUCTION)) + vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); + enum tree_code orig_code = ERROR_MARK; + if (reduction_type == CONST_COND_REDUCTION + || reduction_type == INTEGER_INDUC_COND_REDUCTION) { - /* This is a reduction pattern: get the vectype from the type of the - reduction variable, and get the tree-code from orig_stmt. */ - orig_code = gimple_assign_rhs_code (orig_stmt_info->stmt); - gcc_assert (vectype_out); + /* For simple condition reductions, replace with the actual expression + we want to base our reduction around. */ + orig_code = STMT_VINFO_VEC_COND_REDUC_CODE (reduc_info); + gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR); } - else + else if (reduction_type == COND_REDUCTION) + orig_code = COND_EXPR; + else if (reduction_type == TREE_CODE_REDUCTION + || reduction_type == FOLD_LEFT_REDUCTION) { - /* Regular reduction: use the same vectype and tree-code as used for - the vector code inside the loop can be used for the epilog code. */ - orig_code = code; - - if (code == MINUS_EXPR) + if (orig_stmt_info) + orig_code = gimple_assign_rhs_code (orig_stmt_info->stmt); + else + orig_code = code; + gcc_assert (vectype_out); + if (orig_code == MINUS_EXPR) orig_code = PLUS_EXPR; + } + STMT_VINFO_REDUC_CODE (reduc_info) = orig_code; - /* For simple condition reductions, replace with the actual expression - we want to base our reduction around. */ - if (reduction_type == CONST_COND_REDUCTION - || reduction_type == INTEGER_INDUC_COND_REDUCTION) + if (reduction_type == TREE_CODE_REDUCTION) + { + /* Check whether it's ok to change the order of the computation. + Generally, when vectorizing a reduction we change the order of the + computation. This may change the behavior of the program in some + cases, so we need to check that this is ok. One exception is when + vectorizing an outer-loop: the inner-loop is executed sequentially, + and therefore vectorizing reductions in the inner-loop during + outer-loop vectorization is safe. */ + if (needs_fold_left_reduction_p (scalar_type, orig_code)) + { + STMT_VINFO_REDUC_TYPE (reduc_info) + = reduction_type = FOLD_LEFT_REDUCTION; + /* When vectorizing a reduction chain w/o SLP the reduction PHI is not + directy used in stmt. */ + if (reduc_index == -1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "in-order reduction chain without SLP.\n"); + return false; + } + } + else if (!commutative_tree_code (orig_code) + || !associative_tree_code (orig_code)) { - orig_code = STMT_VINFO_VEC_COND_REDUC_CODE (reduc_info); - gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR); + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "reduction: not commutative/associative"); + return false; } } - STMT_VINFO_REDUC_CODE (reduc_info) = orig_code; - reduc_fn = IFN_LAST; + if ((double_reduc || reduction_type != TREE_CODE_REDUCTION) + && ncopies > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "multiple types in double reduction or condition " + "reduction or fold-left reduction.\n"); + return false; + } + internal_fn reduc_fn = IFN_LAST; if (reduction_type == TREE_CODE_REDUCTION || reduction_type == FOLD_LEFT_REDUCTION || reduction_type == INTEGER_INDUC_COND_REDUCTION diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index f9c74087893..9adc48fba56 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -833,19 +833,8 @@ vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt, /* Return true if STMT_VINFO describes a reduction for which reassociation is allowed. If STMT_INFO is part of a group, assume that it's part of a reduction chain and optimistically assume that all statements - except the last allow reassociation. */ - -static bool -vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo) -{ - if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) - return (STMT_VINFO_REDUC_TYPE (STMT_VINFO_REDUC_DEF (stmt_vinfo)) - != FOLD_LEFT_REDUCTION); - else - return REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL; -} - -/* As above, but also require it to have code CODE and to be a reduction + except the last allow reassociation. + Also require it to have code CODE and to be a reduction in the outermost loop. When returning true, store the operands in *OP0_OUT and *OP1_OUT. */ @@ -867,7 +856,13 @@ vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code, if (loop && nested_in_vect_loop_p (loop, stmt_info)) return false; - if (!vect_reassociating_reduction_p (stmt_info)) + if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def) + { + if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)), + code)) + return false; + } + else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL) return false; *op0_out = gimple_assign_rhs1 (assign); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 291304fe95e..559d78d4491 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1632,6 +1632,7 @@ extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo); /* Used in gimple-loop-interchange.c and tree-parloops.c. */ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, enum tree_code); +extern bool needs_fold_left_reduction_p (tree, tree_code); /* Drive for loop analysis stage. */ extern opt_loop_vec_info vect_analyze_loop (class loop *, loop_vec_info,