From patchwork Wed Oct 9 13:18:57 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Biener X-Patchwork-Id: 1173775 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-510542-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=suse.de Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="vjPlEzyc"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 46pFDJ1Vjlz9sCJ for ; Thu, 10 Oct 2019 00:19:09 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; q=dns; s= default; b=twoEB7hBlvHlT50ef6lcyD5Juiu8dPMfh3Gwt0GtqoJJ9rgW2QgkU 36JPgx1SDXDXw57FrocdyTK4cIcllbZErnkgdYfG6vNl1NFn7PEUvgtdaqTUMC3E Ehe2ABvtO7PoHPgmqQVs5EWvp3CvdGXGeN2U7EHIEIPrq8Dx2xBCHo= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; s= default; bh=WcxGqzcy9Vt6NDPo8jP62Thc2DM=; b=vjPlEzyctZpPCB+4nPtJ Z0EKwD0sUH6t/REqSYYL9uuQ/xswype17ep6XfmXudAPVN5fvSwedAe1HpEFEk5W PhNmdfbYfbsn4JPDB05ZmmsslxVBXl3ubY3rPUwCx8CYM64SFdYjMtJAPOAns5uw /9H5FW0l2Zk6lhY8pbbzUdo= Received: (qmail 113440 invoked by alias); 9 Oct 2019 13:19:02 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 113055 invoked by uid 89); 9 Oct 2019 13:19:02 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-18.3 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, SPF_PASS autolearn=ham version=3.3.1 spammy= X-HELO: mx1.suse.de Received: from mx2.suse.de (HELO mx1.suse.de) (195.135.220.15) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 09 Oct 2019 13:19:00 +0000 Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id E98F6B14A for ; Wed, 9 Oct 2019 13:18:57 +0000 (UTC) Date: Wed, 9 Oct 2019 15:18:57 +0200 (CEST) From: Richard Biener To: gcc-patches@gcc.gnu.org Subject: [PATCH] Relax nested cycle vectorization further Message-ID: User-Agent: Alpine 2.21 (LSU 202 2017-01-01) MIME-Version: 1.0 This simplifies and refactors vect_is_simple_reduction to make sure to not reject nested cycle vectorization just beacuse there are calls in the innermost loop. This lets us vectorize the new testcase using outer loop vectorization. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2019-10-09 Richard Biener * tree-vect-loop.c (vect_is_simple_reduction): Simplify and allow stmts other than GIMPLE_ASSIGN in nested cycles. * gcc.dg/vect/vect-outer-call-1.c: New testcase. diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c b/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c new file mode 100644 index 00000000000..f26d4220532 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ +/* { dg-additional-options "-fno-math-errno" } */ + +void +foo (float * __restrict x, float *y, int n, int m) +{ + if (m > 0) + for (int i = 0; i < n; ++i) + { + float tem = x[i], tem1; + for (int j = 0; j < m; ++j) + { + tem += y[j]; + tem1 = tem; + tem = __builtin_sqrtf (tem); + } + x[i] = tem - tem1; + } +} + +/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_call_sqrtf } } } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index a9ea0caf218..14352102f54 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2756,10 +2756,8 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, enum tree_code orig_code, code; tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; tree type; - tree name; imm_use_iterator imm_iter; use_operand_p use_p; - bool phi_def; *double_reduc = false; STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION; @@ -2791,44 +2789,24 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, phi_use_stmt = use_stmt; } - edge latch_e = loop_latch_edge (loop); - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); - if (TREE_CODE (loop_arg) != SSA_NAME) + tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); + if (TREE_CODE (latch_def) != SSA_NAME) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduction: not ssa_name: %T\n", loop_arg); + "reduction: not ssa_name: %T\n", latch_def); return NULL; } - stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg); + stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def); if (!def_stmt_info || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))) return NULL; - if (gassign *def_stmt = dyn_cast (def_stmt_info->stmt)) - { - name = gimple_assign_lhs (def_stmt); - phi_def = false; - } - else if (gphi *def_stmt = dyn_cast (def_stmt_info->stmt)) - { - name = PHI_RESULT (def_stmt); - phi_def = true; - } - else - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduction: unhandled reduction operation: %G", - def_stmt_info->stmt); - return NULL; - } - unsigned nlatch_def_loop_uses = 0; auto_vec lcphis; bool inner_loop_of_double_reduc = false; - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def) { gimple *use_stmt = USE_STMT (use_p); if (is_gimple_debug (use_stmt)) @@ -2846,11 +2824,21 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, } } + /* If we are vectorizing an inner reduction we are executing that + in the original order only in case we are not dealing with a + double reduction. */ + if (nested_in_vect_loop && !inner_loop_of_double_reduc) + { + if (dump_enabled_p ()) + report_vect_op (MSG_NOTE, def_stmt_info->stmt, + "detected nested cycle: "); + return def_stmt_info; + } + /* If this isn't a nested cycle or if the nested cycle reduction value is used ouside of the inner loop we cannot handle uses of the reduction value. */ - if ((!nested_in_vect_loop || inner_loop_of_double_reduc) - && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1)) + if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -2860,9 +2848,8 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, /* If DEF_STMT is a phi node itself, we expect it to have a single argument defined in the inner loop. */ - if (phi_def) + if (gphi *def_stmt = dyn_cast (def_stmt_info->stmt)) { - gphi *def_stmt = as_a (def_stmt_info->stmt); op1 = PHI_ARG_DEF (def_stmt, 0); if (gimple_phi_num_args (def_stmt) != 1 @@ -2895,35 +2882,16 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, return NULL; } - /* If we are vectorizing an inner reduction we are executing that - in the original order only in case we are not dealing with a - double reduction. */ - bool check_reduction = true; - if (flow_loop_nested_p (vect_loop, loop)) - { - gphi *lcphi; - unsigned i; - check_reduction = false; - FOR_EACH_VEC_ELT (lcphis, i, lcphi) - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi)) - { - gimple *use_stmt = USE_STMT (use_p); - if (is_gimple_debug (use_stmt)) - continue; - if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt))) - check_reduction = true; - } - } - - gassign *def_stmt = as_a (def_stmt_info->stmt); - code = orig_code = gimple_assign_rhs_code (def_stmt); - - if (nested_in_vect_loop && !check_reduction) + gassign *def_stmt = dyn_cast (def_stmt_info->stmt); + if (!def_stmt) { if (dump_enabled_p ()) - report_vect_op (MSG_NOTE, def_stmt, "detected nested cycle: "); - return def_stmt_info; + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "reduction: unhandled reduction operation: %G", + def_stmt_info->stmt); + return NULL; } + code = orig_code = gimple_assign_rhs_code (def_stmt); /* We can handle "res -= x[i]", which is non-associative by simply rewriting this into "res += -x[i]". Avoid changing @@ -3018,8 +2986,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, vectorizing an outer-loop: the inner-loop is executed sequentially, and therefore vectorizing reductions in the inner-loop during outer-loop vectorization is safe. */ - if (check_reduction - && STMT_VINFO_REDUC_TYPE (phi_info) == TREE_CODE_REDUCTION + if (STMT_VINFO_REDUC_TYPE (phi_info) == TREE_CODE_REDUCTION && needs_fold_left_reduction_p (type, code)) STMT_VINFO_REDUC_TYPE (phi_info) = FOLD_LEFT_REDUCTION; @@ -3066,9 +3033,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, return def_stmt_info; } - /* Look for the expression computing loop_arg from loop PHI result. */ + /* Look for the expression computing latch_def from loop PHI result. */ auto_vec > path; - if (check_reduction_path (vect_location, loop, phi, loop_arg, code, + if (check_reduction_path (vect_location, loop, phi, latch_def, code, path)) { /* Try building an SLP reduction chain for which the additional