From patchwork Mon Dec 3 15:39:31 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Biener X-Patchwork-Id: 1006997 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-491512-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=suse.de Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="WI4g62d1"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 437q1Y1VNcz9sBh for ; Tue, 4 Dec 2018 02:39:44 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; q=dns; s= default; b=oyfhxvi0rhpdgTFhC7ieSENv9TUvDDHZ63W8GCaivvxkgi71GYTwc k0iMBiZsLp9HraVi82lrd3WU6nuesO6u21esCg/BnNNeIQfK+fqUhDef52JRgD0i 2y9J9zfO5xWIRdwkumUk7fvGUWyuMwMSyh/shWw34H3WOuI6SIcEIk= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type; s= default; bh=iFBaiEhLiIX2AdquWZ7Fhz1pSfc=; b=WI4g62d1Pwaq2v6oxRCy fE7A7ss/5Uwl3GqNpx//HTQcaEZy7/Gws8HxKYxmuqTMZh1oLaYE8KF3T0L4FAnF VFLRn/Gu8WHDDFInne/P2Leumnz3LeIr4PxwMD7x04TRpmd/WQfgQg7QNH6RIDg6 vVEMp7EUPUYxyNrQrNoAcQk= Received: (qmail 77068 invoked by alias); 3 Dec 2018 15:39:37 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 77057 invoked by uid 89); 3 Dec 2018 15:39:37 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-10.6 required=5.0 tests=BAYES_00, GIT_PATCH_2, GIT_PATCH_3, KAM_ASCII_DIVIDERS, KAM_NUMSUBJECT, SPF_PASS autolearn=ham version=3.3.2 spammy=neutral X-HELO: mx1.suse.de Received: from mx2.suse.de (HELO mx1.suse.de) (195.135.220.15) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Mon, 03 Dec 2018 15:39:34 +0000 Received: from relay1.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 2E7E3AEB4 for ; Mon, 3 Dec 2018 15:39:31 +0000 (UTC) Date: Mon, 3 Dec 2018 16:39:31 +0100 (CET) From: Richard Biener To: gcc-patches@gcc.gnu.org Subject: [PATCH] Fix PR88315 Message-ID: User-Agent: Alpine 2.20 (LSU 67 2015-01-07) MIME-Version: 1.0 While working on improving x264 vectorization I noticed (via enabling epilogue vectorization) that for SAD and DOT_PROD SLP reductions with non-zero initial value we create wrong-code. The following fixes this, removing the ugly backwards-creation of the initial value. Bootstrap & regtest running on x86_64-unknown-linux-gnu. This seems to be broken since forever. Richard. 2018-12-03 Richard Biener PR tree-optimization/88315 * tree-vect-loop.c (get_initial_defs_for_reduction): Simplify and fix initialization vector for SAD and DOT_PROD SLP reductions. * gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial initial value. Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 266744) +++ gcc/tree-vect-loop.c (working copy) @@ -4103,9 +4103,6 @@ get_initial_defs_for_reduction (slp_tree tree vop; int group_size = stmts.length (); unsigned int vec_num, i; - unsigned number_of_copies = 1; - vec voprnds; - voprnds.create (number_of_vectors); struct loop *loop; auto_vec permute_results; @@ -4138,115 +4135,78 @@ get_initial_defs_for_reduction (slp_tree if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits)) nunits = group_size; - number_of_copies = nunits * number_of_vectors / group_size; - number_of_places_left_in_vector = nunits; bool constant_p = true; tree_vector_builder elts (vector_type, nunits, 1); elts.quick_grow (nunits); - for (j = 0; j < number_of_copies; j++) + for (j = 0; j < nunits * number_of_vectors; ++j) { - for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--) - { - tree op; - /* Get the def before the loop. In reduction chain we have only - one initial value. */ - if ((j != (number_of_copies - 1) - || (reduc_chain && i != 0)) - && neutral_op) - op = neutral_op; - else - op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe); - - /* Create 'vect_ = {op0,op1,...,opn}'. */ - number_of_places_left_in_vector--; - elts[number_of_places_left_in_vector] = op; - if (!CONSTANT_CLASS_P (op)) - constant_p = false; + tree op; + i = j % group_size; + stmt_vinfo = stmts[i]; + + /* Get the def before the loop. In reduction chain we have only + one initial value. Else we have as many as PHIs in the group. */ + if (reduc_chain) + op = i != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe); + else if (((vec_oprnds->length () + 1) * nunits + - number_of_places_left_in_vector >= group_size) + && neutral_op) + op = neutral_op; + else + op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe); - if (number_of_places_left_in_vector == 0) - { - gimple_seq ctor_seq = NULL; - tree init; - if (constant_p && !neutral_op - ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits) - : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)) - /* Build the vector directly from ELTS. */ - init = gimple_build_vector (&ctor_seq, &elts); - else if (neutral_op) + /* Create 'vect_ = {op0,op1,...,opn}'. */ + number_of_places_left_in_vector--; + elts[nunits - number_of_places_left_in_vector - 1] = op; + if (!CONSTANT_CLASS_P (op)) + constant_p = false; + + if (number_of_places_left_in_vector == 0) + { + gimple_seq ctor_seq = NULL; + tree init; + if (constant_p && !neutral_op + ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits) + : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)) + /* Build the vector directly from ELTS. */ + init = gimple_build_vector (&ctor_seq, &elts); + else if (neutral_op) + { + /* Build a vector of the neutral value and shift the + other elements into place. */ + init = gimple_build_vector_from_val (&ctor_seq, vector_type, + neutral_op); + int k = nunits; + while (k > 0 && elts[k - 1] == neutral_op) + k -= 1; + while (k > 0) { - /* Build a vector of the neutral value and shift the - other elements into place. */ - init = gimple_build_vector_from_val (&ctor_seq, vector_type, - neutral_op); - int k = nunits; - while (k > 0 && elts[k - 1] == neutral_op) - k -= 1; - while (k > 0) - { - k -= 1; - init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT, - vector_type, init, elts[k]); - } - } - else - { - /* First time round, duplicate ELTS to fill the - required number of vectors, then cherry pick the - appropriate result for each iteration. */ - if (vec_oprnds->is_empty ()) - duplicate_and_interleave (&ctor_seq, vector_type, elts, - number_of_vectors, - permute_results); - init = permute_results[number_of_vectors - j - 1]; + k -= 1; + init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT, + vector_type, init, elts[k]); } - if (ctor_seq != NULL) - gsi_insert_seq_on_edge_immediate (pe, ctor_seq); - voprnds.quick_push (init); - - number_of_places_left_in_vector = nunits; - elts.new_vector (vector_type, nunits, 1); - elts.quick_grow (nunits); - constant_p = true; - } - } - } - - /* Since the vectors are created in the reverse order, we should invert - them. */ - vec_num = voprnds.length (); - for (j = vec_num; j != 0; j--) - { - vop = voprnds[j - 1]; - vec_oprnds->quick_push (vop); - } - - voprnds.release (); - - /* In case that VF is greater than the unrolling factor needed for the SLP - group of stmts, NUMBER_OF_VECTORS to be created is greater than - NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have - to replicate the vectors. */ - tree neutral_vec = NULL; - while (number_of_vectors > vec_oprnds->length ()) - { - if (neutral_op) - { - if (!neutral_vec) - { - gimple_seq ctor_seq = NULL; - neutral_vec = gimple_build_vector_from_val - (&ctor_seq, vector_type, neutral_op); - if (ctor_seq != NULL) - gsi_insert_seq_on_edge_immediate (pe, ctor_seq); } - vec_oprnds->quick_push (neutral_vec); - } - else - { - for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++) - vec_oprnds->quick_push (vop); - } + else + { + /* First time round, duplicate ELTS to fill the + required number of vectors, then cherry pick the + appropriate result for each iteration. */ + if (vec_oprnds->is_empty ()) + duplicate_and_interleave (&ctor_seq, vector_type, elts, + number_of_vectors, + permute_results); + init = permute_results[number_of_vectors - j - 1]; + } + if (ctor_seq != NULL) + gsi_insert_seq_on_edge_immediate (pe, ctor_seq); + vec_oprnds->quick_push (init); + + number_of_places_left_in_vector = nunits; + elts.new_vector (vector_type, nunits, 1); + elts.quick_grow (nunits); + constant_p = true; + } } } Index: gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c (revision 266744) +++ gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c (working copy) @@ -12,7 +12,7 @@ extern void abort (void); int __attribute__((noinline,noclone)) foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2) { - int i_sum = 0; + int i_sum = 5; for( int y = 0; y < 16; y++ ) { i_sum += abs ( pix1[0] - pix2[0] ); @@ -52,7 +52,7 @@ main () __asm__ volatile (""); } - if (foo (X, Y, 16) != 32512) + if (foo (X, Y, 16) != 32512 + 5) abort (); return 0;