From patchwork Sat Mar 30 14:47:14 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Marc Glisse X-Patchwork-Id: 232523 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client CN "localhost", Issuer "www.qmailtoaster.com" (not verified)) by ozlabs.org (Postfix) with ESMTPS id 9E5682C00DF for ; Sun, 31 Mar 2013 01:47:39 +1100 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:cc:subject:in-reply-to:message-id:references :mime-version:content-type; q=dns; s=default; b=m8BPNTj5fP/O7kTp yDuL2Bsm51nPr4qOeSgQiqmTPsUD/JDHIDQomk6zPrFXJjxZGgbx8ZB4qZCzsXXF 6FW9DuPbc5ZA2AgUeULZugCfkLR8pg1DHUIKqKeZ3Sgtq3Q3tKD82+O2WXyUzgYB mOkwGQq1wMC2ceEFJnnEw4fJt1M= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:cc:subject:in-reply-to:message-id:references :mime-version:content-type; s=default; bh=Unr2GA9M1xibr6lliBzrHs JDyiw=; b=V+KRyXrXzJKA1h79WXk0VmkvOT2HgNX2Vdmw2At744Ii7gmpfQw8bx lmipXrX/a4PADE2pcLvzzO1a8EJmxwWt6xSGRlfmRtoMM6kOYIgcEgq2LBudOGGd qp8FucKcuCfM/GFcFHWsDsyhyP57bSVKbJaNx/oGR7QKbr9Snhgkw= Received: (qmail 26979 invoked by alias); 30 Mar 2013 14:47:30 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 26964 invoked by uid 89); 30 Mar 2013 14:47:21 -0000 X-Spam-SWARE-Status: No, score=-8.5 required=5.0 tests=AWL, BAYES_00, KHOP_RCVD_UNTRUST, KHOP_THREADED, RCVD_IN_DNSWL_HI, RCVD_IN_HOSTKARMA_W, RP_MATCHES_RCVD, TW_AV, TW_VP autolearn=ham version=3.3.1 Received: from mail2-relais-roc.national.inria.fr (HELO mail2-relais-roc.national.inria.fr) (192.134.164.83) by sourceware.org (qpsmtpd/0.84/v0.84-167-ge50287c) with ESMTP; Sat, 30 Mar 2013 14:47:17 +0000 Received: from stedding.saclay.inria.fr ([193.55.250.194]) by mail2-relais-roc.national.inria.fr with ESMTP/TLS/DHE-RSA-AES128-SHA; 30 Mar 2013 15:47:15 +0100 Received: from glisse (helo=localhost) by stedding.saclay.inria.fr with local-esmtp (Exim 4.80) (envelope-from ) id 1ULx3q-0002m9-Mv; Sat, 30 Mar 2013 15:47:14 +0100 Date: Sat, 30 Mar 2013 15:47:14 +0100 (CET) From: Marc Glisse To: Eric Botcazou cc: gcc-patches@gcc.gnu.org, ubizjak@gmail.com Subject: Re: [rtl, i386] vec_merge simplification In-Reply-To: <8694562.qGXxdu6rGm@polaris> Message-ID: References: <8694562.qGXxdu6rGm@polaris> User-Agent: Alpine 2.02 (DEB 1266 2009-07-14) MIME-Version: 1.0 X-Virus-Found: No On Wed, 27 Mar 2013, Eric Botcazou wrote: > OK, modulo a few nits: Thanks, here is a version taking into account all your comments, and which still passes bootstrap+testsuite on x86_64-linux-gnu. I am not completely sure if there is a point checking !side_effects_p (op1) after rtx_equal_p (op0, op1), but I am still doing it as it seems safe. Uros, are you ok with the testsuite part? 2013-03-30 Marc Glisse gcc/ * simplify-rtx.c (simplify_binary_operation_1) : Handle VEC_MERGE. (simplify_ternary_operation) : Use unsigned HOST_WIDE_INT for masks. Test for side effects. Handle nested VEC_MERGE. Handle equal arguments. gcc/testsuite/ * gcc.target/i386/merge-1.c: New testcase. * gcc.target/i386/avx2-vpblendd128-1.c: Make it non-trivial. Index: simplify-rtx.c =================================================================== --- simplify-rtx.c (revision 197265) +++ simplify-rtx.c (working copy) @@ -3553,20 +3553,45 @@ simplify_binary_operation_1 (enum rtx_co offset -= vec_size; vec = XEXP (vec, 1); } vec = avoid_constant_pool_reference (vec); } if (GET_MODE (vec) == mode) return vec; } + /* If we select elements in a vec_merge that all come from the same + operand, select from that operand directly. */ + if (GET_CODE (op0) == VEC_MERGE) + { + rtx trueop02 = avoid_constant_pool_reference (XEXP (op0, 2)); + if (CONST_INT_P (trueop02)) + { + unsigned HOST_WIDE_INT sel = UINTVAL (trueop02); + bool all_operand0 = true; + bool all_operand1 = true; + for (int i = 0; i < XVECLEN (trueop1, 0); i++) + { + rtx j = XVECEXP (trueop1, 0, i); + if (sel & (1 << UINTVAL (j))) + all_operand1 = false; + else + all_operand0 = false; + } + if (all_operand0 && !side_effects_p (XEXP (op0, 1))) + return simplify_gen_binary (VEC_SELECT, mode, XEXP (op0, 0), op1); + if (all_operand1 && !side_effects_p (XEXP (op0, 0))) + return simplify_gen_binary (VEC_SELECT, mode, XEXP (op0, 1), op1); + } + } + return 0; case VEC_CONCAT: { enum machine_mode op0_mode = (GET_MODE (trueop0) != VOIDmode ? GET_MODE (trueop0) : GET_MODE_INNER (mode)); enum machine_mode op1_mode = (GET_MODE (trueop1) != VOIDmode ? GET_MODE (trueop1) : GET_MODE_INNER (mode)); @@ -5217,21 +5242,21 @@ simplify_const_relational_operation (enu OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became a constant. Return 0 if no simplifications is possible. */ rtx simplify_ternary_operation (enum rtx_code code, enum machine_mode mode, enum machine_mode op0_mode, rtx op0, rtx op1, rtx op2) { unsigned int width = GET_MODE_PRECISION (mode); bool any_change = false; - rtx tem; + rtx tem, trueop2; /* VOIDmode means "infinite" precision. */ if (width == 0) width = HOST_BITS_PER_WIDE_INT; switch (code) { case FMA: /* Simplify negations around the multiplication. */ /* -a * -b + c => a * b + c. */ @@ -5363,47 +5388,85 @@ simplify_ternary_operation (enum rtx_cod else if (temp) return gen_rtx_IF_THEN_ELSE (mode, temp, op1, op2); } } break; case VEC_MERGE: gcc_assert (GET_MODE (op0) == mode); gcc_assert (GET_MODE (op1) == mode); gcc_assert (VECTOR_MODE_P (mode)); - op2 = avoid_constant_pool_reference (op2); - if (CONST_INT_P (op2)) + trueop2 = avoid_constant_pool_reference (op2); + if (CONST_INT_P (trueop2)) { - int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode)); + int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode)); unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size); - int mask = (1 << n_elts) - 1; + unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n_elts) + - 1; + unsigned HOST_WIDE_INT sel = UINTVAL (trueop2); - if (!(INTVAL (op2) & mask)) + if (!(sel & mask) && !side_effects_p (op0)) return op1; - if ((INTVAL (op2) & mask) == mask) + if ((sel & mask) == mask && !side_effects_p (op1)) return op0; - op0 = avoid_constant_pool_reference (op0); - op1 = avoid_constant_pool_reference (op1); - if (GET_CODE (op0) == CONST_VECTOR - && GET_CODE (op1) == CONST_VECTOR) + rtx trueop0 = avoid_constant_pool_reference (op0); + rtx trueop1 = avoid_constant_pool_reference (op1); + if (GET_CODE (trueop0) == CONST_VECTOR + && GET_CODE (trueop1) == CONST_VECTOR) { rtvec v = rtvec_alloc (n_elts); unsigned int i; for (i = 0; i < n_elts; i++) - RTVEC_ELT (v, i) = (INTVAL (op2) & (1 << i) - ? CONST_VECTOR_ELT (op0, i) - : CONST_VECTOR_ELT (op1, i)); + RTVEC_ELT (v, i) = ((sel & ((unsigned HOST_WIDE_INT) 1 << i)) + ? CONST_VECTOR_ELT (trueop0, i) + : CONST_VECTOR_ELT (trueop1, i)); return gen_rtx_CONST_VECTOR (mode, v); } + + /* Replace (vec_merge (vec_merge a b m) c n) with (vec_merge b c n) + if no element from a appears in the result. */ + if (GET_CODE (op0) == VEC_MERGE) + { + tem = avoid_constant_pool_reference (XEXP (op0, 2)); + if (CONST_INT_P (tem)) + { + unsigned HOST_WIDE_INT sel0 = UINTVAL (tem); + if (!(sel & sel0 & mask) && !side_effects_p (XEXP (op0, 0))) + return simplify_gen_ternary (code, mode, mode, + XEXP (op0, 1), op1, op2); + if (!(sel & ~sel0 & mask) && !side_effects_p (XEXP (op0, 1))) + return simplify_gen_ternary (code, mode, mode, + XEXP (op0, 0), op1, op2); + } + } + if (GET_CODE (op1) == VEC_MERGE) + { + tem = avoid_constant_pool_reference (XEXP (op1, 2)); + if (CONST_INT_P (tem)) + { + unsigned HOST_WIDE_INT sel1 = UINTVAL (tem); + if (!(~sel & sel1 & mask) && !side_effects_p (XEXP (op1, 0))) + return simplify_gen_ternary (code, mode, mode, + op0, XEXP (op1, 1), op2); + if (!(~sel & ~sel1 & mask) && !side_effects_p (XEXP (op1, 1))) + return simplify_gen_ternary (code, mode, mode, + op0, XEXP (op1, 0), op2); + } + } } + + if (rtx_equal_p (op0, op1) + && !side_effects_p (op2) && !side_effects_p (op1)) + return op0; + break; default: gcc_unreachable (); } return 0; } /* Evaluate a SUBREG of a CONST_INT or CONST_DOUBLE or CONST_FIXED Index: testsuite/gcc.target/i386/merge-1.c =================================================================== --- testsuite/gcc.target/i386/merge-1.c (revision 0) +++ testsuite/gcc.target/i386/merge-1.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -msse2" } */ + +#include + +void +f (double *r, __m128d x, __m128d y, __m128d z) +{ + __m128d t=_mm_move_sd(x,y); + __m128d u=_mm_move_sd(t,z); + *r = u[0]; +} + +__m128d +g(__m128d x, __m128d y, __m128d z) +{ + __m128d t=_mm_move_sd(x,y); + __m128d u=_mm_move_sd(t,z); + return u; +} + +/* { dg-final { scan-assembler-times "movsd" 1 } } */ Property changes on: testsuite/gcc.target/i386/merge-1.c ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Author Date Id Revision URL Index: testsuite/gcc.target/i386/avx2-vpblendd128-1.c =================================================================== --- testsuite/gcc.target/i386/avx2-vpblendd128-1.c (revision 197265) +++ testsuite/gcc.target/i386/avx2-vpblendd128-1.c (working copy) @@ -1,13 +1,14 @@ /* { dg-do compile } */ /* { dg-options "-mavx2 -O2" } */ /* { dg-final { scan-assembler "vpblendd\[ \\t\]+\[^\n\]*" } } */ #include __m128i x; +__m128i y; void extern avx2_test (void) { - x = _mm_blend_epi32 (x, x, 13); + x = _mm_blend_epi32 (x, y, 13); }