From patchwork Sun Dec 2 17:39:34 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Marc Glisse X-Patchwork-Id: 203246 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 00A492C0085 for ; Mon, 3 Dec 2012 04:39:51 +1100 (EST) Comment: DKIM? See http://www.dkim.org DKIM-Signature: v=1; a=rsa-sha1; c=relaxed/relaxed; d=gcc.gnu.org; s=default; x=1355074792; h=Comment: DomainKey-Signature:Received:Received:Received:Received:Received: Date:From:To:cc:Subject:In-Reply-To:Message-ID:References: User-Agent:MIME-Version:Content-Type:Mailing-List:Precedence: List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender: Delivered-To; bh=hogR0/BGx8J9YK3xWbNCAvBRK94=; b=AbwHpKwEfCqWPms Y0X2Z+CJEO6w8z16rQfIpiZKwaBJnL5fREzKcQLYG9WJu/5weqhCXljQ5PydNGoW VpfawWK+qFxONTZDbISIieXPn42uStio415nVeOsFkOHOpFo6ZU7hys6Xr3HoWfK 8PeCa1o+530k4da3pkMv0Oz+63vc= Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:Received:Received:Date:From:To:cc:Subject:In-Reply-To:Message-ID:References:User-Agent:MIME-Version:Content-Type:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=IRoP/+hqKZl4WIadMijNID/p1bo0e6FAIeiK4XRVnPbS7tAIaZwTwjyqoTvxMN RFn49+muHBMu25TYV4ygSpAVkzCyQ1OFBgpm5Zl3VRv8cieKjzWVrgXfReSR+W5t 3nOpza5dOZRRRa0qzgKTxVZAepNcyyPgZkx70DyDh+I0g=; Received: (qmail 2805 invoked by alias); 2 Dec 2012 17:39:43 -0000 Received: (qmail 2792 invoked by uid 22791); 2 Dec 2012 17:39:41 -0000 X-SWARE-Spam-Status: No, hits=-8.0 required=5.0 tests=AWL, BAYES_00, KHOP_RCVD_UNTRUST, KHOP_THREADED, RCVD_IN_DNSWL_HI, RCVD_IN_HOSTKARMA_W, RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from mail1-relais-roc.national.inria.fr (HELO mail1-relais-roc.national.inria.fr) (192.134.164.82) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Sun, 02 Dec 2012 17:39:36 +0000 Received: from stedding.saclay.inria.fr ([193.55.250.194]) by mail1-relais-roc.national.inria.fr with ESMTP/TLS/DHE-RSA-AES128-SHA; 02 Dec 2012 18:39:34 +0100 Received: from glisse (helo=localhost) by stedding.saclay.inria.fr with local-esmtp (Exim 4.80) (envelope-from ) id 1TfDVu-0000CB-KH; Sun, 02 Dec 2012 18:39:34 +0100 Date: Sun, 2 Dec 2012 18:39:34 +0100 (CET) From: Marc Glisse To: gcc-patches@gcc.gnu.org cc: ubizjak@gmail.com Subject: Re: Simplify a VEC_SELECT from one half of a VEC_CONCAT In-Reply-To: Message-ID: References: User-Agent: Alpine 2.02 (DEB 1266 2009-07-14) MIME-Version: 1.0 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org While I was there, I decided to improve another bit of VEC_SELECT simplification (this supersedes the patch in the father of this message). Adding Uros in Cc: because I am touching the x86 backend. Sorry to add yet another insn variant, but I don't see how we can avoid it here. The x86 and RTL parts are independent, but the testcases need both. bootstrap+testsuite on x86_64-linux. (for next stage1 I assume) 2012-12-02 Marc Glisse PR target/43147 PR target/44551 gcc/ * simplify-rtx.c (simplify_binary_operation_1) : Improve VEC_SELECT and VEC_CONCAT subcases. * config/i386/sse.md (*sse_shufps__single): New. gcc/testsuite/ * gcc.target/i386/pr43147.c: New testcase. * gcc.target/i386/pr44551.c: New testcase. Index: simplify-rtx.c =================================================================== --- simplify-rtx.c (revision 194051) +++ simplify-rtx.c (working copy) @@ -3317,98 +3317,20 @@ simplify_binary_operation_1 (enum rtx_co gcc_assert (VECTOR_MODE_P (GET_MODE (trueop0))); gcc_assert (mode == GET_MODE_INNER (GET_MODE (trueop0))); gcc_assert (GET_CODE (trueop1) == PARALLEL); gcc_assert (XVECLEN (trueop1, 0) == 1); gcc_assert (CONST_INT_P (XVECEXP (trueop1, 0, 0))); if (GET_CODE (trueop0) == CONST_VECTOR) return CONST_VECTOR_ELT (trueop0, INTVAL (XVECEXP (trueop1, 0, 0))); - /* Extract a scalar element from a nested VEC_SELECT expression - (with optional nested VEC_CONCAT expression). Some targets - (i386) extract scalar element from a vector using chain of - nested VEC_SELECT expressions. When input operand is a memory - operand, this operation can be simplified to a simple scalar - load from an offseted memory address. */ - if (GET_CODE (trueop0) == VEC_SELECT) - { - rtx op0 = XEXP (trueop0, 0); - rtx op1 = XEXP (trueop0, 1); - - enum machine_mode opmode = GET_MODE (op0); - int elt_size = GET_MODE_SIZE (GET_MODE_INNER (opmode)); - int n_elts = GET_MODE_SIZE (opmode) / elt_size; - - int i = INTVAL (XVECEXP (trueop1, 0, 0)); - int elem; - - rtvec vec; - rtx tmp_op, tmp; - - gcc_assert (GET_CODE (op1) == PARALLEL); - gcc_assert (i < n_elts); - - /* Select element, pointed by nested selector. */ - elem = INTVAL (XVECEXP (op1, 0, i)); - - /* Handle the case when nested VEC_SELECT wraps VEC_CONCAT. */ - if (GET_CODE (op0) == VEC_CONCAT) - { - rtx op00 = XEXP (op0, 0); - rtx op01 = XEXP (op0, 1); - - enum machine_mode mode00, mode01; - int n_elts00, n_elts01; - - mode00 = GET_MODE (op00); - mode01 = GET_MODE (op01); - - /* Find out number of elements of each operand. */ - if (VECTOR_MODE_P (mode00)) - { - elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode00)); - n_elts00 = GET_MODE_SIZE (mode00) / elt_size; - } - else - n_elts00 = 1; - - if (VECTOR_MODE_P (mode01)) - { - elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode01)); - n_elts01 = GET_MODE_SIZE (mode01) / elt_size; - } - else - n_elts01 = 1; - - gcc_assert (n_elts == n_elts00 + n_elts01); - - /* Select correct operand of VEC_CONCAT - and adjust selector. */ - if (elem < n_elts01) - tmp_op = op00; - else - { - tmp_op = op01; - elem -= n_elts00; - } - } - else - tmp_op = op0; - - vec = rtvec_alloc (1); - RTVEC_ELT (vec, 0) = GEN_INT (elem); - - tmp = gen_rtx_fmt_ee (code, mode, - tmp_op, gen_rtx_PARALLEL (VOIDmode, vec)); - return tmp; - } if (GET_CODE (trueop0) == VEC_DUPLICATE && GET_MODE (XEXP (trueop0, 0)) == mode) return XEXP (trueop0, 0); } else { gcc_assert (VECTOR_MODE_P (GET_MODE (trueop0))); gcc_assert (GET_MODE_INNER (mode) == GET_MODE_INNER (GET_MODE (trueop0))); gcc_assert (GET_CODE (trueop1) == PARALLEL); @@ -3482,44 +3404,96 @@ simplify_binary_operation_1 (enum rtx_co rtx subop0, subop1; gcc_assert (i0 < 2 && i1 < 2); subop0 = XEXP (trueop0, i0); subop1 = XEXP (trueop0, i1); return simplify_gen_binary (VEC_CONCAT, mode, subop0, subop1); } } - if (XVECLEN (trueop1, 0) == 1 - && CONST_INT_P (XVECEXP (trueop1, 0, 0)) - && GET_CODE (trueop0) == VEC_CONCAT) + /* Look through nested VEC_SELECTs. */ + if (GET_CODE (trueop0) == VEC_SELECT) { - rtx vec = trueop0; - int offset = INTVAL (XVECEXP (trueop1, 0, 0)) * GET_MODE_SIZE (mode); + int len = XVECLEN (trueop1, 0); + rtvec vec = rtvec_alloc (len); + for (int i = 0; i < len; i++) + { + int j = INTVAL (XVECEXP (trueop1, 0, i)); + RTVEC_ELT (vec, i) = XVECEXP (XEXP (trueop0, 1), 0, j); + } + rtx new_op0 = XEXP (trueop0, 0); + rtx new_op1 = gen_rtx_PARALLEL (VOIDmode, vec); + return simplify_gen_binary (VEC_SELECT, mode, new_op0, new_op1); + } - /* Try to find the element in the VEC_CONCAT. */ - while (GET_MODE (vec) != mode - && GET_CODE (vec) == VEC_CONCAT) - { - HOST_WIDE_INT vec_size = GET_MODE_SIZE (GET_MODE (XEXP (vec, 0))); - if (offset < vec_size) - vec = XEXP (vec, 0); + /* Detect if all the elements come from the same subpart of a concat. */ + if (GET_CODE (trueop0) == VEC_CONCAT) + { + rtx new_op0 = NULL_RTX; + rtx new_op1 = NULL_RTX; + int first = 0; + int second = 0; + unsigned nelts_first_half = 1; + enum machine_mode mode_first_half = GET_MODE (XEXP (trueop0, 0)); + if (VECTOR_MODE_P (mode_first_half)) + { + int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode_first_half)); + nelts_first_half = (GET_MODE_SIZE (mode_first_half) / elt_size); + } + + for (int i = 0; i < XVECLEN (trueop1, 0); i++) + { + rtx j = XVECEXP (trueop1, 0, i); + if (!CONST_INT_P (j)) + { + first++; + second++; + break; + } + if (INTVAL (j) < nelts_first_half) + first++; else + second++; + } + + if (second == 0) + { + new_op0 = XEXP (trueop0, 0); + new_op1 = trueop1; + } + else if (first == 0 + || rtx_equal_p (XEXP (trueop0, 0), XEXP (trueop0, 1))) + { + int len = XVECLEN (trueop1, 0); + rtvec vec = rtvec_alloc (len); + for (int i = 0; i < len; i++) { - offset -= vec_size; - vec = XEXP (vec, 1); + /* All vectors have a power-of-2 size, so both halves of a + VEC_CONCAT must have the same size and using '%' instead + of '-' is safe. */ + int j = INTVAL (XVECEXP (trueop1, 0, i)) % nelts_first_half; + RTVEC_ELT (vec, i) = GEN_INT (j); } - vec = avoid_constant_pool_reference (vec); + new_op0 = XEXP (trueop0, 1); + new_op1 = gen_rtx_PARALLEL (VOIDmode, vec); } - if (GET_MODE (vec) == mode) - return vec; + if (new_op0) + { + if (VECTOR_MODE_P (GET_MODE (new_op0))) + return simplify_gen_binary (VEC_SELECT, mode, new_op0, new_op1); + if (VECTOR_MODE_P (mode)) + return simplify_gen_unary (VEC_DUPLICATE, mode, new_op0, + GET_MODE (new_op0)); + return new_op0; + } } return 0; case VEC_CONCAT: { enum machine_mode op0_mode = (GET_MODE (trueop0) != VOIDmode ? GET_MODE (trueop0) : GET_MODE_INNER (mode)); enum machine_mode op1_mode = (GET_MODE (trueop1) != VOIDmode ? GET_MODE (trueop1) Index: testsuite/gcc.target/i386/pr44551.c =================================================================== --- testsuite/gcc.target/i386/pr44551.c (revision 0) +++ testsuite/gcc.target/i386/pr44551.c (revision 0) @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mavx" } */ + +#include + +__m128i +foo (__m256i x, __m128i y) +{ + __m256i r = _mm256_insertf128_si256(x, y, 1); + __m128i a = _mm256_extractf128_si256(r, 1); + return a; +} + +/* { dg-final { scan-assembler-not "insert" } } */ +/* { dg-final { scan-assembler-not "extract" } } */ Property changes on: testsuite/gcc.target/i386/pr44551.c ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision URL Added: svn:eol-style + native Index: testsuite/gcc.target/i386/pr43147.c =================================================================== --- testsuite/gcc.target/i386/pr43147.c (revision 0) +++ testsuite/gcc.target/i386/pr43147.c (revision 0) @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O -msse" } */ + +#include + +__m128 f(__m128 m) +{ + m = _mm_shuffle_ps(m, m, 0xC9); + m = _mm_shuffle_ps(m, m, 0x2D); + return m; +} + +/* { dg-final { scan-assembler-times "shufps" 1 } } */ Property changes on: testsuite/gcc.target/i386/pr43147.c ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Author Date Id Revision URL Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 194051) +++ config/i386/sse.md (working copy) @@ -3900,20 +3900,53 @@ default: gcc_unreachable (); } } [(set_attr "isa" "noavx,avx") (set_attr "type" "sseshuf") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex") (set_attr "mode" "V4SF")]) +(define_insn "*sse_shufps__single" + [(set (match_operand:VI4F_128 0 "register_operand" "=x,x") + (vec_select:VI4F_128 + (match_operand:VI4F_128 1 "register_operand" "0,x") + (parallel [(match_operand 2 "const_0_to_3_operand") + (match_operand 3 "const_0_to_3_operand") + (match_operand 4 "const_0_to_3_operand") + (match_operand 5 "const_0_to_3_operand")])))] + "TARGET_SSE" +{ + int mask = 0; + mask |= INTVAL (operands[2]) << 0; + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + operands[2] = GEN_INT (mask); + + switch (which_alternative) + { + case 0: + return "shufps\t{%2, %0, %0|%0, %0, %2}"; + case 1: + return "vshufps\t{%2, %1, %1, %0|%0, %1, %1, %2}"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseshuf1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V4SF")]) + (define_insn "sse_storehps" [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") (parallel [(const_int 2) (const_int 3)])))] "TARGET_SSE" "@ %vmovhps\t{%1, %0|%0, %1} %vmovhlps\t{%1, %d0|%d0, %1} %vmovlps\t{%H1, %d0|%d0, %H1}"