From patchwork Fri Jun 15 20:57:22 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 165226 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id E4D6AB70A8 for ; Sat, 16 Jun 2012 06:58:05 +1000 (EST) Comment: DKIM? See http://www.dkim.org DKIM-Signature: v=1; a=rsa-sha1; c=relaxed/relaxed; d=gcc.gnu.org; s=default; x=1340398686; h=Comment: DomainKey-Signature:Received:Received:Received:Received:Received: Received:From:To:Cc:Subject:Date:Message-Id:In-Reply-To: References:Mailing-List:Precedence:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:Sender:Delivered-To; bh=y0mj7RV VyGXpR+5xC9dS7cHhEho=; b=iLQLO6+tMw+8Dum5xNy3+zoxfw+zAs+TFrXjydi wABT0mxyBr6CbYi7DaURokutkmH2DX8Y/kMISWDW38e0W4y3rXNvvnMFqgZ44Q5k M5Fkv+9ImrH2DqrB8naehGQjmIarjU0WuEmiTOIVOTPFoTG+JuJb6BxFQXLzBTwZ FLbU= Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:Received:Received:Received:From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References:X-IsSubscribed:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=YBNNrISNncMq4UBkxIMjdPTZXGjIb4T2LotEX+nRZIrJ7yhwDZ3n1pNNy1x7J3 Mf9NByIumcyXjMV9OVS+bW9zkynUkb7hDbBSSVRIv9HDfItdS9IrTum3bBinCiFr gwmGV/+1+QK5mewZYvKJAPfh+mAD/vEJV+I5WTnmm+2q8=; Received: (qmail 13817 invoked by alias); 15 Jun 2012 20:57:48 -0000 Received: (qmail 13792 invoked by uid 22791); 15 Jun 2012 20:57:45 -0000 X-SWARE-Spam-Status: No, hits=-4.7 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, KHOP_RCVD_TRUST, KHOP_THREADED, RCVD_IN_DNSWL_LOW, RCVD_IN_HOSTKARMA_YE X-Spam-Check-By: sourceware.org Received: from mail-pb0-f47.google.com (HELO mail-pb0-f47.google.com) (209.85.160.47) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 15 Jun 2012 20:57:31 +0000 Received: by pbbrq2 with SMTP id rq2so5728278pbb.20 for ; Fri, 15 Jun 2012 13:57:31 -0700 (PDT) Received: by 10.68.227.69 with SMTP id ry5mr2321046pbc.16.1339793851102; Fri, 15 Jun 2012 13:57:31 -0700 (PDT) Received: from anchor.twiddle.home ([173.160.232.49]) by mx.google.com with ESMTPS id nh8sm14366236pbc.60.2012.06.15.13.57.30 (version=TLSv1/SSLv3 cipher=OTHER); Fri, 15 Jun 2012 13:57:30 -0700 (PDT) From: Richard Henderson To: gcc-patches@gcc.gnu.org Cc: rguenther@suse.de, ubizjak@gmail.com, hjl.tools@gmail.com Subject: [PATCH 1/3] Add rtx costs for sse integer ops Date: Fri, 15 Jun 2012 13:57:22 -0700 Message-Id: <1339793844-27442-2-git-send-email-rth@redhat.com> In-Reply-To: <1339793844-27442-1-git-send-email-rth@redhat.com> References: <1339793844-27442-1-git-send-email-rth@redhat.com> X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org --- gcc/config/i386/i386.c | 50 ++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 40 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e2f5740..578a756 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -31990,13 +31990,16 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total, break; case 0: case -1: - /* Start with (MEM (SYMBOL_REF)), since that's where - it'll probably end up. Add a penalty for size. */ - *total = (COSTS_N_INSNS (1) - + (flag_pic != 0 && !TARGET_64BIT) - + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); break; } + /* FALLTHRU */ + + case CONST_VECTOR: + /* Start with (MEM (SYMBOL_REF)), since that's where + it'll probably end up. Add a penalty for size. */ + *total = (COSTS_N_INSNS (1) + + (flag_pic != 0 && !TARGET_64BIT) + + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); return true; case ZERO_EXTEND: @@ -32016,8 +32019,9 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total, return false; case ASHIFT: - if (CONST_INT_P (XEXP (x, 1)) - && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) + if (SCALAR_INT_MODE_P (mode) + && GET_MODE_SIZE (mode) < UNITS_PER_WORD + && CONST_INT_P (XEXP (x, 1))) { HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); if (value == 1) @@ -32038,7 +32042,15 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total, case ASHIFTRT: case LSHIFTRT: case ROTATERT: - if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + /* ??? Should be SSE vector operation cost. */ + /* At least for published AMD latencies, this really is the same + as the latency for a simple fpu operation like fabs. */ + *total = cost->fabs; + return false; + } + if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) { if (CONST_INT_P (XEXP (x, 1))) { @@ -32107,6 +32119,16 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total, *total = cost->fmul; return false; } + else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + /* Without sse4.1, we don't have PMULLD; it's emulated with 7 + insns, including two PMULUDQ. */ + if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX)) + *total = cost->fmul * 2 + cost->fabs * 5; + else + *total = cost->fmul; + return false; + } else { rtx op0 = XEXP (x, 0); @@ -32171,7 +32193,7 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total, case PLUS: if (GET_MODE_CLASS (mode) == MODE_INT - && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) + && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) { if (GET_CODE (XEXP (x, 0)) == PLUS && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT @@ -32271,6 +32293,14 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total, /* FALLTHRU */ case NOT: + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + /* ??? Should be SSE vector operation cost. */ + /* At least for published AMD latencies, this really is the same + as the latency for a simple fpu operation like fabs. */ + *total = cost->fabs; + return false; + } if (!TARGET_64BIT && mode == DImode) *total = cost->add * 2; else @@ -32331,7 +32361,7 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total, /* ??? Assume all of these vector manipulation patterns are recognizable. In which case they all pretty much have the same cost. */ - *total = COSTS_N_INSNS (1); + *total = cost->fabs; return true; default: