From patchwork Wed Jun 27 16:07:06 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Igor Zamyatin X-Patchwork-Id: 167692 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id C36301008DD for ; Thu, 28 Jun 2012 02:07:31 +1000 (EST) Comment: DKIM? See http://www.dkim.org DKIM-Signature: v=1; a=rsa-sha1; c=relaxed/relaxed; d=gcc.gnu.org; s=default; x=1341418052; h=Comment: DomainKey-Signature:Received:Received:Received:Received: MIME-Version:Received:Received:In-Reply-To:References:Date: Message-ID:Subject:From:To:Cc:Content-Type: Content-Transfer-Encoding:Mailing-List:Precedence:List-Id: List-Unsubscribe:List-Archive:List-Post:List-Help:Sender: Delivered-To; bh=9KAZrS/sGqkhRdWjHU9trFlWHBI=; b=Ck38YHPTdCLAT8P Pt3fqmxk0/pEstaFHFmp+J3P8e49xZHXEhuCrOQtLVOXorHIqMcliZvsARsUTbCn k4kWsfehOxiF/eF2n4A2701BJcBXR10eWNhxxhRZJVjk4I+AgCcWqldtTGAtBic7 1ZI4exd8F6bFtC7eyPto5FxL9TXc= Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:Received:MIME-Version:Received:Received:In-Reply-To:References:Date:Message-ID:Subject:From:To:Cc:Content-Type:Content-Transfer-Encoding:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=QubGu5zY5C9Qg6q1mu8gkGOJciKx4L09FNb7T7aFj4bWn3Fm4KWE9BhWDpeYDE esrW6yhv0p8iGFY4D5NvJfJG6StJnyG/S0KTZkZ2fr11Pua+mrto+42HPVlCg8MA 3365I+WwjHGM5MKDMagRnCXi+KbloIt4e45XaTZcSL+J8=; Received: (qmail 28308 invoked by alias); 27 Jun 2012 16:07:25 -0000 Received: (qmail 28293 invoked by uid 22791); 27 Jun 2012 16:07:23 -0000 X-SWARE-Spam-Status: No, hits=-5.6 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, KHOP_RCVD_TRUST, KHOP_THREADED, RCVD_IN_DNSWL_LOW, RCVD_IN_HOSTKARMA_YE X-Spam-Check-By: sourceware.org Received: from mail-pb0-f47.google.com (HELO mail-pb0-f47.google.com) (209.85.160.47) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Wed, 27 Jun 2012 16:07:09 +0000 Received: by pbbrq2 with SMTP id rq2so1850051pbb.20 for ; Wed, 27 Jun 2012 09:07:08 -0700 (PDT) MIME-Version: 1.0 Received: by 10.68.134.201 with SMTP id pm9mr67222252pbb.49.1340813226618; Wed, 27 Jun 2012 09:07:06 -0700 (PDT) Received: by 10.68.65.52 with HTTP; Wed, 27 Jun 2012 09:07:06 -0700 (PDT) In-Reply-To: <0EFAB2BDD0F67E4FB6CCC8B9F87D756915E4EF6C@IRSMSX101.ger.corp.intel.com> References: <1339793844-27442-1-git-send-email-rth@redhat.com> <1339793844-27442-2-git-send-email-rth@redhat.com> <0EFAB2BDD0F67E4FB6CCC8B9F87D756915E4EF6C@IRSMSX101.ger.corp.intel.com> Date: Wed, 27 Jun 2012 20:07:06 +0400 Message-ID: Subject: Re: [PATCH 1/3] Add rtx costs for sse integer ops From: Igor Zamyatin To: gcc-patches@gcc.gnu.org Cc: rth@redhat.com, rguenther@suse.de, ubizjak@gmail.com, hjl.tools@gmail.com, vbyakovl23@gmail.com Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org May I ask about the purpose of the following piece of change? Doesn't it affect non-sse cases either? @@ -32038,7 +32042,15 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total, case ASHIFTRT: case LSHIFTRT: case ROTATERT: - if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + /* ??? Should be SSE vector operation cost. */ + /* At least for published AMD latencies, this really is the same + as the latency for a simple fpu operation like fabs. */ + *total = cost->fabs; + return false; + } + if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) { if (CONST_INT_P (XEXP (x, 1))) { It also seems that we reversed the condition for the code that is now under if (GET_MODE_SIZE (mode) < UNITS_PER_WORD). Why do we need this? Thanks, Igor -----Original Message----- From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-owner@gcc.gnu.org] On Behalf Of Richard Henderson Sent: Saturday, June 16, 2012 12:57 AM To: gcc-patches@gcc.gnu.org Cc: rguenther@suse.de; ubizjak@gmail.com; hjl.tools@gmail.com Subject: [PATCH 1/3] Add rtx costs for sse integer ops ---  gcc/config/i386/i386.c |   50 ++++++++++++++++++++++++++++++++++++++---------  1 files changed, 40 insertions(+), 10 deletions(-)     case ZERO_EXTEND: @@ -32016,8 +32019,9 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,       return false;     case ASHIFT: -      if (CONST_INT_P (XEXP (x, 1)) -         && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) +      if (SCALAR_INT_MODE_P (mode) +         && GET_MODE_SIZE (mode) < UNITS_PER_WORD +         && CONST_INT_P (XEXP (x, 1)))        {          HOST_WIDE_INT value = INTVAL (XEXP (x, 1));          if (value == 1) @@ -32038,7 +32042,15 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,     case ASHIFTRT:     case LSHIFTRT:     case ROTATERT: -      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) +      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) +       { +         /* ??? Should be SSE vector operation cost.  */ +         /* At least for published AMD latencies, this really is the same +            as the latency for a simple fpu operation like fabs.  */ +         *total = cost->fabs; +         return false; +       } +      if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)        {          if (CONST_INT_P (XEXP (x, 1)))            { @@ -32107,6 +32119,16 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,          *total = cost->fmul;          return false;        } +      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) +       { +         /* Without sse4.1, we don't have PMULLD; it's emulated with 7 +            insns, including two PMULUDQ.  */ +         if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX)) +           *total = cost->fmul * 2 + cost->fabs * 5; +         else +           *total = cost->fmul; +         return false; +       }       else        {          rtx op0 = XEXP (x, 0); @@ -32171,7 +32193,7 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,     case PLUS:       if (GET_MODE_CLASS (mode) == MODE_INT -              && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) +         && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)        {          if (GET_CODE (XEXP (x, 0)) == PLUS              && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT @@ -32271,6 +32293,14 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,       /* FALLTHRU */     case NOT: +      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) +       { +         /* ??? Should be SSE vector operation cost.  */ +         /* At least for published AMD latencies, this really is the same +            as the latency for a simple fpu operation like fabs.  */ +         *total = cost->fabs; +         return false; +       }       if (!TARGET_64BIT && mode == DImode)        *total = cost->add * 2;       else @@ -32331,7 +32361,7 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,       /* ??? Assume all of these vector manipulation patterns are         recognizable.  In which case they all pretty much have the         same cost.  */ -     *total = COSTS_N_INSNS (1); +     *total = cost->fabs;      return true;     default: -- 1.7.7.6 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e2f5740..578a756 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -31990,13 +31990,16 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,            break;          case 0:          case -1: -           /* Start with (MEM (SYMBOL_REF)), since that's where -              it'll probably end up.  Add a penalty for size.  */ -           *total = (COSTS_N_INSNS (1) -                     + (flag_pic != 0 && !TARGET_64BIT) -                     + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));            break;          } +      /* FALLTHRU */ + +    case CONST_VECTOR: +      /* Start with (MEM (SYMBOL_REF)), since that's where +        it'll probably end up.  Add a penalty for size.  */ +      *total = (COSTS_N_INSNS (1) +               + (flag_pic != 0 && !TARGET_64BIT) +               + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));       return true;