From patchwork Fri Aug 20 20:59:23 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: "H.J. Lu" X-Patchwork-Id: 62318 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 822B5B70DF for ; Sat, 21 Aug 2010 06:59:37 +1000 (EST) Received: (qmail 18259 invoked by alias); 20 Aug 2010 20:59:33 -0000 Received: (qmail 18239 invoked by uid 22791); 20 Aug 2010 20:59:31 -0000 X-SWARE-Spam-Status: No, hits=-1.8 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE X-Spam-Check-By: sourceware.org Received: from mail-vw0-f47.google.com (HELO mail-vw0-f47.google.com) (209.85.212.47) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 20 Aug 2010 20:59:25 +0000 Received: by vws13 with SMTP id 13so3589833vws.20 for ; Fri, 20 Aug 2010 13:59:24 -0700 (PDT) MIME-Version: 1.0 Received: by 10.220.163.10 with SMTP id y10mr1192181vcx.203.1282337963986; Fri, 20 Aug 2010 13:59:23 -0700 (PDT) Received: by 10.220.164.142 with HTTP; Fri, 20 Aug 2010 13:59:23 -0700 (PDT) In-Reply-To: <4C6ECD83.7030504@redhat.com> References: <20100819163330.GX702@tyan-ft48-01.lab.bos.redhat.com> <4C6E4F08.6070801@gnu.org> <20100820135046.GC702@tyan-ft48-01.lab.bos.redhat.com> <20100820172757.GF702@tyan-ft48-01.lab.bos.redhat.com> <4C6EC2C1.5030603@gnu.org> <20100820183359.GH702@tyan-ft48-01.lab.bos.redhat.com> <4C6ECD83.7030504@redhat.com> Date: Fri, 20 Aug 2010 13:59:23 -0700 Message-ID: Subject: Re: [PATCH] Optimize nested SIGN_EXTENDs/ZERO_EXTENDs (PR target/45336) From: "H.J. Lu" To: Richard Henderson Cc: Jakub Jelinek , Uros Bizjak , Paolo Bonzini , Bernd Schmidt , gcc-patches@gcc.gnu.org X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On Fri, Aug 20, 2010 at 11:46 AM, Richard Henderson wrote: > On 08/20/2010 11:44 AM, H.J. Lu wrote: >> 2010-08-20  H.J. Lu   >> >>       PR target/45336 >>       * config/i386/emmintrin.h (_mm_extract_epi16): Cast to unsigned >>       short first. >> >>       * config/i386/smmintrin.h (_mm_extract_epi8): Cast to unsigned >>       char first. >> >> gcc/testsuite/ >> >> 2010-08-20  H.J. Lu   >> >>       PR target/45336 >>       * gcc.target/i386/pr45336-1.c: New. >>       * gcc.target/i386/pr45336-2.c: Likewise. >>       * gcc.target/i386/pr45336-3.c: Likewise. >>       * gcc.target/i386/pr45336-4.c: Likewise. > > Ok. > > > r~ > This is the patch I checked in. I changed long to long long in pr45336-2.c and pr45336-4.c. I also limited pr45336-2.c to LP64. diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 9467fe0..596d28f 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -1309,7 +1309,7 @@ _mm_cmpgt_epi32 (__m128i __A, __m128i __B) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi16 (__m128i const __A, int const __N) { - return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); + return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1319,7 +1319,7 @@ _mm_insert_epi16 (__m128i const __A, int const __D, int const __N) } #else #define _mm_extract_epi16(A, N) \ - ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) + ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) #define _mm_insert_epi16(A, D, N) \ ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ (int)(D), (int)(N))) diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index 170fae5..357b527 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -439,7 +439,7 @@ _mm_insert_epi64 (__m128i __D, long long __S, const int __N) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi8 (__m128i __X, const int __N) { - return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N); + return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -457,7 +457,7 @@ _mm_extract_epi64 (__m128i __X, const int __N) #endif #else #define _mm_extract_epi8(X, N) \ - ((int) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))) + ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))) #define _mm_extract_epi32(X, N) \ ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N))) --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-1.c 2010-08-20 11:03:02.636918319 -0700 @@ -0,0 +1,16 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler-not "movsbl" } } */ +/* { dg-final { scan-assembler-not "movswl" } } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ +/* { dg-final { scan-assembler-not "movzwl" } } */ +/* { dg-final { scan-assembler-not "cwtl" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +unsigned int foo8(__m128i x) { return _mm_extract_epi8(x, 4); } +unsigned int foo16(__m128i x) { return _mm_extract_epi16(x, 3); } +unsigned int foo32(__m128i x) { return _mm_extract_epi32(x, 2); } --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-2.c 2010-08-20 12:47:08.973718228 -0700 @@ -0,0 +1,21 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler-not "movsbl" } } */ +/* { dg-final { scan-assembler-not "movswl" } } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ +/* { dg-final { scan-assembler-not "movzwl" } } */ +/* { dg-final { scan-assembler-not "cwtl" } } */ +/* { dg-final { scan-assembler-not "cltq" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +unsigned long long int foo8(__m128i x) { return _mm_extract_epi8(x, 4); } +unsigned long long int foo16(__m128i x) { return _mm_extract_epi16(x, 3); } +unsigned long long int foo32(__m128i x) +{ + return (unsigned int) _mm_extract_epi32(x, 2); +} --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-3.c 2010-08-20 11:12:03.249670891 -0700 @@ -0,0 +1,13 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler "movsbl" } } */ +/* { dg-final { scan-assembler "(movswl|cwtl)" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +int foo8(__m128i x) { return (char) _mm_extract_epi8(x, 4); } +int foo16(__m128i x) { return (short) _mm_extract_epi16(x, 3); } +int foo32(__m128i x) { return _mm_extract_epi32(x, 2); } --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-4.c 2010-08-20 12:46:52.730790353 -0700 @@ -0,0 +1,15 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler "movsbq" } } */ +/* { dg-final { scan-assembler "movswq" } } */ +/* { dg-final { scan-assembler "(cltq|movslq)" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +long long int foo8(__m128i x) { return (char) _mm_extract_epi8(x, 4); } +long long int foo16(__m128i x) { return (short) _mm_extract_epi16(x, 3); } +long long int foo32(__m128i x) { return (int) _mm_extract_epi32(x, 2); }