From patchwork Fri Aug 20 18:44:08 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: "H.J. Lu" X-Patchwork-Id: 62303 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id E4AB6B6F10 for ; Sat, 21 Aug 2010 04:44:18 +1000 (EST) Received: (qmail 23602 invoked by alias); 20 Aug 2010 18:44:16 -0000 Received: (qmail 23588 invoked by uid 22791); 20 Aug 2010 18:44:15 -0000 X-SWARE-Spam-Status: No, hits=-1.8 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE X-Spam-Check-By: sourceware.org Received: from mail-vw0-f47.google.com (HELO mail-vw0-f47.google.com) (209.85.212.47) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 20 Aug 2010 18:44:10 +0000 Received: by vws13 with SMTP id 13so3438891vws.20 for ; Fri, 20 Aug 2010 11:44:08 -0700 (PDT) MIME-Version: 1.0 Received: by 10.220.125.88 with SMTP id x24mr1084108vcr.205.1282329848541; Fri, 20 Aug 2010 11:44:08 -0700 (PDT) Received: by 10.220.164.142 with HTTP; Fri, 20 Aug 2010 11:44:08 -0700 (PDT) In-Reply-To: <20100820183359.GH702@tyan-ft48-01.lab.bos.redhat.com> References: <20100819163330.GX702@tyan-ft48-01.lab.bos.redhat.com> <4C6E4F08.6070801@gnu.org> <20100820135046.GC702@tyan-ft48-01.lab.bos.redhat.com> <20100820172757.GF702@tyan-ft48-01.lab.bos.redhat.com> <4C6EC2C1.5030603@gnu.org> <20100820183359.GH702@tyan-ft48-01.lab.bos.redhat.com> Date: Fri, 20 Aug 2010 11:44:08 -0700 Message-ID: Subject: Re: [PATCH] Optimize nested SIGN_EXTENDs/ZERO_EXTENDs (PR target/45336) From: "H.J. Lu" To: Jakub Jelinek , Uros Bizjak Cc: Paolo Bonzini , Bernd Schmidt , gcc-patches@gcc.gnu.org X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On Fri, Aug 20, 2010 at 11:33 AM, Jakub Jelinek wrote: > On Fri, Aug 20, 2010 at 08:00:33PM +0200, Paolo Bonzini wrote: >> On 08/20/2010 07:27 PM, Jakub Jelinek wrote: >> >Not sure what exactly is >> >pextrb ..., %ecx >> >insn doing to the upper 32 bits of %rcx, if it clears them >> >> Probably yes like every other 32-bit writeback on x86_64. > > The manuals confirm that. > Following seems to work just fine in the quick testing I've done so far: > > 2010-08-20  Jakub Jelinek   > >        * config/i386/sse.md (*sse4_1_pextrb): Add SWI48 mode iterator >        to cover zero extension into 64-bit register. >        (*sse2_pextrw): Likewise. >        (*sse4_1_pextrd_zext): New insn. > Here is the rest of the patch. I talked to icc people. They say the return value should be zero-extended to reflex what hardware does. diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 9467fe0..596d28f 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -1309,7 +1309,7 @@ _mm_cmpgt_epi32 (__m128i __A, __m128i __B) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi16 (__m128i const __A, int const __N) { - return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); + return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1319,7 +1319,7 @@ _mm_insert_epi16 (__m128i const __A, int const __D, int const __N) } #else #define _mm_extract_epi16(A, N) \ - ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) + ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) #define _mm_insert_epi16(A, D, N) \ ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ (int)(D), (int)(N))) diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index 170fae5..357b527 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -439,7 +439,7 @@ _mm_insert_epi64 (__m128i __D, long long __S, const int __N) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi8 (__m128i __X, const int __N) { - return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N); + return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -457,7 +457,7 @@ _mm_extract_epi64 (__m128i __X, const int __N) #endif #else #define _mm_extract_epi8(X, N) \ - ((int) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))) + ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))) #define _mm_extract_epi32(X, N) \ ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N))) --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-1.c 2010-08-20 11:03:02.636918319 -0700 @@ -0,0 +1,16 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler-not "movsbl" } } */ +/* { dg-final { scan-assembler-not "movswl" } } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ +/* { dg-final { scan-assembler-not "movzwl" } } */ +/* { dg-final { scan-assembler-not "cwtl" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +unsigned int foo8(__m128i x) { return _mm_extract_epi8(x, 4); } +unsigned int foo16(__m128i x) { return _mm_extract_epi16(x, 3); } +unsigned int foo32(__m128i x) { return _mm_extract_epi32(x, 2); } --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-2.c 2010-08-20 11:04:55.588671125 -0700 @@ -0,0 +1,20 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler-not "movsbl" } } */ +/* { dg-final { scan-assembler-not "movswl" } } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ +/* { dg-final { scan-assembler-not "movzwl" } } */ +/* { dg-final { scan-assembler-not "cwtl" } } */ +/* { dg-final { scan-assembler-not "cltq" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +unsigned long int foo8(__m128i x) { return _mm_extract_epi8(x, 4); } +unsigned long int foo16(__m128i x) { return _mm_extract_epi16(x, 3); } +unsigned long int foo32(__m128i x) +{ + return (unsigned int) _mm_extract_epi32(x, 2); +} --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-3.c 2010-08-20 11:12:03.249670891 -0700 @@ -0,0 +1,13 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler "movsbl" } } */ +/* { dg-final { scan-assembler "(movswl|cwtl)" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +int foo8(__m128i x) { return (char) _mm_extract_epi8(x, 4); } +int foo16(__m128i x) { return (short) _mm_extract_epi16(x, 3); } +int foo32(__m128i x) { return _mm_extract_epi32(x, 2); } --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-4.c 2010-08-20 11:24:11.918808644 -0700 @@ -0,0 +1,15 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler "movsbq" } } */ +/* { dg-final { scan-assembler "movswq" } } */ +/* { dg-final { scan-assembler "(cltq|movslq)" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +long int foo8(__m128i x) { return (char) _mm_extract_epi8(x, 4); } +long int foo16(__m128i x) { return (short) _mm_extract_epi16(x, 3); } +long int foo32(__m128i x) { return (int) _mm_extract_epi32(x, 2); }