From patchwork Fri Aug 20 18:44:08 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: Optimize nested SIGN_EXTENDs/ZERO_EXTENDs (PR target/45336) Date: Fri, 20 Aug 2010 08:44:08 -0000 From: "H.J. Lu" X-Patchwork-Id: 62303 Message-Id: To: Jakub Jelinek , Uros Bizjak Cc: Paolo Bonzini , Bernd Schmidt , gcc-patches@gcc.gnu.org On Fri, Aug 20, 2010 at 11:33 AM, Jakub Jelinek wrote: > On Fri, Aug 20, 2010 at 08:00:33PM +0200, Paolo Bonzini wrote: >> On 08/20/2010 07:27 PM, Jakub Jelinek wrote: >> >Not sure what exactly is >> >pextrb ..., %ecx >> >insn doing to the upper 32 bits of %rcx, if it clears them >> >> Probably yes like every other 32-bit writeback on x86_64. > > The manuals confirm that. > Following seems to work just fine in the quick testing I've done so far: > > 2010-08-20  Jakub Jelinek   > >        * config/i386/sse.md (*sse4_1_pextrb): Add SWI48 mode iterator >        to cover zero extension into 64-bit register. >        (*sse2_pextrw): Likewise. >        (*sse4_1_pextrd_zext): New insn. > Here is the rest of the patch. I talked to icc people. They say the return value should be zero-extended to reflex what hardware does. diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 9467fe0..596d28f 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -1309,7 +1309,7 @@ _mm_cmpgt_epi32 (__m128i __A, __m128i __B) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi16 (__m128i const __A, int const __N) { - return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); + return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1319,7 +1319,7 @@ _mm_insert_epi16 (__m128i const __A, int const __D, int const __N) } #else #define _mm_extract_epi16(A, N) \ - ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) + ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) #define _mm_insert_epi16(A, D, N) \ ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ (int)(D), (int)(N))) diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index 170fae5..357b527 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -439,7 +439,7 @@ _mm_insert_epi64 (__m128i __D, long long __S, const int __N) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi8 (__m128i __X, const int __N) { - return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N); + return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -457,7 +457,7 @@ _mm_extract_epi64 (__m128i __X, const int __N) #endif #else #define _mm_extract_epi8(X, N) \ - ((int) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))) + ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))) #define _mm_extract_epi32(X, N) \ ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N))) --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-1.c 2010-08-20 11:03:02.636918319 -0700 @@ -0,0 +1,16 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler-not "movsbl" } } */ +/* { dg-final { scan-assembler-not "movswl" } } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ +/* { dg-final { scan-assembler-not "movzwl" } } */ +/* { dg-final { scan-assembler-not "cwtl" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +unsigned int foo8(__m128i x) { return _mm_extract_epi8(x, 4); } +unsigned int foo16(__m128i x) { return _mm_extract_epi16(x, 3); } +unsigned int foo32(__m128i x) { return _mm_extract_epi32(x, 2); } --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-2.c 2010-08-20 11:04:55.588671125 -0700 @@ -0,0 +1,20 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler-not "movsbl" } } */ +/* { dg-final { scan-assembler-not "movswl" } } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ +/* { dg-final { scan-assembler-not "movzwl" } } */ +/* { dg-final { scan-assembler-not "cwtl" } } */ +/* { dg-final { scan-assembler-not "cltq" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +unsigned long int foo8(__m128i x) { return _mm_extract_epi8(x, 4); } +unsigned long int foo16(__m128i x) { return _mm_extract_epi16(x, 3); } +unsigned long int foo32(__m128i x) +{ + return (unsigned int) _mm_extract_epi32(x, 2); +} --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-3.c 2010-08-20 11:12:03.249670891 -0700 @@ -0,0 +1,13 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler "movsbl" } } */ +/* { dg-final { scan-assembler "(movswl|cwtl)" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +int foo8(__m128i x) { return (char) _mm_extract_epi8(x, 4); } +int foo16(__m128i x) { return (short) _mm_extract_epi16(x, 3); } +int foo32(__m128i x) { return _mm_extract_epi32(x, 2); } --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-4.c 2010-08-20 11:24:11.918808644 -0700 @@ -0,0 +1,15 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler "movsbq" } } */ +/* { dg-final { scan-assembler "movswq" } } */ +/* { dg-final { scan-assembler "(cltq|movslq)" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +long int foo8(__m128i x) { return (char) _mm_extract_epi8(x, 4); } +long int foo16(__m128i x) { return (short) _mm_extract_epi16(x, 3); } +long int foo32(__m128i x) { return (int) _mm_extract_epi32(x, 2); }