From patchwork Fri Aug 20 20:59:23 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: Optimize nested SIGN_EXTENDs/ZERO_EXTENDs (PR target/45336) Date: Fri, 20 Aug 2010 10:59:23 -0000 From: H.J. Lu X-Patchwork-Id: 62318 Message-Id: To: Richard Henderson Cc: Jakub Jelinek , Uros Bizjak , Paolo Bonzini , Bernd Schmidt , gcc-patches@gcc.gnu.org On Fri, Aug 20, 2010 at 11:46 AM, Richard Henderson wrote: > On 08/20/2010 11:44 AM, H.J. Lu wrote: >> 2010-08-20  H.J. Lu   >> >>       PR target/45336 >>       * config/i386/emmintrin.h (_mm_extract_epi16): Cast to unsigned >>       short first. >> >>       * config/i386/smmintrin.h (_mm_extract_epi8): Cast to unsigned >>       char first. >> >> gcc/testsuite/ >> >> 2010-08-20  H.J. Lu   >> >>       PR target/45336 >>       * gcc.target/i386/pr45336-1.c: New. >>       * gcc.target/i386/pr45336-2.c: Likewise. >>       * gcc.target/i386/pr45336-3.c: Likewise. >>       * gcc.target/i386/pr45336-4.c: Likewise. > > Ok. > > > r~ > This is the patch I checked in. I changed long to long long in pr45336-2.c and pr45336-4.c. I also limited pr45336-2.c to LP64. diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 9467fe0..596d28f 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -1309,7 +1309,7 @@ _mm_cmpgt_epi32 (__m128i __A, __m128i __B) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi16 (__m128i const __A, int const __N) { - return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); + return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1319,7 +1319,7 @@ _mm_insert_epi16 (__m128i const __A, int const __D, int const __N) } #else #define _mm_extract_epi16(A, N) \ - ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) + ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) #define _mm_insert_epi16(A, D, N) \ ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ (int)(D), (int)(N))) diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index 170fae5..357b527 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -439,7 +439,7 @@ _mm_insert_epi64 (__m128i __D, long long __S, const int __N) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi8 (__m128i __X, const int __N) { - return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N); + return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -457,7 +457,7 @@ _mm_extract_epi64 (__m128i __X, const int __N) #endif #else #define _mm_extract_epi8(X, N) \ - ((int) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))) + ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))) #define _mm_extract_epi32(X, N) \ ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N))) --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-1.c 2010-08-20 11:03:02.636918319 -0700 @@ -0,0 +1,16 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler-not "movsbl" } } */ +/* { dg-final { scan-assembler-not "movswl" } } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ +/* { dg-final { scan-assembler-not "movzwl" } } */ +/* { dg-final { scan-assembler-not "cwtl" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +unsigned int foo8(__m128i x) { return _mm_extract_epi8(x, 4); } +unsigned int foo16(__m128i x) { return _mm_extract_epi16(x, 3); } +unsigned int foo32(__m128i x) { return _mm_extract_epi32(x, 2); } --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-2.c 2010-08-20 12:47:08.973718228 -0700 @@ -0,0 +1,21 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler-not "movsbl" } } */ +/* { dg-final { scan-assembler-not "movswl" } } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ +/* { dg-final { scan-assembler-not "movzwl" } } */ +/* { dg-final { scan-assembler-not "cwtl" } } */ +/* { dg-final { scan-assembler-not "cltq" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +unsigned long long int foo8(__m128i x) { return _mm_extract_epi8(x, 4); } +unsigned long long int foo16(__m128i x) { return _mm_extract_epi16(x, 3); } +unsigned long long int foo32(__m128i x) +{ + return (unsigned int) _mm_extract_epi32(x, 2); +} --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-3.c 2010-08-20 11:12:03.249670891 -0700 @@ -0,0 +1,13 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler "movsbl" } } */ +/* { dg-final { scan-assembler "(movswl|cwtl)" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +int foo8(__m128i x) { return (char) _mm_extract_epi8(x, 4); } +int foo16(__m128i x) { return (short) _mm_extract_epi16(x, 3); } +int foo32(__m128i x) { return _mm_extract_epi32(x, 2); } --- /dev/null 2010-08-11 15:57:03.635230126 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/pr45336-4.c 2010-08-20 12:46:52.730790353 -0700 @@ -0,0 +1,15 @@ +/* PR target/45336 */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -msse4 -mtune=generic" } */ +/* { dg-final { scan-assembler "movsbq" } } */ +/* { dg-final { scan-assembler "movswq" } } */ +/* { dg-final { scan-assembler "(cltq|movslq)" } } */ +/* { dg-final { scan-assembler "pextrb" } } */ +/* { dg-final { scan-assembler "pextrw" } } */ +/* { dg-final { scan-assembler "pextrd" } } */ + +#include +long long int foo8(__m128i x) { return (char) _mm_extract_epi8(x, 4); } +long long int foo16(__m128i x) { return (short) _mm_extract_epi16(x, 3); } +long long int foo32(__m128i x) { return (int) _mm_extract_epi32(x, 2); }