Patchwork Optimize nested SIGN_EXTENDs/ZERO_EXTENDs (PR target/45336)

login
register
mail settings
Submitter H.J. Lu
Date Aug. 20, 2010, 6:44 p.m.
Message ID <AANLkTinivFF++jauZCLE1m1BpTer0URMxE8UGL-h_moe@mail.gmail.com>
Download mbox | patch
Permalink /patch/62303/
State New
Headers show

Comments

H.J. Lu - Aug. 20, 2010, 6:44 p.m.
On Fri, Aug 20, 2010 at 11:33 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Aug 20, 2010 at 08:00:33PM +0200, Paolo Bonzini wrote:
>> On 08/20/2010 07:27 PM, Jakub Jelinek wrote:
>> >Not sure what exactly is
>> >pextrb ..., %ecx
>> >insn doing to the upper 32 bits of %rcx, if it clears them
>>
>> Probably yes like every other 32-bit writeback on x86_64.
>
> The manuals confirm that.
> Following seems to work just fine in the quick testing I've done so far:
>
> 2010-08-20  Jakub Jelinek  <jakub@redhat.com>
>
>        * config/i386/sse.md (*sse4_1_pextrb): Add SWI48 mode iterator
>        to cover zero extension into 64-bit register.
>        (*sse2_pextrw): Likewise.
>        (*sse4_1_pextrd_zext): New insn.
>


Here is the rest of the patch.  I talked to icc people. They say the return
value should be zero-extended to reflex what hardware does.
Richard Henderson - Aug. 20, 2010, 6:46 p.m.
On 08/20/2010 11:44 AM, H.J. Lu wrote:
> 2010-08-20  H.J. Lu  <hongjiu.lu@intel.com>
> 
> 	PR target/45336
> 	* config/i386/emmintrin.h (_mm_extract_epi16): Cast to unsigned
> 	short first.
> 
> 	* config/i386/smmintrin.h (_mm_extract_epi8): Cast to unsigned
> 	char first.
> 
> gcc/testsuite/
> 
> 2010-08-20  H.J. Lu  <hongjiu.lu@intel.com>
> 
> 	PR target/45336
> 	* gcc.target/i386/pr45336-1.c: New.
> 	* gcc.target/i386/pr45336-2.c: Likewise.
> 	* gcc.target/i386/pr45336-3.c: Likewise.
> 	* gcc.target/i386/pr45336-4.c: Likewise.

Ok.


r~

Patch

diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index 9467fe0..596d28f 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -1309,7 +1309,7 @@  _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_extract_epi16 (__m128i const __A, int const __N)
 {
-  return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
+  return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1319,7 +1319,7 @@  _mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
 }
 #else
 #define _mm_extract_epi16(A, N) \
-  ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
+  ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
 #define _mm_insert_epi16(A, D, N)				\
   ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A),	\
 					  (int)(D), (int)(N)))
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
index 170fae5..357b527 100644
--- a/gcc/config/i386/smmintrin.h
+++ b/gcc/config/i386/smmintrin.h
@@ -439,7 +439,7 @@  _mm_insert_epi64 (__m128i __D, long long __S, const int __N)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_extract_epi8 (__m128i __X, const int __N)
 {
-   return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
+   return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -457,7 +457,7 @@  _mm_extract_epi64 (__m128i __X, const int __N)
 #endif
 #else
 #define _mm_extract_epi8(X, N) \
-  ((int) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
+  ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
 #define _mm_extract_epi32(X, N) \
   ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)))
 
--- /dev/null	2010-08-11 15:57:03.635230126 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/pr45336-1.c	2010-08-20 11:03:02.636918319 -0700
@@ -0,0 +1,16 @@ 
+/* PR target/45336 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4 -mtune=generic" } */
+/* { dg-final { scan-assembler-not "movsbl" } } */
+/* { dg-final { scan-assembler-not "movswl" } } */
+/* { dg-final { scan-assembler-not "movzbl" } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+/* { dg-final { scan-assembler-not "cwtl" } } */
+/* { dg-final { scan-assembler "pextrb" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+/* { dg-final { scan-assembler "pextrd" } } */
+
+#include <smmintrin.h>
+unsigned int foo8(__m128i x) { return _mm_extract_epi8(x, 4); }
+unsigned int foo16(__m128i x) { return _mm_extract_epi16(x, 3); }
+unsigned int foo32(__m128i x) { return _mm_extract_epi32(x, 2); }
--- /dev/null	2010-08-11 15:57:03.635230126 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/pr45336-2.c	2010-08-20 11:04:55.588671125 -0700
@@ -0,0 +1,20 @@ 
+/* PR target/45336 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4 -mtune=generic" } */
+/* { dg-final { scan-assembler-not "movsbl" } } */
+/* { dg-final { scan-assembler-not "movswl" } } */
+/* { dg-final { scan-assembler-not "movzbl" } } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+/* { dg-final { scan-assembler-not "cwtl" } } */
+/* { dg-final { scan-assembler-not "cltq" } } */
+/* { dg-final { scan-assembler "pextrb" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+/* { dg-final { scan-assembler "pextrd" } } */
+
+#include <smmintrin.h>
+unsigned long int foo8(__m128i x) { return _mm_extract_epi8(x, 4); }
+unsigned long int foo16(__m128i x) { return _mm_extract_epi16(x, 3); }
+unsigned long int foo32(__m128i x)
+{ 
+  return (unsigned int) _mm_extract_epi32(x, 2);
+}
--- /dev/null	2010-08-11 15:57:03.635230126 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/pr45336-3.c	2010-08-20 11:12:03.249670891 -0700
@@ -0,0 +1,13 @@ 
+/* PR target/45336 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4 -mtune=generic" } */
+/* { dg-final { scan-assembler "movsbl" } } */
+/* { dg-final { scan-assembler "(movswl|cwtl)" } } */
+/* { dg-final { scan-assembler "pextrb" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+/* { dg-final { scan-assembler "pextrd" } } */
+
+#include <smmintrin.h>
+int foo8(__m128i x) { return (char) _mm_extract_epi8(x, 4); }
+int foo16(__m128i x) { return (short) _mm_extract_epi16(x, 3); }
+int foo32(__m128i x) { return _mm_extract_epi32(x, 2); }
--- /dev/null	2010-08-11 15:57:03.635230126 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/pr45336-4.c	2010-08-20 11:24:11.918808644 -0700
@@ -0,0 +1,15 @@ 
+/* PR target/45336 */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -msse4 -mtune=generic" } */
+/* { dg-final { scan-assembler "movsbq" } } */
+/* { dg-final { scan-assembler "movswq" } } */
+/* { dg-final { scan-assembler "(cltq|movslq)" } } */
+/* { dg-final { scan-assembler "pextrb" } } */
+/* { dg-final { scan-assembler "pextrw" } } */
+/* { dg-final { scan-assembler "pextrd" } } */
+
+#include <smmintrin.h>
+long int foo8(__m128i x) { return (char) _mm_extract_epi8(x, 4); }
+long int foo16(__m128i x) { return (short) _mm_extract_epi16(x, 3); }
+long int foo32(__m128i x) { return (int) _mm_extract_epi32(x, 2); }