diff mbox

[i386] Fix emitting of prefetch instructions

Message ID CAFULd4Z+k9f1YMLZqNpWvb7XTMkk9XSKAn3FfwKAEK4pppbHBw@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak March 3, 2014, 10:27 p.m. UTC
On Mon, Mar 3, 2014 at 1:10 PM, Rainer Orth <ro@cebitec.uni-bielefeld.de> wrote:

> The new gcc.target/i386/prefetchwt1-1.c test currently FAILs on Solaris 9/x86:
>
> FAIL: gcc.target/i386/prefetchwt1-1.c (test for excess errors)
> Excess errors:
> /var/gcc/regression/trunk/9-gcc-gas/build/gcc/include/xmmintrin.h:1195:1: error:
>  inlining failed in call to always_inline '_mm_prefetch': target specific option
>  mismatch
> /vol/gcc/src/hg/trunk/local/gcc/testsuite/gcc.target/i386/prefetchwt1-1.c:12:5:
> error: called from here
>
> gcc.target/i386/prefetchwt1-1.c: output file does not exist
> UNRESOLVED: gcc.target/i386/prefetchwt1-1.c scan-assembler [ \\t]+prefetchwt1[ \
> \t]+
>
> This can be fixed by compiling with -msse2.

Actually, we should take prefetch instructions out of various GCC
target pragmas. Patterns that emit these instructions are designed to
(depending on selected ISA) always emit  the most optimal prefetch
instruction.

The patch also changes the compiler to emit prefetchwt1 only for
_MM_HINT_T1, while for _MM_HINT_T0, it still emits prefetchw. In
addition, the patch corrects wrong MM_HINT_T0 value.

Patch was bootstrapped and tested on x86_64-pc-linux-gnu {,-m32}  and
committed to mainline SVN.

2014-03-03  Uros Bizjak  <ubizjak@gmail.com>

    * config/i386/xmmintrin.h (enum _mm_hint) <_MM_HINT_ET0>: Correct
    hint value.
    (_mm_prefetch): Move out of GCC target("sse") pragma.
    * config/i386/prfchwintrin.h (_m_prefetchw): Move out of
    GCC target("prfchw") pragma.
    * config/i386/i386.md (prefetch): Emit prefetchwt1 only
    for locality <= 2.
    * config/i386/i386.c (ix86_option_override_internal): Enable
    -mprfchw with -mprefetchwt1.

Uros.

Comments

Uros Bizjak March 3, 2014, 11:31 p.m. UTC | #1
On Mon, Mar 3, 2014 at 11:27 PM, Uros Bizjak <ubizjak@gmail.com> wrote:

>> The new gcc.target/i386/prefetchwt1-1.c test currently FAILs on Solaris 9/x86:
>>
>> FAIL: gcc.target/i386/prefetchwt1-1.c (test for excess errors)
>> Excess errors:
>> /var/gcc/regression/trunk/9-gcc-gas/build/gcc/include/xmmintrin.h:1195:1: error:
>>  inlining failed in call to always_inline '_mm_prefetch': target specific option
>>  mismatch
>> /vol/gcc/src/hg/trunk/local/gcc/testsuite/gcc.target/i386/prefetchwt1-1.c:12:5:
>> error: called from here
>>
>> gcc.target/i386/prefetchwt1-1.c: output file does not exist
>> UNRESOLVED: gcc.target/i386/prefetchwt1-1.c scan-assembler [ \\t]+prefetchwt1[ \
>> \t]+
>>
>> This can be fixed by compiling with -msse2.
>
> Actually, we should take prefetch instructions out of various GCC
> target pragmas. Patterns that emit these instructions are designed to
> (depending on selected ISA) always emit  the most optimal prefetch
> instruction.
>
> The patch also changes the compiler to emit prefetchwt1 only for
> _MM_HINT_T1, while for _MM_HINT_T0, it still emits prefetchw. In
> addition, the patch corrects wrong MM_HINT_T0 value.
>
> Patch was bootstrapped and tested on x86_64-pc-linux-gnu {,-m32}  and
> committed to mainline SVN.
>
> 2014-03-03  Uros Bizjak  <ubizjak@gmail.com>
>
>     * config/i386/xmmintrin.h (enum _mm_hint) <_MM_HINT_ET0>: Correct
>     hint value.
>     (_mm_prefetch): Move out of GCC target("sse") pragma.
>     * config/i386/prfchwintrin.h (_m_prefetchw): Move out of
>     GCC target("prfchw") pragma.
>     * config/i386/i386.md (prefetch): Emit prefetchwt1 only
>     for locality <= 2.
>     * config/i386/i386.c (ix86_option_override_internal): Enable
>     -mprfchw with -mprefetchwt1.

BTW: There are a couple of new testsuite failures:

FAIL: gcc.target/i386/avx512pf-vscatterpf0dpd-1.c (test for excess errors)
UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0dpd-1.c
scan-assembler-times vscatterpf0dpd[ \\\\t]+[^\\n]*%ymm[0-9] 2
UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0dpd-1.c
scan-assembler-times vscatterpf0dpd[ \\\\t]+[^\\n]*{%k[1-7] 1
FAIL: gcc.target/i386/avx512pf-vscatterpf0dps-1.c (test for excess errors)
UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0dps-1.c
scan-assembler-times vscatterpf0dps[ \\\\t]+[^\\n]*%zmm[0-9] 2
UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0dps-1.c
scan-assembler-times vscatterpf0dps[ \\\\t]+[^\\n]*{%k[1-7] 1
FAIL: gcc.target/i386/avx512pf-vscatterpf0qpd-1.c (test for excess errors)
UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0qpd-1.c
scan-assembler-times vscatterpf0qpd[ \\\\t]+[^\\n]*%zmm[0-9] 2
UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0qpd-1.c
scan-assembler-times vscatterpf0qpd[ \\\\t]+[^\\n]*{%k[1-7] 1
FAIL: gcc.target/i386/avx512pf-vscatterpf0qps-1.c (test for excess errors)
UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0qps-1.c
scan-assembler-times vscatterpf0qps[ \\\\t]+[^\\n]*%zmm[0-9] 2
UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0qps-1.c
scan-assembler-times vscatterpf0qps[ \\\\t]+[^\\n]*{%k[1-7] 1

They are all:

FAIL: gcc.target/i386/avx512pf-vscatterpf0dpd-1.c (test for excess errors)
Excess errors:
/ssd/uros/gcc-build/gcc/include/avx512pfintrin.h:108:3: error: the
last argument must be hint 0 or 1

They are due to _MM_HINT_ET0 fix, and probably show that the pattern
was not updated when hint constants were adjusted to 2 and 3.

Kirill, can you please look at this inconsistency?

Uros.
diff mbox

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 208281)
+++ config/i386/i386.c	(working copy)
@@ -3874,8 +3874,9 @@  ix86_option_override_internal (bool main_args_p,
       || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
     x86_prefetch_sse = true;
 
-  /* Enable prefetch{,w} instructions for -m3dnow.  */
-  if (TARGET_3DNOW_P (opts->x_ix86_isa_flags))
+  /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1.  */
+  if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
+      || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
     opts->x_ix86_isa_flags
       |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
 
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 208281)
+++ config/i386/i386.md	(working copy)
@@ -17867,7 +17867,7 @@ 
      supported by SSE counterpart or the SSE prefetch is not available
      (K6 machines).  Otherwise use SSE prefetch as it allows specifying
      of locality.  */
-  if (TARGET_PREFETCHWT1 && write)
+  if (TARGET_PREFETCHWT1 && write && locality <= 2)
     operands[2] = const2_rtx;
   else if (TARGET_PRFCHW && (write || !TARGET_PREFETCH_SSE))
     operands[2] = GEN_INT (3);
Index: config/i386/prfchwintrin.h
===================================================================
--- config/i386/prfchwintrin.h	(revision 208281)
+++ config/i386/prfchwintrin.h	(working copy)
@@ -25,16 +25,9 @@ 
 # error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
 #endif
 
-
 #ifndef _PRFCHWINTRIN_H_INCLUDED
 #define _PRFCHWINTRIN_H_INCLUDED
 
-#ifndef __PRFCHW__
-#pragma GCC push_options
-#pragma GCC target("prfchw")
-#define __DISABLE_PRFCHW__
-#endif /* __PRFCHW__ */
-
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _m_prefetchw (void *__P)
 {
@@ -41,9 +34,4 @@  _m_prefetchw (void *__P)
   __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
 }
 
-#ifdef __DISABLE_PRFCHW__
-#undef __DISABLE_PRFCHW__
-#pragma GCC pop_options
-#endif /* __DISABLE_PRFCHW__ */
-
 #endif /* _PRFCHWINTRIN_H_INCLUDED */
Index: config/i386/xmmintrin.h
===================================================================
--- config/i386/xmmintrin.h	(revision 208281)
+++ config/i386/xmmintrin.h	(working copy)
@@ -33,6 +33,31 @@ 
 /* Get _mm_malloc () and _mm_free ().  */
 #include <mm_malloc.h>
 
+/* Constants for use with _mm_prefetch.  */
+enum _mm_hint
+{
+  /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit.  */
+  _MM_HINT_ET0 = 7,
+  _MM_HINT_ET1 = 6,
+  _MM_HINT_T0 = 3,
+  _MM_HINT_T1 = 2,
+  _MM_HINT_T2 = 1,
+  _MM_HINT_NTA = 0
+};
+
+/* Loads one cache line from address P to a location "closer" to the
+   processor.  The selector I specifies the type of prefetch operation.  */
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch (const void *__P, enum _mm_hint __I)
+{
+  __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
+}
+#else
+#define _mm_prefetch(P, I) \
+  __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
+#endif
+
 #ifndef __SSE__
 #pragma GCC push_options
 #pragma GCC target("sse")
@@ -50,18 +75,6 @@  typedef float __v4sf __attribute__ ((__vector_size
 #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
  (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
 
-/* Constants for use with _mm_prefetch.  */
-enum _mm_hint
-{
-  /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit.  */
-  _MM_HINT_ET0 = 5,
-  _MM_HINT_ET1 = 6,
-  _MM_HINT_T0 = 3,
-  _MM_HINT_T1 = 2,
-  _MM_HINT_T2 = 1,
-  _MM_HINT_NTA = 0
-};
-
 /* Bits in the MXCSR.  */
 #define _MM_EXCEPT_MASK       0x003f
 #define _MM_EXCEPT_INVALID    0x0001
@@ -1188,19 +1201,6 @@  _m_psadbw (__m64 __A, __m64 __B)
   return _mm_sad_pu8 (__A, __B);
 }
 
-/* Loads one cache line from address P to a location "closer" to the
-   processor.  The selector I specifies the type of prefetch operation.  */
-#ifdef __OPTIMIZE__
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_prefetch (const void *__P, enum _mm_hint __I)
-{
-  __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
-}
-#else
-#define _mm_prefetch(P, I) \
-  __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
-#endif
-
 /* Stores the data in A to the address P without polluting the caches.  */
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_stream_pi (__m64 *__P, __m64 __A)