Message ID | CAFULd4Z+k9f1YMLZqNpWvb7XTMkk9XSKAn3FfwKAEK4pppbHBw@mail.gmail.com |
---|---|
State | New |
Headers | show |
On Mon, Mar 3, 2014 at 11:27 PM, Uros Bizjak <ubizjak@gmail.com> wrote: >> The new gcc.target/i386/prefetchwt1-1.c test currently FAILs on Solaris 9/x86: >> >> FAIL: gcc.target/i386/prefetchwt1-1.c (test for excess errors) >> Excess errors: >> /var/gcc/regression/trunk/9-gcc-gas/build/gcc/include/xmmintrin.h:1195:1: error: >> inlining failed in call to always_inline '_mm_prefetch': target specific option >> mismatch >> /vol/gcc/src/hg/trunk/local/gcc/testsuite/gcc.target/i386/prefetchwt1-1.c:12:5: >> error: called from here >> >> gcc.target/i386/prefetchwt1-1.c: output file does not exist >> UNRESOLVED: gcc.target/i386/prefetchwt1-1.c scan-assembler [ \\t]+prefetchwt1[ \ >> \t]+ >> >> This can be fixed by compiling with -msse2. > > Actually, we should take prefetch instructions out of various GCC > target pragmas. Patterns that emit these instructions are designed to > (depending on selected ISA) always emit the most optimal prefetch > instruction. > > The patch also changes the compiler to emit prefetchwt1 only for > _MM_HINT_T1, while for _MM_HINT_T0, it still emits prefetchw. In > addition, the patch corrects wrong MM_HINT_T0 value. > > Patch was bootstrapped and tested on x86_64-pc-linux-gnu {,-m32} and > committed to mainline SVN. > > 2014-03-03 Uros Bizjak <ubizjak@gmail.com> > > * config/i386/xmmintrin.h (enum _mm_hint) <_MM_HINT_ET0>: Correct > hint value. > (_mm_prefetch): Move out of GCC target("sse") pragma. > * config/i386/prfchwintrin.h (_m_prefetchw): Move out of > GCC target("prfchw") pragma. > * config/i386/i386.md (prefetch): Emit prefetchwt1 only > for locality <= 2. > * config/i386/i386.c (ix86_option_override_internal): Enable > -mprfchw with -mprefetchwt1. BTW: There are a couple of new testsuite failures: FAIL: gcc.target/i386/avx512pf-vscatterpf0dpd-1.c (test for excess errors) UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0dpd-1.c scan-assembler-times vscatterpf0dpd[ \\\\t]+[^\\n]*%ymm[0-9] 2 UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0dpd-1.c scan-assembler-times vscatterpf0dpd[ \\\\t]+[^\\n]*{%k[1-7] 1 FAIL: gcc.target/i386/avx512pf-vscatterpf0dps-1.c (test for excess errors) UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0dps-1.c scan-assembler-times vscatterpf0dps[ \\\\t]+[^\\n]*%zmm[0-9] 2 UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0dps-1.c scan-assembler-times vscatterpf0dps[ \\\\t]+[^\\n]*{%k[1-7] 1 FAIL: gcc.target/i386/avx512pf-vscatterpf0qpd-1.c (test for excess errors) UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0qpd-1.c scan-assembler-times vscatterpf0qpd[ \\\\t]+[^\\n]*%zmm[0-9] 2 UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0qpd-1.c scan-assembler-times vscatterpf0qpd[ \\\\t]+[^\\n]*{%k[1-7] 1 FAIL: gcc.target/i386/avx512pf-vscatterpf0qps-1.c (test for excess errors) UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0qps-1.c scan-assembler-times vscatterpf0qps[ \\\\t]+[^\\n]*%zmm[0-9] 2 UNRESOLVED: gcc.target/i386/avx512pf-vscatterpf0qps-1.c scan-assembler-times vscatterpf0qps[ \\\\t]+[^\\n]*{%k[1-7] 1 They are all: FAIL: gcc.target/i386/avx512pf-vscatterpf0dpd-1.c (test for excess errors) Excess errors: /ssd/uros/gcc-build/gcc/include/avx512pfintrin.h:108:3: error: the last argument must be hint 0 or 1 They are due to _MM_HINT_ET0 fix, and probably show that the pattern was not updated when hint constants were adjusted to 2 and 3. Kirill, can you please look at this inconsistency? Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 208281) +++ config/i386/i386.c (working copy) @@ -3874,8 +3874,9 @@ ix86_option_override_internal (bool main_args_p, || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags))) x86_prefetch_sse = true; - /* Enable prefetch{,w} instructions for -m3dnow. */ - if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)) + /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */ + if (TARGET_3DNOW_P (opts->x_ix86_isa_flags) + || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit; Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 208281) +++ config/i386/i386.md (working copy) @@ -17867,7 +17867,7 @@ supported by SSE counterpart or the SSE prefetch is not available (K6 machines). Otherwise use SSE prefetch as it allows specifying of locality. */ - if (TARGET_PREFETCHWT1 && write) + if (TARGET_PREFETCHWT1 && write && locality <= 2) operands[2] = const2_rtx; else if (TARGET_PRFCHW && (write || !TARGET_PREFETCH_SSE)) operands[2] = GEN_INT (3); Index: config/i386/prfchwintrin.h =================================================================== --- config/i386/prfchwintrin.h (revision 208281) +++ config/i386/prfchwintrin.h (working copy) @@ -25,16 +25,9 @@ # error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead." #endif - #ifndef _PRFCHWINTRIN_H_INCLUDED #define _PRFCHWINTRIN_H_INCLUDED -#ifndef __PRFCHW__ -#pragma GCC push_options -#pragma GCC target("prfchw") -#define __DISABLE_PRFCHW__ -#endif /* __PRFCHW__ */ - extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_prefetchw (void *__P) { @@ -41,9 +34,4 @@ _m_prefetchw (void *__P) __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */); } -#ifdef __DISABLE_PRFCHW__ -#undef __DISABLE_PRFCHW__ -#pragma GCC pop_options -#endif /* __DISABLE_PRFCHW__ */ - #endif /* _PRFCHWINTRIN_H_INCLUDED */ Index: config/i386/xmmintrin.h =================================================================== --- config/i386/xmmintrin.h (revision 208281) +++ config/i386/xmmintrin.h (working copy) @@ -33,6 +33,31 @@ /* Get _mm_malloc () and _mm_free (). */ #include <mm_malloc.h> +/* Constants for use with _mm_prefetch. */ +enum _mm_hint +{ + /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */ + _MM_HINT_ET0 = 7, + _MM_HINT_ET1 = 6, + _MM_HINT_T0 = 3, + _MM_HINT_T1 = 2, + _MM_HINT_T2 = 1, + _MM_HINT_NTA = 0 +}; + +/* Loads one cache line from address P to a location "closer" to the + processor. The selector I specifies the type of prefetch operation. */ +#ifdef __OPTIMIZE__ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_prefetch (const void *__P, enum _mm_hint __I) +{ + __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3); +} +#else +#define _mm_prefetch(P, I) \ + __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3)) +#endif + #ifndef __SSE__ #pragma GCC push_options #pragma GCC target("sse") @@ -50,18 +75,6 @@ typedef float __v4sf __attribute__ ((__vector_size #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) -/* Constants for use with _mm_prefetch. */ -enum _mm_hint -{ - /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */ - _MM_HINT_ET0 = 5, - _MM_HINT_ET1 = 6, - _MM_HINT_T0 = 3, - _MM_HINT_T1 = 2, - _MM_HINT_T2 = 1, - _MM_HINT_NTA = 0 -}; - /* Bits in the MXCSR. */ #define _MM_EXCEPT_MASK 0x003f #define _MM_EXCEPT_INVALID 0x0001 @@ -1188,19 +1201,6 @@ _m_psadbw (__m64 __A, __m64 __B) return _mm_sad_pu8 (__A, __B); } -/* Loads one cache line from address P to a location "closer" to the - processor. The selector I specifies the type of prefetch operation. */ -#ifdef __OPTIMIZE__ -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_prefetch (const void *__P, enum _mm_hint __I) -{ - __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3); -} -#else -#define _mm_prefetch(P, I) \ - __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3)) -#endif - /* Stores the data in A to the address P without polluting the caches. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_pi (__m64 *__P, __m64 __A)