Message ID | 1397480031-5847-1-git-send-email-sihai.ysh@alibaba-inc.com |
---|---|
State | New |
Headers | show |
On Mon, Apr 14, 2014 at 5:53 AM, <ling.ma.program@gmail.com> wrote: > From: Sihai Yao <sihai.ysh@alibaba-inc.com> > > This patch sets bit_AVX2_Usable of __cpu_features.feature by checking > COMMON_CPUID_INDEX_7 for Haswell. Architecture related assembler file > can use this bit to determine calling path. > > --- > This version removed the unrelated cpu module branch code and FEATURE_INDEX_7, > which is unusefull for AVX > > ChangeLog | 8 ++++++++ > sysdeps/x86_64/multiarch/ifunc-defines.sym | 1 + > sysdeps/x86_64/multiarch/init-arch.c | 3 +++ > sysdeps/x86_64/multiarch/init-arch.h | 8 ++++++++ > 4 files changed, 20 insertions(+) > > diff --git a/ChangeLog b/ChangeLog > index fb0177d..ba8980c 100644 > --- a/ChangeLog > +++ b/ChangeLog > @@ -1,3 +1,11 @@ > +2014-04-04 Sihai Yao <sihai.ysh@alibaba-inc.com> Need a blank line here. > + * sysdeps/x86_64/multiarch/ifunc-defines.sym: Add COMMON_CPU_INDEX_7 and > + FEATURE_INDEX_7. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FEATURE_INDEX_7 has been removed. > + * sysdeps/x86_64/multiarch/init-arch.c: Add AVX2 detection from cpu > + features word of COMMON_CPUID_INDEX_7. > + * sysdeps/x86_64/multiarch/init-arch.h: Add bit_AVX2_Usable and > + index_AVX2_Usable for future assembly code to determing calling path. > + It looks good to me, except for ChangeLog. > 2014-04-10 Torvald Riegel <triegel@redhat.com> > > * benchtests/pthread_once-inputs: New file. > diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym > index eb1538a..a410d88 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-defines.sym > +++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym > @@ -17,4 +17,5 @@ FEATURE_OFFSET offsetof (struct cpu_features, feature) > FEATURE_SIZE sizeof (unsigned int) > > COMMON_CPUID_INDEX_1 > +COMMON_CPUID_INDEX_7 > FEATURE_INDEX_1 > diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c > index db74d97..2a6dcb7 100644 > --- a/sysdeps/x86_64/multiarch/init-arch.c > +++ b/sysdeps/x86_64/multiarch/init-arch.c > @@ -167,6 +167,9 @@ __init_cpu_features (void) > /* Determine if AVX is usable. */ > if (CPUID_AVX) > __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; > + /* Determine if AVX2 is usable. */ > + if (CPUID_AVX2) > + __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable; > /* Determine if FMA is usable. */ > if (CPUID_FMA) > __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable; > diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h > index 793707a..813b6de 100644 > --- a/sysdeps/x86_64/multiarch/init-arch.h > +++ b/sysdeps/x86_64/multiarch/init-arch.h > @@ -24,6 +24,7 @@ > #define bit_FMA_Usable (1 << 7) > #define bit_FMA4_Usable (1 << 8) > #define bit_Slow_SSE4_2 (1 << 9) > +#define bit_AVX2_Usable (1 << 10) > > /* CPUID Feature flags. */ > > @@ -40,6 +41,7 @@ > > /* COMMON_CPUID_INDEX_7. */ > #define bit_RTM (1 << 11) > +#define bit_AVX2 (1 << 5) > > /* XCR0 Feature flags. */ > #define bit_XMM_state (1 << 1) > @@ -54,6 +56,7 @@ > # define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET > # define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET > # define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET > +# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET > > # define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE > # define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE > @@ -64,6 +67,7 @@ > # define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE > # define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE > # define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE > +# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE > > #else /* __ASSEMBLER__ */ > > @@ -145,6 +149,8 @@ extern const struct cpu_features *__get_cpu_features (void) > HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) > # define CPUID_RTM \ > HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM) > +# define CPUID_AVX2 \ > + HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) > > /* HAS_* evaluates to true if we may use the feature at runtime. */ > # define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) > @@ -153,6 +159,7 @@ extern const struct cpu_features *__get_cpu_features (void) > # define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1) > # define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2) > # define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM) > +# define HAS_AVX2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) > > # define index_Fast_Rep_String FEATURE_INDEX_1 > # define index_Fast_Copy_Backward FEATURE_INDEX_1 > @@ -163,6 +170,7 @@ extern const struct cpu_features *__get_cpu_features (void) > # define index_FMA_Usable FEATURE_INDEX_1 > # define index_FMA4_Usable FEATURE_INDEX_1 > # define index_Slow_SSE4_2 FEATURE_INDEX_1 > +# define index_AVX2_Usable FEATURE_INDEX_1 > > # define HAS_ARCH_FEATURE(name) \ > ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) > -- > 1.8.1.4 >
diff --git a/ChangeLog b/ChangeLog index fb0177d..ba8980c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2014-04-04 Sihai Yao <sihai.ysh@alibaba-inc.com> + * sysdeps/x86_64/multiarch/ifunc-defines.sym: Add COMMON_CPU_INDEX_7 and + FEATURE_INDEX_7. + * sysdeps/x86_64/multiarch/init-arch.c: Add AVX2 detection from cpu + features word of COMMON_CPUID_INDEX_7. + * sysdeps/x86_64/multiarch/init-arch.h: Add bit_AVX2_Usable and + index_AVX2_Usable for future assembly code to determing calling path. + 2014-04-10 Torvald Riegel <triegel@redhat.com> * benchtests/pthread_once-inputs: New file. diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym index eb1538a..a410d88 100644 --- a/sysdeps/x86_64/multiarch/ifunc-defines.sym +++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym @@ -17,4 +17,5 @@ FEATURE_OFFSET offsetof (struct cpu_features, feature) FEATURE_SIZE sizeof (unsigned int) COMMON_CPUID_INDEX_1 +COMMON_CPUID_INDEX_7 FEATURE_INDEX_1 diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index db74d97..2a6dcb7 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -167,6 +167,9 @@ __init_cpu_features (void) /* Determine if AVX is usable. */ if (CPUID_AVX) __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; + /* Determine if AVX2 is usable. */ + if (CPUID_AVX2) + __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable; /* Determine if FMA is usable. */ if (CPUID_FMA) __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable; diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index 793707a..813b6de 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -24,6 +24,7 @@ #define bit_FMA_Usable (1 << 7) #define bit_FMA4_Usable (1 << 8) #define bit_Slow_SSE4_2 (1 << 9) +#define bit_AVX2_Usable (1 << 10) /* CPUID Feature flags. */ @@ -40,6 +41,7 @@ /* COMMON_CPUID_INDEX_7. */ #define bit_RTM (1 << 11) +#define bit_AVX2 (1 << 5) /* XCR0 Feature flags. */ #define bit_XMM_state (1 << 1) @@ -54,6 +56,7 @@ # define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET # define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET # define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET # define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE # define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE @@ -64,6 +67,7 @@ # define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE # define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE # define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE #else /* __ASSEMBLER__ */ @@ -145,6 +149,8 @@ extern const struct cpu_features *__get_cpu_features (void) HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) # define CPUID_RTM \ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM) +# define CPUID_AVX2 \ + HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) /* HAS_* evaluates to true if we may use the feature at runtime. */ # define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) @@ -153,6 +159,7 @@ extern const struct cpu_features *__get_cpu_features (void) # define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1) # define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2) # define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM) +# define HAS_AVX2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) # define index_Fast_Rep_String FEATURE_INDEX_1 # define index_Fast_Copy_Backward FEATURE_INDEX_1 @@ -163,6 +170,7 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_FMA_Usable FEATURE_INDEX_1 # define index_FMA4_Usable FEATURE_INDEX_1 # define index_Slow_SSE4_2 FEATURE_INDEX_1 +# define index_AVX2_Usable FEATURE_INDEX_1 # define HAS_ARCH_FEATURE(name) \ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
From: Sihai Yao <sihai.ysh@alibaba-inc.com> This patch sets bit_AVX2_Usable of __cpu_features.feature by checking COMMON_CPUID_INDEX_7 for Haswell. Architecture related assembler file can use this bit to determine calling path. --- This version removed the unrelated cpu module branch code and FEATURE_INDEX_7, which is unusefull for AVX ChangeLog | 8 ++++++++ sysdeps/x86_64/multiarch/ifunc-defines.sym | 1 + sysdeps/x86_64/multiarch/init-arch.c | 3 +++ sysdeps/x86_64/multiarch/init-arch.h | 8 ++++++++ 4 files changed, 20 insertions(+)