Message ID | 20180329124309.GA12667@intel.com |
---|---|
State | New |
Headers | show |
Series | i386: Enable AVX/AVX512 features only if supported by OSXSAVE | expand |
On Thu, Mar 29, 2018 at 2:43 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: > Enable AVX and AVX512 features only if their states are supported by > OSXSAVE. > > OK for trunk and release branches? > > > H.J. > --- > PR target/85100 > * config/i386/cpuinfo.c (XCR_XFEATURE_ENABLED_MASK): New. > (XSTATE_FP): Likewise. > (XSTATE_SSE): Likewise. > (XSTATE_YMM): Likewise. > (XSTATE_OPMASK): Likewise. > (XSTATE_ZMM): Likewise. > (XSTATE_HI_ZMM): Likewise. > (XCR_AVX_ENABLED_MASK): Likewise. > (XCR_AVX512F_ENABLED_MASK): Likewise. > (get_available_features): Enable AVX and AVX512 features only > if their states are supported by OSXSAVE OK for trunk and release branches after a couple of days without problems in trunk. Thanks, Uros. > --- > libgcc/config/i386/cpuinfo.c | 134 +++++++++++++++++++++++++++++-------------- > 1 file changed, 90 insertions(+), 44 deletions(-) > > diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c > index 4eb3f5cd944..1dac110a79a 100644 > --- a/libgcc/config/i386/cpuinfo.c > +++ b/libgcc/config/i386/cpuinfo.c > @@ -240,6 +240,40 @@ get_available_features (unsigned int ecx, unsigned int edx, > unsigned int features = 0; > unsigned int features2 = 0; > > + /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ > +#define XCR_XFEATURE_ENABLED_MASK 0x0 > +#define XSTATE_FP 0x1 > +#define XSTATE_SSE 0x2 > +#define XSTATE_YMM 0x4 > +#define XSTATE_OPMASK 0x20 > +#define XSTATE_ZMM 0x40 > +#define XSTATE_HI_ZMM 0x80 > + > +#define XCR_AVX_ENABLED_MASK \ > + (XSTATE_SSE | XSTATE_YMM) > +#define XCR_AVX512F_ENABLED_MASK \ > + (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) > + > + /* Check if AVX and AVX512 are usable. */ > + int avx_usable = 0; > + int avx512_usable = 0; > + if ((ecx & bit_OSXSAVE)) > + { > + /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and > + ZMM16-ZMM31 states are supported by OSXSAVE. */ > + unsigned int xcrlow; > + unsigned int xcrhigh; > + asm (".byte 0x0f, 0x01, 0xd0" > + : "=a" (xcrlow), "=d" (xcrhigh) > + : "c" (XCR_XFEATURE_ENABLED_MASK)); > + if ((xcrlow & XCR_AVX_ENABLED_MASK) == XCR_AVX_ENABLED_MASK) > + { > + avx_usable = 1; > + avx512_usable = ((xcrlow & XCR_AVX512F_ENABLED_MASK) > + == XCR_AVX512F_ENABLED_MASK); > + } > + } > + > #define set_feature(f) \ > if (f < 32) features |= (1U << f); else features2 |= (1U << (f - 32)) > > @@ -265,10 +299,13 @@ get_available_features (unsigned int ecx, unsigned int edx, > set_feature (FEATURE_SSE4_1); > if (ecx & bit_SSE4_2) > set_feature (FEATURE_SSE4_2); > - if (ecx & bit_AVX) > - set_feature (FEATURE_AVX); > - if (ecx & bit_FMA) > - set_feature (FEATURE_FMA); > + if (avx_usable) > + { > + if (ecx & bit_AVX) > + set_feature (FEATURE_AVX); > + if (ecx & bit_FMA) > + set_feature (FEATURE_FMA); > + } > > /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */ > if (max_cpuid_level >= 7) > @@ -276,44 +313,50 @@ get_available_features (unsigned int ecx, unsigned int edx, > __cpuid_count (7, 0, eax, ebx, ecx, edx); > if (ebx & bit_BMI) > set_feature (FEATURE_BMI); > - if (ebx & bit_AVX2) > - set_feature (FEATURE_AVX2); > + if (avx_usable) > + { > + if (ebx & bit_AVX2) > + set_feature (FEATURE_AVX2); > + } > if (ebx & bit_BMI2) > set_feature (FEATURE_BMI2); > - if (ebx & bit_AVX512F) > - set_feature (FEATURE_AVX512F); > - if (ebx & bit_AVX512VL) > - set_feature (FEATURE_AVX512VL); > - if (ebx & bit_AVX512BW) > - set_feature (FEATURE_AVX512BW); > - if (ebx & bit_AVX512DQ) > - set_feature (FEATURE_AVX512DQ); > - if (ebx & bit_AVX512CD) > - set_feature (FEATURE_AVX512CD); > - if (ebx & bit_AVX512PF) > - set_feature (FEATURE_AVX512PF); > - if (ebx & bit_AVX512ER) > - set_feature (FEATURE_AVX512ER); > - if (ebx & bit_AVX512IFMA) > - set_feature (FEATURE_AVX512IFMA); > - if (ecx & bit_AVX512VBMI) > - set_feature (FEATURE_AVX512VBMI); > - if (ecx & bit_AVX512VBMI2) > - set_feature (FEATURE_AVX512VBMI2); > - if (ecx & bit_GFNI) > - set_feature (FEATURE_GFNI); > - if (ecx & bit_VPCLMULQDQ) > - set_feature (FEATURE_VPCLMULQDQ); > - if (ecx & bit_AVX512VNNI) > - set_feature (FEATURE_AVX512VNNI); > - if (ecx & bit_AVX512BITALG) > - set_feature (FEATURE_AVX512BITALG); > - if (ecx & bit_AVX512VPOPCNTDQ) > - set_feature (FEATURE_AVX512VPOPCNTDQ); > - if (edx & bit_AVX5124VNNIW) > - set_feature (FEATURE_AVX5124VNNIW); > - if (edx & bit_AVX5124FMAPS) > - set_feature (FEATURE_AVX5124FMAPS); > + if (avx512_usable) > + { > + if (ebx & bit_AVX512F) > + set_feature (FEATURE_AVX512F); > + if (ebx & bit_AVX512VL) > + set_feature (FEATURE_AVX512VL); > + if (ebx & bit_AVX512BW) > + set_feature (FEATURE_AVX512BW); > + if (ebx & bit_AVX512DQ) > + set_feature (FEATURE_AVX512DQ); > + if (ebx & bit_AVX512CD) > + set_feature (FEATURE_AVX512CD); > + if (ebx & bit_AVX512PF) > + set_feature (FEATURE_AVX512PF); > + if (ebx & bit_AVX512ER) > + set_feature (FEATURE_AVX512ER); > + if (ebx & bit_AVX512IFMA) > + set_feature (FEATURE_AVX512IFMA); > + if (ecx & bit_AVX512VBMI) > + set_feature (FEATURE_AVX512VBMI); > + if (ecx & bit_AVX512VBMI2) > + set_feature (FEATURE_AVX512VBMI2); > + if (ecx & bit_GFNI) > + set_feature (FEATURE_GFNI); > + if (ecx & bit_VPCLMULQDQ) > + set_feature (FEATURE_VPCLMULQDQ); > + if (ecx & bit_AVX512VNNI) > + set_feature (FEATURE_AVX512VNNI); > + if (ecx & bit_AVX512BITALG) > + set_feature (FEATURE_AVX512BITALG); > + if (ecx & bit_AVX512VPOPCNTDQ) > + set_feature (FEATURE_AVX512VPOPCNTDQ); > + if (edx & bit_AVX5124VNNIW) > + set_feature (FEATURE_AVX5124VNNIW); > + if (edx & bit_AVX5124FMAPS) > + set_feature (FEATURE_AVX5124FMAPS); > + } > } > > /* Check cpuid level of extended features. */ > @@ -325,10 +368,13 @@ get_available_features (unsigned int ecx, unsigned int edx, > > if (ecx & bit_SSE4a) > set_feature (FEATURE_SSE4_A); > - if (ecx & bit_FMA4) > - set_feature (FEATURE_FMA4); > - if (ecx & bit_XOP) > - set_feature (FEATURE_XOP); > + if (avx_usable) > + { > + if (ecx & bit_FMA4) > + set_feature (FEATURE_FMA4); > + if (ecx & bit_XOP) > + set_feature (FEATURE_XOP); > + } > } > > __cpu_model.__cpu_features[0] = features; > -- > 2.14.3 >
This check will always disable AVX-512 on macOS, because they implemented on-demand support: https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176 (I'm not against this change, just for information). 2018-03-29 16:05 GMT+03:00 Uros Bizjak <ubizjak@gmail.com>: > On Thu, Mar 29, 2018 at 2:43 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: >> Enable AVX and AVX512 features only if their states are supported by >> OSXSAVE. -- Ilya
On Fri, Mar 30, 2018 at 10:19 AM, Ilya Verbin <iverbin@gmail.com> wrote: > This check will always disable AVX-512 on macOS, because they > implemented on-demand support: > https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176 > Isn't xsaveopt designed for this?
2018-03-30 20:56 GMT+03:00 H.J. Lu <hjl.tools@gmail.com>: > On Fri, Mar 30, 2018 at 10:19 AM, Ilya Verbin <iverbin@gmail.com> wrote: >> This check will always disable AVX-512 on macOS, because they >> implemented on-demand support: >> https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176 >> > > Isn't xsaveopt designed for this? Maybe the goal was to reduce the size of the area allocated by default for each thread. > -- > H.J. -- Ilya
diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c index 4eb3f5cd944..1dac110a79a 100644 --- a/libgcc/config/i386/cpuinfo.c +++ b/libgcc/config/i386/cpuinfo.c @@ -240,6 +240,40 @@ get_available_features (unsigned int ecx, unsigned int edx, unsigned int features = 0; unsigned int features2 = 0; + /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ +#define XCR_XFEATURE_ENABLED_MASK 0x0 +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 +#define XSTATE_YMM 0x4 +#define XSTATE_OPMASK 0x20 +#define XSTATE_ZMM 0x40 +#define XSTATE_HI_ZMM 0x80 + +#define XCR_AVX_ENABLED_MASK \ + (XSTATE_SSE | XSTATE_YMM) +#define XCR_AVX512F_ENABLED_MASK \ + (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) + + /* Check if AVX and AVX512 are usable. */ + int avx_usable = 0; + int avx512_usable = 0; + if ((ecx & bit_OSXSAVE)) + { + /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and + ZMM16-ZMM31 states are supported by OSXSAVE. */ + unsigned int xcrlow; + unsigned int xcrhigh; + asm (".byte 0x0f, 0x01, 0xd0" + : "=a" (xcrlow), "=d" (xcrhigh) + : "c" (XCR_XFEATURE_ENABLED_MASK)); + if ((xcrlow & XCR_AVX_ENABLED_MASK) == XCR_AVX_ENABLED_MASK) + { + avx_usable = 1; + avx512_usable = ((xcrlow & XCR_AVX512F_ENABLED_MASK) + == XCR_AVX512F_ENABLED_MASK); + } + } + #define set_feature(f) \ if (f < 32) features |= (1U << f); else features2 |= (1U << (f - 32)) @@ -265,10 +299,13 @@ get_available_features (unsigned int ecx, unsigned int edx, set_feature (FEATURE_SSE4_1); if (ecx & bit_SSE4_2) set_feature (FEATURE_SSE4_2); - if (ecx & bit_AVX) - set_feature (FEATURE_AVX); - if (ecx & bit_FMA) - set_feature (FEATURE_FMA); + if (avx_usable) + { + if (ecx & bit_AVX) + set_feature (FEATURE_AVX); + if (ecx & bit_FMA) + set_feature (FEATURE_FMA); + } /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */ if (max_cpuid_level >= 7) @@ -276,44 +313,50 @@ get_available_features (unsigned int ecx, unsigned int edx, __cpuid_count (7, 0, eax, ebx, ecx, edx); if (ebx & bit_BMI) set_feature (FEATURE_BMI); - if (ebx & bit_AVX2) - set_feature (FEATURE_AVX2); + if (avx_usable) + { + if (ebx & bit_AVX2) + set_feature (FEATURE_AVX2); + } if (ebx & bit_BMI2) set_feature (FEATURE_BMI2); - if (ebx & bit_AVX512F) - set_feature (FEATURE_AVX512F); - if (ebx & bit_AVX512VL) - set_feature (FEATURE_AVX512VL); - if (ebx & bit_AVX512BW) - set_feature (FEATURE_AVX512BW); - if (ebx & bit_AVX512DQ) - set_feature (FEATURE_AVX512DQ); - if (ebx & bit_AVX512CD) - set_feature (FEATURE_AVX512CD); - if (ebx & bit_AVX512PF) - set_feature (FEATURE_AVX512PF); - if (ebx & bit_AVX512ER) - set_feature (FEATURE_AVX512ER); - if (ebx & bit_AVX512IFMA) - set_feature (FEATURE_AVX512IFMA); - if (ecx & bit_AVX512VBMI) - set_feature (FEATURE_AVX512VBMI); - if (ecx & bit_AVX512VBMI2) - set_feature (FEATURE_AVX512VBMI2); - if (ecx & bit_GFNI) - set_feature (FEATURE_GFNI); - if (ecx & bit_VPCLMULQDQ) - set_feature (FEATURE_VPCLMULQDQ); - if (ecx & bit_AVX512VNNI) - set_feature (FEATURE_AVX512VNNI); - if (ecx & bit_AVX512BITALG) - set_feature (FEATURE_AVX512BITALG); - if (ecx & bit_AVX512VPOPCNTDQ) - set_feature (FEATURE_AVX512VPOPCNTDQ); - if (edx & bit_AVX5124VNNIW) - set_feature (FEATURE_AVX5124VNNIW); - if (edx & bit_AVX5124FMAPS) - set_feature (FEATURE_AVX5124FMAPS); + if (avx512_usable) + { + if (ebx & bit_AVX512F) + set_feature (FEATURE_AVX512F); + if (ebx & bit_AVX512VL) + set_feature (FEATURE_AVX512VL); + if (ebx & bit_AVX512BW) + set_feature (FEATURE_AVX512BW); + if (ebx & bit_AVX512DQ) + set_feature (FEATURE_AVX512DQ); + if (ebx & bit_AVX512CD) + set_feature (FEATURE_AVX512CD); + if (ebx & bit_AVX512PF) + set_feature (FEATURE_AVX512PF); + if (ebx & bit_AVX512ER) + set_feature (FEATURE_AVX512ER); + if (ebx & bit_AVX512IFMA) + set_feature (FEATURE_AVX512IFMA); + if (ecx & bit_AVX512VBMI) + set_feature (FEATURE_AVX512VBMI); + if (ecx & bit_AVX512VBMI2) + set_feature (FEATURE_AVX512VBMI2); + if (ecx & bit_GFNI) + set_feature (FEATURE_GFNI); + if (ecx & bit_VPCLMULQDQ) + set_feature (FEATURE_VPCLMULQDQ); + if (ecx & bit_AVX512VNNI) + set_feature (FEATURE_AVX512VNNI); + if (ecx & bit_AVX512BITALG) + set_feature (FEATURE_AVX512BITALG); + if (ecx & bit_AVX512VPOPCNTDQ) + set_feature (FEATURE_AVX512VPOPCNTDQ); + if (edx & bit_AVX5124VNNIW) + set_feature (FEATURE_AVX5124VNNIW); + if (edx & bit_AVX5124FMAPS) + set_feature (FEATURE_AVX5124FMAPS); + } } /* Check cpuid level of extended features. */ @@ -325,10 +368,13 @@ get_available_features (unsigned int ecx, unsigned int edx, if (ecx & bit_SSE4a) set_feature (FEATURE_SSE4_A); - if (ecx & bit_FMA4) - set_feature (FEATURE_FMA4); - if (ecx & bit_XOP) - set_feature (FEATURE_XOP); + if (avx_usable) + { + if (ecx & bit_FMA4) + set_feature (FEATURE_FMA4); + if (ecx & bit_XOP) + set_feature (FEATURE_XOP); + } } __cpu_model.__cpu_features[0] = features;