Message ID | 20190209132352.1828-2-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | V2: Emulate MMX intrinsics with SSE | expand |
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: > In 64-bit mode, SSE2 can be used to emulate MMX instructions without > 3DNOW. We can use SSE2 to support 64-bit vectors. > > PR target/89021 > * config/i386/i386.h (TARGET_MMX_WITH_SSE): New. > * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector > modes for TARGET_MMX_WITH_SSE. > (SSE_REG_MODE_P): Likewise. > --- > gcc/config/i386/i386.h | 20 ++++++++++++++++++-- > 1 file changed, 18 insertions(+), 2 deletions(-) > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 83b025e0cf5..c1df3ec3326 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -585,6 +585,11 @@ extern unsigned char > ix86_arch_features[X86_ARCH_LAST]; > > #define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387) > > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions. > + FIXME: All 3DNOW patterns needs to be updated with SSE emulation. */ > +#define TARGET_MMX_WITH_SSE \ > + (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW) > + > extern unsigned char x86_prefetch_sse; > #define TARGET_PREFETCH_SSE x86_prefetch_sse > > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc, > const char **argv); > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \ > || (MODE) == TFmode || (MODE) == V1TImode) > > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we > + want to include 8-byte vector modes, like V2SFmode, but not DImode > + nor SImode. */ This is strange, since we already allow all MMX modes in SSE registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we return: return ((TARGET_AVX && VALID_AVX256_REG_OR_OI_MODE (mode)) || VALID_SSE_REG_MODE (mode) || VALID_SSE2_REG_MODE (mode) || VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)); I'd expect that changed VALID_SSE2_REG_MODE affects only ix86_vector_mode_supported_p when MMX is disabled and perhaps ix86_set_reg_reg_cost cost function. Are there any concrete issues when allowing all MMX (including 3DNOW?) modes in VALID_SSE2_REG_MODE? Uros. > #define VALID_SSE2_REG_MODE(MODE) \ > ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ > - || (MODE) == V2DImode || (MODE) == DFmode) > + || (MODE) == V2DImode || (MODE) == DFmode \ > + || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode \ > + || (MODE) == V4HImode \ > + || (MODE) == V2SImode \ > + || (MODE) == V2SFmode))) > > #define VALID_SSE_REG_MODE(MODE) \ > ((MODE) == V1TImode || (MODE) == TImode \ > @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc, > const char **argv); > || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \ > || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode \ > || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \ > - || (MODE) == V16SFmode) > + || (MODE) == V16SFmode \ > + || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode \ > + || (MODE) == V4HImode \ > + || (MODE) == V2SImode \ > + || (MODE) == V2SFmode))) > > #define X87_FLOAT_MODE_P(MODE) \ > (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == > XFmode)) > -- > 2.20.1 > >
On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: > > In 64-bit mode, SSE2 can be used to emulate MMX instructions without > > 3DNOW. We can use SSE2 to support 64-bit vectors. > > > > PR target/89021 > > * config/i386/i386.h (TARGET_MMX_WITH_SSE): New. > > * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector > > modes for TARGET_MMX_WITH_SSE. > > (SSE_REG_MODE_P): Likewise. > > --- > > gcc/config/i386/i386.h | 20 ++++++++++++++++++-- > > 1 file changed, 18 insertions(+), 2 deletions(-) > > > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > > index 83b025e0cf5..c1df3ec3326 100644 > > --- a/gcc/config/i386/i386.h > > +++ b/gcc/config/i386/i386.h > > @@ -585,6 +585,11 @@ extern unsigned char > > ix86_arch_features[X86_ARCH_LAST]; > > > > #define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387) > > > > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions. > > + FIXME: All 3DNOW patterns needs to be updated with SSE emulation. */ > > +#define TARGET_MMX_WITH_SSE \ > > + (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW) > > + > > extern unsigned char x86_prefetch_sse; > > #define TARGET_PREFETCH_SSE x86_prefetch_sse > > > > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc, > > const char **argv); > > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \ > > || (MODE) == TFmode || (MODE) == V1TImode) > > > > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we > > + want to include 8-byte vector modes, like V2SFmode, but not DImode > > + nor SImode. */ > > This is strange, since we already allow all MMX modes in SSE > registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we > return: > > return ((TARGET_AVX > && VALID_AVX256_REG_OR_OI_MODE (mode)) > || VALID_SSE_REG_MODE (mode) > || VALID_SSE2_REG_MODE (mode) > || VALID_MMX_REG_MODE (mode) > || VALID_MMX_REG_MODE_3DNOW (mode)); > > I'd expect that changed VALID_SSE2_REG_MODE affects only > ix86_vector_mode_supported_p when MMX is disabled and perhaps > ix86_set_reg_reg_cost cost function. > > Are there any concrete issues when allowing all MMX (including 3DNOW?) > modes in VALID_SSE2_REG_MODE? The problem is with DImode and SImode. All other vector modes, including V2SF is OK. With DImode and SImode, I got following regressions: FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias cannot be created; target is discardable" FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n FAIL: go test misc/cgo/testcarchive gcc.dg/pr39323-3.c is due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261 and /* Decide whether a variable of mode MODE should be 128 bit aligned. */ #define ALIGN_MODE_128(MODE) \ ((MODE) == XFmode || SSE_REG_MODE_P (MODE)) SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different places. i386 backend may not be prepared to deal them in SSE_REG_MODE_P nor VALID_SSE2_REG_MODE. > Uros. > > > #define VALID_SSE2_REG_MODE(MODE) \ > > ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ > > - || (MODE) == V2DImode || (MODE) == DFmode) > > + || (MODE) == V2DImode || (MODE) == DFmode \ > > + || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode \ > > + || (MODE) == V4HImode \ > > + || (MODE) == V2SImode \ > > + || (MODE) == V2SFmode))) > > > > #define VALID_SSE_REG_MODE(MODE) \ > > ((MODE) == V1TImode || (MODE) == TImode \ > > @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc, > > const char **argv); > > || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \ > > || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode \ > > || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \ > > - || (MODE) == V16SFmode) > > + || (MODE) == V16SFmode \ > > + || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode \ > > + || (MODE) == V4HImode \ > > + || (MODE) == V2SImode \ > > + || (MODE) == V2SFmode))) > > > > #define X87_FLOAT_MODE_P(MODE) \ > > (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == > > XFmode)) > > -- > > 2.20.1 > > > >
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote: >> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without >> > 3DNOW. We can use SSE2 to support 64-bit vectors. >> > >> > PR target/89021 >> > * config/i386/i386.h (TARGET_MMX_WITH_SSE): New. >> > * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector >> > modes for TARGET_MMX_WITH_SSE. >> > (SSE_REG_MODE_P): Likewise. >> > --- >> > gcc/config/i386/i386.h | 20 ++++++++++++++++++-- >> > 1 file changed, 18 insertions(+), 2 deletions(-) >> > >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h >> > index 83b025e0cf5..c1df3ec3326 100644 >> > --- a/gcc/config/i386/i386.h >> > +++ b/gcc/config/i386/i386.h >> > @@ -585,6 +585,11 @@ extern unsigned char >> > ix86_arch_features[X86_ARCH_LAST]; >> > >> > #define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387) >> > >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions. >> > + FIXME: All 3DNOW patterns needs to be updated with SSE emulation. >> > */ >> > +#define TARGET_MMX_WITH_SSE \ >> > + (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW) >> > + >> > extern unsigned char x86_prefetch_sse; >> > #define TARGET_PREFETCH_SSE x86_prefetch_sse >> > >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int >> > argc, >> > const char **argv); >> > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \ >> > || (MODE) == TFmode || (MODE) == V1TImode) >> > >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we >> > + want to include 8-byte vector modes, like V2SFmode, but not DImode >> > + nor SImode. */ >> >> This is strange, since we already allow all MMX modes in SSE >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we >> return: >> >> return ((TARGET_AVX >> && VALID_AVX256_REG_OR_OI_MODE (mode)) >> || VALID_SSE_REG_MODE (mode) >> || VALID_SSE2_REG_MODE (mode) >> || VALID_MMX_REG_MODE (mode) >> || VALID_MMX_REG_MODE_3DNOW (mode)); >> >> I'd expect that changed VALID_SSE2_REG_MODE affects only >> ix86_vector_mode_supported_p when MMX is disabled and perhaps >> ix86_set_reg_reg_cost cost function. >> >> Are there any concrete issues when allowing all MMX (including 3DNOW?) >> modes in VALID_SSE2_REG_MODE? > > The problem is with DImode and SImode. All other vector modes, including > V2SF is OK. With DImode and SImode, I got following regressions: > > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias > cannot be created; target is discardable" > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n > FAIL: go test misc/cgo/testcarchive > > gcc.dg/pr39323-3.c is due to > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261 > > and > > /* Decide whether a variable of mode MODE should be 128 bit aligned. */ > #define ALIGN_MODE_128(MODE) \ > ((MODE) == XFmode || SSE_REG_MODE_P (MODE)) Hm, this is a bit worrying, we don't want to introduce ABI incompatibilites w.r.t. alignment. We still need to be ABI compatible for MMX values and emit unaligned loads/stores when necessary. > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different > places. i386 backend may not be prepared to deal them in SSE_REG_MODE_P > nor VALID_SSE2_REG_MODE. I think we have to review the usage of these two changed defines to prevent any ABI issues or other hidden issues. Uros.
On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: > > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote: > >> > >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: > >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without > >> > 3DNOW. We can use SSE2 to support 64-bit vectors. > >> > > >> > PR target/89021 > >> > * config/i386/i386.h (TARGET_MMX_WITH_SSE): New. > >> > * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector > >> > modes for TARGET_MMX_WITH_SSE. > >> > (SSE_REG_MODE_P): Likewise. > >> > --- > >> > gcc/config/i386/i386.h | 20 ++++++++++++++++++-- > >> > 1 file changed, 18 insertions(+), 2 deletions(-) > >> > > >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > >> > index 83b025e0cf5..c1df3ec3326 100644 > >> > --- a/gcc/config/i386/i386.h > >> > +++ b/gcc/config/i386/i386.h > >> > @@ -585,6 +585,11 @@ extern unsigned char > >> > ix86_arch_features[X86_ARCH_LAST]; > >> > > >> > #define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387) > >> > > >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions. > >> > + FIXME: All 3DNOW patterns needs to be updated with SSE emulation. > >> > */ > >> > +#define TARGET_MMX_WITH_SSE \ > >> > + (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW) > >> > + > >> > extern unsigned char x86_prefetch_sse; > >> > #define TARGET_PREFETCH_SSE x86_prefetch_sse > >> > > >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int > >> > argc, > >> > const char **argv); > >> > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \ > >> > || (MODE) == TFmode || (MODE) == V1TImode) > >> > > >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we > >> > + want to include 8-byte vector modes, like V2SFmode, but not DImode > >> > + nor SImode. */ > >> > >> This is strange, since we already allow all MMX modes in SSE > >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we > >> return: > >> > >> return ((TARGET_AVX > >> && VALID_AVX256_REG_OR_OI_MODE (mode)) > >> || VALID_SSE_REG_MODE (mode) > >> || VALID_SSE2_REG_MODE (mode) > >> || VALID_MMX_REG_MODE (mode) > >> || VALID_MMX_REG_MODE_3DNOW (mode)); > >> > >> I'd expect that changed VALID_SSE2_REG_MODE affects only > >> ix86_vector_mode_supported_p when MMX is disabled and perhaps > >> ix86_set_reg_reg_cost cost function. > >> > >> Are there any concrete issues when allowing all MMX (including 3DNOW?) > >> modes in VALID_SSE2_REG_MODE? > > > > The problem is with DImode and SImode. All other vector modes, including > > V2SF is OK. With DImode and SImode, I got following regressions: > > > > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias > > cannot be created; target is discardable" > > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n > > FAIL: go test misc/cgo/testcarchive > > > > gcc.dg/pr39323-3.c is due to > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261 > > > > and > > > > /* Decide whether a variable of mode MODE should be 128 bit aligned. */ > > #define ALIGN_MODE_128(MODE) \ > > ((MODE) == XFmode || SSE_REG_MODE_P (MODE)) > > Hm, this is a bit worrying, we don't want to introduce ABI > incompatibilites w.r.t. alignment. We still need to be ABI compatible > for MMX values and emit unaligned loads/stores when necessary. We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE. And I don't think we should put DI and SI in them. > > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different > > places. i386 backend may not be prepared to deal them in SSE_REG_MODE_P > > nor VALID_SSE2_REG_MODE. > > I think we have to review the usage of these two changed defines to > prevent any ABI issues or other hidden issues. > Absolutely.
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: > On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote: >> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: >> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote: >> >> >> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: >> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without >> >> > 3DNOW. We can use SSE2 to support 64-bit vectors. >> >> > >> >> > PR target/89021 >> >> > * config/i386/i386.h (TARGET_MMX_WITH_SSE): New. >> >> > * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit >> >> > vector >> >> > modes for TARGET_MMX_WITH_SSE. >> >> > (SSE_REG_MODE_P): Likewise. >> >> > --- >> >> > gcc/config/i386/i386.h | 20 ++++++++++++++++++-- >> >> > 1 file changed, 18 insertions(+), 2 deletions(-) >> >> > >> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h >> >> > index 83b025e0cf5..c1df3ec3326 100644 >> >> > --- a/gcc/config/i386/i386.h >> >> > +++ b/gcc/config/i386/i386.h >> >> > @@ -585,6 +585,11 @@ extern unsigned char >> >> > ix86_arch_features[X86_ARCH_LAST]; >> >> > >> >> > #define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387) >> >> > >> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions. >> >> > + FIXME: All 3DNOW patterns needs to be updated with SSE >> >> > emulation. >> >> > */ >> >> > +#define TARGET_MMX_WITH_SSE \ >> >> > + (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW) >> >> > + >> >> > extern unsigned char x86_prefetch_sse; >> >> > #define TARGET_PREFETCH_SSE x86_prefetch_sse >> >> > >> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int >> >> > argc, >> >> > const char **argv); >> >> > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode >> >> > \ >> >> > || (MODE) == TFmode || (MODE) == V1TImode) >> >> > >> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since >> >> > we >> >> > + want to include 8-byte vector modes, like V2SFmode, but not >> >> > DImode >> >> > + nor SImode. */ >> >> >> >> This is strange, since we already allow all MMX modes in SSE >> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we >> >> return: >> >> >> >> return ((TARGET_AVX >> >> && VALID_AVX256_REG_OR_OI_MODE (mode)) >> >> || VALID_SSE_REG_MODE (mode) >> >> || VALID_SSE2_REG_MODE (mode) >> >> || VALID_MMX_REG_MODE (mode) >> >> || VALID_MMX_REG_MODE_3DNOW (mode)); >> >> >> >> I'd expect that changed VALID_SSE2_REG_MODE affects only >> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps >> >> ix86_set_reg_reg_cost cost function. >> >> >> >> Are there any concrete issues when allowing all MMX (including 3DNOW?) >> >> modes in VALID_SSE2_REG_MODE? >> > >> > The problem is with DImode and SImode. All other vector modes, >> > including >> > V2SF is OK. With DImode and SImode, I got following regressions: >> > >> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias >> > cannot be created; target is discardable" >> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ >> > \t]*\n >> > FAIL: go test misc/cgo/testcarchive >> > >> > gcc.dg/pr39323-3.c is due to >> > >> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261 >> > >> > and >> > >> > /* Decide whether a variable of mode MODE should be 128 bit aligned. >> > */ >> > #define ALIGN_MODE_128(MODE) \ >> > ((MODE) == XFmode || SSE_REG_MODE_P (MODE)) >> >> Hm, this is a bit worrying, we don't want to introduce ABI >> incompatibilites w.r.t. alignment. We still need to be ABI compatible >> for MMX values and emit unaligned loads/stores when necessary. > > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE. > And I don't think we should put DI and SI in them. Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE && VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to a big-hammer approach of changing wide-used defines like SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example, ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P, while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is changed. Uros. > >> > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different >> > places. i386 backend may not be prepared to deal them in >> > SSE_REG_MODE_P >> > nor VALID_SSE2_REG_MODE. >> >> I think we have to review the usage of these two changed defines to >> prevent any ABI issues or other hidden issues. >> > > Absolutely. > > -- > H.J. >
On Sat, Feb 9, 2019 at 10:27 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: > > On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote: > >> > >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: > >> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote: > >> >> > >> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: > >> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without > >> >> > 3DNOW. We can use SSE2 to support 64-bit vectors. > >> >> > > >> >> > PR target/89021 > >> >> > * config/i386/i386.h (TARGET_MMX_WITH_SSE): New. > >> >> > * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit > >> >> > vector > >> >> > modes for TARGET_MMX_WITH_SSE. > >> >> > (SSE_REG_MODE_P): Likewise. > >> >> > --- > >> >> > gcc/config/i386/i386.h | 20 ++++++++++++++++++-- > >> >> > 1 file changed, 18 insertions(+), 2 deletions(-) > >> >> > > >> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > >> >> > index 83b025e0cf5..c1df3ec3326 100644 > >> >> > --- a/gcc/config/i386/i386.h > >> >> > +++ b/gcc/config/i386/i386.h > >> >> > @@ -585,6 +585,11 @@ extern unsigned char > >> >> > ix86_arch_features[X86_ARCH_LAST]; > >> >> > > >> >> > #define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387) > >> >> > > >> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions. > >> >> > + FIXME: All 3DNOW patterns needs to be updated with SSE > >> >> > emulation. > >> >> > */ > >> >> > +#define TARGET_MMX_WITH_SSE \ > >> >> > + (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW) > >> >> > + > >> >> > extern unsigned char x86_prefetch_sse; > >> >> > #define TARGET_PREFETCH_SSE x86_prefetch_sse > >> >> > > >> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int > >> >> > argc, > >> >> > const char **argv); > >> >> > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode > >> >> > \ > >> >> > || (MODE) == TFmode || (MODE) == V1TImode) > >> >> > > >> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since > >> >> > we > >> >> > + want to include 8-byte vector modes, like V2SFmode, but not > >> >> > DImode > >> >> > + nor SImode. */ > >> >> > >> >> This is strange, since we already allow all MMX modes in SSE > >> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we > >> >> return: > >> >> > >> >> return ((TARGET_AVX > >> >> && VALID_AVX256_REG_OR_OI_MODE (mode)) > >> >> || VALID_SSE_REG_MODE (mode) > >> >> || VALID_SSE2_REG_MODE (mode) > >> >> || VALID_MMX_REG_MODE (mode) > >> >> || VALID_MMX_REG_MODE_3DNOW (mode)); > >> >> > >> >> I'd expect that changed VALID_SSE2_REG_MODE affects only > >> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps > >> >> ix86_set_reg_reg_cost cost function. > >> >> > >> >> Are there any concrete issues when allowing all MMX (including 3DNOW?) > >> >> modes in VALID_SSE2_REG_MODE? > >> > > >> > The problem is with DImode and SImode. All other vector modes, > >> > including > >> > V2SF is OK. With DImode and SImode, I got following regressions: > >> > > >> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias > >> > cannot be created; target is discardable" > >> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ > >> > \t]*\n > >> > FAIL: go test misc/cgo/testcarchive > >> > > >> > gcc.dg/pr39323-3.c is due to > >> > > >> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261 > >> > > >> > and > >> > > >> > /* Decide whether a variable of mode MODE should be 128 bit aligned. > >> > */ > >> > #define ALIGN_MODE_128(MODE) \ > >> > ((MODE) == XFmode || SSE_REG_MODE_P (MODE)) > >> > >> Hm, this is a bit worrying, we don't want to introduce ABI > >> incompatibilites w.r.t. alignment. We still need to be ABI compatible > >> for MMX values and emit unaligned loads/stores when necessary. > > > > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE. > > And I don't think we should put DI and SI in them. > > Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they > are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE && > VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF. We only want 8-byte vector modes here. > a big-hammer approach of changing wide-used defines like > SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example, > ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P, > while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is > changed. I will give it a try.
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: >> >> Hm, this is a bit worrying, we don't want to introduce ABI >> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible >> >> for MMX values and emit unaligned loads/stores when necessary. >> > >> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE. >> > And I don't think we should put DI and SI in them. >> >> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they >> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE && >> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to > > Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF. > We only want 8-byte vector modes here. Well, I'm not forcing VALID_MMX_REG_MODE here, it is just an example; the important part is in the addition of (TARGET_MMX_WITH_SSE && some_modes). Surely, we don't want to align SImode to 128 bits in ALIGN_MODE_128. Uros. >> a big-hammer approach of changing wide-used defines like >> SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example, >> ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P, >> while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is >> changed. > > I will give it a try. > > -- > H.J. >
On Sat, Feb 9, 2019 at 10:41 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote: > >> >> Hm, this is a bit worrying, we don't want to introduce ABI > >> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible > >> >> for MMX values and emit unaligned loads/stores when necessary. > >> > > >> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE. > >> > And I don't think we should put DI and SI in them. > >> > >> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they > >> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE && > >> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to > > > > Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF. > > We only want 8-byte vector modes here. > > Well, I'm not forcing VALID_MMX_REG_MODE here, it is just an example; > the important part is in the addition of (TARGET_MMX_WITH_SSE && > some_modes). Surely, we don't want to align SImode to 128 bits in > ALIGN_MODE_128. > I am testing this.
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 83b025e0cf5..c1df3ec3326 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -585,6 +585,11 @@ extern unsigned char ix86_arch_features[X86_ARCH_LAST]; #define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387) +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions. + FIXME: All 3DNOW patterns needs to be updated with SSE emulation. */ +#define TARGET_MMX_WITH_SSE \ + (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW) + extern unsigned char x86_prefetch_sse; #define TARGET_PREFETCH_SSE x86_prefetch_sse @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \ || (MODE) == TFmode || (MODE) == V1TImode) +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we + want to include 8-byte vector modes, like V2SFmode, but not DImode + nor SImode. */ #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ - || (MODE) == V2DImode || (MODE) == DFmode) + || (MODE) == V2DImode || (MODE) == DFmode \ + || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode \ + || (MODE) == V4HImode \ + || (MODE) == V2SImode \ + || (MODE) == V2SFmode))) #define VALID_SSE_REG_MODE(MODE) \ ((MODE) == V1TImode || (MODE) == TImode \ @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \ || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode \ || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \ - || (MODE) == V16SFmode) + || (MODE) == V16SFmode \ + || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode \ + || (MODE) == V4HImode \ + || (MODE) == V2SImode \ + || (MODE) == V2SFmode))) #define X87_FLOAT_MODE_P(MODE) \ (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))