diff mbox series

[01/43] i386: Allow 64-bit vector modes in SSE registers

Message ID 20190209132352.1828-2-hjl.tools@gmail.com
State New
Headers show
Series V2: Emulate MMX intrinsics with SSE | expand

Commit Message

H.J. Lu Feb. 9, 2019, 1:23 p.m. UTC
In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support 64-bit vectors.

	PR target/89021
	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
	* config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
	modes for TARGET_MMX_WITH_SSE.
	(SSE_REG_MODE_P): Likewise.
---
 gcc/config/i386/i386.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

Comments

Uros Bizjak Feb. 9, 2019, 2:09 p.m. UTC | #1
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> 3DNOW.  We can use SSE2 to support 64-bit vectors.
>
> 	PR target/89021
> 	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> 	* config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
> 	modes for TARGET_MMX_WITH_SSE.
> 	(SSE_REG_MODE_P): Likewise.
> ---
>  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 83b025e0cf5..c1df3ec3326 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -585,6 +585,11 @@ extern unsigned char
> ix86_arch_features[X86_ARCH_LAST];
>
>  #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
>
> +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
> +#define TARGET_MMX_WITH_SSE \
> +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> +
>  extern unsigned char x86_prefetch_sse;
>  #define TARGET_PREFETCH_SSE	x86_prefetch_sse
>
> @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc,
> const char **argv);
>     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
>     || (MODE) == TFmode || (MODE) == V1TImode)
>
> +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
> +   want to include 8-byte vector modes, like V2SFmode, but not DImode
> +   nor SImode.  */

This is strange, since we already allow all MMX modes in SSE
registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
return:

return ((TARGET_AVX
             && VALID_AVX256_REG_OR_OI_MODE (mode))
            || VALID_SSE_REG_MODE (mode)
            || VALID_SSE2_REG_MODE (mode)
            || VALID_MMX_REG_MODE (mode)
            || VALID_MMX_REG_MODE_3DNOW (mode));

I'd expect that changed VALID_SSE2_REG_MODE affects only
ix86_vector_mode_supported_p when MMX is disabled and perhaps
ix86_set_reg_reg_cost cost function.

Are there any concrete issues when allowing all MMX (including 3DNOW?)
modes in VALID_SSE2_REG_MODE?

Uros.

>  #define VALID_SSE2_REG_MODE(MODE)					\
>    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
> -   || (MODE) == V2DImode || (MODE) == DFmode)
> +   || (MODE) == V2DImode || (MODE) == DFmode				\
> +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\
> +			       || (MODE) == V4HImode			\
> +			       || (MODE) == V2SImode			\
> +			       || (MODE) == V2SFmode)))
>
>  #define VALID_SSE_REG_MODE(MODE)					\
>    ((MODE) == V1TImode || (MODE) == TImode				\
> @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc,
> const char **argv);
>     || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode	\
>     || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode	\
>     || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode	\
> -   || (MODE) == V16SFmode)
> +   || (MODE) == V16SFmode						\
> +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\
> +			       || (MODE) == V4HImode			\
> +			       || (MODE) == V2SImode			\
> +			       || (MODE) == V2SFmode)))
>
>  #define X87_FLOAT_MODE_P(MODE)	\
>    (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) ==
> XFmode))
> --
> 2.20.1
>
>
H.J. Lu Feb. 9, 2019, 2:31 p.m. UTC | #2
On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
> >
> >       PR target/89021
> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
> >       modes for TARGET_MMX_WITH_SSE.
> >       (SSE_REG_MODE_P): Likewise.
> > ---
> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
> >  1 file changed, 18 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index 83b025e0cf5..c1df3ec3326 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -585,6 +585,11 @@ extern unsigned char
> > ix86_arch_features[X86_ARCH_LAST];
> >
> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)
> >
> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
> > +#define TARGET_MMX_WITH_SSE \
> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> > +
> >  extern unsigned char x86_prefetch_sse;
> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
> >
> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc,
> > const char **argv);
> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
> >     || (MODE) == TFmode || (MODE) == V1TImode)
> >
> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode
> > +   nor SImode.  */
>
> This is strange, since we already allow all MMX modes in SSE
> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
> return:
>
> return ((TARGET_AVX
>              && VALID_AVX256_REG_OR_OI_MODE (mode))
>             || VALID_SSE_REG_MODE (mode)
>             || VALID_SSE2_REG_MODE (mode)
>             || VALID_MMX_REG_MODE (mode)
>             || VALID_MMX_REG_MODE_3DNOW (mode));
>
> I'd expect that changed VALID_SSE2_REG_MODE affects only
> ix86_vector_mode_supported_p when MMX is disabled and perhaps
> ix86_set_reg_reg_cost cost function.
>
> Are there any concrete issues when allowing all MMX (including 3DNOW?)
> modes in VALID_SSE2_REG_MODE?

The problem is with DImode and SImode.  All other vector modes,  including
V2SF is OK.  With DImode and SImode, I got following regressions:

FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
cannot be created; target is discardable"
FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n
FAIL: go test misc/cgo/testcarchive

 gcc.dg/pr39323-3.c  is due to

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261

and

/* Decide whether a variable of mode MODE should be 128 bit aligned.  */
#define ALIGN_MODE_128(MODE) \
 ((MODE) == XFmode || SSE_REG_MODE_P (MODE))

SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P
nor VALID_SSE2_REG_MODE.

> Uros.
>
> >  #define VALID_SSE2_REG_MODE(MODE)                                    \
> >    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode   \
> > -   || (MODE) == V2DImode || (MODE) == DFmode)
> > +   || (MODE) == V2DImode || (MODE) == DFmode                         \
> > +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode      \
> > +                            || (MODE) == V4HImode                    \
> > +                            || (MODE) == V2SImode                    \
> > +                            || (MODE) == V2SFmode)))
> >
> >  #define VALID_SSE_REG_MODE(MODE)                                     \
> >    ((MODE) == V1TImode || (MODE) == TImode                            \
> > @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc,
> > const char **argv);
> >     || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \
> >     || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode        \
> >     || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode       \
> > -   || (MODE) == V16SFmode)
> > +   || (MODE) == V16SFmode                                            \
> > +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode      \
> > +                            || (MODE) == V4HImode                    \
> > +                            || (MODE) == V2SImode                    \
> > +                            || (MODE) == V2SFmode)))
> >
> >  #define X87_FLOAT_MODE_P(MODE)       \
> >    (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) ==
> > XFmode))
> > --
> > 2.20.1
> >
> >
Uros Bizjak Feb. 9, 2019, 3:03 p.m. UTC | #3
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>>
>> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
>> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
>> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
>> >
>> >       PR target/89021
>> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
>> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
>> >       modes for TARGET_MMX_WITH_SSE.
>> >       (SSE_REG_MODE_P): Likewise.
>> > ---
>> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
>> >  1 file changed, 18 insertions(+), 2 deletions(-)
>> >
>> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
>> > index 83b025e0cf5..c1df3ec3326 100644
>> > --- a/gcc/config/i386/i386.h
>> > +++ b/gcc/config/i386/i386.h
>> > @@ -585,6 +585,11 @@ extern unsigned char
>> > ix86_arch_features[X86_ARCH_LAST];
>> >
>> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)
>> >
>> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
>> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.
>> > */
>> > +#define TARGET_MMX_WITH_SSE \
>> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
>> > +
>> >  extern unsigned char x86_prefetch_sse;
>> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
>> >
>> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
>> > argc,
>> > const char **argv);
>> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
>> >     || (MODE) == TFmode || (MODE) == V1TImode)
>> >
>> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
>> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode
>> > +   nor SImode.  */
>>
>> This is strange, since we already allow all MMX modes in SSE
>> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
>> return:
>>
>> return ((TARGET_AVX
>>              && VALID_AVX256_REG_OR_OI_MODE (mode))
>>             || VALID_SSE_REG_MODE (mode)
>>             || VALID_SSE2_REG_MODE (mode)
>>             || VALID_MMX_REG_MODE (mode)
>>             || VALID_MMX_REG_MODE_3DNOW (mode));
>>
>> I'd expect that changed VALID_SSE2_REG_MODE affects only
>> ix86_vector_mode_supported_p when MMX is disabled and perhaps
>> ix86_set_reg_reg_cost cost function.
>>
>> Are there any concrete issues when allowing all MMX (including 3DNOW?)
>> modes in VALID_SSE2_REG_MODE?
>
> The problem is with DImode and SImode.  All other vector modes,  including
> V2SF is OK.  With DImode and SImode, I got following regressions:
>
> FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
> cannot be created; target is discardable"
> FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n
> FAIL: go test misc/cgo/testcarchive
>
>  gcc.dg/pr39323-3.c  is due to
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
>
> and
>
> /* Decide whether a variable of mode MODE should be 128 bit aligned.  */
> #define ALIGN_MODE_128(MODE) \
>  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))

Hm, this is a bit worrying, we don't want to introduce ABI
incompatibilites w.r.t. alignment. We still need to be ABI compatible
for MMX values and emit unaligned loads/stores when necessary.

> SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
> places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P
> nor VALID_SSE2_REG_MODE.

I think we have to review the usage of these two changed defines to
prevent any ABI issues or other hidden issues.

Uros.
H.J. Lu Feb. 9, 2019, 3:07 p.m. UTC | #4
On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >>
> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
> >> >
> >> >       PR target/89021
> >> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> >> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
> >> >       modes for TARGET_MMX_WITH_SSE.
> >> >       (SSE_REG_MODE_P): Likewise.
> >> > ---
> >> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
> >> >  1 file changed, 18 insertions(+), 2 deletions(-)
> >> >
> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> >> > index 83b025e0cf5..c1df3ec3326 100644
> >> > --- a/gcc/config/i386/i386.h
> >> > +++ b/gcc/config/i386/i386.h
> >> > @@ -585,6 +585,11 @@ extern unsigned char
> >> > ix86_arch_features[X86_ARCH_LAST];
> >> >
> >> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)
> >> >
> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.
> >> > */
> >> > +#define TARGET_MMX_WITH_SSE \
> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> >> > +
> >> >  extern unsigned char x86_prefetch_sse;
> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
> >> >
> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
> >> > argc,
> >> > const char **argv);
> >> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
> >> >     || (MODE) == TFmode || (MODE) == V1TImode)
> >> >
> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
> >> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode
> >> > +   nor SImode.  */
> >>
> >> This is strange, since we already allow all MMX modes in SSE
> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
> >> return:
> >>
> >> return ((TARGET_AVX
> >>              && VALID_AVX256_REG_OR_OI_MODE (mode))
> >>             || VALID_SSE_REG_MODE (mode)
> >>             || VALID_SSE2_REG_MODE (mode)
> >>             || VALID_MMX_REG_MODE (mode)
> >>             || VALID_MMX_REG_MODE_3DNOW (mode));
> >>
> >> I'd expect that changed VALID_SSE2_REG_MODE affects only
> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps
> >> ix86_set_reg_reg_cost cost function.
> >>
> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)
> >> modes in VALID_SSE2_REG_MODE?
> >
> > The problem is with DImode and SImode.  All other vector modes,  including
> > V2SF is OK.  With DImode and SImode, I got following regressions:
> >
> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
> > cannot be created; target is discardable"
> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n
> > FAIL: go test misc/cgo/testcarchive
> >
> >  gcc.dg/pr39323-3.c  is due to
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
> >
> > and
> >
> > /* Decide whether a variable of mode MODE should be 128 bit aligned.  */
> > #define ALIGN_MODE_128(MODE) \
> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
>
> Hm, this is a bit worrying, we don't want to introduce ABI
> incompatibilites w.r.t. alignment. We still need to be ABI compatible
> for MMX values and emit unaligned loads/stores when necessary.

We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
And I don't think we should put DI and SI in them.

> > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
> > places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P
> > nor VALID_SSE2_REG_MODE.
>
> I think we have to review the usage of these two changed defines to
> prevent any ABI issues or other hidden issues.
>

Absolutely.
Uros Bizjak Feb. 9, 2019, 6:27 p.m. UTC | #5
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>>
>> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
>> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>> >>
>> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
>> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
>> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
>> >> >
>> >> >       PR target/89021
>> >> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
>> >> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit
>> >> > vector
>> >> >       modes for TARGET_MMX_WITH_SSE.
>> >> >       (SSE_REG_MODE_P): Likewise.
>> >> > ---
>> >> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
>> >> >  1 file changed, 18 insertions(+), 2 deletions(-)
>> >> >
>> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
>> >> > index 83b025e0cf5..c1df3ec3326 100644
>> >> > --- a/gcc/config/i386/i386.h
>> >> > +++ b/gcc/config/i386/i386.h
>> >> > @@ -585,6 +585,11 @@ extern unsigned char
>> >> > ix86_arch_features[X86_ARCH_LAST];
>> >> >
>> >> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)
>> >> >
>> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
>> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE
>> >> > emulation.
>> >> > */
>> >> > +#define TARGET_MMX_WITH_SSE \
>> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
>> >> > +
>> >> >  extern unsigned char x86_prefetch_sse;
>> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
>> >> >
>> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
>> >> > argc,
>> >> > const char **argv);
>> >> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode
>> >> > \
>> >> >     || (MODE) == TFmode || (MODE) == V1TImode)
>> >> >
>> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since
>> >> > we
>> >> > +   want to include 8-byte vector modes, like V2SFmode, but not
>> >> > DImode
>> >> > +   nor SImode.  */
>> >>
>> >> This is strange, since we already allow all MMX modes in SSE
>> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
>> >> return:
>> >>
>> >> return ((TARGET_AVX
>> >>              && VALID_AVX256_REG_OR_OI_MODE (mode))
>> >>             || VALID_SSE_REG_MODE (mode)
>> >>             || VALID_SSE2_REG_MODE (mode)
>> >>             || VALID_MMX_REG_MODE (mode)
>> >>             || VALID_MMX_REG_MODE_3DNOW (mode));
>> >>
>> >> I'd expect that changed VALID_SSE2_REG_MODE affects only
>> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps
>> >> ix86_set_reg_reg_cost cost function.
>> >>
>> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)
>> >> modes in VALID_SSE2_REG_MODE?
>> >
>> > The problem is with DImode and SImode.  All other vector modes,
>> > including
>> > V2SF is OK.  With DImode and SImode, I got following regressions:
>> >
>> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
>> > cannot be created; target is discardable"
>> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[
>> > \t]*\n
>> > FAIL: go test misc/cgo/testcarchive
>> >
>> >  gcc.dg/pr39323-3.c  is due to
>> >
>> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
>> >
>> > and
>> >
>> > /* Decide whether a variable of mode MODE should be 128 bit aligned.
>> > */
>> > #define ALIGN_MODE_128(MODE) \
>> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
>>
>> Hm, this is a bit worrying, we don't want to introduce ABI
>> incompatibilites w.r.t. alignment. We still need to be ABI compatible
>> for MMX values and emit unaligned loads/stores when necessary.
>
> We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
> And I don't think we should put DI and SI in them.

Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to
a big-hammer approach of changing wide-used defines like
SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,
ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,
while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is
changed.

Uros.


>
>> > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
>> > places.   i386 backend may not be prepared to deal them in
>> > SSE_REG_MODE_P
>> > nor VALID_SSE2_REG_MODE.
>>
>> I think we have to review the usage of these two changed defines to
>> prevent any ABI issues or other hidden issues.
>>
>
> Absolutely.
>
> --
> H.J.
>
H.J. Lu Feb. 9, 2019, 6:32 p.m. UTC | #6
On Sat, Feb 9, 2019 at 10:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> > On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >>
> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> >> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >> >>
> >> >> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> >> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> >> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
> >> >> >
> >> >> >       PR target/89021
> >> >> >       * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> >> >> >       * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit
> >> >> > vector
> >> >> >       modes for TARGET_MMX_WITH_SSE.
> >> >> >       (SSE_REG_MODE_P): Likewise.
> >> >> > ---
> >> >> >  gcc/config/i386/i386.h | 20 ++++++++++++++++++--
> >> >> >  1 file changed, 18 insertions(+), 2 deletions(-)
> >> >> >
> >> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> >> >> > index 83b025e0cf5..c1df3ec3326 100644
> >> >> > --- a/gcc/config/i386/i386.h
> >> >> > +++ b/gcc/config/i386/i386.h
> >> >> > @@ -585,6 +585,11 @@ extern unsigned char
> >> >> > ix86_arch_features[X86_ARCH_LAST];
> >> >> >
> >> >> >  #define TARGET_FISTTP                (TARGET_SSE3 && TARGET_80387)
> >> >> >
> >> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> >> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE
> >> >> > emulation.
> >> >> > */
> >> >> > +#define TARGET_MMX_WITH_SSE \
> >> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> >> >> > +
> >> >> >  extern unsigned char x86_prefetch_sse;
> >> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
> >> >> >
> >> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
> >> >> > argc,
> >> >> > const char **argv);
> >> >> >     || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode
> >> >> > \
> >> >> >     || (MODE) == TFmode || (MODE) == V1TImode)
> >> >> >
> >> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since
> >> >> > we
> >> >> > +   want to include 8-byte vector modes, like V2SFmode, but not
> >> >> > DImode
> >> >> > +   nor SImode.  */
> >> >>
> >> >> This is strange, since we already allow all MMX modes in SSE
> >> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
> >> >> return:
> >> >>
> >> >> return ((TARGET_AVX
> >> >>              && VALID_AVX256_REG_OR_OI_MODE (mode))
> >> >>             || VALID_SSE_REG_MODE (mode)
> >> >>             || VALID_SSE2_REG_MODE (mode)
> >> >>             || VALID_MMX_REG_MODE (mode)
> >> >>             || VALID_MMX_REG_MODE_3DNOW (mode));
> >> >>
> >> >> I'd expect that changed VALID_SSE2_REG_MODE affects only
> >> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps
> >> >> ix86_set_reg_reg_cost cost function.
> >> >>
> >> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)
> >> >> modes in VALID_SSE2_REG_MODE?
> >> >
> >> > The problem is with DImode and SImode.  All other vector modes,
> >> > including
> >> > V2SF is OK.  With DImode and SImode, I got following regressions:
> >> >
> >> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
> >> > cannot be created; target is discardable"
> >> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[
> >> > \t]*\n
> >> > FAIL: go test misc/cgo/testcarchive
> >> >
> >> >  gcc.dg/pr39323-3.c  is due to
> >> >
> >> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
> >> >
> >> > and
> >> >
> >> > /* Decide whether a variable of mode MODE should be 128 bit aligned.
> >> > */
> >> > #define ALIGN_MODE_128(MODE) \
> >> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
> >>
> >> Hm, this is a bit worrying, we don't want to introduce ABI
> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible
> >> for MMX values and emit unaligned loads/stores when necessary.
> >
> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
> > And I don't think we should put DI and SI in them.
>
> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to

Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.
We only want 8-byte vector modes here.

> a big-hammer approach of changing wide-used defines like
> SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,
> ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,
> while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is
> changed.

I will give it a try.
Uros Bizjak Feb. 9, 2019, 6:41 p.m. UTC | #7
On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
>> >> Hm, this is a bit worrying, we don't want to introduce ABI
>> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible
>> >> for MMX values and emit unaligned loads/stores when necessary.
>> >
>> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
>> > And I don't think we should put DI and SI in them.
>>
>> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
>> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
>> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to
>
> Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.
> We only want 8-byte vector modes here.

Well, I'm not forcing VALID_MMX_REG_MODE here, it is just an example;
the important part is in the addition of (TARGET_MMX_WITH_SSE &&
some_modes). Surely, we don't want to align SImode to 128 bits in
ALIGN_MODE_128.

Uros.

>> a big-hammer approach of changing wide-used defines like
>> SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,
>> ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,
>> while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is
>> changed.
>
> I will give it a try.
>
> --
> H.J.
>
H.J. Lu Feb. 9, 2019, 6:52 p.m. UTC | #8
On Sat, Feb 9, 2019 at 10:41 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On 2/9/19, H.J. Lu <hjl.tools@gmail.com> wrote:
> >> >> Hm, this is a bit worrying, we don't want to introduce ABI
> >> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible
> >> >> for MMX values and emit unaligned loads/stores when necessary.
> >> >
> >> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
> >> > And I don't think we should put DI and SI in them.
> >>
> >> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
> >> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
> >> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to
> >
> > Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.
> > We only want 8-byte vector modes here.
>
> Well, I'm not forcing VALID_MMX_REG_MODE here, it is just an example;
> the important part is in the addition of (TARGET_MMX_WITH_SSE &&
> some_modes). Surely, we don't want to align SImode to 128 bits in
> ALIGN_MODE_128.
>

I am testing this.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..c1df3ec3326 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -585,6 +585,11 @@  extern unsigned char ix86_arch_features[X86_ARCH_LAST];
 
 #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
 
+/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
+   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
+#define TARGET_MMX_WITH_SSE \
+  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
+
 extern unsigned char x86_prefetch_sse;
 #define TARGET_PREFETCH_SSE	x86_prefetch_sse
 
@@ -1143,9 +1148,16 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
    || (MODE) == TFmode || (MODE) == V1TImode)
 
+/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
+   want to include 8-byte vector modes, like V2SFmode, but not DImode
+   nor SImode.  */
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
-   || (MODE) == V2DImode || (MODE) == DFmode)
+   || (MODE) == V2DImode || (MODE) == DFmode				\
+   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\
+			       || (MODE) == V4HImode			\
+			       || (MODE) == V2SImode			\
+			       || (MODE) == V2SFmode)))
 
 #define VALID_SSE_REG_MODE(MODE)					\
   ((MODE) == V1TImode || (MODE) == TImode				\
@@ -1188,7 +1200,11 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode	\
    || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode	\
    || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode	\
-   || (MODE) == V16SFmode)
+   || (MODE) == V16SFmode						\
+   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode	\
+			       || (MODE) == V4HImode			\
+			       || (MODE) == V2SImode			\
+			       || (MODE) == V2SFmode)))
 
 #define X87_FLOAT_MODE_P(MODE)	\
   (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))