Message ID | 20160811165449.GA22638@intel.com |
---|---|
State | New |
Headers | show |
On Thu, Aug 11, 2016 at 6:54 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: > Support TImode CONST_WIDE_INT store generated from piecewise store. > Need to verify performance impact before enabling TImode CONST_INT > store for __int128. > > Tested on x86-64. OK for trunk? OK. Thanks, Uros. > H.J. > --- > gcc/ > > * config/i386/i386.c (timode_scalar_to_vector_candidate_p): Allow > TImode CONST_WIDE_INT store. > (timode_scalar_chain::convert_insn): Handle CONST_WIDE_INT store. > > gcc/testsuite/ > > * gcc.target/i386/pieces-strcpy-1.c: New test. > * gcc.target/i386/pieces-strcpy-2.c: Likewise. > --- > gcc/config/i386/i386.c | 23 ++++++++++++++++++++--- > gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c | 15 +++++++++++++++ > gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 15 +++++++++++++++ > 3 files changed, 50 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 93eaab1..d086ede 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -2862,9 +2862,12 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn) > > if (MEM_P (dst)) > { > - /* Check for store. Only support store from register or standard > - SSE constants. Memory must be aligned or unaligned store is > - optimal. */ > + /* Check for store. Memory must be aligned or unaligned store > + is optimal. Only support store from register, standard SSE > + constant or CONST_WIDE_INT generated from piecewise store. > + > + ??? Verify performance impact before enabling CONST_INT for > + __int128 store. */ > if (misaligned_operand (dst, TImode) > && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) > return false; > @@ -2875,6 +2878,7 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn) > return false; > > case REG: > + case CONST_WIDE_INT: > return true; > > case CONST_INT: > @@ -3868,6 +3872,19 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) > PUT_MODE (src, V1TImode); > break; > > + case CONST_WIDE_INT: > + if (NONDEBUG_INSN_P (insn)) > + { > + /* Since there are no instructions to store 128-bit constant, > + temporary register usage is required. */ > + rtx tmp = gen_reg_rtx (V1TImode); > + src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); > + src = validize_mem (force_const_mem (V1TImode, src)); > + emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); > + dst = tmp; > + } > + break; > + > case CONST_INT: > switch (standard_sse_constant_p (src, TImode)) > { > diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c > new file mode 100644 > index 0000000..64b7329 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */ > + > +extern char *strcpy (char *, const char *); > + > +void > +foo (char *s) > +{ > + strcpy (s, > + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" > + "1234567"); > +} > + > +/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */ > +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c > new file mode 100644 > index 0000000..7421255 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ > + > +extern char *strcpy (char *, const char *); > + > +void > +foo (char *s) > +{ > + strcpy (s, > + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" > + "1234567"); > +} > + > +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */ > +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */ > -- > 2.7.4 >
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 93eaab1..d086ede 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2862,9 +2862,12 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn) if (MEM_P (dst)) { - /* Check for store. Only support store from register or standard - SSE constants. Memory must be aligned or unaligned store is - optimal. */ + /* Check for store. Memory must be aligned or unaligned store + is optimal. Only support store from register, standard SSE + constant or CONST_WIDE_INT generated from piecewise store. + + ??? Verify performance impact before enabling CONST_INT for + __int128 store. */ if (misaligned_operand (dst, TImode) && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) return false; @@ -2875,6 +2878,7 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn) return false; case REG: + case CONST_WIDE_INT: return true; case CONST_INT: @@ -3868,6 +3872,19 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) PUT_MODE (src, V1TImode); break; + case CONST_WIDE_INT: + if (NONDEBUG_INSN_P (insn)) + { + /* Since there are no instructions to store 128-bit constant, + temporary register usage is required. */ + rtx tmp = gen_reg_rtx (V1TImode); + src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); + src = validize_mem (force_const_mem (V1TImode, src)); + emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); + dst = tmp; + } + break; + case CONST_INT: switch (standard_sse_constant_p (src, TImode)) { diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c new file mode 100644 index 0000000..64b7329 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */ + +extern char *strcpy (char *, const char *); + +void +foo (char *s) +{ + strcpy (s, + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" + "1234567"); +} + +/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */ +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c new file mode 100644 index 0000000..7421255 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ + +extern char *strcpy (char *, const char *); + +void +foo (char *s) +{ + strcpy (s, + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" + "1234567"); +} + +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */ +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */