diff mbox

Support TImode CONST_WIDE_INT store in 64-bit STV

Message ID 20160811165449.GA22638@intel.com
State New
Headers show

Commit Message

H.J. Lu Aug. 11, 2016, 4:54 p.m. UTC
Support TImode CONST_WIDE_INT store generated from piecewise store.
Need to verify performance impact before enabling TImode CONST_INT
store for __int128.

Tested on x86-64.  OK for trunk?

H.J.
---
gcc/

	* config/i386/i386.c (timode_scalar_to_vector_candidate_p): Allow
	TImode CONST_WIDE_INT store.
	(timode_scalar_chain::convert_insn): Handle CONST_WIDE_INT store.

gcc/testsuite/

	* gcc.target/i386/pieces-strcpy-1.c: New test.
	* gcc.target/i386/pieces-strcpy-2.c: Likewise.
---
 gcc/config/i386/i386.c                          | 23 ++++++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c | 15 +++++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 15 +++++++++++++++
 3 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c

Comments

Uros Bizjak Aug. 11, 2016, 5:32 p.m. UTC | #1
On Thu, Aug 11, 2016 at 6:54 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> Support TImode CONST_WIDE_INT store generated from piecewise store.
> Need to verify performance impact before enabling TImode CONST_INT
> store for __int128.
>
> Tested on x86-64.  OK for trunk?

OK.

Thanks,
Uros.

> H.J.
> ---
> gcc/
>
>         * config/i386/i386.c (timode_scalar_to_vector_candidate_p): Allow
>         TImode CONST_WIDE_INT store.
>         (timode_scalar_chain::convert_insn): Handle CONST_WIDE_INT store.
>
> gcc/testsuite/
>
>         * gcc.target/i386/pieces-strcpy-1.c: New test.
>         * gcc.target/i386/pieces-strcpy-2.c: Likewise.
> ---
>  gcc/config/i386/i386.c                          | 23 ++++++++++++++++++++---
>  gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c | 15 +++++++++++++++
>  gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 15 +++++++++++++++
>  3 files changed, 50 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 93eaab1..d086ede 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -2862,9 +2862,12 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
>
>    if (MEM_P (dst))
>      {
> -      /* Check for store.  Only support store from register or standard
> -        SSE constants.  Memory must be aligned or unaligned store is
> -        optimal.  */
> +      /* Check for store.  Memory must be aligned or unaligned store
> +        is optimal.  Only support store from register, standard SSE
> +        constant or CONST_WIDE_INT generated from piecewise store.
> +
> +        ??? Verify performance impact before enabling CONST_INT for
> +        __int128 store.  */
>        if (misaligned_operand (dst, TImode)
>           && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
>         return false;
> @@ -2875,6 +2878,7 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
>           return false;
>
>         case REG:
> +       case CONST_WIDE_INT:
>           return true;
>
>         case CONST_INT:
> @@ -3868,6 +3872,19 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
>        PUT_MODE (src, V1TImode);
>        break;
>
> +    case CONST_WIDE_INT:
> +      if (NONDEBUG_INSN_P (insn))
> +       {
> +         /* Since there are no instructions to store 128-bit constant,
> +            temporary register usage is required.  */
> +         rtx tmp = gen_reg_rtx (V1TImode);
> +         src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
> +         src = validize_mem (force_const_mem (V1TImode, src));
> +         emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
> +         dst = tmp;
> +       }
> +      break;
> +
>      case CONST_INT:
>        switch (standard_sse_constant_p (src, TImode))
>         {
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
> new file mode 100644
> index 0000000..64b7329
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *strcpy (char *, const char *);
> +
> +void
> +foo (char *s)
> +{
> +  strcpy (s,
> +         "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
> +         "1234567");
> +}
> +
> +/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
> new file mode 100644
> index 0000000..7421255
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +
> +extern char *strcpy (char *, const char *);
> +
> +void
> +foo (char *s)
> +{
> +  strcpy (s,
> +         "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
> +         "1234567");
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> --
> 2.7.4
>
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 93eaab1..d086ede 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2862,9 +2862,12 @@  timode_scalar_to_vector_candidate_p (rtx_insn *insn)
 
   if (MEM_P (dst))
     {
-      /* Check for store.  Only support store from register or standard
-	 SSE constants.  Memory must be aligned or unaligned store is
-	 optimal.  */
+      /* Check for store.  Memory must be aligned or unaligned store
+	 is optimal.  Only support store from register, standard SSE
+	 constant or CONST_WIDE_INT generated from piecewise store.
+
+	 ??? Verify performance impact before enabling CONST_INT for
+	 __int128 store.  */
       if (misaligned_operand (dst, TImode)
 	  && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
 	return false;
@@ -2875,6 +2878,7 @@  timode_scalar_to_vector_candidate_p (rtx_insn *insn)
 	  return false;
 
 	case REG:
+	case CONST_WIDE_INT:
 	  return true;
 
 	case CONST_INT:
@@ -3868,6 +3872,19 @@  timode_scalar_chain::convert_insn (rtx_insn *insn)
       PUT_MODE (src, V1TImode);
       break;
 
+    case CONST_WIDE_INT:
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  /* Since there are no instructions to store 128-bit constant,
+	     temporary register usage is required.  */
+	  rtx tmp = gen_reg_rtx (V1TImode);
+	  src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
+	  src = validize_mem (force_const_mem (V1TImode, src));
+	  emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
+	  dst = tmp;
+	}
+      break;
+
     case CONST_INT:
       switch (standard_sse_constant_p (src, TImode))
 	{
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
new file mode 100644
index 0000000..64b7329
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+
+extern char *strcpy (char *, const char *);
+
+void
+foo (char *s)
+{
+  strcpy (s,
+	  "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
+	  "1234567");
+}
+
+/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
new file mode 100644
index 0000000..7421255
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+
+extern char *strcpy (char *, const char *);
+
+void
+foo (char *s)
+{
+  strcpy (s,
+	  "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
+	  "1234567");
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */