diff mbox

[nvptx,3/3] Add v2di support

Message ID 2af1b956-f5f6-1d30-9816-3eafe41bdb1f@mentor.com
State New
Headers show

Commit Message

Tom de Vries June 6, 2017, 1:12 p.m. UTC
Hi,

this patch adds v2di support to the nvptx target.  This allows us to 
generate 128-bit loads and stores.

Tested in nvptx mainkernel mode and x86_64 accelerator mode.

OK for trunk?

Thanks,
- Tom

Comments

Tom de Vries June 6, 2017, 1:39 p.m. UTC | #1
[ was: Re: [nvptx, PATCH, 3/3] Add v2di support ]

On 06/06/2017 03:12 PM, Tom de Vries wrote:
> diff --git a/libgomp/testsuite/libgomp.oacc-c/vec.c b/libgomp/testsuite/libgomp.oacc-c/vec.c
> new file mode 100644
> index 0000000..79c1c17
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c/vec.c
> @@ -0,0 +1,48 @@
> +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
> +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
> +/* { dg-additional-options "-std=c99 -ftree-slp-vectorize -foffload=-ftree-slp-vectorize -foffload=-fdump-tree-slp1 -foffload=-save-temps -save-temps" } */
> +
> +#include <stdio.h>
> +#include <sys/time.h>
> +
> +long long int p[32 *1000] __attribute__((aligned(16)));
> +long long int p2[32 *1000] __attribute__((aligned(16)));
> +
> +int
> +main (void)
> +{
> +#pragma acc parallel num_gangs(1) num_workers(1) vector_length(32)
> +  {
> +    if (((unsigned long int)p & (0xfULL)) != 0)
> +      __builtin_abort ();
> +    if (((unsigned long int)p2 & (0xfULL)) != 0)
> +      __builtin_abort ();
> +
> +    for (unsigned int k = 0; k < 10000; k += 1)
> +      {
> +#pragma acc loop vector
> +	for (unsigned long long int j = 0; j < 32; j += 1)
> +	  {
> +	    unsigned long long a, b;
> +	    unsigned long long *p3, *p4;
> +	    p3 = (unsigned long long *)((unsigned long long int)p & (~0xfULL));
> +	    p4 = (unsigned long long *)((unsigned long long int)p2 & (~0xfULL));
> +
> +	    for (unsigned int i = 0; i < 1000; i += 2)
> +	      {
> +		a = p3[j * 1000 + i];
> +		b = p3[j * 1000 + i + 1];
> +		
> +		p4[j * 1000 + i] = a;
> +		p4[j * 1000 + i + 1] = b;
> +	      }
> +	  }
> +      }
> +  }
> +
> +  return 0;
> +}
> +
> +/* Todo: make a scan-tree-dump variant that scans vec.o instead.  */
> +/* { dg-final { file copy -force [glob vec.o.*] [regsub \.o\. [glob vec.o.*] \.c\.] } } */
> +/* { dg-final { scan-tree-dump "vector\\(2\\) long long unsigned int" "slp1" } } */

Hi,

we have scan-tree-dump that scans in test.c.* files.  But when we run 
lto1 for the offloaded region, we produce test.o.* files instead.  In 
the test-case above, I work around that by using 'dg-final { file copy 
}'. What is a good way to get rid of this workaround ?

Add scan-o-tree-dump ?

Or make the "slp1" field smarter, and allow f.i. "o.slp1" ?

Thanks,
- Tom
Richard Biener June 6, 2017, 2:12 p.m. UTC | #2
On Tue, Jun 6, 2017 at 3:39 PM, Tom de Vries <Tom_deVries@mentor.com> wrote:
> [ was: Re: [nvptx, PATCH, 3/3] Add v2di support ]
>
> On 06/06/2017 03:12 PM, Tom de Vries wrote:
>>
>> diff --git a/libgomp/testsuite/libgomp.oacc-c/vec.c
>> b/libgomp/testsuite/libgomp.oacc-c/vec.c
>> new file mode 100644
>> index 0000000..79c1c17
>> --- /dev/null
>> +++ b/libgomp/testsuite/libgomp.oacc-c/vec.c
>> @@ -0,0 +1,48 @@
>> +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
>> +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
>> +/* { dg-additional-options "-std=c99 -ftree-slp-vectorize
>> -foffload=-ftree-slp-vectorize -foffload=-fdump-tree-slp1
>> -foffload=-save-temps -save-temps" } */
>> +
>> +#include <stdio.h>
>> +#include <sys/time.h>
>> +
>> +long long int p[32 *1000] __attribute__((aligned(16)));
>> +long long int p2[32 *1000] __attribute__((aligned(16)));
>> +
>> +int
>> +main (void)
>> +{
>> +#pragma acc parallel num_gangs(1) num_workers(1) vector_length(32)
>> +  {
>> +    if (((unsigned long int)p & (0xfULL)) != 0)
>> +      __builtin_abort ();
>> +    if (((unsigned long int)p2 & (0xfULL)) != 0)
>> +      __builtin_abort ();
>> +
>> +    for (unsigned int k = 0; k < 10000; k += 1)
>> +      {
>> +#pragma acc loop vector
>> +       for (unsigned long long int j = 0; j < 32; j += 1)
>> +         {
>> +           unsigned long long a, b;
>> +           unsigned long long *p3, *p4;
>> +           p3 = (unsigned long long *)((unsigned long long int)p &
>> (~0xfULL));
>> +           p4 = (unsigned long long *)((unsigned long long int)p2 &
>> (~0xfULL));
>> +
>> +           for (unsigned int i = 0; i < 1000; i += 2)
>> +             {
>> +               a = p3[j * 1000 + i];
>> +               b = p3[j * 1000 + i + 1];
>> +
>> +               p4[j * 1000 + i] = a;
>> +               p4[j * 1000 + i + 1] = b;
>> +             }
>> +         }
>> +      }
>> +  }
>> +
>> +  return 0;
>> +}
>> +
>> +/* Todo: make a scan-tree-dump variant that scans vec.o instead.  */
>> +/* { dg-final { file copy -force [glob vec.o.*] [regsub \.o\. [glob
>> vec.o.*] \.c\.] } } */
>> +/* { dg-final { scan-tree-dump "vector\\(2\\) long long unsigned int"
>> "slp1" } } */
>
>
> Hi,
>
> we have scan-tree-dump that scans in test.c.* files.  But when we run lto1
> for the offloaded region, we produce test.o.* files instead.  In the
> test-case above, I work around that by using 'dg-final { file copy }'. What
> is a good way to get rid of this workaround ?
>
> Add scan-o-tree-dump ?
>
> Or make the "slp1" field smarter, and allow f.i. "o.slp1" ?

There is the same issue with regular LTO tests using scan-tree-dump which
end up scanning the "fat" compilation dumpfile.  Maybe add
scan-ltrans-tree-dump and scan-wpa-ipa-dump that look at appropriate
files plus passing appropriate flags to generate dumpfiles in known locations
(I think part of them end up in /tmp).

Richard.

> Thanks,
> - Tom
Jeff Law June 23, 2017, 5:50 p.m. UTC | #3
On 06/06/2017 07:12 AM, Tom de Vries wrote:
> Hi,
> 
> this patch adds v2di support to the nvptx target.  This allows us to
> generate 128-bit loads and stores.
> 
> Tested in nvptx mainkernel mode and x86_64 accelerator mode.
> 
> OK for trunk?
> 
> Thanks,
> - Tom
> 
> 0003-Add-v2di-support.patch
> 
> 
> Add v2di support
> 
> 2017-06-06  Tom de Vries  <tom@codesourcery.com>
> 
> 	* config/nvptx/nvptx-modes.def: Add V2DImode.
> 	* config/nvptx/nvptx-protos.h (nvptx_data_alignment): Declare.
> 	* config/nvptx/nvptx.c (nvptx_ptx_type_from_mode): Handle V2DImode.
> 	(nvptx_output_mov_insn): Handle lack of mov.b128.
> 	(nvptx_print_operand): Handle 'H' and 'L' codes.
> 	(nvptx_vector_mode_supported): Allow V2DImode.
> 	(nvptx_preferred_simd_mode): New function.
> 	(nvptx_data_alignment): New function.
> 	(TARGET_VECTORIZE_PREFERRED_SIMD_MODE): Redefine to
> 	nvptx_preferred_simd_mode.
> 	* config/nvptx/nvptx.h (STACK_BOUNDARY, BIGGEST_ALIGNMENT): Change from
> 	64 to 128 bits.
> 	(DATA_ALIGNMENT): Define.  Set to nvptx_data_alignment.
> 
> 	* config/nvptx/nvptx.md (VECIM): Add V2DI.
> 
> 	* gcc.target/nvptx/decl-init.c: Update alignment.
> 	* gcc.target/nvptx/slp-2-run.c: New test.
> 	* gcc.target/nvptx/slp-2.c: New test.
> 	* gcc.target/nvptx/v2di.c: New test.
> 
> 	* testsuite/libgomp.oacc-c/vec.c: New test.
>
OK.  I'm going to take your word that bumping STACK_BOUNDARY is the
right thing to do  rather than dynamic realignment.  Presumably mixing
code from different PTX compilers isn't something we're really worrying
about yet anyway...

jeff
diff mbox

Patch

Add v2di support

2017-06-06  Tom de Vries  <tom@codesourcery.com>

	* config/nvptx/nvptx-modes.def: Add V2DImode.
	* config/nvptx/nvptx-protos.h (nvptx_data_alignment): Declare.
	* config/nvptx/nvptx.c (nvptx_ptx_type_from_mode): Handle V2DImode.
	(nvptx_output_mov_insn): Handle lack of mov.b128.
	(nvptx_print_operand): Handle 'H' and 'L' codes.
	(nvptx_vector_mode_supported): Allow V2DImode.
	(nvptx_preferred_simd_mode): New function.
	(nvptx_data_alignment): New function.
	(TARGET_VECTORIZE_PREFERRED_SIMD_MODE): Redefine to
	nvptx_preferred_simd_mode.
	* config/nvptx/nvptx.h (STACK_BOUNDARY, BIGGEST_ALIGNMENT): Change from
	64 to 128 bits.
	(DATA_ALIGNMENT): Define.  Set to nvptx_data_alignment.

	* config/nvptx/nvptx.md (VECIM): Add V2DI.

	* gcc.target/nvptx/decl-init.c: Update alignment.
	* gcc.target/nvptx/slp-2-run.c: New test.
	* gcc.target/nvptx/slp-2.c: New test.
	* gcc.target/nvptx/v2di.c: New test.

	* testsuite/libgomp.oacc-c/vec.c: New test.

---
 gcc/config/nvptx/nvptx-modes.def           |  2 +
 gcc/config/nvptx/nvptx-protos.h            |  1 +
 gcc/config/nvptx/nvptx.c                   | 68 +++++++++++++++++++++++++++++-
 gcc/config/nvptx/nvptx.h                   |  6 ++-
 gcc/config/nvptx/nvptx.md                  |  2 +-
 gcc/testsuite/gcc.target/nvptx/decl-init.c |  2 +-
 gcc/testsuite/gcc.target/nvptx/slp-2-run.c | 23 ++++++++++
 gcc/testsuite/gcc.target/nvptx/slp-2.c     | 25 +++++++++++
 gcc/testsuite/gcc.target/nvptx/v2di.c      | 12 ++++++
 libgomp/testsuite/libgomp.oacc-c/vec.c     | 48 +++++++++++++++++++++
 10 files changed, 183 insertions(+), 6 deletions(-)

diff --git a/gcc/config/nvptx/nvptx-modes.def b/gcc/config/nvptx/nvptx-modes.def
index d49429c..ff61b36 100644
--- a/gcc/config/nvptx/nvptx-modes.def
+++ b/gcc/config/nvptx/nvptx-modes.def
@@ -1 +1,3 @@ 
 VECTOR_MODE (INT, SI, 2);  /* V2SI */
+
+VECTOR_MODE (INT, DI, 2);  /* V2DI */
diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index 16b316f..c3e3b84 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -41,6 +41,7 @@  extern void nvptx_function_end (FILE *);
 extern void nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT);
 extern void nvptx_output_ascii (FILE *, const char *, unsigned HOST_WIDE_INT);
 extern void nvptx_register_pragmas (void);
+extern unsigned int nvptx_data_alignment (const_tree, unsigned int);
 
 #ifdef RTX_CODE
 extern void nvptx_expand_oacc_fork (unsigned);
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index d513ddb..1c84b1b 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -236,6 +236,8 @@  nvptx_ptx_type_from_mode (machine_mode mode, bool promote)
 
     case V2SImode:
       return ".v2.u32";
+    case V2DImode:
+      return ".v2.u64";
 
     default:
       gcc_unreachable ();
@@ -2181,7 +2183,20 @@  nvptx_output_mov_insn (rtx dst, rtx src)
 	    ? "%.\tmov%t0\t%0, %1;" : "%.\tmov.b%T0\t%0, %1;");
 
   if (GET_MODE_SIZE (dst_inner) == GET_MODE_SIZE (src_inner))
-    return "%.\tmov.b%T0\t%0, %1;";
+    {
+      if (GET_MODE_BITSIZE (dst_mode) == 128
+	  && GET_MODE_BITSIZE (GET_MODE (src)) == 128)
+	{
+	  /* mov.b128 is not supported.  */
+	  if (dst_inner == V2DImode && src_inner == TImode)
+	    return "%.\tmov.u64\t%0.x, %L1;\n\t%.\tmov.u64\t%0.y, %H1;";
+	  else if (dst_inner == TImode && src_inner == V2DImode)
+	    return "%.\tmov.u64\t%L0, %1.x;\n\t%.\tmov.u64\t%H0, %1.y;";
+
+	  gcc_unreachable ();
+	}
+      return "%.\tmov.b%T0\t%0, %1;";
+    }
 
   return "%.\tcvt%t0%t1\t%0, %1;";
 }
@@ -2419,6 +2434,20 @@  nvptx_print_operand (FILE *file, rtx x, int code)
       fprintf (file, "%s", nvptx_ptx_type_from_mode (mode, code == 't'));
       break;
 
+    case 'H':
+    case 'L':
+      {
+	rtx inner_x = SUBREG_REG (x);
+	machine_mode inner_mode = GET_MODE (inner_x);
+	machine_mode split = maybe_split_mode (inner_mode);
+
+	output_reg (file, REGNO (inner_x), split,
+		    (code == 'H'
+		     ? GET_MODE_SIZE (inner_mode) / 2
+		     : 0));
+      }
+      break;
+
     case 'S':
       {
 	nvptx_shuffle_kind kind = (nvptx_shuffle_kind) UINTVAL (x);
@@ -5363,7 +5392,38 @@  nvptx_goacc_reduction (gcall *call)
 static bool
 nvptx_vector_mode_supported (machine_mode mode)
 {
-  return mode == V2SImode;
+  return (mode == V2SImode
+	  || mode == V2DImode);
+}
+
+/* Return the preferred mode for vectorizing scalar MODE.  */
+
+static machine_mode
+nvptx_preferred_simd_mode (machine_mode mode)
+{
+  switch (mode)
+    {
+    case DImode:
+      return V2DImode;
+    case SImode:
+      return V2SImode;
+
+    default:
+      return default_preferred_simd_mode (mode);
+    }
+}
+
+unsigned int
+nvptx_data_alignment (const_tree type, unsigned int basic_align)
+{
+  if (TREE_CODE (type) == INTEGER_TYPE)
+    {
+      unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
+      if (size == GET_MODE_SIZE (TImode))
+	return GET_MODE_BITSIZE (maybe_split_mode (TImode));
+    }
+
+  return basic_align;
 }
 
 #undef TARGET_OPTION_OVERRIDE
@@ -5483,6 +5543,10 @@  nvptx_vector_mode_supported (machine_mode mode)
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P nvptx_vector_mode_supported
 
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
+    nvptx_preferred_simd_mode
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-nvptx.h"
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 0a000a7..4224acf 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -52,13 +52,15 @@ 
 
 /* Alignments in bits.  */
 #define PARM_BOUNDARY 32
-#define STACK_BOUNDARY 64
+#define STACK_BOUNDARY 128
 #define FUNCTION_BOUNDARY 32
-#define BIGGEST_ALIGNMENT 64
+#define BIGGEST_ALIGNMENT 128
 #define STRICT_ALIGNMENT 1
 
 #define MAX_STACK_ALIGNMENT (1024 * 8)
 
+#define DATA_ALIGNMENT nvptx_data_alignment
+
 /* Copied from elf.h and other places.  We'd otherwise use
    BIGGEST_ALIGNMENT and fail a number of testcases.  */
 #define MAX_OFILE_ALIGNMENT (32768 * 8)
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index ba0567c..dff7cc0 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -184,7 +184,7 @@ 
 (define_mode_iterator SDCM [SC DC])
 (define_mode_iterator BITS [SI SF])
 (define_mode_iterator BITD [DI DF])
-(define_mode_iterator VECIM [V2SI])
+(define_mode_iterator VECIM [V2SI V2DI])
 
 ;; This mode iterator allows :P to be used for patterns that operate on
 ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
diff --git a/gcc/testsuite/gcc.target/nvptx/decl-init.c b/gcc/testsuite/gcc.target/nvptx/decl-init.c
index e9af907..23008fb 100644
--- a/gcc/testsuite/gcc.target/nvptx/decl-init.c
+++ b/gcc/testsuite/gcc.target/nvptx/decl-init.c
@@ -37,7 +37,7 @@  struct five five2[2] = {{12, 13}, {14, 15}};
 /* { dg-final { scan-assembler ".align 1 .u8 five2\\\[10\\\] = { 12, 13, 0, 0, 0, 14, 15, 0, 0, 0 };" } } */
 
 int __attribute__((vector_size(16))) vi = {16, 17, 18, 19};
-/* { dg-final { scan-assembler ".align 8 .u32 vi\\\[4\\\] = { 16, 17, 18, 19 };" } } */
+/* { dg-final { scan-assembler ".align 16 .u32 vi\\\[4\\\] = { 16, 17, 18, 19 };" } } */
 
 typedef int __attribute ((mode(TI))) ti_t;
 
diff --git a/gcc/testsuite/gcc.target/nvptx/slp-2-run.c b/gcc/testsuite/gcc.target/nvptx/slp-2-run.c
new file mode 100644
index 0000000..f9841a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/slp-2-run.c
@@ -0,0 +1,23 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-slp-vectorize" } */
+
+#include "slp-2.c"
+
+int
+main(void)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i += 1)
+    {
+      p[i] = i;
+      p2[i] = 0;
+    }
+
+  foo ();
+
+  for (i = 0; i < 1000; i += 1)
+    if (p2[i] != i)
+      return 1;
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/nvptx/slp-2.c b/gcc/testsuite/gcc.target/nvptx/slp-2.c
new file mode 100644
index 0000000..6639491
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/slp-2.c
@@ -0,0 +1,25 @@ 
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-slp-vectorize -save-temps" } */
+
+long long int p[1000] __attribute__((aligned(16)));
+long long int p2[1000] __attribute__((aligned(16)));
+
+void __attribute__((noinline, noclone))
+foo ()
+{
+  long long int a, b;
+
+  unsigned int i;
+  for (i = 0; i < 1000; i += 2)
+    {
+      a = p[i];
+      b = p[i+1];
+
+      p2[i] = a;
+      p2[i+1] = b;
+    }
+}
+
+/* { dg-final { scan-assembler "ld.v2.u64" } } */
+/* { dg-final { scan-assembler "st.v2.u64" } } */
+
diff --git a/gcc/testsuite/gcc.target/nvptx/v2di.c b/gcc/testsuite/gcc.target/nvptx/v2di.c
new file mode 100644
index 0000000..f63ad35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/v2di.c
@@ -0,0 +1,12 @@ 
+/* { dg-do assemble } */
+/* { dg-options "-O2 -save-temps" } */
+
+typedef long long int __v2di __attribute__((__vector_size__(16)));
+
+#define TYPE __v2di
+#include "vec.inc"
+
+/* { dg-final { scan-assembler ".reg\\.v2\\.u64" } } */
+/* { dg-final { scan-assembler "ld\\.v2\\.u64" } } */
+/* { dg-final { scan-assembler "st\\.v2\\.u64" } } */
+/* { dg-final { scan-assembler "mov\\.v2\\.u64.*\\{ 1, 2 \\}" } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c/vec.c b/libgomp/testsuite/libgomp.oacc-c/vec.c
new file mode 100644
index 0000000..79c1c17
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/vec.c
@@ -0,0 +1,48 @@ 
+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+/* { dg-additional-options "-std=c99 -ftree-slp-vectorize -foffload=-ftree-slp-vectorize -foffload=-fdump-tree-slp1 -foffload=-save-temps -save-temps" } */
+
+#include <stdio.h>
+#include <sys/time.h>
+
+long long int p[32 *1000] __attribute__((aligned(16)));
+long long int p2[32 *1000] __attribute__((aligned(16)));
+
+int
+main (void)
+{
+#pragma acc parallel num_gangs(1) num_workers(1) vector_length(32)
+  {
+    if (((unsigned long int)p & (0xfULL)) != 0)
+      __builtin_abort ();
+    if (((unsigned long int)p2 & (0xfULL)) != 0)
+      __builtin_abort ();
+
+    for (unsigned int k = 0; k < 10000; k += 1)
+      {
+#pragma acc loop vector
+	for (unsigned long long int j = 0; j < 32; j += 1)
+	  {
+	    unsigned long long a, b;
+	    unsigned long long *p3, *p4;
+	    p3 = (unsigned long long *)((unsigned long long int)p & (~0xfULL));
+	    p4 = (unsigned long long *)((unsigned long long int)p2 & (~0xfULL));
+
+	    for (unsigned int i = 0; i < 1000; i += 2)
+	      {
+		a = p3[j * 1000 + i];
+		b = p3[j * 1000 + i + 1];
+		
+		p4[j * 1000 + i] = a;
+		p4[j * 1000 + i + 1] = b;
+	      }
+	  }
+      }
+  }
+
+  return 0;
+}
+
+/* Todo: make a scan-tree-dump variant that scans vec.o instead.  */
+/* { dg-final { file copy -force [glob vec.o.*] [regsub \.o\. [glob vec.o.*] \.c\.] } } */
+/* { dg-final { scan-tree-dump "vector\\(2\\) long long unsigned int" "slp1" } } */