diff mbox

[PTX] parameters and return values

Message ID 566F1AD6.9040604@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Dec. 14, 2015, 7:39 p.m. UTC
This patch further cleans up the parameter passing and return machinery.  Now 
both the PTX prototyp emission and the regular gcc  hooks use the same 
underlying functions (or the former uses the gcc hooks directly).  There were a 
few inconsistencies with promotion of QH & HI mode registers -- for instance, 
PROMOTE_MODE promoted them, but the parameter passing and returh didn't always 
appear to do that.  This changes things to consistently always promote, which 
apart from being simpler, is more in keeping with C expectations.  PARM_BOUNDARY 
was set at 1 byte, and nvptx_function_arg_boundary did some rather funky 
calculations, again resolved by setting  PARM boundary to 4 bytes and removing 
the special boundary handling.

The parameter and return codes was nearly unconditionally just using the modee 
to determine any promotion behavior -- except for some cases of checking for an 
aggregate type.  This checks the type more rigorously, to prevent passing more 
complex types (such asvectors) that happen to get a simple mode from being 
passed as the integer type the mode corresponds to.

Finally, figured out the C++ named return value case.  For some types returned 
by additional parameter, GCC may also return a pointer to that object in the 
regular return register.  Whether it does so is optimization-dependent.  This 
causes problems for PTX because it'll mean the PTX prototype would be 
optimization-dependent, which is clearly wrong.  AFAICT, because of the 
optimization-dependence, no actual code can make use of the returned pointer 
itself -- even in TUs containing both the caller and callee.  So of the two 
alternatives that occurred to me,  I chose the one that doesn't mention the 
returned pointer type, and inhibits the copy of the retval register to the 
(non-existent) param region.  (The other alternative was to always declare such 
a return parameter on functions that could be optimized to return one, and have 
it contain garbage  in the unoptimized case).

Added a bunch of C and C++ testcases.

nathan
diff mbox

Patch

2015-12-14  Nathan Sidwell  <nathan@acm.org>

	gcc/
	* config/nvptx/nvptx.h (PARM_BOUNDARY): Set to 32.
	* config/nvptx/nvptx.c (PASS_IN_REG_P, RETURN_IN_REG_P): Delete.
	(pass_in_memory, promote_arg, promote_return): New.
	(nvptx_function_arg_boundary): Delete.
	(nvptx_function_value): Use promote_return.
	(nvptx_pass_by_reference): Use pass_in_memory.
	(nvptx_return_in_memory): Use pass_in_memory.
	(nvptx_promote_function_mode): Use promote_arg.
	(write_arg): Adjust arg splitting logic.
	(write_return): Check and clear ret_reg_mode, if needed.
	(write_fn_proto, nvptx_declare_function_name): Adust write_return
	calls.
	(TARGET_RUNCTION_ARG_BOUNDARY,
	TARGET_FUNCTION_ARG_ROUND_BOUNDARY): Don't override.

	gcc/testsuite/
	* g++.dg/abi/nvptx-nrv1.C: New.
	* g++.dg/abi/nvptx-ptrmem1.C: New.
	* gcc.target/nvptx/abi-complex-arg.c: New.
	* gcc.target/nvptx/abi-complex-ret.c: New.
	* gcc.target/nvptx/abi-enum-arg.c: New.
	* gcc.target/nvptx/abi-enum-ret.c: New.
	* gcc.target/nvptx/abi-knr-arg.c: New.
	* gcc.target/nvptx/abi-knr-ret.c: New.
	* gcc.target/nvptx/abi-scalar-arg.c: New.
	* gcc.target/nvptx/abi-scalar-ret.c: New.
	* gcc.target/nvptx/abi-struct-arg.c: New.
	* gcc.target/nvptx/abi-struct-ret.c: New.
	* gcc.target/nvptx/abi-vararg-1.c: New.
	* gcc.target/nvptx/abi-vararg-2.c: New.
	* gcc.target/nvptx/abi-vect-arg.c: New.
	* gcc.target/nvptx/abi-vect-ret.c: New.

Index: gcc/config/nvptx/nvptx.h
===================================================================
--- gcc/config/nvptx/nvptx.h	(revision 231624)
+++ gcc/config/nvptx/nvptx.h	(working copy)
@@ -46,7 +46,8 @@ 
 /* Chosen such that we won't have to deal with multi-word subregs.  */
 #define UNITS_PER_WORD 8
 
-#define PARM_BOUNDARY 8
+/* Alignments in bits.  */
+#define PARM_BOUNDARY 32
 #define STACK_BOUNDARY 64
 #define FUNCTION_BOUNDARY 32
 #define BIGGEST_ALIGNMENT 64
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 231624)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -365,18 +365,6 @@  nvptx_emit_joining (unsigned mask, bool
     }
 }
 
-#define PASS_IN_REG_P(MODE, TYPE)				\
-  ((GET_MODE_CLASS (MODE) == MODE_INT				\
-    || GET_MODE_CLASS (MODE) == MODE_FLOAT			\
-    || ((GET_MODE_CLASS (MODE) == MODE_COMPLEX_INT		\
-	 || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)	\
-	&& !AGGREGATE_TYPE_P (TYPE)))				\
-   && (MODE) != TImode)
-
-#define RETURN_IN_REG_P(MODE)			\
-  ((GET_MODE_CLASS (MODE) == MODE_INT		\
-    || GET_MODE_CLASS (MODE) == MODE_FLOAT)	\
-   && GET_MODE_SIZE (MODE) <= 8)
 
 /* Perform a mode promotion for a function argument with MODE.  Return
    the promoted mode.  */
@@ -389,6 +377,61 @@  arg_promotion (machine_mode mode)
   return mode;
 }
 
+/* Determine whether MODE and TYPE (possibly NULL) should be passed or
+   returned in memory.  Integer and floating types supported by the
+   machine are passed in registers, everything else is passed in
+   memory.  Complex types are split.  */
+
+static bool
+pass_in_memory (machine_mode mode, const_tree type, bool for_return)
+{
+  if (type)
+    {
+      if (AGGREGATE_TYPE_P (type))
+	return true;
+      if (TREE_CODE (type) == VECTOR_TYPE)
+	return true;
+    }
+
+  if (!for_return && COMPLEX_MODE_P (mode))
+    /* Complex types are passed as two underlying args.  */
+    mode = GET_MODE_INNER (mode);
+
+  if (GET_MODE_CLASS (mode) != MODE_INT
+      && GET_MODE_CLASS (mode) != MODE_FLOAT)
+    return true;
+
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+    return true;
+
+  return false;
+}
+
+/* A non-memory argument of mode MODE is being passed, determine the mode it
+   should be promoted to.  This is also used for determining return
+   type promotion.  */
+
+static machine_mode
+promote_arg (machine_mode mode, bool prototyped)
+{
+  if (!prototyped && mode == SFmode)
+    /* K&R float promotion for unprototyped functions.  */
+    mode = DFmode;
+  else if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
+    mode = SImode;
+
+  return mode;
+}
+
+/* A non-memory return type of MODE is being returned.  Determine the
+   mode it should be promoted to.  */
+
+static machine_mode
+promote_return (machine_mode mode)
+{
+  return promote_arg (mode, true);
+}
+
 /* Implement TARGET_FUNCTION_ARG.  */
 
 static rtx
@@ -450,40 +493,6 @@  nvptx_strict_argument_naming (cumulative
   return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
 }
 
-/* Implement TARGET_FUNCTION_ARG_BOUNDARY.  */
-
-static unsigned int
-nvptx_function_arg_boundary (machine_mode mode, const_tree type)
-{
-  unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
-
-  if (boundary > UNITS_PER_WORD * BITS_PER_UNIT)
-    boundary = UNITS_PER_WORD * BITS_PER_UNIT;
-  else if (mode == BLKmode)
-    {
-      HOST_WIDE_INT size = int_size_in_bytes (type);
-
-      if (size > UNITS_PER_WORD)
-	boundary = UNITS_PER_WORD;
-      else
-	{
-	  /* Keep rounding up until only 1 bit set.  */
-	  unsigned lsb = (unsigned) size;
-
-	  boundary = 0;
-	  do
-	    {
-	      boundary += lsb;
-	      lsb = boundary & -boundary;
-	    }
-	  while (boundary != lsb);
-	}
-      boundary *= BITS_PER_UNIT;
-    }
-
-  return boundary;
-}
-
 /* Implement TARGET_LIBCALL_VALUE.  */
 
 static rtx
@@ -501,13 +510,11 @@  nvptx_libcall_value (machine_mode mode,
    where function FUNC returns or receives a value of data type TYPE.  */
 
 static rtx
-nvptx_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
+nvptx_function_value (const_tree type, const_tree ARG_UNUSED (func),
 		      bool outgoing)
 {
-  int unsignedp = TYPE_UNSIGNED (type);
-  machine_mode orig_mode = TYPE_MODE (type);
-  machine_mode mode = promote_function_mode (type, orig_mode,
-					     &unsignedp, NULL_TREE, 1);
+  machine_mode mode = promote_return (TYPE_MODE (type));
+
   if (outgoing)
     return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
 
@@ -529,7 +536,7 @@  static bool
 nvptx_pass_by_reference (cumulative_args_t ARG_UNUSED (cum), machine_mode mode,
 			 const_tree type, bool ARG_UNUSED (named))
 {
-  return !PASS_IN_REG_P (mode, type);
+  return pass_in_memory (mode, type, false);
 }
 
 /* Implement TARGET_RETURN_IN_MEMORY.  */
@@ -537,35 +544,17 @@  nvptx_pass_by_reference (cumulative_args
 static bool
 nvptx_return_in_memory (const_tree type, const_tree)
 {
-  machine_mode mode = TYPE_MODE (type);
-  if (!RETURN_IN_REG_P (mode))
-    return true;
-  return false;
+  return pass_in_memory (TYPE_MODE (type), type, true);
 }
 
 /* Implement TARGET_PROMOTE_FUNCTION_MODE.  */
 
 static machine_mode
 nvptx_promote_function_mode (const_tree type, machine_mode mode,
-			     int *punsignedp,
+			     int *ARG_UNUSED (punsignedp),
 			     const_tree funtype, int for_return)
 {
-  if (type == NULL_TREE)
-    return mode;
-  if (for_return)
-    return promote_mode (type, mode, punsignedp);
-  /* For K&R-style functions, try to match the language promotion rules to
-     minimize type mismatches at assembly time.  */
-  if (TYPE_ARG_TYPES (funtype) == NULL_TREE
-      && type != NULL_TREE
-      && !AGGREGATE_TYPE_P (type))
-    {
-      if (mode == SFmode)
-	mode = DFmode;
-      mode = arg_promotion (mode);
-    }
-
-  return mode;
+  return promote_arg (mode, for_return || !type || TYPE_ARG_TYPES (funtype));
 }
 
 /* Implement TARGET_STATIC_CHAIN.  */
@@ -576,7 +565,6 @@  nvptx_static_chain (const_tree fndecl, b
   if (!DECL_STATIC_CHAIN (fndecl))
     return NULL;
 
-
   return gen_rtx_REG (Pmode, (incoming_p ? STATIC_CHAIN_REGNUM
 			      : OUTGOING_STATIC_CHAIN_REGNUM));
 }
@@ -620,8 +608,9 @@  write_one_arg (std::stringstream &s, int
 }
 
 /* Process function parameter TYPE to emit one or more PTX
-   arguments.  PROTOTYPED is true, if this is a prototyped function,
-   rather than an old-style C declaration.
+   arguments. S, FOR_REG and ARGNO as for write_one_arg.  PROTOTYPED
+   is true, if this is a prototyped function, rather than an old-style
+   C declaration.  Returns the next argument number to use.
 
    The promotion behaviour here must match the regular GCC function
    parameter marshalling machinery.  */
@@ -635,50 +624,71 @@  write_arg (std::stringstream &s, int for
   if (mode == VOIDmode)
     return argno;
 
-  if (!PASS_IN_REG_P (mode, type))
+  if (pass_in_memory (mode, type, false))
     mode = Pmode;
+  else
+    {
+      bool split = TREE_CODE (type) == COMPLEX_TYPE;
 
-  machine_mode split = maybe_split_mode (mode);
-  if (split != VOIDmode)
-    mode = split;
+      if (split)
+	{
+	  /* Complex types are sent as two separate args.  */
+	  type = TREE_TYPE (type);
+	  mode  = TYPE_MODE (type);
+	  prototyped = true;
+	}
 
-  if (!prototyped && !AGGREGATE_TYPE_P (type))
-    {
-      if (mode == SFmode)
-	mode = DFmode;
-      mode = arg_promotion (mode);
+      mode = promote_arg (mode, prototyped);
+      if (split)
+	argno = write_one_arg (s, for_reg, argno, mode);
     }
-  else if (for_reg >= 0)
-    mode = arg_promotion (mode);
 
-  if (split != VOIDmode)
-    argno = write_one_arg (s, for_reg, argno, mode);
   return write_one_arg (s, for_reg, argno, mode);
 }
 
+/* Process a function return TYPE to emit a PTX return as a prototype
+   or function prologue declaration.  DECL_RESULT is the decl result
+   of the function and needed for determining named result
+   behaviour. Returns true if return is via an additional pointer
+   parameter.  The promotion behaviour here must match the regular GCC
+   function return mashalling.  */
+
 static bool
-write_return (std::stringstream &s, bool for_proto, tree type,
-	      machine_mode ret_mode)
+write_return (std::stringstream &s, bool for_proto, tree type)
 {
   machine_mode mode = TYPE_MODE (type);
-  bool return_in_mem = mode != VOIDmode && !RETURN_IN_REG_P (mode);
 
-  mode = arg_promotion (mode);
-  if (for_proto)
+  if (mode == VOIDmode)
+    return false;
+
+  bool return_in_mem = pass_in_memory (mode, type, true);
+
+  if (return_in_mem)
     {
-      if (!return_in_mem && mode != VOIDmode)
-	s << "(.param" << nvptx_ptx_type_from_mode (mode, false)
-	  << " %out_retval) ";
+      if (for_proto)
+	return return_in_mem;
+      
+      /* Named return values can cause us to return a pointer as well
+	 as expect an argument for the return location.  This is
+	 optimization-level specific, so no caller can make use of
+	 this data, but more importantly for us, we must ensure it
+	 doesn't change the PTX prototype.  */
+      mode = (machine_mode) cfun->machine->ret_reg_mode;
+      if (mode == VOIDmode)
+	return return_in_mem;
+
+      /* Clear ret_reg_mode to inhibit copy of retval to non-existent
+	 retval parameter.  */
+      cfun->machine->ret_reg_mode = VOIDmode;
     }
   else
-    {
-      /* Prologue.  C++11 ABI causes us to return a reference to the
-	 passed in pointer for return_in_mem.  */
-      ret_mode = arg_promotion (ret_mode);
-      if (ret_mode != VOIDmode)
-	s << "\t.reg" << nvptx_ptx_type_from_mode (ret_mode, false)
-	  << " %retval;\n";
-    }
+    mode = promote_return (mode);
+
+  const char *ptx_type  = nvptx_ptx_type_from_mode (mode, false);
+  if (for_proto)
+    s << "(.param" << ptx_type << " %out_retval) ";
+  else
+    s << "\t.reg" << ptx_type << " %retval;\n";
 
   return return_in_mem;
 }
@@ -751,7 +761,7 @@  write_fn_proto (std::stringstream &s, bo
   tree result_type = TREE_TYPE (fntype);
 
   /* Declare the result.  */
-  bool return_in_mem = write_return (s, true, result_type, VOIDmode);
+  bool return_in_mem = write_return (s, true, result_type);
 
   s << name;
 
@@ -943,8 +953,7 @@  nvptx_declare_function_name (FILE *file,
   write_fn_proto (s, true, name, decl);
   s << "{\n";
 
-  bool return_in_mem = write_return (s, false, result_type,
-				     (machine_mode)cfun->machine->ret_reg_mode);
+  bool return_in_mem = write_return (s, false, result_type);
   if (return_in_mem)
     argno = write_arg (s, 0, argno, ptr_type_node, true);
   
@@ -1203,6 +1212,7 @@  nvptx_expand_call (rtx retval, rtx addre
   if (tmp_retval != retval)
     emit_move_insn (retval, tmp_retval);
 }
+
 /* Emit a comparison COMPARE, and return the new test to be used in the
    jump.  */
 
@@ -4840,10 +4850,6 @@  nvptx_goacc_reduction (gcall *call)
 #define TARGET_FUNCTION_INCOMING_ARG nvptx_function_incoming_arg
 #undef TARGET_FUNCTION_ARG_ADVANCE
 #define TARGET_FUNCTION_ARG_ADVANCE nvptx_function_arg_advance
-#undef TARGET_FUNCTION_ARG_BOUNDARY
-#define TARGET_FUNCTION_ARG_BOUNDARY nvptx_function_arg_boundary
-#undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
-#define TARGET_FUNCTION_ARG_ROUND_BOUNDARY nvptx_function_arg_boundary
 #undef TARGET_PASS_BY_REFERENCE
 #define TARGET_PASS_BY_REFERENCE nvptx_pass_by_reference
 #undef TARGET_FUNCTION_VALUE_REGNO_P
Index: gcc/testsuite/g++.dg/abi/nvptx-nrv1.C
===================================================================
--- gcc/testsuite/g++.dg/abi/nvptx-nrv1.C	(revision 0)
+++ gcc/testsuite/g++.dg/abi/nvptx-nrv1.C	(working copy)
@@ -0,0 +1,71 @@ 
+// { dg-do compile { target nvptx-*-* } }
+// { dg-additional-options "-m64" }
+
+// Check NRV optimization doesn't change the PTX prototypes.
+
+struct A
+{
+  int d;
+
+// { dg-final { scan-assembler-times ".weak .func _ZN1AC1Ev \\(.param.u64 %in_ar0\\)(?:;|\[\r\n\]+\{)" 2 } }
+  A () { d = 123; }
+  A (const A & o) { d = o.d; }
+  void frob ();
+};
+
+
+namespace unopt 
+{
+  // { dg-final { scan-assembler ".extern .func _ZN5unopt3fooEv \\(.param.u64 %in_ar0\\);" } }
+  A __attribute__ ((__optimize__ ("O0"))) foo ();
+
+  // { dg-final { scan-assembler-times ".visible .func _ZN5unopt3barEv \\(.param.u64 %in_ar0\\)(?:;|\[\r\n\]+\{)" 2 } }
+  A __attribute__ ((__optimize__ ("O0"), noinline)) bar()
+  {
+    A l;
+    return l;
+  }
+
+  // { dg-final { scan-assembler-times ".visible .func _ZN5unopt3bazEv \\(.param.u64 %in_ar0\\)(?:;|\[\r\n\]+\{)" 2 } }
+  A __attribute__ ((__optimize__ ("O0"), noinline)) baz ()
+  {
+    return foo ();
+  }
+
+  void __attribute__ ((__optimize__ ("O0"), noinline)) quux ()
+  {
+    bar ().frob ();
+    baz ().frob ();
+    foo ().frob ();
+  }
+  
+}
+
+namespace opt
+{
+  // { dg-final { scan-assembler ".extern .func _ZN3opt3fooEv \\(.param.u64 %in_ar0\\);" } }
+  A __attribute__ ((__optimize__ ("O2"), noinline)) foo ();
+
+  // { dg-final { scan-assembler-times ".visible .func _ZN3opt3barEv \\(.param.u64 %in_ar0\\)(?:;|\[\r\n\]+\{)" 2 } }
+  A __attribute__ ((__optimize__ ("O2"), noinline)) bar()
+  {
+    A l;
+    return l;
+  }
+
+  // { dg-final { scan-assembler-times ".visible .func _ZN3opt3bazEv \\(.param.u64 %in_ar0\\)(?:;|\[\r\n\]+\{)" 2 } }
+  A __attribute__ ((__optimize__ ("O2"))) baz ()
+  {
+    return foo ();
+  }
+
+  void __attribute__ ((__optimize__ ("O2"), noinline)) quux ()
+  {
+    bar ().frob ();
+    baz ().frob ();
+    foo ().frob ();
+  }
+}
+
+// Make sure we're not trying return a return value anywhere.
+// { dg-final { scan-assembler-not "st.param.u64\[\t \]*\\\[%out_retval\\\], %\[_a-z0-9\]*;" } }
Index: gcc/testsuite/g++.dg/abi/nvptx-ptrmem1.C
===================================================================
--- gcc/testsuite/g++.dg/abi/nvptx-ptrmem1.C	(revision 0)
+++ gcc/testsuite/g++.dg/abi/nvptx-ptrmem1.C	(working copy)
@@ -0,0 +1,52 @@ 
+// { dg-do compile { target nvptx-*-* } }
+// { dg-additional-options "-m64" }
+
+// ptr to member data is like sizeof.
+// ptr to member fn is like a struct.
+
+struct A{};
+
+typedef int A::*pmd;
+typedef void (A::*pmf) ();
+
+// { dg-final { scan-assembler ".extern .func \\(.param.u64 %out_retval\\) _Z8dcl_rpmdv;" } }
+pmd dcl_rpmd ();
+
+// { dg-final { scan-assembler ".extern .func _Z8dcl_rpmfv \\(.param.u64 %in_ar0\\);" } }
+pmf dcl_rpmf ();
+
+// { dg-final { scan-assembler ".extern .func _Z8dcl_apmdM1Ai \\(.param.u64 %in_ar0\\);" } }
+void dcl_apmd (pmd);
+
+// { dg-final { scan-assembler ".extern .func _Z8dcl_apmfM1AFvvE \\(.param.u64 %in_ar0\\);" } }
+void dcl_apmf (pmf);
+
+void test_1 ()
+{
+  dcl_rpmd ();
+  dcl_rpmf ();
+  dcl_apmd (0);
+  dcl_apmf (0);
+}
+
+// { dg-final { scan-assembler-times ".visible .func \\(.param.u64 %out_retval\\) _Z8dfn_rpmdv(?:;|\[\r\n\]+\{)" 2 } }
+pmd dfn_rpmd ()
+{
+  return 0;
+}
+
+// { dg-final { scan-assembler-times ".visible .func _Z8dfn_rpmfv \\(.param.u64 %in_ar0\\)(?:;|\[\r\n\]+\{)" 2 } }
+pmf dfn_rpmf ()
+{
+  return 0;
+}
+
+// { dg-final { scan-assembler-times ".visible .func _Z8dfn_apmdM1Ai \\(.param.u64 %in_ar0\\)(?:;|\[\r\n\]+\{)" 2 } }
+void dfn_apmd (pmd)
+{
+}
+
+// { dg-final { scan-assembler-times ".visible .func _Z8dfn_apmfM1AFvvE \\(.param.u64 %in_ar0\\)(?:;|\[\r\n\]+\{)" 2 } }
+void dfn_apmf (pmf)
+{
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-complex-arg.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-complex-arg.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-complex-arg.c	(working copy)
@@ -0,0 +1,64 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* Comples arg types.  All these should be in 2 registers.  */
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_acc \\(.param.u32 %\[_a-z0-9\]*, .param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_acc (_Complex char);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_acs \\(.param.u32 %\[_a-z0-9\]*, .param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_acs (_Complex short);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_aci \\(.param.u32 %\[_a-z0-9\]*, .param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_aci (_Complex int);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_acll \\(.param.u64 %\[_a-z0-9\]*, .param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_acll (_Complex long);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_acf \\(.param.f32 %\[_a-z0-9\]*, .param.f32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_acf (_Complex float);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_acd \\(.param.f64 %\[_a-z0-9\]*, .param.f64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_acd (_Complex double);
+
+#define M(T,r,i) ({_Complex T x; __real__ (x) = (r), __imag__(x) == (i); x; })
+
+void test_1 (void)
+{
+  dcl_acc (M (char, 1, 2));
+  dcl_acs (M (short, 3, 4));
+  dcl_aci (M (int, 5, 6));
+  dcl_acll (M (long long, 7, 8));
+  dcl_acf (M (float, 9, 10));
+  dcl_acd (M (double, 11, 12));
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_acc \\(.param.u32 %\[_a-z0-9\]*, .param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_acc (_Complex char c)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_acs \\(.param.u32 %\[_a-z0-9\]*, .param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_acs (_Complex short s)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_aci \\(.param.u32 %\[_a-z0-9\]*, .param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_aci (_Complex int i)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_acll \\(.param.u64 %\[_a-z0-9\]*, .param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_acll (_Complex long long ll)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_acf \\(.param.f32 %\[_a-z0-9\]*, .param.f32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_acf (_Complex float f)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_acd \\(.param.f64 %\[_a-z0-9\]*, .param.f64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_acd (_Complex double d)
+{
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-complex-ret.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-complex-ret.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-complex-ret.c	(working copy)
@@ -0,0 +1,70 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* Complex return.  Returned via pointer.  */
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rcc \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+_Complex char dcl_rcc (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rcs \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+_Complex short dcl_rcs (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rci \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+_Complex int dcl_rci (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rcll \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+_Complex long long dcl_rcll (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rcf \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+_Complex float dcl_rcf (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rcd \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+_Complex double dcl_rcd (void);
+
+void test_1 (void)
+{
+  dcl_rcc ();
+  dcl_rcs ();
+  dcl_rci ();
+  dcl_rcll ();
+  dcl_rcf ();
+  dcl_rcd ();
+}
+
+#define M(T,r,i) ({_Complex T x; __real__ (x) = (r), __imag__(x) == (i); x; })
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rcc \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+_Complex char dfn_rcc (void)
+{
+  return M (char,1,2);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rcs \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+_Complex short dfn_rcs (void)
+{
+  return M (short,3,4);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rci \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+_Complex int dfn_rci (void)
+{
+  return M (int,5,6);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rcll \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+_Complex long long dfn_rcll (void)
+{
+  return M (long long,7,8);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rcf \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+_Complex float dfn_rcf (void)
+{
+  return M (float,9,10);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rcd \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+_Complex double dfn_rcd (void)
+{
+  return M (double,11,12);
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-enum-arg.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-enum-arg.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-enum-arg.c	(working copy)
@@ -0,0 +1,49 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64 -fshort-enums" } */
+
+/* Enum return types.  Passed as the underlying integer.  */
+
+typedef enum { a = 0x1, b } Echar;
+typedef enum { c = 0x100, d } Eshort;
+typedef enum { e = 0x10000, f } Eint;
+typedef enum { g = 0x100000000LL, h } Elonglong;
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ac \\(.param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ac (Echar);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_as \\(.param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_as (Eshort);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ai \\(.param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ai (Eint);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_all \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_all (Elonglong);
+
+void test_1 (void)
+{
+  dcl_ac (1);
+  dcl_as (2);
+  dcl_ai (3);
+  dcl_all (4);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ac \\(.param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ac (Echar c)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_as \\(.param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_as (Eshort s)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ai \\(.param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ai (Eint i)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_all \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_all (Elonglong ll)
+{
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-enum-ret.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-enum-ret.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-enum-ret.c	(working copy)
@@ -0,0 +1,53 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64 -fshort-enums" } */
+
+/* Enum return types.  Passed as the underlying integer.  */
+
+typedef enum { a = 0x1, b } Echar;
+typedef enum { c = 0x100, d } Eshort;
+typedef enum { e = 0x10000, f } Eint;
+typedef enum { g = 0x100000000LL, h } Elonglong;
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u32 %\[_a-z\]*\\) dcl_rc;" 1 } } */
+Echar dcl_rc (void);
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u32 %\[_a-z\]*\\) dcl_rs;" 1 } } */
+Eshort dcl_rs (void);
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u32 %\[_a-z\]*\\) dcl_ri;" 1 } } */
+Eint dcl_ri (void);
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u64 %\[_a-z\]*\\) dcl_rll;" 1 } } */
+Elonglong dcl_rll (void);
+
+void test_1 (void)
+{
+  dcl_rc ();
+  dcl_rs ();
+  dcl_ri ();
+  dcl_rll ();
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u32 %\[_a-z0-9\]*\\) dfn_rc(?:;|\[\r\n\]+\{)" 2 } } */
+Echar dfn_rc (void)
+{
+  return 1;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u32 %\[_a-z0-0\]*\\) dfn_rs(?:;|\[\r\n\]+\{)" 2 } } */
+Eshort dfn_rs (void)
+{
+  return 2;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u32 %\[_a-z0-9\]*\\) dfn_ri(?:;|\[\r\n\]+\{)" 2 } } */
+Eint dfn_ri (void)
+{
+  return 3;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u64 %\[_a-z0-9\]*\\) dfn_rll(?:;|\[\r\n\]+\{)" 2 } } */
+Elonglong dfn_rll (void)
+{
+  return 4;
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-knr-arg.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-knr-arg.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-knr-arg.c	(working copy)
@@ -0,0 +1,95 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* K+R args.  as for ANSI except float promotes to double.  */
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_av;" 1 } } */
+void dcl_av ();
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ac \\(.param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ac ();
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_as \\(.param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_as ();
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ai \\(.param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ai ();
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_all \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_all ();
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_af \\(.param.f64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_af ();
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ad \\(.param.f64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ad ();
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ap \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ap ();
+
+void test_1 (void)
+{
+  dcl_av ();
+  dcl_ac ((char)1);
+  dcl_as ((short)2);
+  dcl_ai ((int)3);
+  dcl_all ((long long)4);
+  dcl_af ((float)5);
+  dcl_ad ((double)6);
+  dcl_ap ((void *)0);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_av(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_av ()
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ac \\(.param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ac (c)
+  char c;
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_as \\(.param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_as (s)
+  short s;
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ai \\(.param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ai (i)
+  int i;
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_all \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_all (ll)
+  long long ll;
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_af \\(.param.f64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_af (f)
+  float f;
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ad \\(.param.f64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ad (d)
+  double d;
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ap \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ap (p)
+  void *p;
+{
+}
+
+/*  But complex float is passed as two floats.  (K&R doesn't have
+    complex, so why obey k&r for the components of such an object?)  */
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_acf \\(.param.f32 %\[_a-z0-9\]*, .param.f32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_acf (_Complex float f)
+{
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-knr-ret.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-knr-ret.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-knr-ret.c	(working copy)
@@ -0,0 +1,88 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* K+R returns.  as for ANSI .  */
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rv;" 1 } } */
+void dcl_rv ();
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u32 %\[_a-z\]*\\) dcl_rc;" 1 } } */
+char dcl_rc ();
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u32 %\[_a-z\]*\\) dcl_rs;" 1 } } */
+short dcl_rs ();
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u32 %\[_a-z\]*\\) dcl_ri;" 1 } } */
+int dcl_ri ();
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u64 %\[_a-z\]*\\) dcl_rll;" 1 } } */
+long long dcl_rll ();
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.f32 %\[_a-z\]*\\) dcl_rf;" 1 } } */
+float dcl_rf ();
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.f64 %\[_a-z\]*\\) dcl_rd;" 1 } } */
+double dcl_rd ();
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u64 %\[_a-z\]*\\) dcl_rp;" 1 } } */
+void *dcl_rp ();
+
+void test_1 ()
+{
+  dcl_rv ();
+  dcl_rc ();
+  dcl_rs ();
+  dcl_ri ();
+  dcl_rll ();
+  dcl_rf ();
+  dcl_rd ();
+  dcl_rp ();
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rv(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_rv ()
+{
+  return;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u32 %\[_a-z0-9\]*\\) dfn_rc(?:;|\[\r\n\]+\{)" 2 } } */
+char dfn_rc ()
+{
+  return 1;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u32 %\[_a-z0-0\]*\\) dfn_rs(?:;|\[\r\n\]+\{)" 2 } } */
+short dfn_rs ()
+{
+  return 2;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u32 %\[_a-z0-9\]*\\) dfn_ri(?:;|\[\r\n\]+\{)" 2 } } */
+int dfn_ri ()
+{
+  return 3;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u64 %\[_a-z0-9\]*\\) dfn_rll(?:;|\[\r\n\]+\{)" 2 } } */
+long long dfn_rll ()
+{
+  return 4;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.f32 %\[_a-z0-9\]*\\) dfn_rf(?:;|\[\r\n\]+\{)" 2 } } */
+float dfn_rf ()
+{
+  return 5;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.f64 %\[_a-z0-9\]*\\) dfn_rd(?:;|\[\r\n\]+\{)" 2 } } */
+double dfn_rd ()
+{
+  return 6;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u64 %\[_a-z0-9\]*\\) dfn_rp(?:;|\[\r\n\]+\{)" 2 } } */
+void *dfn_rp ()
+{
+  return 0;
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-scalar-arg.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-scalar-arg.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-scalar-arg.c	(working copy)
@@ -0,0 +1,91 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* Scalar return types.  In registers when <= 64 bit.  */
+
+typedef int __attribute__((mode(TI))) ti;
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_av;" 1 } } */
+void dcl_av (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ac \\(.param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ac (char);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_as \\(.param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_as (short);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ai \\(.param.u32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ai (int);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_all \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_all (long long);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ati \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ati (ti);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_af \\(.param.f32 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_af (float);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ad \\(.param.f64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ad (double);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_ap \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_ap (void *);
+
+void test_1 (void)
+{
+  dcl_av ();
+  dcl_ac (1);
+  dcl_as (2);
+  dcl_ai (3);
+  dcl_all (4);
+  dcl_ati (5);
+  dcl_af (6);
+  dcl_ad (7);
+  dcl_ap (0);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_av(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_av (void)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ac \\(.param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ac (char c)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_as \\(.param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_as (short s)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ai \\(.param.u32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ai (int i)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_all \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_all (long long ll)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ati \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ati (ti t)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_af \\(.param.f32 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_af (float f)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ad \\(.param.f64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ad (double d)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_ap \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_ap (void *p)
+{
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-scalar-ret.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-scalar-ret.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-scalar-ret.c	(working copy)
@@ -0,0 +1,100 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* Scalar return types.  In registers when <= 64 bit.  */
+
+typedef int __attribute__((mode(TI))) ti;
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rv;" 1 } } */
+void dcl_rv (void);
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u32 %\[_a-z\]*\\) dcl_rc;" 1 } } */
+char dcl_rc (void);
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u32 %\[_a-z\]*\\) dcl_rs;" 1 } } */
+short dcl_rs (void);
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u32 %\[_a-z\]*\\) dcl_ri;" 1 } } */
+int dcl_ri (void);
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u64 %\[_a-z\]*\\) dcl_rll;" 1 } } */
+long long dcl_rll (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rti \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+ti dcl_rti (void);
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.f32 %\[_a-z\]*\\) dcl_rf;" 1 } } */
+float dcl_rf (void);
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.f64 %\[_a-z\]*\\) dcl_rd;" 1 } } */
+double dcl_rd (void);
+
+/* { dg-final { scan-assembler-times ".extern .func \\(.param.u64 %\[_a-z\]*\\) dcl_rp;" 1 } } */
+void *dcl_rp (void);
+
+void test_1 (void)
+{
+  dcl_rv ();
+  dcl_rc ();
+  dcl_rs ();
+  dcl_ri ();
+  dcl_rll ();
+  dcl_rti ();
+  dcl_rf ();
+  dcl_rd ();
+  dcl_rp ();
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rv(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_rv (void)
+{
+  return;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u32 %\[_a-z0-9\]*\\) dfn_rc(?:;|\[\r\n\]+\{)" 2 } } */
+char dfn_rc (void)
+{
+  return 1;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u32 %\[_a-z0-0\]*\\) dfn_rs(?:;|\[\r\n\]+\{)" 2 } } */
+short dfn_rs (void)
+{
+  return 2;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u32 %\[_a-z0-9\]*\\) dfn_ri(?:;|\[\r\n\]+\{)" 2 } } */
+int dfn_ri (void)
+{
+  return 3;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u64 %\[_a-z0-9\]*\\) dfn_rll(?:;|\[\r\n\]+\{)" 2 } } */
+long long dfn_rll (void)
+{
+  return 4;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rti \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+ti dfn_rti (void)
+{
+  return 5;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.f32 %\[_a-z0-9\]*\\) dfn_rf(?:;|\[\r\n\]+\{)" 2 } } */
+float dfn_rf (void)
+{
+  return 6;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.f64 %\[_a-z0-9\]*\\) dfn_rd(?:;|\[\r\n\]+\{)" 2 } } */
+double dfn_rd (void)
+{
+  return 7;
+}
+
+/* { dg-final { scan-assembler-times ".visible .func \\(.param.u64 %\[_a-z0-9\]*\\) dfn_rp(?:;|\[\r\n\]+\{)" 2 } } */
+void *dfn_rp (void)
+{
+  return 0;
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-struct-arg.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-struct-arg.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-struct-arg.c	(working copy)
@@ -0,0 +1,62 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* Struct arg.  Passed via pointer.  */
+
+typedef struct {char a;} one;
+typedef struct {short a;} two;
+typedef struct {int a;} four;
+typedef struct {long long a;} eight;
+typedef struct {int a, b[12];} big;
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_aone \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_aone (one);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_atwo \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_atwo (two);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_afour \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_afour (four);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_aeight \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_aeight (eight);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_abig \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_abig (big);
+
+#define M(T, v) ({T t; t.a = v; t;})
+
+void test_1 (void)
+{
+  dcl_aone (M (one, 1));
+  dcl_atwo (M (two, 2));
+  dcl_afour (M (four, 3));
+  dcl_aeight (M (eight, 4));
+  dcl_abig (M (big, 5));
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_aone \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_aone (one one)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_atwo \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_atwo (two two)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_afour \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_afour (four four)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_aeight \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_aeight (eight eight)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_abig \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_abig (big big)
+{
+}
+
Index: gcc/testsuite/gcc.target/nvptx/abi-struct-ret.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-struct-ret.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-struct-ret.c	(working copy)
@@ -0,0 +1,66 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* Struct return.  Returned via pointer.  */
+
+typedef struct {char a;} one;
+typedef struct {short a;} two;
+typedef struct {int a;} four;
+typedef struct {long long a;} eight;
+typedef struct {int a, b[12];} big;
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rone \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+one dcl_rone (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rtwo \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+two dcl_rtwo (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rfour \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+four dcl_rfour (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_reight \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+eight dcl_reight (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rbig \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+big dcl_rbig (void);
+
+void test_1 (void)
+{
+  dcl_rone ();
+  dcl_rtwo ();
+  dcl_rfour ();
+  dcl_reight ();
+  dcl_rbig ();
+}
+
+#define M(T, v) ({T t; t.a = v; t;})
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rone \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+one dfn_rone (void)
+{
+  return M (one, 1);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rtwo \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+two dfn_rtwo (void)
+{
+  return M (two, 2);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rfour \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+four dfn_rfour (void)
+{
+  return M (four, 3);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_reight \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+eight dfn_reight (void)
+{
+  return M (eight, 4);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rbig \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+big dfn_rbig (void)
+{
+  return M (big, 5);
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-vararg-1.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-vararg-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-vararg-1.c	(working copy)
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* varargs, passed as pointer to array of args.   */
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_av \\(.param.u32 %\[_a-z0-9\]*, .param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_av (int, ...);
+
+void test_1 (void)
+{
+  dcl_av (1, 1);
+  dcl_av (2, 1, 2);
+  dcl_av (2, 1, 2, 3);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_av \\(.param.u32 %\[_a-z0-9\]*, .param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_av (int a, ...)
+{
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-vararg-2.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-vararg-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-vararg-2.c	(working copy)
@@ -0,0 +1,208 @@ 
+/* { dg-do run } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -fshort-enums" } */
+
+/* va args, promoted as regular knr args.  */
+
+void __attribute__ ((noinline)) chars (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != 1) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != 2) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+void __attribute__ ((noinline)) shorts (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != 1) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != 2) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != 3) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+void __attribute__ ((noinline)) ints (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != 1) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != 2) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != 3) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+void __attribute__ ((noinline)) longlongs (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != 1) __builtin_abort ();
+  if (__builtin_va_arg (args, long long) != 2) __builtin_abort ();
+  if (__builtin_va_arg (args, long long) != 3) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+typedef int __attribute__ ((mode(TI))) ti;
+
+void __attribute__ ((noinline)) tis (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != 1) __builtin_abort ();
+  if (__builtin_va_arg (args, ti) != 2) __builtin_abort ();
+  if (__builtin_va_arg (args, ti) != 3) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+void __attribute__ ((noinline)) floats (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != 1) __builtin_abort ();
+  if (__builtin_va_arg (args, double) != 2) __builtin_abort ();
+  if (__builtin_va_arg (args, double) != 3) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+void __attribute__ ((noinline)) doubles (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != 1) __builtin_abort ();
+  if (__builtin_va_arg (args, double) != 2) __builtin_abort ();
+  if (__builtin_va_arg (args, double) != 3) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+typedef enum {ec1, ec2, ecmax = 0xf} echar;
+typedef enum {es1, es2, esmax = 0xfff} eshort;
+typedef enum {ei1, ei2, eimax = 0xfffff} eint;
+
+void __attribute__ ((noinline)) echars (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != ec1) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != ec2) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != ecmax) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+void __attribute__ ((noinline)) eshorts (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != es1) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != es2) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != esmax) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+void __attribute__ ((noinline)) eints (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != ei1) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != ei2) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != eimax) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+typedef struct {char a;} one;
+typedef struct {short a;} two;
+typedef struct {int a;} four;
+typedef struct {long long a;} eight;
+typedef struct {int a, b[12];} big;
+
+void __attribute__ ((noinline)) structs (int i, ...)
+{
+  __builtin_va_list args;
+  __builtin_va_start (args, i);
+
+  if (__builtin_va_arg (args, int) != 1) __builtin_abort ();
+  if (__builtin_va_arg (args, one).a != 2) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != 3) __builtin_abort ();
+  if (__builtin_va_arg (args, two).a != 4) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != 5) __builtin_abort ();
+  if (__builtin_va_arg (args, four).a != 6) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != 7) __builtin_abort ();
+  if (__builtin_va_arg (args, eight).a != 8) __builtin_abort ();
+  if (__builtin_va_arg (args, int) != 9) __builtin_abort ();
+  if (__builtin_va_arg (args, big).a != 10) __builtin_abort ();
+  __builtin_va_end (args);
+}
+
+int main ()
+{
+  char vc1, vc2, vc3, vc4, vc5;
+  short vs1, vs2, vs3;
+  int vi1, vi2, vi3;
+  long long vll1, vll2;
+  ti vti1, vti2;
+  float vf1, vf2;
+  double vd1, vd2;
+
+  one vone;
+  two vtwo;
+  four vfour;
+  eight veight;
+  big vbig;
+  echar vec1, vec2,  vec3;
+  eshort ves1, ves2,  ves3;
+  eint vei1, vei2,  vei3;
+
+  vc1 = 1, vc2 = 2;
+  chars (1, vc1, vc2);
+
+  vc1 = 1, vs1 = 2, vs2 = 3;
+  shorts (1, vc1, vs1, vs2);
+
+  vc1 = 1, vi1 = 2, vi2 = 3;
+  ints (1, vc1, vi1, vi2);
+
+  vc1 = 1,  vll1 = 2, vll2 = 3;
+  longlongs (1, vc1, vll1, vll2);
+
+  vc1 = 1, vti1 = 2, vti2 = 3;
+  tis (1, vc1,  vti1, vti2);
+
+  vc1 = 1,  vf1 = 2, vf2 = 3;
+  floats (1, vc1, vf1, vf2);
+  doubles (1, vc1, vf1, vf2); /* Floats are promoted, so this should work. */
+
+  vc1 = 1, vd1 = 2, vd2 = 3;
+  floats (1, vc1, vf1, vf2); /* Floats are promoted, so this should work. */
+  doubles (1, vc1, vf1, vf2);
+
+  vec1 = ec1, vec2 = ec2, vec3 = ecmax;
+  echars (1, vec1, vec2, vec3);
+  vc1 = ec1, vc2 = ec2, vc3 = ecmax;
+  echars (1, vc1, vc2, vc3);
+
+  ves1 = ec1, ves2 = ec2, ves3 = esmax;
+  eshorts (1, ves1, ves2, ves3);
+  vs1 = ec1, vs2 = ec2, vs3 = esmax;
+  eshorts (1, vs1, vs2, vs3);
+
+  vei1 = ec1, vei2 = ec2, vei3 = eimax;
+  eints (1, vei1, vei2, vei3);
+  vi1 = ec1, vi2 = ec2, vi3 = eimax;
+  eints (1, vi1, vi2, vi3);
+
+  vc1 = 1, vone.a = 2, vc2 = 3, vtwo.a = 4,
+    vc3 = 5, vfour.a = 6, vc4 = 7, veight.a = 8,
+    vc5 = 9, vbig.a = 10;
+  structs (1, vc1,  vone, vc2, vtwo, vc3, vfour, vc4, veight, vc5, vbig);
+  
+  return 0;
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-vect-arg.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-vect-arg.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-vect-arg.c	(working copy)
@@ -0,0 +1,225 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* Vector arg.  Pass via pointer.  */
+
+typedef char __attribute__ ((vector_size (1))) vc1;
+typedef char __attribute__ ((vector_size (2))) vc2;
+typedef char __attribute__ ((vector_size (4))) vc4;
+typedef char __attribute__ ((vector_size (8))) vc8;
+
+typedef short __attribute__ ((vector_size (2))) vs1;
+typedef short __attribute__ ((vector_size (4))) vs2;
+typedef short __attribute__ ((vector_size (8))) vs4;
+typedef short __attribute__ ((vector_size (16))) vs8;
+
+typedef int __attribute__ ((vector_size (4))) vi1;
+typedef int __attribute__ ((vector_size (8))) vi2;
+typedef int __attribute__ ((vector_size (16))) vi4;
+typedef int __attribute__ ((vector_size (32))) vi8;
+
+typedef long long __attribute__ ((vector_size (8))) vll1;
+typedef long long __attribute__ ((vector_size (16))) vll2;
+typedef long long __attribute__ ((vector_size (32))) vll4;
+typedef long long __attribute__ ((vector_size (64))) vll8;
+
+typedef float __attribute__ ((vector_size (4))) vf1;
+typedef float __attribute__ ((vector_size (8))) vf2;
+typedef float __attribute__ ((vector_size (16))) vf4;
+typedef float __attribute__ ((vector_size (32))) vf8;
+
+typedef double __attribute__ ((vector_size (8))) vd1;
+typedef double __attribute__ ((vector_size (16))) vd2;
+typedef double __attribute__ ((vector_size (32))) vd4;
+typedef double __attribute__ ((vector_size (64))) vd8;
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_avc1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avc1 (vc1);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avc2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avc2 (vc2);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avc4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avc4 (vc4);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avc8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avc8 (vc8);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_avs1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avs1 (vs1);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avs2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avs2 (vs2);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avs4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avs4 (vs4);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avs8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avs8 (vs8);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_avi1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avi1 (vi1);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avi2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avi2 (vi2);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avi4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avi4 (vi4);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avi8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avi8 (vi8);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_avll1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avll1 (vll1);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avll2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avll2 (vll2);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avll4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avll4 (vll4);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avll8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avll8 (vll8);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_avf1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avf1 (vf1);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avf2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avf2 (vf2);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avf4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avf4 (vf4);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avf8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avf8 (vf8);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_avd1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avd1 (vd1);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avd2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avd2 (vd2);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avd4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avd4 (vd4);
+/* { dg-final { scan-assembler-times ".extern .func dcl_avd8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+void dcl_avd8 (vd8);
+
+#define M(T, V) ({T t;t[0]= V;t;})
+
+void  test_1 (void)
+{
+  dcl_avc1 (M (vc1, 1));
+  dcl_avc2 (M (vc2, 2));
+  dcl_avc4 (M (vc4, 3));
+  dcl_avc8 (M (vc8, 4));
+
+  dcl_avs1 (M (vs1, 5));
+  dcl_avs2 (M (vs2, 6));
+  dcl_avs4 (M (vs4, 7));
+  dcl_avs8 (M (vs8, 8));
+
+  dcl_avi1 (M (vi1, 9));
+  dcl_avi2 (M (vi2, 10));
+  dcl_avi4 (M (vi4, 11));
+  dcl_avi8 (M (vi8, 12));
+
+  dcl_avll1 (M (vll1, 13));
+  dcl_avll2 (M (vll2, 14));
+  dcl_avll4 (M (vll4, 15));
+  dcl_avll8 (M (vll8, 16));
+
+  dcl_avf1 (M (vf1, 17));
+  dcl_avf2 (M (vf2, 18));
+  dcl_avf4 (M (vf4, 19));
+  dcl_avf8 (M (vf8, 20));
+
+  dcl_avd1 (M (vd1, 21));
+  dcl_avd2 (M (vd2, 22));
+  dcl_avd4 (M (vd4, 23));
+  dcl_avd8 (M (vd8, 24));
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_avc1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avc1(vc1 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avc2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avc2(vc2 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avc4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avc4(vc4 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avc8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avc8(vc8 a)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_avs1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avs1(vs1 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avs2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avs2(vs2 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avs4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avs4(vs4 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avs8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avs8(vs8 a)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_avi1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avi1(vi1 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avi2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avi2(vi2 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avi4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avi4(vi4 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avi8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avi8(vi8 a)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_avll1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avll1(vll1 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avll2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avll2(vll2 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avll4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avll4(vll4 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avll8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avll8(vll8 a)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_avf1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avf1(vf1 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avf2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avf2(vf2 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avf4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avf4(vf4 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avf8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avf8(vf8 a)
+{
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_avd1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avd1(vd1 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avd2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avd2(vd2 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avd4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avd4(vd4 a)
+{
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_avd8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+void dfn_avd8(vd8 a)
+{
+}
Index: gcc/testsuite/gcc.target/nvptx/abi-vect-ret.c
===================================================================
--- gcc/testsuite/gcc.target/nvptx/abi-vect-ret.c	(revision 0)
+++ gcc/testsuite/gcc.target/nvptx/abi-vect-ret.c	(working copy)
@@ -0,0 +1,249 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -Wno-long-long -m64" } */
+
+/* Vector return.  Return via pointer.  */
+
+typedef char __attribute__ ((vector_size (1))) vc1;
+typedef char __attribute__ ((vector_size (2))) vc2;
+typedef char __attribute__ ((vector_size (4))) vc4;
+typedef char __attribute__ ((vector_size (8))) vc8;
+
+typedef short __attribute__ ((vector_size (2))) vs1;
+typedef short __attribute__ ((vector_size (4))) vs2;
+typedef short __attribute__ ((vector_size (8))) vs4;
+typedef short __attribute__ ((vector_size (16))) vs8;
+
+typedef int __attribute__ ((vector_size (4))) vi1;
+typedef int __attribute__ ((vector_size (8))) vi2;
+typedef int __attribute__ ((vector_size (16))) vi4;
+typedef int __attribute__ ((vector_size (32))) vi8;
+
+typedef long long __attribute__ ((vector_size (8))) vll1;
+typedef long long __attribute__ ((vector_size (16))) vll2;
+typedef long long __attribute__ ((vector_size (32))) vll4;
+typedef long long __attribute__ ((vector_size (64))) vll8;
+
+typedef float __attribute__ ((vector_size (4))) vf1;
+typedef float __attribute__ ((vector_size (8))) vf2;
+typedef float __attribute__ ((vector_size (16))) vf4;
+typedef float __attribute__ ((vector_size (32))) vf8;
+
+typedef double __attribute__ ((vector_size (8))) vd1;
+typedef double __attribute__ ((vector_size (16))) vd2;
+typedef double __attribute__ ((vector_size (32))) vd4;
+typedef double __attribute__ ((vector_size (64))) vd8;
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvc1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vc1 dcl_rvc1 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvc2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vc2 dcl_rvc2 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvc4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vc4 dcl_rvc4 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvc8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vc8 dcl_rvc8 (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvs1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vs1 dcl_rvs1 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvs2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vs2 dcl_rvs2 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvs4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vs4 dcl_rvs4 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvs8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vs8 dcl_rvs8 (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvi1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vi1 dcl_rvi1 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvi2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vi2 dcl_rvi2 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvi4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vi4 dcl_rvi4 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvi8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vi8 dcl_rvi8 (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvll1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vll1 dcl_rvll1 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvll2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vll2 dcl_rvll2 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvll4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vll4 dcl_rvll4 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvll8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vll8 dcl_rvll8 (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvf1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vf1 dcl_rvf1 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvf2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vf2 dcl_rvf2 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvf4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vf4 dcl_rvf4 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvf8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vf8 dcl_rvf8 (void);
+
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvd1 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vd1 dcl_rvd1 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvd2 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vd2 dcl_rvd2 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvd4 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vd4 dcl_rvd4 (void);
+/* { dg-final { scan-assembler-times ".extern .func dcl_rvd8 \\(.param.u64 %\[_a-z0-9\]*\\);" 1 } } */
+vd8 dcl_rvd8 (void);
+
+void  test_1 (void)
+{
+  dcl_rvc1 ();
+  dcl_rvc2 ();
+  dcl_rvc4 ();
+  dcl_rvc8 ();
+  
+  dcl_rvs1 ();
+  dcl_rvs2 ();
+  dcl_rvs4 ();
+  dcl_rvs8 ();
+  
+  dcl_rvi1 ();
+  dcl_rvi2 ();
+  dcl_rvi4 ();
+  dcl_rvi8 ();
+
+  dcl_rvll1 ();
+  dcl_rvll2 ();
+  dcl_rvll4 ();
+  dcl_rvll8 ();
+  
+  dcl_rvf1 ();
+  dcl_rvf2 ();
+  dcl_rvf4 ();
+  dcl_rvf8 ();
+
+  dcl_rvd1 ();
+  dcl_rvd2 ();
+  dcl_rvd4 ();
+  dcl_rvd8 ();
+}
+
+#define M(T, V) ({T t;t[0]= V;t;})
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvc1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vc1 dfn_rvc1 (void)
+{
+  return M (vc1, 1);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvc2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vc2 dfn_rvc2 (void)
+{
+  return M (vc2, 2);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvc4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vc4 dfn_rvc4 (void)
+{
+  return M (vc4, 3);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvc8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vc8 dfn_rvc8 (void)
+{
+  return M (vc8, 4);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvs1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vs1 dfn_rvs1 (void)
+{
+  return M (vs1, 5);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvs2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vs2 dfn_rvs2 (void)
+{
+  return M (vs2, 6);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvs4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vs4 dfn_rvs4 (void)
+{
+  return M (vs4, 7);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvs8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vs8 dfn_rvs8 (void)
+{
+  return M (vs8, 8);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvi1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vi1 dfn_rvi1 (void)
+{
+  return M (vi1, 9);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvi2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vi2 dfn_rvi2 (void)
+{
+  return M (vi2, 10);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvi4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vi4 dfn_rvi4 (void)
+{
+  return M (vi4, 11);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvi8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vi8 dfn_rvi8 (void)
+{
+  return M (vi8, 12);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvll1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vll1 dfn_rvll1 (void)
+{
+  return M (vll1, 13);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvll2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vll2 dfn_rvll2 (void)
+{
+  return M (vll2, 14);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvll4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vll4 dfn_rvll4 (void)
+{
+  return M (vll4, 16);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvll8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vll8 dfn_rvll8 (void)
+{
+  return M (vll8, 6);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvf1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vf1 dfn_rvf1 (void)
+{
+  return M (vf1, 17);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvf2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vf2 dfn_rvf2 (void)
+{
+  return M (vf2, 18);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvf4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vf4 dfn_rvf4 (void)
+{
+  return M (vf4, 19);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvf8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vf8 dfn_rvf8 (void)
+{
+  return M (vf8, 20);
+}
+
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvd1 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vd1 dfn_rvd1 (void)
+{
+  return M (vd1, 21);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvd2 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vd2 dfn_rvd2 (void)
+{
+  return M (vd2, 22);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvd4 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vd4 dfn_rvd4 (void)
+{
+  return M (vd4, 23);
+}
+/* { dg-final { scan-assembler-times ".visible .func dfn_rvd8 \\(.param.u64 %\[_a-z0-9\]*\\)(?:;|\[\r\n\]+\{)" 2 } } */
+vd8 dfn_rvd8 (void)
+{
+  return M (vd8, 24);
+}