diff mbox

[gomp4] merge ptx changes

Message ID 568BD1B6.8030503@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Jan. 5, 2016, 2:22 p.m. UTC
This patch merges my most recent sequence of ptx  backend changes to the gomp4 
branch.

nathan
diff mbox

Patch

2016-01-05  Nathan Sidwell  <nathan@acm.org>

	Merge from mainline:
	2015-12-30  Nathan Sidwell  <nathan@acm.org>
	* config/nvptx/nvptx.c (nvptx_assemble_undefined_decl): Check
	it's not a constant pool object.

	2015-12-28  Nathan Sidwell  <nathan@acm.org>
	* config/nvptx/nvptx.c (nvptx_output_call_insn): Expect hard regs.
	* config/nvptx/nvptx.md (nvptx_reg_or_mem_operand): Rename to ...
	(nvptx_nonimmediate_operand): ... here.  Update all uses.
	(call_insn_operand): Use REG_P.
	(call_operation): Allow hard regs.

	2015-12-23  Nathan Sidwell  <nathan@acm.org>
	* config/nvptx/nvptx-protos.h
	(nvptx_maybe_convert_symbolic_operand): Delete prototype.
	* config/nvptx/nvptx.c (nvptx_maybe_convert_symbolic_operand): Delete.
	(nvptx_output_mov_insn): Record fnsym here.
	(nvptx_wpropagate): Don't create UNSPEC_TO_GENERIC unspec.
	* config/nvptx/nvptx.md (UNSPEC_TO_GENERIC): Delete.
	(symbolic_operand): Delete predicate.
	(nvptx_nonimmediate_operand): Delete predicate.
	(mov<mode>): Hard regs are perfectly ok here.
	(convaddr_<mode>): Delete.

	2015-12-18  Nathan Sidwell  <nathan@acm.org>
	* config/nvptx/nvptx.c (nvptx_maybe_convert_symbolic_operand):
	Remove UNSPEC_TO_GENERIC generation.
	(nvptx_output_mov_insn): Generate cvta for symbolic src.
	* config/nvptx/nvptx.md (nvptx_register_operand): Allow hard reg.
	(nvptx_reg_or_mem_operand): Likewise.
	(nvptx_nonmemory_operand): Likewise.
	(nvptx_general_operand): Delete.
	(*mov<mode>_insn): Use nonimmediate_operand, permit hardregs.
	(oacc_fork, oacc_join): Use general_operand.

	2015-12-18  Nathan Sidwell  <nathan@acm.org>
	* config/nvptx/nvptx.c (nvptx_option_override): Emit sorry for
	stabs debug.
	(nvptx_assemble_undefined_decl): Use nvptx_assemble_decl_end.

	2015-12-18  Nathan Sidwell  <nathan@acm.org>
	* config/nvptx/nvptx.c (worker_bcast_name, worker_red_name): Delete.
	(nvptx_option_override): Adjust worker symbol creation.
	(nvptx_gen_wcast): Wrap worker address in UNSPEC_TO_GENERIC.
	(write_worker_buffer): New.
	(nvptx_file_end): Call write_worker_buffer.
	* config/nvptx/nvptx.md (UNSPEC_SHARED_DATA): Delete.
	(worker_load<mode>, worker_store<mode>): Delete.

	2015-12-17  Nathan Sidwell  <nathan@acm.org>
	* config/nvptx/nvptx.h (NVPTX_RETURN_REGNUM, FRAME_POINTER_REGNUM,
	ARG_POINTER_REGNUM, STATIC_CHAIN_REGNUM): Renumber.
	(REGISTER_NAMES): Update and rename.
	(FIXED_REGISTERS, CALL_USED_REGISTERS): Update.
	(enum_reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS): Reformat.

	2015-12-16  Nathan Sidwell  <nathan@acm.org>
	* config/nvptx/nvptx.h (OUTGOING_STATIC_CHAIN_REGNUM): Remove.
	(REGISTER_NAMES): Adjust.
	* config/nvptx/nvptx.c (nvptx_pass_by_reference): Avoid long line.
	(nvptx_static_hain): Delete.
	(write_arg_mode): Don't emit initializer if argno < 0.
	(write_arg_type): Fix whitespace.
	(init_frame): Initialize reg to zero if frame is zero-sized.
	(nvptx_declare_function_name):  Use write_arg_type to emit chain
	decl.
	(nvptx_output_call_insn): Adjust static chain emission.
	(nvptx_goacc_reduction): Make static.
	(TARGET_STATIC_CHAIN): Don't override.

	2015-12-16  Nathan Sidwell  <nathan@acm.org>
	* config/nvptx/nvptx-protos.h (nvptx_hard_regno_mode_ok): Delete.
	* config/nvptx/nvptx.h (struct machine_function):
	Reimplement. Adjust all users.
	* config/nvptx/nvptx.c (nvptx_declare_function_name): Move stack
	and frame array generation earlier.
	(nvptx_call_args): Reimplement.
	(nvptx_expand_call): Adjust.
	(nvptx_hard_reno_mode_ok): Delete.
	(nvptx_reorg): Revert scan of hard regs.

Index: config/nvptx/nvptx.h
===================================================================
--- config/nvptx/nvptx.h	(revision 232059)
+++ config/nvptx/nvptx.h	(working copy)
@@ -1,5 +1,5 @@ 
 /* Target Definitions for NVPTX.
-   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   Copyright (C) 2014-2016 Free Software Foundation, Inc.
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
    This file is part of GCC.
@@ -29,8 +29,6 @@ 
 
 #define STARTFILE_SPEC "%{mmainkernel:crt0.o}"
 
-#define ASM_SPEC "%{misa=*:-m %*}"
-
 #define TARGET_CPU_CPP_BUILTINS()		\
   do						\
     {						\
@@ -84,21 +82,17 @@ 
 #define PTRDIFF_TYPE (TARGET_ABI64 ? "long int" : "int")
 
 #define POINTER_SIZE (TARGET_ABI64 ? 64 : 32)
-
 #define Pmode (TARGET_ABI64 ? DImode : SImode)
 
 #define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35)
 
 /* Registers.  Since ptx is a virtual target, we just define a few
-   hard registers for special purposes and leave pseudos unallocated.  */
-
-#define FIRST_PSEUDO_REGISTER 16
-/* We have to have some available hard registers, to keep gcc setup
+   hard registers for special purposes and leave pseudos unallocated.
+   We have to have some available hard registers, to keep gcc setup
    happy.  */
-#define FIXED_REGISTERS					\
-  { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 }
-#define CALL_USED_REGISTERS				\
-  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+#define FIRST_PSEUDO_REGISTER 16
+#define FIXED_REGISTERS	    { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+#define CALL_USED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
 
 #define HARD_REGNO_NREGS(REG, MODE)		\
   ((void)(REG), (void)(MODE), 1)
@@ -108,32 +102,13 @@ 
      ((void)(REG), (void)(MODE), true)
 
 /* Register Classes.  */
-
-enum reg_class
-  {
-    NO_REGS,
-    ALL_REGS,
-    LIM_REG_CLASSES
-  };
-
+enum reg_class             {  NO_REGS,    ALL_REGS,	LIM_REG_CLASSES };
+#define REG_CLASS_NAMES    { "NO_REGS",  "ALL_REGS" }
+#define REG_CLASS_CONTENTS { { 0x0000 }, { 0xFFFF } }
 #define N_REG_CLASSES (int) LIM_REG_CLASSES
 
-#define REG_CLASS_NAMES {	  \
-    "NO_REGS",			  \
-    "ALL_REGS" }
-
-#define REG_CLASS_CONTENTS	\
-{				\
-  /* NO_REGS.  */		\
-  { 0x0000 },			\
-  /* ALL_REGS.  */		\
-  { 0xFFFF },			\
-}
-
 #define GENERAL_REGS ALL_REGS
-
 #define REGNO_REG_CLASS(R) ((void)(R), ALL_REGS)
-
 #define BASE_REG_CLASS ALL_REGS
 #define INDEX_REG_CLASS NO_REGS
 
@@ -159,18 +134,16 @@  enum reg_class
 #define FRAME_GROWS_DOWNWARD 0
 #define STACK_GROWS_DOWNWARD 1
 
+#define NVPTX_RETURN_REGNUM 0
 #define STACK_POINTER_REGNUM 1
-#define NVPTX_RETURN_REGNUM 4
-#define FRAME_POINTER_REGNUM 15
-#define ARG_POINTER_REGNUM 14
-
-#define STATIC_CHAIN_REGNUM 12
-#define OUTGOING_STATIC_CHAIN_REGNUM 10
+#define FRAME_POINTER_REGNUM 2
+#define ARG_POINTER_REGNUM 3
+#define STATIC_CHAIN_REGNUM 4
 
 #define REGISTER_NAMES							\
   {									\
-    "%hr0", "%outargs", "%hfp", "%hr3", "%retval", "%hr5", "%hr6", "%hr7",	\
-    "%hr8", "%hr9", "%chain_out", "%hr11", "%chain_in", "%hr13", "%argp", "%frame" \
+    "%value", "%stack", "%frame", "%args", "%chain", "%hr5", "%hr6", "%hr7", \
+    "%hr8", "%hr9", "%hr10", "%hr11", "%hr12", "%hr13", "%hr14", "%hr15" \
   }
 
 #define FIRST_PARM_OFFSET(FNDECL) ((void)(FNDECL), 0)
@@ -228,14 +201,15 @@  struct nvptx_args {
 #if defined HOST_WIDE_INT
 struct GTY(()) machine_function
 {
-  rtx_expr_list *call_args;
-  rtx start_call;
-  tree funtype;
-  bool has_call_with_varargs;
-  bool has_call_with_sc;
-  HOST_WIDE_INT outgoing_stdarg_size;
-  int ret_reg_mode; /* machine_mode not defined yet. */
-  rtx axis_predicate[2];
+  rtx_expr_list *call_args;  /* Arg list for the current call.  */
+  bool doing_call; /* Within a CALL_ARGS ... CALL_ARGS_END sequence.  */
+  bool is_varadic;  /* This call is varadic  */
+  bool has_varadic;  /* Current function has a varadic call.  */
+  bool has_chain; /* Current function has outgoing static chain.  */
+  int num_args;	/* Number of args of current call.  */
+  int return_mode; /* Return mode of current fn.
+		      (machine_mode not defined yet.) */
+  rtx axis_predicate[2]; /* Neutering predicates.  */
 };
 #endif
 
Index: config/nvptx/nvptx-protos.h
===================================================================
--- config/nvptx/nvptx-protos.h	(revision 232059)
+++ config/nvptx/nvptx-protos.h	(working copy)
@@ -1,5 +1,5 @@ 
 /* Prototypes for exported functions defined in nvptx.c.
-   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   Copyright (C) 2014-2016 Free Software Foundation, Inc.
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
    This file is part of GCC.
@@ -41,7 +41,5 @@  extern const char *nvptx_ptx_type_from_m
 extern const char *nvptx_output_mov_insn (rtx, rtx);
 extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
 extern const char *nvptx_output_return (void);
-extern bool nvptx_hard_regno_mode_ok (int, machine_mode);
-extern rtx nvptx_maybe_convert_symbolic_operand (rtx);
 #endif
 #endif
Index: config/nvptx/nvptx.md
===================================================================
--- config/nvptx/nvptx.md	(revision 232059)
+++ config/nvptx/nvptx.md	(working copy)
@@ -1,5 +1,5 @@ 
 ;; Machine description for NVPTX.
-;; Copyright (C) 2014-2015 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2016 Free Software Foundation, Inc.
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;;
 ;; This file is part of GCC.
@@ -20,7 +20,6 @@ 
 
 (define_c_enum "unspec" [
    UNSPEC_ARG_REG
-   UNSPEC_TO_GENERIC
 
    UNSPEC_COPYSIGN
    UNSPEC_LOG2
@@ -39,8 +38,6 @@ 
 
    UNSPEC_DIM_SIZE
 
-   UNSPEC_SHARED_DATA
-
    UNSPEC_BIT_CONV
 
    UNSPEC_SHUFFLE
@@ -52,7 +49,6 @@ 
    UNSPECV_CAS
    UNSPECV_XCHG
    UNSPECV_BARSYNC
-   UNSPECV_MEMBAR
    UNSPECV_DIM_POS
 
    UNSPECV_FORK
@@ -64,56 +60,27 @@ 
 (define_attr "subregs_ok" "false,true"
   (const_string "false"))
 
+;; The nvptx operand predicates, in general, don't permit subregs and
+;; only literal constants, which differ from the generic ones, which
+;; permit subregs and symbolc constants (as appropriate)
 (define_predicate "nvptx_register_operand"
   (match_code "reg")
 {
-  if (REG_P (op))
-    return !HARD_REGISTER_P (op);
   return register_operand (op, mode);
 })
 
-(define_predicate "nvptx_reg_or_mem_operand"
+(define_predicate "nvptx_nonimmediate_operand"
   (match_code "mem,reg")
 {
-  if (REG_P (op))
-    return !HARD_REGISTER_P (op);
-  return memory_operand (op, mode) || register_operand (op, mode);
+  return (REG_P (op) ? register_operand (op, mode)
+          : memory_operand (op, mode));
 })
 
-;; Allow symbolic constants.
-(define_predicate "symbolic_operand"
-  (match_code "symbol_ref,const"))
-
-;; Registers or constants for normal instructions.  Does not allow symbolic
-;; constants.
 (define_predicate "nvptx_nonmemory_operand"
   (match_code "reg,const_int,const_double")
 {
-  if (REG_P (op))
-    return !HARD_REGISTER_P (op);
-  return nonmemory_operand (op, mode);
-})
-
-;; A source operand for a move instruction.  This is the only predicate we use
-;; that accepts symbolic constants.
-(define_predicate "nvptx_general_operand"
-  (match_code "reg,subreg,mem,const,symbol_ref,label_ref,const_int,const_double")
-{
-  if (REG_P (op))
-    return !HARD_REGISTER_P (op);
-  return general_operand (op, mode);
-})
-
-;; A destination operand for a move instruction.  This is the only destination
-;; predicate that accepts the return register since it requires special handling.
-(define_predicate "nvptx_nonimmediate_operand"
-  (match_code "reg,subreg,mem")
-{
-  if (REG_P (op))
-    return (op != frame_pointer_rtx
-	    && op != arg_pointer_rtx
-	    && op != stack_pointer_rtx);
-  return nonimmediate_operand (op, mode);
+  return (REG_P (op) ? register_operand (op, mode)
+          : immediate_operand (op, mode));
 })
 
 (define_predicate "const0_operand"
@@ -137,7 +104,7 @@ 
 (define_predicate "call_insn_operand"
   (match_code "symbol_ref,reg")
 {
-  return GET_CODE (op) != SYMBOL_REF || SYMBOL_REF_FUNCTION_P (op);
+  return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
 })
 
 ;; Return true if OP is a call with parallel USEs of the argument
@@ -151,11 +118,7 @@ 
     {
       rtx elt = XVECEXP (op, 0, i);
 
-      if (GET_CODE (elt) != USE
-          || GET_CODE (XEXP (elt, 0)) != REG
-          || XEXP (elt, 0) == frame_pointer_rtx
-          || XEXP (elt, 0) == arg_pointer_rtx
-          || XEXP (elt, 0) == stack_pointer_rtx)
+      if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
         return false;
     }
   return true;
@@ -213,10 +176,9 @@ 
    %.\\tsetp.eq.u32\\t%0, 1, 1;")
 
 (define_insn "*mov<mode>_insn"
-  [(set (match_operand:QHSDIM 0 "nvptx_nonimmediate_operand" "=R,R,m")
+  [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
 	(match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
-  "!MEM_P (operands[0])
-   || (REG_P (operands[1]) && REGNO (operands[1]) > LAST_VIRTUAL_REGISTER)"
+  "!MEM_P (operands[0]) || REG_P (operands[1])"
 {
   if (which_alternative == 1)
     return "%.\\tld%A1%u1\\t%0, %1;";
@@ -228,7 +190,7 @@ 
   [(set_attr "subregs_ok" "true")])
 
 (define_insn "*mov<mode>_insn"
-  [(set (match_operand:SDFM 0 "nvptx_nonimmediate_operand" "=R,R,m")
+  [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
 	(match_operand:SDFM 1 "general_operand" "RF,m,R"))]
   "!MEM_P (operands[0]) || REG_P (operands[1])"
 {
@@ -256,17 +218,11 @@ 
   "%.\\tmov%t0\\t%0, %%ar%1;")
 
 (define_expand "mov<mode>"
-  [(set (match_operand:QHSDISDFM 0 "nvptx_nonimmediate_operand" "")
+  [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
 	(match_operand:QHSDISDFM 1 "general_operand" ""))]
   ""
 {
-  operands[1] = nvptx_maybe_convert_symbolic_operand (operands[1]);
-
-  /* Hard registers are often actually symbolic operands on this target.
-     Don't allow them when storing to memory.  */
-  if (MEM_P (operands[0])
-      && (!REG_P (operands[1])
-	  || REGNO (operands[1]) <= LAST_VIRTUAL_REGISTER))
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
     {
       rtx tmp = gen_reg_rtx (<MODE>mode);
       emit_move_insn (tmp, operands[1]);
@@ -277,7 +233,7 @@ 
 
 (define_insn "zero_extendqihi2"
   [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
-	(zero_extend:HI (match_operand:QI 1 "nvptx_reg_or_mem_operand" "R,m")))]
+	(zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
   ""
   "@
    %.\\tcvt.u16.u%T1\\t%0, %1;
@@ -286,7 +242,7 @@ 
 
 (define_insn "zero_extend<mode>si2"
   [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
-	(zero_extend:SI (match_operand:QHIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+	(zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
   ""
   "@
    %.\\tcvt.u32.u%T1\\t%0, %1;
@@ -295,7 +251,7 @@ 
 
 (define_insn "zero_extend<mode>di2"
   [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
-	(zero_extend:DI (match_operand:QHSIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+	(zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
   ""
   "@
    %.\\tcvt.u64.u%T1\\t%0, %1;
@@ -304,7 +260,7 @@ 
 
 (define_insn "extend<mode>si2"
   [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
-	(sign_extend:SI (match_operand:QHIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+	(sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
   ""
   "@
    %.\\tcvt.s32.s%T1\\t%0, %1;
@@ -313,7 +269,7 @@ 
 
 (define_insn "extend<mode>di2"
   [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
-	(sign_extend:DI (match_operand:QHSIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+	(sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
   ""
   "@
    %.\\tcvt.s64.s%T1\\t%0, %1;
@@ -321,7 +277,7 @@ 
   [(set_attr "subregs_ok" "true")])
 
 (define_insn "trunchiqi2"
-  [(set (match_operand:QI 0 "nvptx_reg_or_mem_operand" "=R,m")
+  [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
 	(truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
   ""
   "@
@@ -330,7 +286,7 @@ 
   [(set_attr "subregs_ok" "true")])
 
 (define_insn "truncsi<mode>2"
-  [(set (match_operand:QHIM 0 "nvptx_reg_or_mem_operand" "=R,m")
+  [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
 	(truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
   ""
   "@
@@ -339,7 +295,7 @@ 
   [(set_attr "subregs_ok" "true")])
 
 (define_insn "truncdi<mode>2"
-  [(set (match_operand:QHSIM 0 "nvptx_reg_or_mem_operand" "=R,m")
+  [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
 	(truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
   ""
   "@
@@ -347,14 +303,6 @@ 
    %.\\tst%A0.u%T0\\t%0, %1;"
   [(set_attr "subregs_ok" "true")])
 
-;; Pointer address space conversion
-(define_insn "convaddr_<mode>"
-  [(set (match_operand:P 0 "nvptx_register_operand" "=R")
-	(unspec:P [(match_operand:P 1 "symbolic_operand" "s")]
-                  UNSPEC_TO_GENERIC))]
-  ""
-  "%.\\tcvta%D1%t0\\t%0, %1;")
-
 ;; Integer arithmetic
 
 (define_insn "add<mode>3"
@@ -1140,7 +1088,7 @@ 
 
 (define_expand "oacc_fork"
   [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
-        (match_operand:SI 1 "nvptx_general_operand" ""))
+        (match_operand:SI 1 "general_operand" ""))
    (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
 		        UNSPECV_FORKED)]
   ""
@@ -1153,7 +1101,7 @@ 
 
 (define_expand "oacc_join"
   [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
-        (match_operand:SI 1 "nvptx_general_operand" ""))
+        (match_operand:SI 1 "general_operand" ""))
    (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
 		        UNSPECV_JOIN)]
   ""
@@ -1194,20 +1142,6 @@ 
   ""
   "%.\\tmov.b64\\t%0, {%1,%2};")
 
-(define_insn "worker_load<mode>"
-  [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
-        (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "m")]
-			 UNSPEC_SHARED_DATA))]
-  ""
-  "%.\\tld.shared%u0\\t%0, %1;")
-
-(define_insn "worker_store<mode>"
-  [(set (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "=m")]
-			 UNSPEC_SHARED_DATA)
-	(match_operand:SDISDFM 0 "nvptx_register_operand" "R"))]
-  ""
-  "%.\\tst.shared%u1\\t%1, %0;")
-
 ;; Atomic insns.
 
 (define_expand "atomic_compare_and_swap<mode>"
@@ -1281,6 +1215,7 @@ 
 (define_code_iterator any_logic [and ior xor])
 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
 
+;; Currently disabled until we add better subtarget support - requires sm_32.
 (define_insn "atomic_fetch_<logic><mode>"
   [(set (match_operand:SDIM 1 "memory_operand" "+m")
 	(unspec_volatile:SDIM
@@ -1290,10 +1225,9 @@ 
 	  UNSPECV_LOCK))
    (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
 	(match_dup 1))]
-  "<MODE>mode == SImode || TARGET_SM35"
+  "0"
   "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;")
 
-;; ??? Mark as not predicable later?
 (define_insn "nvptx_barsync"
   [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
 		    UNSPECV_BARSYNC)]
Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c	(revision 232059)
+++ config/nvptx/nvptx.c	(working copy)
@@ -1,5 +1,5 @@ 
 /* Target code for NVPTX.
-   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   Copyright (C) 2014-2016 Free Software Foundation, Inc.
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
    This file is part of GCC.
@@ -128,14 +128,12 @@  static GTY((cache)) hash_table<tree_hash
    shared across TUs (taking the largest size).  */
 static unsigned worker_bcast_size;
 static unsigned worker_bcast_align;
-#define worker_bcast_name "__worker_bcast"
 static GTY(()) rtx worker_bcast_sym;
 
 /* Buffer needed for worker reductions.  This has to be distinct from
    the worker broadcast array, as both may be live concurrently.  */
 static unsigned worker_red_size;
 static unsigned worker_red_align;
-#define worker_red_name "__worker_red"
 static GTY(()) rtx worker_red_sym;
 
 /* Global lock variable, needed for 128bit worker & gang reductions.  */
@@ -147,7 +145,7 @@  static struct machine_function *
 nvptx_init_machine_status (void)
 {
   struct machine_function *p = ggc_cleared_alloc<machine_function> ();
-  p->ret_reg_mode = VOIDmode;
+  p->return_mode = VOIDmode;
   return p;
 }
 
@@ -161,6 +159,13 @@  nvptx_option_override (void)
   flag_toplevel_reorder = 1;
   /* Assumes that it will see only hard registers.  */
   flag_var_tracking = 0;
+
+  if (write_symbols == DBX_DEBUG)
+    /* The stabs testcases want to know stabs isn't supported.  */
+    sorry ("stabs debug format not supported");
+
+  /* Actually we don't have any debug format, but don't be
+     unneccesarily noisy.  */
   write_symbols = NO_DEBUG;
   debug_info_level = DINFO_LEVEL_NONE;
 
@@ -172,11 +177,11 @@  nvptx_option_override (void)
   declared_libfuncs_htab
     = hash_table<declared_libfunc_hasher>::create_ggc (17);
 
-  worker_bcast_sym = gen_rtx_SYMBOL_REF (Pmode, worker_bcast_name);
+  worker_bcast_sym = gen_rtx_SYMBOL_REF (Pmode, "__worker_bcast");
   SET_SYMBOL_DATA_AREA (worker_bcast_sym, DATA_AREA_SHARED);
   worker_bcast_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
 
-  worker_red_sym = gen_rtx_SYMBOL_REF (Pmode, worker_red_name);
+  worker_red_sym = gen_rtx_SYMBOL_REF (Pmode, "__worker_red");
   SET_SYMBOL_DATA_AREA (worker_red_sym, DATA_AREA_SHARED);
   worker_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
 }
@@ -487,7 +492,7 @@  nvptx_strict_argument_naming (cumulative
 static rtx
 nvptx_libcall_value (machine_mode mode, const_rtx)
 {
-  if (cfun->machine->start_call == NULL_RTX)
+  if (!cfun->machine->doing_call)
     /* Pretend to return in a hard reg for early uses before pseudos can be
        generated.  */
     return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
@@ -506,7 +511,7 @@  nvptx_function_value (const_tree type, c
 
   if (outgoing)
     {
-      cfun->machine->ret_reg_mode = mode;
+      cfun->machine->return_mode = mode;
       return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
     }
 
@@ -525,8 +530,9 @@  nvptx_function_value_regno_p (const unsi
    reference in memory.  */
 
 static bool
-nvptx_pass_by_reference (cumulative_args_t ARG_UNUSED (cum), machine_mode mode,
-			 const_tree type, bool ARG_UNUSED (named))
+nvptx_pass_by_reference (cumulative_args_t ARG_UNUSED (cum),
+			 machine_mode mode, const_tree type,
+			 bool ARG_UNUSED (named))
 {
   return pass_in_memory (mode, type, false);
 }
@@ -549,18 +555,6 @@  nvptx_promote_function_mode (const_tree
   return promote_arg (mode, for_return || !type || TYPE_ARG_TYPES (funtype));
 }
 
-/* Implement TARGET_STATIC_CHAIN.  */
-
-static rtx
-nvptx_static_chain (const_tree fndecl, bool incoming_p)
-{
-  if (!DECL_STATIC_CHAIN (fndecl))
-    return NULL;
-
-  return gen_rtx_REG (Pmode, (incoming_p ? STATIC_CHAIN_REGNUM
-			      : OUTGOING_STATIC_CHAIN_REGNUM));
-}
-
 /* Helper for write_arg.  Emit a single PTX argument of MODE, either
    in a prototype, or as copy in a function prologue.  ARGNO is the
    index of this argument in the PTX function.  FOR_REG is negative,
@@ -588,12 +582,15 @@  write_arg_mode (std::stringstream &s, in
       else
 	s << "%ar" << argno;
       s << ";\n";
-      s << "\tld.param" << ptx_type << " ";
-      if (for_reg)
-	s << reg_names[for_reg];
-      else
-	s << "%ar" << argno;
-      s << ", [%in_ar" << argno << "];\n";
+      if (argno >= 0)
+	{
+	  s << "\tld.param" << ptx_type << " ";
+	  if (for_reg)
+	    s << reg_names[for_reg];
+	  else
+	    s << "%ar" << argno;
+	  s << ", [%in_ar" << argno << "];\n";
+	}
     }
   return argno + 1;
 }
@@ -625,7 +622,7 @@  write_arg_type (std::stringstream &s, in
 	{
 	  /* Complex types are sent as two separate args.  */
 	  type = TREE_TYPE (type);
-	  mode  = TYPE_MODE (type);
+	  mode = TYPE_MODE (type);
 	  prototyped = true;
 	}
 
@@ -678,14 +675,14 @@  write_return_type (std::stringstream &s,
 	 optimization-level specific, so no caller can make use of
 	 this data, but more importantly for us, we must ensure it
 	 doesn't change the PTX prototype.  */
-      mode = (machine_mode) cfun->machine->ret_reg_mode;
+      mode = (machine_mode) cfun->machine->return_mode;
 
       if (mode == VOIDmode)
 	return return_in_mem;
 
-      /* Clear ret_reg_mode to inhibit copy of retval to non-existent
+      /* Clear return_mode to inhibit copy of retval to non-existent
 	 retval parameter.  */
-      cfun->machine->ret_reg_mode = VOIDmode;
+      cfun->machine->return_mode = VOIDmode;
     }
   else
     mode = promote_return (mode);
@@ -917,16 +914,20 @@  nvptx_maybe_record_fnsym (rtx sym)
 }
 
 /* Emit a local array to hold some part of a conventional stack frame
-   and initialize REGNO to point to it.  */
+   and initialize REGNO to point to it.  If the size is zero, it'll
+   never be valid to dereference, so we can simply initialize to
+   zero.  */
 
 static void
 init_frame (FILE  *file, int regno, unsigned align, unsigned size)
 {
-  fprintf (file, "\t.reg.u%d %s;\n"
-	   "\t.local.align %d .b8 %s_ar[%u];\n"
-	   "\tcvta.local.u%d %s, %s_ar;\n",
-	   POINTER_SIZE, reg_names[regno],
-	   align, reg_names[regno], size ? size : 1,
+  if (size)
+    fprintf (file, "\t.local .align %d .b8 %s_ar[%u];\n",
+	     align, reg_names[regno], size);
+  fprintf (file, "\t.reg.u%d %s;\n",
+	   POINTER_SIZE, reg_names[regno]);
+  fprintf (file, (size ? "\tcvta.local.u%d %s, %s_ar;\n"
+		  :  "\tmov.u%d %s, 0;\n"),
 	   POINTER_SIZE, reg_names[regno], reg_names[regno]);
 }
 
@@ -981,17 +982,26 @@  nvptx_declare_function_name (FILE *file,
     }
 
   if (stdarg_p (fntype))
-    argno = write_arg_type (s, ARG_POINTER_REGNUM, argno, ptr_type_node, true);
-
-  if (DECL_STATIC_CHAIN (decl))
-    argno = write_arg_type (s, STATIC_CHAIN_REGNUM, argno, ptr_type_node,
+    argno = write_arg_type (s, ARG_POINTER_REGNUM, argno, ptr_type_node,
 			    true);
 
+  if (DECL_STATIC_CHAIN (decl) || cfun->machine->has_chain)
+    write_arg_type (s, STATIC_CHAIN_REGNUM,
+		    DECL_STATIC_CHAIN (decl) ? argno : -1, ptr_type_node,
+		    true);
+
   fprintf (file, "%s", s.str().c_str());
 
-  if (regno_reg_rtx[OUTGOING_STATIC_CHAIN_REGNUM] != const0_rtx)
-    fprintf (file, "\t.reg.u%d %s;\n", GET_MODE_BITSIZE (Pmode),
-	     reg_names[OUTGOING_STATIC_CHAIN_REGNUM]);
+  /* Declare a local var for outgoing varargs.  */
+  if (cfun->machine->has_varadic)
+    init_frame (file, STACK_POINTER_REGNUM,
+		UNITS_PER_WORD, crtl->outgoing_args_size);
+
+  /* Declare a local variable for the frame.  */
+  HOST_WIDE_INT sz = get_frame_size ();
+  if (sz || cfun->machine->has_chain)
+    init_frame (file, FRAME_POINTER_REGNUM,
+		crtl->stack_alignment_needed / BITS_PER_UNIT, sz);
 
   /* Declare the pseudos we have as ptx registers.  */
   int maxregs = max_reg_num ();
@@ -1010,17 +1020,6 @@  nvptx_declare_function_name (FILE *file,
 	}
     }
 
-  /* Declare a local var for outgoing varargs.  */
-  if (cfun->machine->has_call_with_varargs)
-    init_frame (file, STACK_POINTER_REGNUM,
-		UNITS_PER_WORD, crtl->outgoing_args_size);
-
-  /* Declare a local variable for the frame.  */
-  HOST_WIDE_INT sz = get_frame_size ();
-  if (sz || cfun->machine->has_call_with_sc)
-    init_frame (file, FRAME_POINTER_REGNUM,
-		crtl->stack_alignment_needed / BITS_PER_UNIT, sz);
-
   /* Emit axis predicates. */
   if (cfun->machine->axis_predicate[0])
     nvptx_init_axis_predicate (file,
@@ -1036,7 +1035,7 @@  nvptx_declare_function_name (FILE *file,
 const char *
 nvptx_output_return (void)
 {
-  machine_mode mode = (machine_mode)cfun->machine->ret_reg_mode;
+  machine_mode mode = (machine_mode)cfun->machine->return_mode;
 
   if (mode != VOIDmode)
     fprintf (asm_out_file, "\tst.param%s\t[%s_out], %s;\n",
@@ -1076,20 +1075,28 @@  nvptx_get_drap_rtx (void)
    argument to the next call.  */
 
 static void
-nvptx_call_args (rtx arg, tree funtype)
+nvptx_call_args (rtx arg, tree fntype)
 {
-  if (cfun->machine->start_call == NULL_RTX)
+  if (!cfun->machine->doing_call)
     {
-      cfun->machine->call_args = NULL;
-      cfun->machine->funtype = funtype;
-      cfun->machine->start_call = const0_rtx;
+      cfun->machine->doing_call = true;
+      cfun->machine->is_varadic = false;
+      cfun->machine->num_args = 0;
+
+      if (fntype && stdarg_p (fntype))
+	{
+	  cfun->machine->is_varadic = true;
+	  cfun->machine->has_varadic = true;
+	  cfun->machine->num_args++;
+	}
     }
-  if (arg == pc_rtx)
-    return;
 
-  rtx_expr_list *args_so_far = cfun->machine->call_args;
-  if (REG_P (arg))
-    cfun->machine->call_args = alloc_EXPR_LIST (VOIDmode, arg, args_so_far);
+  if (REG_P (arg) && arg != pc_rtx)
+    {
+      cfun->machine->num_args++;
+      cfun->machine->call_args = alloc_EXPR_LIST (VOIDmode, arg,
+						  cfun->machine->call_args);
+    }
 }
 
 /* Implement the corresponding END_CALL_ARGS hook.  Clear and free the
@@ -1098,7 +1105,7 @@  nvptx_call_args (rtx arg, tree funtype)
 static void
 nvptx_end_call_args (void)
 {
-  cfun->machine->start_call = NULL_RTX;
+  cfun->machine->doing_call = false;
   free_EXPR_LIST_list (&cfun->machine->call_args);
 }
 
@@ -1111,16 +1118,10 @@  nvptx_end_call_args (void)
 void
 nvptx_expand_call (rtx retval, rtx address)
 {
-  int nargs = 0;
   rtx callee = XEXP (address, 0);
-  rtx pat, t;
-  rtvec vec;
   rtx varargs = NULL_RTX;
   unsigned parallel = 0;
 
-  for (t = cfun->machine->call_args; t; t = XEXP (t, 1))
-    nargs++;
-
   if (!call_insn_operand (callee, Pmode))
     {
       callee = force_reg (Pmode, callee);
@@ -1133,7 +1134,7 @@  nvptx_expand_call (rtx retval, rtx addre
       if (decl != NULL_TREE)
 	{
 	  if (DECL_STATIC_CHAIN (decl))
-	    cfun->machine->has_call_with_sc = true;
+	    cfun->machine->has_chain = true;
 
 	  tree attr = get_oacc_fn_attrib (decl);
 	  if (attr)
@@ -1154,35 +1155,31 @@  nvptx_expand_call (rtx retval, rtx addre
 	}
     }
 
-  if (cfun->machine->funtype
-      && stdarg_p (cfun->machine->funtype))
+  unsigned nargs = cfun->machine->num_args;
+  if (cfun->machine->is_varadic)
     {
       varargs = gen_reg_rtx (Pmode);
       emit_move_insn (varargs, stack_pointer_rtx);
-      cfun->machine->has_call_with_varargs = true;
     }
-  vec = rtvec_alloc (nargs + 1 + (varargs ? 1 : 0));
-  pat = gen_rtx_PARALLEL (VOIDmode, vec);
 
+  rtvec vec = rtvec_alloc (nargs + 1);
+  rtx pat = gen_rtx_PARALLEL (VOIDmode, vec);
   int vec_pos = 0;
-  
+
+  rtx call = gen_rtx_CALL (VOIDmode, address, const0_rtx);
   rtx tmp_retval = retval;
-  t = gen_rtx_CALL (VOIDmode, address, const0_rtx);
-  if (retval != NULL_RTX)
+  if (retval)
     {
       if (!nvptx_register_operand (retval, GET_MODE (retval)))
 	tmp_retval = gen_reg_rtx (GET_MODE (retval));
-      t = gen_rtx_SET (tmp_retval, t);
+      call = gen_rtx_SET (tmp_retval, call);
     }
-  XVECEXP (pat, 0, vec_pos++) = t;
+  XVECEXP (pat, 0, vec_pos++) = call;
 
   /* Construct the call insn, including a USE for each argument pseudo
      register.  These will be used when printing the insn.  */
   for (rtx arg = cfun->machine->call_args; arg; arg = XEXP (arg, 1))
-    {
-      rtx this_arg = XEXP (arg, 0);
-      XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, this_arg);
-    }
+    XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, XEXP (arg, 0));
 
   if (varargs)
     XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, varargs);
@@ -1390,7 +1387,6 @@  nvptx_gen_wcast (rtx reg, propagate_mask
 	  }
 	
 	addr = gen_rtx_MEM (mode, addr);
-	addr = gen_rtx_UNSPEC (mode, gen_rtvec (1, addr), UNSPEC_SHARED_DATA);
 	if (pm == PM_read)
 	  res = gen_rtx_SET (addr, reg);
 	else if (pm == PM_write)
@@ -1417,39 +1413,6 @@  nvptx_gen_wcast (rtx reg, propagate_mask
     }
   return res;
 }
-
-/* When loading an operand ORIG_OP, verify whether an address space
-   conversion to generic is required, and if so, perform it.  Check
-   for SYMBOL_REFs and record them if needed.  Return either the
-   original operand, or the converted one.  */
-
-rtx
-nvptx_maybe_convert_symbolic_operand (rtx op)
-{
-  if (GET_MODE (op) != Pmode)
-    return op;
-
-  rtx sym = op;
-  if (GET_CODE (sym) == CONST)
-    sym = XEXP (sym, 0);
-  if (GET_CODE (sym) == PLUS)
-    sym = XEXP (sym, 0);
-
-  if (GET_CODE (sym) != SYMBOL_REF)
-    return op;
-
-  nvptx_maybe_record_fnsym (sym);
-
-  nvptx_data_area area = SYMBOL_DATA_AREA (sym);
-  if (area == DATA_AREA_GENERIC)
-    return op;
-
-  rtx dest = gen_reg_rtx (Pmode);
-  emit_insn (gen_rtx_SET (dest,
-			  gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op),
-					  UNSPEC_TO_GENERIC)));
-  return dest;
-}
 
 /* Returns true if X is a valid address for use in a memory reference.  */
 
@@ -1477,18 +1440,6 @@  nvptx_legitimate_address_p (machine_mode
       return false;
     }
 }
-
-/* Implement HARD_REGNO_MODE_OK.  We barely use hard regs, but we want
-   to ensure that the return register's mode isn't changed.  */
-
-bool
-nvptx_hard_regno_mode_ok (int regno, machine_mode mode)
-{
-  if (regno != NVPTX_RETURN_REGNUM
-      || cfun == NULL || cfun->machine->ret_reg_mode == VOIDmode)
-    return true;
-  return mode == cfun->machine->ret_reg_mode;
-}
 
 /* Machinery to output constant initializers.  When beginning an
    initializer, we decide on a fragment size (which is visible in ptx
@@ -1767,6 +1718,11 @@  nvptx_globalize_label (FILE *, const cha
 static void
 nvptx_assemble_undefined_decl (FILE *file, const char *name, const_tree decl)
 {
+  /* The middle end can place constant pool decls into the varpool as
+     undefined.  Until that is fixed, catch the problem here.  */
+  if (DECL_IN_CONSTANT_POOL (decl))
+    return;
+
   write_var_marker (file, false, TREE_PUBLIC (decl), name);
 
   fprintf (file, "\t.extern ");
@@ -1774,7 +1730,7 @@  nvptx_assemble_undefined_decl (FILE *fil
   nvptx_assemble_decl_begin (file, name, section_for_decl (decl),
 			     TREE_TYPE (decl), size ? tree_to_shwi (size) : 0,
 			     DECL_ALIGN (decl));
-  fprintf (file, ";\n");
+  nvptx_assemble_decl_end ();
 }
 
 /* Output a pattern for a move instruction.  */
@@ -1788,6 +1744,16 @@  nvptx_output_mov_insn (rtx dst, rtx src)
   machine_mode src_inner = (GET_CODE (src) == SUBREG
 			    ? GET_MODE (XEXP (src, 0)) : dst_mode);
 
+  rtx sym = src;
+  if (GET_CODE (sym) == CONST)
+    sym = XEXP (XEXP (sym, 0), 0);
+  if (SYMBOL_REF_P (sym))
+    {
+      if (SYMBOL_DATA_AREA (sym) != DATA_AREA_GENERIC)
+	return "%.\tcvta%D1%t0\t%0, %1;";
+      nvptx_maybe_record_fnsym (sym);
+    }
+
   if (src_inner == dst_inner)
     return "%.\tmov%t0\t%0, %1;";
 
@@ -1847,14 +1813,14 @@  nvptx_output_call_insn (rtx_insn *insn,
     {
       rtx t = XEXP (XVECEXP (pat, 0, argno), 0);
       machine_mode mode = GET_MODE (t);
+      const char *ptx_type = nvptx_ptx_type_from_mode (mode, false);
 
       /* Mode splitting has already been done.  */
-      fprintf (asm_out_file, "\t\t.param%s %%out_arg%d%s;\n",
-	       nvptx_ptx_type_from_mode (mode, false), argno,
-	       mode == QImode || mode == HImode ? "[1]" : "");
-      fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d;\n",
-	       nvptx_ptx_type_from_mode (mode, false), argno,
-	       REGNO (t));
+      fprintf (asm_out_file, "\t\t.param%s %%out_arg%d;\n"
+	       "\t\tst.param%s [%%out_arg%d], ",
+	       ptx_type, argno, ptx_type, argno);
+      output_reg (asm_out_file, REGNO (t), VOIDmode);
+      fprintf (asm_out_file, ";\n");
     }
 
   fprintf (asm_out_file, "\t\tcall ");
@@ -1878,8 +1844,7 @@  nvptx_output_call_insn (rtx_insn *insn,
     }
   if (decl && DECL_STATIC_CHAIN (decl))
     {
-      fprintf (asm_out_file, ", %s%s", open,
-	       reg_names [OUTGOING_STATIC_CHAIN_REGNUM]);
+      fprintf (asm_out_file, ", %s%s", open, reg_names [STATIC_CHAIN_REGNUM]);
       open = "";
     }
   if (!open[0])
@@ -3379,7 +3344,7 @@  nvptx_wpropagate (bool pre_p, basic_bloc
       /* Stuff was emitted, initialize the base pointer now.  */
       rtx init = gen_rtx_SET (data.base, worker_bcast_sym);
       emit_insn_after (init, insn);
-      
+
       if (worker_bcast_size < data.offset)
 	worker_bcast_size = data.offset;
     }
@@ -3773,7 +3738,7 @@  nvptx_reorg (void)
   
   /* Mark unused regs as unused.  */
   int max_regs = max_reg_num ();
-  for (int i = 0; i < max_regs; i++)
+  for (int i = LAST_VIRTUAL_REGISTER + 1; i < max_regs; i++)
     if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0)
       regno_reg_rtx[i] = const0_rtx;
 
@@ -3946,6 +3911,18 @@  nvptx_file_start (void)
   fputs ("// END PREAMBLE\n", asm_out_file);
 }
 
+/* Emit a declaration for a worker-level buffer in .shared memory.  */
+
+static void
+write_worker_buffer (FILE *file, rtx sym, unsigned align, unsigned size)
+{
+  const char *name = XSTR (sym, 0);
+
+  write_var_marker (file, true, false, name);
+  fprintf (file, ".shared .align %d .u8 %s[%d];\n",
+	   align, name, size);
+}
+
 /* Write out the function declarations we've collected and declare storage
    for the broadcast buffer.  */
 
@@ -3959,30 +3936,12 @@  nvptx_file_end (void)
   fputs (func_decls.str().c_str(), asm_out_file);
 
   if (worker_bcast_size)
-    {
-      /* Define the broadcast buffer.  */
-
-      worker_bcast_size = (worker_bcast_size + worker_bcast_align - 1)
-	& ~(worker_bcast_align - 1);
-      
-      write_var_marker (asm_out_file, true, false, worker_bcast_name);
-      fprintf (asm_out_file, ".shared .align %d .u8 %s[%d];\n",
-	       worker_bcast_align,
-	       worker_bcast_name, worker_bcast_size);
-    }
+    write_worker_buffer (asm_out_file, worker_bcast_sym,
+			 worker_bcast_align, worker_bcast_size);
 
   if (worker_red_size)
-    {
-      /* Define the reduction buffer.  */
-
-      worker_red_size = ((worker_red_size + worker_red_align - 1)
-			 & ~(worker_red_align - 1));
-
-      write_var_marker (asm_out_file, true, false, worker_red_name);
-      fprintf (asm_out_file, ".shared .align %d .u8 %s[%d];\n",
-	       worker_red_align,
-	       worker_red_name, worker_red_size);
-    }
+    write_worker_buffer (asm_out_file, worker_red_sym,
+			 worker_red_align, worker_red_size);
 }
 
 /* Expander for the shuffle builtins.  */
@@ -4796,7 +4755,7 @@  nvptx_goacc_reduction_teardown (gcall *c
 
 /* NVPTX reduction expander.  */
 
-void
+static void
 nvptx_goacc_reduction (gcall *call)
 {
   unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
@@ -4862,9 +4821,6 @@  nvptx_goacc_reduction (gcall *call)
 #define TARGET_OMIT_STRUCT_RETURN_REG true
 #undef TARGET_STRICT_ARGUMENT_NAMING
 #define TARGET_STRICT_ARGUMENT_NAMING nvptx_strict_argument_naming
-#undef TARGET_STATIC_CHAIN
-#define TARGET_STATIC_CHAIN nvptx_static_chain
-
 #undef TARGET_CALL_ARGS
 #define TARGET_CALL_ARGS nvptx_call_args
 #undef TARGET_END_CALL_ARGS