diff mbox

[gomp-nvptx] nvptx backend: implement alloca with -msoft-stack

Message ID 1450101048-19618-1-git-send-email-amonakov@ispras.ru
State New
Headers show

Commit Message

Alexander Monakov Dec. 14, 2015, 1:50 p.m. UTC
This patch implements variable stack allocation for alloca/VLA on NVPTX if
-msoft-stack is enabled.  In addition to moving the stack pointer, we need to
copy the updated pointer into __nvptx_stacks[tid.y].

	* config/nvptx/nvptx.c (nvptx_declare_function_name): Emit %outargs
	using .local %outargs_ar only if not TARGET_SOFT_STACK.  Emit %outargs
	under TARGET_SOFT_STACK by offsetting from %frame.
	(nvptx_get_drap_rtx): Return %argp as the DRAP if needed.
	* config/nvptx/nvptx.md (nvptx_register_operand): Allow %outargs under
	TARGET_SOFT_STACK.
	(nvptx_nonimmediate_operand): Ditto.
	(allocate_stack): Implement for TARGET_SOFT_STACK.  Remove unused code.
	(allocate_stack_<mode>): Remove unused pattern.
	(set_softstack_insn): New pattern.
	(restore_stack_block): Handle for TARGET_SOFT_STACK.
---

I have committed this patch to the gomp-nvptx branch.  Bernd, Nathan, I would
appreciate if you could comment on 'define_predicate' changes in nvptx.md.
There are three predicates that start like this:

  if (REG_P (op))
    return !HARD_REGISTER_P (op);
  if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
    return false;
  if (GET_CODE (op) == SUBREG)
    return false;

For stack adjustments I need to allow operations on the stack pointer.  For
now I've implemented that as a fairly straightforward shortcut, but I guess it
doesn't look very nice.  What is the reason to reject "hard registers" there,
in the first place?  In any case, I'd like your input if you see a better way
to handle it.

Also, note that there's either a bug or a cleanup opportunity: the third "if"
statement is clearly more general than the second.

No regressions on check-c testsuite (with 'alloca' effective-target enabled).

Thanks.
Alexander

Comments

Nathan Sidwell Dec. 14, 2015, 2:27 p.m. UTC | #1
On 12/14/15 08:50, Alexander Monakov wrote:
> I have committed this patch to the gomp-nvptx branch.  Bernd, Nathan, I would
> appreciate if you could comment on 'define_predicate' changes in nvptx.md.
> There are three predicates that start like this:
>
>    if (REG_P (op))
>      return !HARD_REGISTER_P (op);
>    if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
>      return false;
>    if (GET_CODE (op) == SUBREG)
>      return false;
>
> For stack adjustments I need to allow operations on the stack pointer.  For
> now I've implemented that as a fairly straightforward shortcut, but I guess it
> doesn't look very nice.  What is the reason to reject "hard registers" there,
> in the first place?  In any case, I'd like your input if you see a better way
> to handle it.

just a quick note that moving onto the MD file is on my todo this week.

> Also, note that there's either a bug or a cleanup opportunity: the third "if"
> statement is clearly more general than the second.

correct, I think there's a bunch of such cleanups.


nathan
diff mbox

Patch

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index b12a7a8..599e460 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -883,7 +883,7 @@  nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
   HOST_WIDE_INT sz = crtl->outgoing_args_size;
   if (sz == 0)
     sz = 1;
-  if (cfun->machine->has_call_with_varargs)
+  if (!TARGET_SOFT_STACK && cfun->machine->has_call_with_varargs)
     {
       fprintf (file, "\t.reg.u%d %%outargs;\n"
 	       "\t.local.align 8 .b8 %%outargs_ar["
@@ -897,7 +897,8 @@  nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
   sz = get_frame_size ();
   if (sz == 0 && cfun->machine->has_call_with_sc)
     sz = 1;
-  if (sz > 0)
+  bool need_sp = cfun->calls_alloca || cfun->machine->has_call_with_varargs;
+  if (sz > 0 || TARGET_SOFT_STACK && need_sp)
     {
       int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
 
@@ -923,10 +924,15 @@  nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
 	  if (alignment > keep_align)
 	    fprintf (file, "\tand.b%d %%frame, %%frame, %d;\n",
 		     bits, -alignment);
+	  fprintf (file, "\t.reg.u%d %%outargs;\n", bits);
+	  sz = crtl->outgoing_args_size;
+	  gcc_assert (sz % keep_align == 0);
+	  fprintf (file, "\tsub.u%d %%outargs, %%frame, "
+	           HOST_WIDE_INT_PRINT_DEC ";\n", bits, sz);
 	  /* crtl->is_leaf is not initialized because RA is not run.  */
 	  if (!leaf_function_p ())
 	    {
-	      fprintf (file, "\tst.shared.u%d [%%fstmp2], %%frame;\n", bits);
+	      fprintf (file, "\tst.shared.u%d [%%fstmp2], %%outargs;\n", bits);
 	      cfun->machine->using_softstack = true;
 	    }
 	  need_softstack_decl = true;
@@ -996,6 +1002,8 @@  nvptx_function_ok_for_sibcall (tree, tree)
 static rtx
 nvptx_get_drap_rtx (void)
 {
+  if (TARGET_SOFT_STACK && stack_realign_drap)
+    return arg_pointer_rtx;
   return NULL_RTX;
 }
 
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index ae1909d..130c809 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -69,6 +69,8 @@  (define_attr "divergent" "false,true"
 (define_predicate "nvptx_register_operand"
   (match_code "reg,subreg")
 {
+  if (TARGET_SOFT_STACK && op == stack_pointer_rtx)
+    return true;
   if (REG_P (op))
     return !HARD_REGISTER_P (op);
   if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
@@ -123,6 +125,8 @@  (define_predicate "nvptx_general_operand"
 (define_predicate "nvptx_nonimmediate_operand"
   (match_code "reg,subreg,mem")
 {
+  if (TARGET_SOFT_STACK && op == stack_pointer_rtx)
+    return true;
   if (REG_P (op))
     return (op != frame_pointer_rtx
 	    && op != arg_pointer_rtx
@@ -1061,31 +1065,41 @@  (define_expand "allocate_stack"
    (match_operand 1 "nvptx_register_operand")]
   ""
 {
+  if (TARGET_SOFT_STACK)
+    {
+      emit_move_insn (stack_pointer_rtx,
+		      gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
+      emit_insn (gen_set_softstack_insn (stack_pointer_rtx));
+      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+    }
   /* The ptx documentation specifies an alloca intrinsic (for 32 bit
      only)  but notes it is not implemented.  The assembler emits a
      confused error message.  Issue a blunt one now instead.  */
   sorry ("target cannot support alloca.");
   emit_insn (gen_nop ());
   DONE;
-  if (TARGET_ABI64)
-    emit_insn (gen_allocate_stack_di (operands[0], operands[1]));
-  else
-    emit_insn (gen_allocate_stack_si (operands[0], operands[1]));
-  DONE;
 })
 
-(define_insn "allocate_stack_<mode>"
-  [(set (match_operand:P 0 "nvptx_register_operand" "=R")
-        (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")]
-                   UNSPEC_ALLOCA))]
-  ""
-  "%.\\tcall (%0), %%alloca, (%1);")
+(define_insn "set_softstack_insn"
+  [(unspec [(match_operand 0 "nvptx_register_operand" "R")] UNSPEC_ALLOCA)]
+  "TARGET_SOFT_STACK"
+{
+  return (cfun->machine->using_softstack
+	  ? "%.\\tst.shared%t0\\t[%%fstmp2], %0;"
+	  : "");
+})
 
 (define_expand "restore_stack_block"
   [(match_operand 0 "register_operand" "")
    (match_operand 1 "register_operand" "")]
   ""
 {
+  if (TARGET_SOFT_STACK)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      emit_insn (gen_set_softstack_insn (operands[0]));
+    }
   DONE;
 })