@@ -883,7 +883,7 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
HOST_WIDE_INT sz = crtl->outgoing_args_size;
if (sz == 0)
sz = 1;
- if (cfun->machine->has_call_with_varargs)
+ if (!TARGET_SOFT_STACK && cfun->machine->has_call_with_varargs)
{
fprintf (file, "\t.reg.u%d %%outargs;\n"
"\t.local.align 8 .b8 %%outargs_ar["
@@ -897,7 +897,8 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
sz = get_frame_size ();
if (sz == 0 && cfun->machine->has_call_with_sc)
sz = 1;
- if (sz > 0)
+ bool need_sp = cfun->calls_alloca || cfun->machine->has_call_with_varargs;
+ if (sz > 0 || TARGET_SOFT_STACK && need_sp)
{
int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
@@ -923,10 +924,15 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
if (alignment > keep_align)
fprintf (file, "\tand.b%d %%frame, %%frame, %d;\n",
bits, -alignment);
+ fprintf (file, "\t.reg.u%d %%outargs;\n", bits);
+ sz = crtl->outgoing_args_size;
+ gcc_assert (sz % keep_align == 0);
+ fprintf (file, "\tsub.u%d %%outargs, %%frame, "
+ HOST_WIDE_INT_PRINT_DEC ";\n", bits, sz);
/* crtl->is_leaf is not initialized because RA is not run. */
if (!leaf_function_p ())
{
- fprintf (file, "\tst.shared.u%d [%%fstmp2], %%frame;\n", bits);
+ fprintf (file, "\tst.shared.u%d [%%fstmp2], %%outargs;\n", bits);
cfun->machine->using_softstack = true;
}
need_softstack_decl = true;
@@ -996,6 +1002,8 @@ nvptx_function_ok_for_sibcall (tree, tree)
static rtx
nvptx_get_drap_rtx (void)
{
+ if (TARGET_SOFT_STACK && stack_realign_drap)
+ return arg_pointer_rtx;
return NULL_RTX;
}
@@ -69,6 +69,8 @@ (define_attr "divergent" "false,true"
(define_predicate "nvptx_register_operand"
(match_code "reg,subreg")
{
+ if (TARGET_SOFT_STACK && op == stack_pointer_rtx)
+ return true;
if (REG_P (op))
return !HARD_REGISTER_P (op);
if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
@@ -123,6 +125,8 @@ (define_predicate "nvptx_general_operand"
(define_predicate "nvptx_nonimmediate_operand"
(match_code "reg,subreg,mem")
{
+ if (TARGET_SOFT_STACK && op == stack_pointer_rtx)
+ return true;
if (REG_P (op))
return (op != frame_pointer_rtx
&& op != arg_pointer_rtx
@@ -1061,31 +1065,41 @@ (define_expand "allocate_stack"
(match_operand 1 "nvptx_register_operand")]
""
{
+ if (TARGET_SOFT_STACK)
+ {
+ emit_move_insn (stack_pointer_rtx,
+ gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
+ emit_insn (gen_set_softstack_insn (stack_pointer_rtx));
+ emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+ DONE;
+ }
/* The ptx documentation specifies an alloca intrinsic (for 32 bit
only) but notes it is not implemented. The assembler emits a
confused error message. Issue a blunt one now instead. */
sorry ("target cannot support alloca.");
emit_insn (gen_nop ());
DONE;
- if (TARGET_ABI64)
- emit_insn (gen_allocate_stack_di (operands[0], operands[1]));
- else
- emit_insn (gen_allocate_stack_si (operands[0], operands[1]));
- DONE;
})
-(define_insn "allocate_stack_<mode>"
- [(set (match_operand:P 0 "nvptx_register_operand" "=R")
- (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")]
- UNSPEC_ALLOCA))]
- ""
- "%.\\tcall (%0), %%alloca, (%1);")
+(define_insn "set_softstack_insn"
+ [(unspec [(match_operand 0 "nvptx_register_operand" "R")] UNSPEC_ALLOCA)]
+ "TARGET_SOFT_STACK"
+{
+ return (cfun->machine->using_softstack
+ ? "%.\\tst.shared%t0\\t[%%fstmp2], %0;"
+ : "");
+})
(define_expand "restore_stack_block"
[(match_operand 0 "register_operand" "")
(match_operand 1 "register_operand" "")]
""
{
+ if (TARGET_SOFT_STACK)
+ {
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn (gen_set_softstack_insn (operands[0]));
+ }
DONE;
})