Message ID | CAMe9rOp5sTEUtSw2c_-+GHBBKUS9u2Tr_RHJSD7XbjosVqkidQ@mail.gmail.com |
---|---|
State | New |
Headers | show |
Series | V3: [PATCH] Update preferred_stack_boundary only when expanding function call | expand |
"H.J. Lu" <hjl.tools@gmail.com> writes: > diff --git a/gcc/calls.c b/gcc/calls.c > index c8a42680041..6ab138e7bb0 100644 > --- a/gcc/calls.c > +++ b/gcc/calls.c > @@ -3226,6 +3226,19 @@ can_implement_as_sibling_call_p (tree exp, > return true; > } > > +/* Update stack alignment when the parameter is passed in the stack > + since the outgoing parameter requires extra alignment on the calling > + function side. */ > + > +static void > +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate) > +{ > + if (crtl->stack_alignment_needed < locate->boundary) > + crtl->stack_alignment_needed = locate->boundary; > + if (crtl->preferred_stack_boundary < locate->boundary) > + crtl->preferred_stack_boundary = locate->boundary; > +} > + > /* Generate all the code for a CALL_EXPR exp > and return an rtx for its value. > Store the value in TARGET (specified as an rtx) if convenient. > @@ -3703,6 +3716,12 @@ expand_call (tree exp, rtx target, int ignore) > /* Ensure current function's preferred stack boundary is at least > what we need. Stack alignment may also increase preferred stack > boundary. */ > + for (i = 0; i < num_actuals; i++) > + if (reg_parm_stack_space > 0 > + || args[i].reg == 0 > + || args[i].partial != 0 > + || args[i].pass_on_stack) > + update_stack_alignment_for_call (&args[i].locate); > if (crtl->preferred_stack_boundary < preferred_stack_boundary) > crtl->preferred_stack_boundary = preferred_stack_boundary; > else > @@ -4961,6 +4980,12 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, > targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true); > } > > + for (int i = 0; i < nargs; i++) > + if (reg_parm_stack_space > 0 > + || argvec[i].reg == 0 > + || argvec[i].partial != 0) > + update_stack_alignment_for_call (&argvec[i].locate); > + It's safe to test argvec[i].pass_on_stack here too, since the vector is initialised to zeros. So I think we should move the "if"s into the new function: static void update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate) { if (reg_parm_stack_space > 0 || locate->reg == 0 || locate->partial != 0 || locate->pass_on_stack) { if (crtl->stack_alignment_needed < locate->boundary) crtl->stack_alignment_needed = locate->boundary; if (crtl->preferred_stack_boundary < locate->boundary) crtl->preferred_stack_boundary = locate->boundary; } } OK with that change, thanks. Richard
On Fri, Jun 14, 2019 at 8:31 AM Richard Sandiford <richard.sandiford@arm.com> wrote: > > "H.J. Lu" <hjl.tools@gmail.com> writes: > > diff --git a/gcc/calls.c b/gcc/calls.c > > index c8a42680041..6ab138e7bb0 100644 > > --- a/gcc/calls.c > > +++ b/gcc/calls.c > > @@ -3226,6 +3226,19 @@ can_implement_as_sibling_call_p (tree exp, > > return true; > > } > > > > +/* Update stack alignment when the parameter is passed in the stack > > + since the outgoing parameter requires extra alignment on the calling > > + function side. */ > > + > > +static void > > +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate) > > +{ > > + if (crtl->stack_alignment_needed < locate->boundary) > > + crtl->stack_alignment_needed = locate->boundary; > > + if (crtl->preferred_stack_boundary < locate->boundary) > > + crtl->preferred_stack_boundary = locate->boundary; > > +} > > + > > /* Generate all the code for a CALL_EXPR exp > > and return an rtx for its value. > > Store the value in TARGET (specified as an rtx) if convenient. > > @@ -3703,6 +3716,12 @@ expand_call (tree exp, rtx target, int ignore) > > /* Ensure current function's preferred stack boundary is at least > > what we need. Stack alignment may also increase preferred stack > > boundary. */ > > + for (i = 0; i < num_actuals; i++) > > + if (reg_parm_stack_space > 0 > > + || args[i].reg == 0 > > + || args[i].partial != 0 > > + || args[i].pass_on_stack) > > + update_stack_alignment_for_call (&args[i].locate); > > if (crtl->preferred_stack_boundary < preferred_stack_boundary) > > crtl->preferred_stack_boundary = preferred_stack_boundary; > > else > > @@ -4961,6 +4980,12 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, > > targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true); > > } > > > > + for (int i = 0; i < nargs; i++) > > + if (reg_parm_stack_space > 0 > > + || argvec[i].reg == 0 > > + || argvec[i].partial != 0) > > + update_stack_alignment_for_call (&argvec[i].locate); > > + > > It's safe to test argvec[i].pass_on_stack here too, since the vector There is no pass_on_stack in argvec: struct arg { rtx value; machine_mode mode; rtx reg; int partial; struct locate_and_pad_arg_data locate; rtx save_area; }; struct arg *argvec; > is initialised to zeros. So I think we should move the "if"s into the > new function: > > static void > update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate) > { > if (reg_parm_stack_space > 0 > || locate->reg == 0 > || locate->partial != 0 > || locate->pass_on_stack) Since we have struct locate_and_pad_arg_data { /* Size of this argument on the stack, rounded up for any padding it gets. If REG_PARM_STACK_SPACE is defined, then register parms are counted here, otherwise they aren't. */ struct args_size size; /* Offset of this argument from beginning of stack-args. */ struct args_size offset; /* Offset to the start of the stack slot. Different from OFFSET if this arg pads downward. */ struct args_size slot_offset; /* The amount that the stack pointer needs to be adjusted to force alignment for the next argument. */ struct args_size alignment_pad; /* Which way we should pad this arg. */ pad_direction where_pad; /* slot_offset is at least this aligned. */ unsigned int boundary; }; we can't check reg, partial nor pass_on_stack here. > { > if (crtl->stack_alignment_needed < locate->boundary) > crtl->stack_alignment_needed = locate->boundary; > if (crtl->preferred_stack_boundary < locate->boundary) > crtl->preferred_stack_boundary = locate->boundary; > } > } > > OK with that change, thanks. > Is my original patch OK? Thanks.
"H.J. Lu" <hjl.tools@gmail.com> writes: > On Fri, Jun 14, 2019 at 8:31 AM Richard Sandiford > <richard.sandiford@arm.com> wrote: >> >> "H.J. Lu" <hjl.tools@gmail.com> writes: >> > diff --git a/gcc/calls.c b/gcc/calls.c >> > index c8a42680041..6ab138e7bb0 100644 >> > --- a/gcc/calls.c >> > +++ b/gcc/calls.c >> > @@ -3226,6 +3226,19 @@ can_implement_as_sibling_call_p (tree exp, >> > return true; >> > } >> > >> > +/* Update stack alignment when the parameter is passed in the stack >> > + since the outgoing parameter requires extra alignment on the calling >> > + function side. */ >> > + >> > +static void >> > +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate) >> > +{ >> > + if (crtl->stack_alignment_needed < locate->boundary) >> > + crtl->stack_alignment_needed = locate->boundary; >> > + if (crtl->preferred_stack_boundary < locate->boundary) >> > + crtl->preferred_stack_boundary = locate->boundary; >> > +} >> > + >> > /* Generate all the code for a CALL_EXPR exp >> > and return an rtx for its value. >> > Store the value in TARGET (specified as an rtx) if convenient. >> > @@ -3703,6 +3716,12 @@ expand_call (tree exp, rtx target, int ignore) >> > /* Ensure current function's preferred stack boundary is at least >> > what we need. Stack alignment may also increase preferred stack >> > boundary. */ >> > + for (i = 0; i < num_actuals; i++) >> > + if (reg_parm_stack_space > 0 >> > + || args[i].reg == 0 >> > + || args[i].partial != 0 >> > + || args[i].pass_on_stack) >> > + update_stack_alignment_for_call (&args[i].locate); >> > if (crtl->preferred_stack_boundary < preferred_stack_boundary) >> > crtl->preferred_stack_boundary = preferred_stack_boundary; >> > else >> > @@ -4961,6 +4980,12 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, >> > targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true); >> > } >> > >> > + for (int i = 0; i < nargs; i++) >> > + if (reg_parm_stack_space > 0 >> > + || argvec[i].reg == 0 >> > + || argvec[i].partial != 0) >> > + update_stack_alignment_for_call (&argvec[i].locate); >> > + >> >> It's safe to test argvec[i].pass_on_stack here too, since the vector > > There is no pass_on_stack in argvec: > > struct arg > { > rtx value; > machine_mode mode; > rtx reg; > int partial; > struct locate_and_pad_arg_data locate; > rtx save_area; > }; > struct arg *argvec; > >> is initialised to zeros. So I think we should move the "if"s into the >> new function: >> >> static void >> update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate) >> { >> if (reg_parm_stack_space > 0 >> || locate->reg == 0 >> || locate->partial != 0 >> || locate->pass_on_stack) > > Since we have > > struct locate_and_pad_arg_data > { > /* Size of this argument on the stack, rounded up for any padding it > gets. If REG_PARM_STACK_SPACE is defined, then register parms are > counted here, otherwise they aren't. */ > struct args_size size; > /* Offset of this argument from beginning of stack-args. */ > struct args_size offset; > /* Offset to the start of the stack slot. Different from OFFSET > if this arg pads downward. */ > struct args_size slot_offset; > /* The amount that the stack pointer needs to be adjusted to > force alignment for the next argument. */ > struct args_size alignment_pad; > /* Which way we should pad this arg. */ > pad_direction where_pad; > /* slot_offset is at least this aligned. */ > unsigned int boundary; > }; > > we can't check reg, partial nor pass_on_stack here. Sorry, missed that they were different structures. >> { >> if (crtl->stack_alignment_needed < locate->boundary) >> crtl->stack_alignment_needed = locate->boundary; >> if (crtl->preferred_stack_boundary < locate->boundary) >> crtl->preferred_stack_boundary = locate->boundary; >> } >> } >> >> OK with that change, thanks. >> > > Is my original patch OK? Yes, thanks. Richard
From 053632d7724ad3299008fc83c64c5d401c1b2ebe Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.tools@gmail.com> Date: Wed, 5 Jun 2019 12:55:19 -0700 Subject: [PATCH] Update preferred_stack_boundary only when expanding function call locate_and_pad_parm is called when expanding function call from initialize_argument_information and when generating function body from assign_parm_find_entry_rtl: /* Remember if the outgoing parameter requires extra alignment on the calling function side. */ if (crtl->stack_alignment_needed < boundary) crtl->stack_alignment_needed = boundary; if (crtl->preferred_stack_boundary < boundary) crtl->preferred_stack_boundary = boundary; stack_alignment_needed and preferred_stack_boundary should be updated only when expanding function call, not when generating function body. Add update_stack_alignment_for_call to update stack alignment when outgoing parameter is passed in the stack. gcc/ PR rtl-optimization/90765 * calls.c (update_stack_alignment_for_call): New function. (expand_call): Call update_stack_alignment_for_call when outgoing parameter is passed in the stack. (emit_library_call_value_1): Likewise. * function.c (locate_and_pad_parm): Don't update stack_alignment_needed and preferred_stack_boundary. gcc/testsuite/ PR rtl-optimization/90765 * gcc.target/i386/pr90765-1.c: New test. * gcc.target/i386/pr90765-2.c: Likewise. --- gcc/calls.c | 25 +++++++++++++++++++++++ gcc/function.c | 7 ------- gcc/testsuite/gcc.target/i386/pr90765-1.c | 11 ++++++++++ gcc/testsuite/gcc.target/i386/pr90765-2.c | 18 ++++++++++++++++ 4 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr90765-2.c diff --git a/gcc/calls.c b/gcc/calls.c index c8a42680041..6ab138e7bb0 100644 --- a/gcc/calls.c +++ b/gcc/calls.c @@ -3226,6 +3226,19 @@ can_implement_as_sibling_call_p (tree exp, return true; } +/* Update stack alignment when the parameter is passed in the stack + since the outgoing parameter requires extra alignment on the calling + function side. */ + +static void +update_stack_alignment_for_call (struct locate_and_pad_arg_data *locate) +{ + if (crtl->stack_alignment_needed < locate->boundary) + crtl->stack_alignment_needed = locate->boundary; + if (crtl->preferred_stack_boundary < locate->boundary) + crtl->preferred_stack_boundary = locate->boundary; +} + /* Generate all the code for a CALL_EXPR exp and return an rtx for its value. Store the value in TARGET (specified as an rtx) if convenient. @@ -3703,6 +3716,12 @@ expand_call (tree exp, rtx target, int ignore) /* Ensure current function's preferred stack boundary is at least what we need. Stack alignment may also increase preferred stack boundary. */ + for (i = 0; i < num_actuals; i++) + if (reg_parm_stack_space > 0 + || args[i].reg == 0 + || args[i].partial != 0 + || args[i].pass_on_stack) + update_stack_alignment_for_call (&args[i].locate); if (crtl->preferred_stack_boundary < preferred_stack_boundary) crtl->preferred_stack_boundary = preferred_stack_boundary; else @@ -4961,6 +4980,12 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true); } + for (int i = 0; i < nargs; i++) + if (reg_parm_stack_space > 0 + || argvec[i].reg == 0 + || argvec[i].partial != 0) + update_stack_alignment_for_call (&argvec[i].locate); + /* If this machine requires an external definition for library functions, write one out. */ assemble_external_libcall (fun); diff --git a/gcc/function.c b/gcc/function.c index e30ee259bec..45b65dc0fd2 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -4021,13 +4021,6 @@ locate_and_pad_parm (machine_mode passed_mode, tree type, int in_regs, } } - /* Remember if the outgoing parameter requires extra alignment on the - calling function side. */ - if (crtl->stack_alignment_needed < boundary) - crtl->stack_alignment_needed = boundary; - if (crtl->preferred_stack_boundary < boundary) - crtl->preferred_stack_boundary = boundary; - if (ARGS_GROW_DOWNWARD) { locate->slot_offset.constant = -initial_offset_ptr->constant; diff --git a/gcc/testsuite/gcc.target/i386/pr90765-1.c b/gcc/testsuite/gcc.target/i386/pr90765-1.c new file mode 100644 index 00000000000..178c3ff8054 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90765-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]*\\$-64,\[\\t \]*%\[re\]?sp" } } */ + +typedef int __v16si __attribute__ ((__vector_size__ (64))); + +void +foo (__v16si x, int i0, int i1, int i2, int i3, int i4, int i5, __v16si *p) +{ + *p = x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr90765-2.c b/gcc/testsuite/gcc.target/i386/pr90765-2.c new file mode 100644 index 00000000000..45cf1f03747 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90765-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-final { scan-assembler "and\[lq\]?\[\\t \]*\\$-64,\[\\t \]*%\[re\]?sp" } } */ +/* { dg-skip-if "" { x86_64-*-mingw* } } */ + +typedef int __v16si __attribute__ ((__vector_size__ (64))); + +extern void foo (__v16si, __v16si, __v16si, __v16si, __v16si, __v16si, + __v16si, __v16si, __v16si, int, int, int, int, int, + int, __v16si *); + +extern __v16si x, y; + +void +bar (void) +{ + foo (x, x, x, x, x, x, x, x, x, 0, 1, 2, 3, 4, 5, &y); +} -- 2.20.1