diff mbox

PR target/81313: Use DRAP only if there are outgoing arguments on stack

Message ID CAFULd4aNBeA6xF1idgpMJwheLgMxQ6Vo=_wT+jrfYY03QD38tw@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak July 9, 2017, 6:57 p.m. UTC
On Sun, Jul 9, 2017 at 8:29 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sun, Jul 9, 2017 at 11:19 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Fri, Jul 7, 2017 at 12:14 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Thu, Jul 6, 2017 at 12:08 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>> Since DRAP is needed only if there are outgoing arguments on stack, we
>>>> should track outgoing arguments on stack and avoid setting need_drap to
>>>> true when there are no outgoing arguments on stack.
>>>>
>>>> Tested on i686 and x86-64 with SSE2, AVX and AVX2.  There is no
>>>> regression.  OK for trunk?
>>>>
>>>> H.J.
>>>> ---
>>>> gcc/
>>>>
>>>>         PR target/81313
>>>>         * config/i386/i386.c (ix86_function_arg_advance): Set
>>>>         outgoing_args_on_stack to true if there are outgoing arguments
>>>>         on stack.
>>>>         (ix86_function_arg): Likewise.
>>>>         (ix86_get_drap_rtx): Use DRAP only if there are outgoing
>>>>         arguments on stack and ACCUMULATE_OUTGOING_ARGS is false.
>>>>         * config/i386/i386.h (machine_function): Add
>>>>         outgoing_args_on_stack.
>>>>
>>>> @@ -10473,6 +10479,10 @@ ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
>>>>    else
>>>>      arg = function_arg_32 (cum, mode, omode, type, bytes, words);
>>>>
>>>> +  /* Track if there are outgoing arguments on stack.  */
>>>> +  if (arg == NULL_RTX)
>>>> +    cfun->machine->outgoing_args_on_stack = true;
>>>
>>> This should be
>>>
>>> +  /* Track if there are outgoing arguments on stack.  */
>>> +  if (arg == NULL_RTX && cum->caller)
>>> +    cfun->machine->outgoing_args_on_stack = true;
>>>
>>> to check outgoing arguments for caller here.
>>>
>>>>    return arg;
>>>>  }
>>>>
>>>>
>>>
>>> I am testing updated patch with a new testcase.
>>
>> Updated patch LGTM.
>>
>> OK for mainline.
>
> Done.  My patch will cause
>
> FAIL: gcc.dg/stack-layout-dynamic-1.c
>
> since on x86, now stack realignment is done with
>
> .cfi_startproc
> pushl %ebp
> .cfi_def_cfa_offset 8
> .cfi_offset 5, -8
> movl %esp, %ebp
> .cfi_def_cfa_register 5
> andl $-65536, %esp
>
> instead of
>
> .cfi_startproc
> pushl %edi
> .cfi_def_cfa_offset 8
> .cfi_offset 7, -8
> leal 8(%esp), %edi
> .cfi_def_cfa 7, 0
> andl $-65536, %esp
> pushl -4(%edi)
> pushl %ebp
> .cfi_escape 0x10,0x5,0x2,0x75,0
> movl %esp, %ebp
>
> PR target/81313
> * gcc.dg/stack-layout-dynamic-1.c (dg-options): Add -mregparm=3
> for ia32.
> Don't expect cfi_escape and expect cfi_def_cfa_register on x86.
>
> OK for trunk?

Better use something like:

--cut here--
--cut here--

so the testcase will suppress new optimization and test what it was
intended to test also on x86_64.

Uros.

Comments

H.J. Lu July 9, 2017, 7:06 p.m. UTC | #1
On Sun, Jul 9, 2017 at 11:57 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sun, Jul 9, 2017 at 8:29 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Sun, Jul 9, 2017 at 11:19 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Fri, Jul 7, 2017 at 12:14 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Thu, Jul 6, 2017 at 12:08 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>> Since DRAP is needed only if there are outgoing arguments on stack, we
>>>>> should track outgoing arguments on stack and avoid setting need_drap to
>>>>> true when there are no outgoing arguments on stack.
>>>>>
>>>>> Tested on i686 and x86-64 with SSE2, AVX and AVX2.  There is no
>>>>> regression.  OK for trunk?
>>>>>
>>>>> H.J.
>>>>> ---
>>>>> gcc/
>>>>>
>>>>>         PR target/81313
>>>>>         * config/i386/i386.c (ix86_function_arg_advance): Set
>>>>>         outgoing_args_on_stack to true if there are outgoing arguments
>>>>>         on stack.
>>>>>         (ix86_function_arg): Likewise.
>>>>>         (ix86_get_drap_rtx): Use DRAP only if there are outgoing
>>>>>         arguments on stack and ACCUMULATE_OUTGOING_ARGS is false.
>>>>>         * config/i386/i386.h (machine_function): Add
>>>>>         outgoing_args_on_stack.
>>>>>
>>>>> @@ -10473,6 +10479,10 @@ ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
>>>>>    else
>>>>>      arg = function_arg_32 (cum, mode, omode, type, bytes, words);
>>>>>
>>>>> +  /* Track if there are outgoing arguments on stack.  */
>>>>> +  if (arg == NULL_RTX)
>>>>> +    cfun->machine->outgoing_args_on_stack = true;
>>>>
>>>> This should be
>>>>
>>>> +  /* Track if there are outgoing arguments on stack.  */
>>>> +  if (arg == NULL_RTX && cum->caller)
>>>> +    cfun->machine->outgoing_args_on_stack = true;
>>>>
>>>> to check outgoing arguments for caller here.
>>>>
>>>>>    return arg;
>>>>>  }
>>>>>
>>>>>
>>>>
>>>> I am testing updated patch with a new testcase.
>>>
>>> Updated patch LGTM.
>>>
>>> OK for mainline.
>>
>> Done.  My patch will cause
>>
>> FAIL: gcc.dg/stack-layout-dynamic-1.c
>>
>> since on x86, now stack realignment is done with
>>
>> .cfi_startproc
>> pushl %ebp
>> .cfi_def_cfa_offset 8
>> .cfi_offset 5, -8
>> movl %esp, %ebp
>> .cfi_def_cfa_register 5
>> andl $-65536, %esp
>>
>> instead of
>>
>> .cfi_startproc
>> pushl %edi
>> .cfi_def_cfa_offset 8
>> .cfi_offset 7, -8
>> leal 8(%esp), %edi
>> .cfi_def_cfa 7, 0
>> andl $-65536, %esp
>> pushl -4(%edi)
>> pushl %ebp
>> .cfi_escape 0x10,0x5,0x2,0x75,0
>> movl %esp, %ebp
>>
>> PR target/81313
>> * gcc.dg/stack-layout-dynamic-1.c (dg-options): Add -mregparm=3
>> for ia32.
>> Don't expect cfi_escape and expect cfi_def_cfa_register on x86.
>>
>> OK for trunk?
>
> Better use something like:
>
> --cut here--
> Index: stack-layout-dynamic-1.c
> ===================================================================
> --- stack-layout-dynamic-1.c    (revision 250084)
> +++ stack-layout-dynamic-1.c    (working copy)
> @@ -4,12 +4,12 @@
>  /* { dg-options "-O0 -fomit-frame-pointer" } */
>  /* { dg-require-effective-target ptr32plus } */
>
> -extern void bar (void *, void *, void *);
> +extern void bar (void *,void *, void *, void *, void *, void *, void *);
>  void foo (void)
>  {
> -  int i;
> +  int i, j, k, l, m;
>    __attribute__ ((aligned(65536))) char runtime_aligned_1[512];
>    __attribute__ ((aligned(32768))) char runtime_aligned_2[1024];
> -  bar (&i, &runtime_aligned_1, &runtime_aligned_2);
> +  bar (&i, &j, &k, &l, &m, &runtime_aligned_1, &runtime_aligned_2);
>  }
>  /* { dg-final { scan-assembler-not "cfi_def_cfa_register" } } */
> --cut here--
>
> so the testcase will suppress new optimization and test what it was
> intended to test also on x86_64.

I will give it a try.

BTW,  gcc.target/i386/pr59501-3a.c will fail the same way as
gcc.target/i386/pr59501-4a.c now since -mno-accumulate-outgoing-arg
works the same as  -maccumulate-outgoing-args.  A patch is posted at:

https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00400.html

to fix it.
Uros Bizjak July 9, 2017, 7:12 p.m. UTC | #2
On Sun, Jul 9, 2017 at 9:06 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>> PR target/81313
>>> * gcc.dg/stack-layout-dynamic-1.c (dg-options): Add -mregparm=3
>>> for ia32.
>>> Don't expect cfi_escape and expect cfi_def_cfa_register on x86.
>>>
>>> OK for trunk?
>>
>> Better use something like:
>>
>> --cut here--
>> Index: stack-layout-dynamic-1.c
>> ===================================================================
>> --- stack-layout-dynamic-1.c    (revision 250084)
>> +++ stack-layout-dynamic-1.c    (working copy)
>> @@ -4,12 +4,12 @@
>>  /* { dg-options "-O0 -fomit-frame-pointer" } */
>>  /* { dg-require-effective-target ptr32plus } */
>>
>> -extern void bar (void *, void *, void *);
>> +extern void bar (void *,void *, void *, void *, void *, void *, void *);
>>  void foo (void)
>>  {
>> -  int i;
>> +  int i, j, k, l, m;
>>    __attribute__ ((aligned(65536))) char runtime_aligned_1[512];
>>    __attribute__ ((aligned(32768))) char runtime_aligned_2[1024];
>> -  bar (&i, &runtime_aligned_1, &runtime_aligned_2);
>> +  bar (&i, &j, &k, &l, &m, &runtime_aligned_1, &runtime_aligned_2);
>>  }
>>  /* { dg-final { scan-assembler-not "cfi_def_cfa_register" } } */
>> --cut here--
>>
>> so the testcase will suppress new optimization and test what it was
>> intended to test also on x86_64.
>
> I will give it a try.
>
> BTW,  gcc.target/i386/pr59501-3a.c will fail the same way as
> gcc.target/i386/pr59501-4a.c now since -mno-accumulate-outgoing-arg
> works the same as  -maccumulate-outgoing-args.  A patch is posted at:
>
> https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00400.html
>
> to fix it.

The referred patch enhances Jakubs PR 59501 patch, I'll wait for his
comments on your patch.

Uros.
H.J. Lu July 9, 2017, 8:53 p.m. UTC | #3
On Sun, Jul 9, 2017 at 11:57 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sun, Jul 9, 2017 at 8:29 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Sun, Jul 9, 2017 at 11:19 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Fri, Jul 7, 2017 at 12:14 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Thu, Jul 6, 2017 at 12:08 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>> Since DRAP is needed only if there are outgoing arguments on stack, we
>>>>> should track outgoing arguments on stack and avoid setting need_drap to
>>>>> true when there are no outgoing arguments on stack.
>>>>>
>>>>> Tested on i686 and x86-64 with SSE2, AVX and AVX2.  There is no
>>>>> regression.  OK for trunk?
>>>>>
>>>>> H.J.
>>>>> ---
>>>>> gcc/
>>>>>
>>>>>         PR target/81313
>>>>>         * config/i386/i386.c (ix86_function_arg_advance): Set
>>>>>         outgoing_args_on_stack to true if there are outgoing arguments
>>>>>         on stack.
>>>>>         (ix86_function_arg): Likewise.
>>>>>         (ix86_get_drap_rtx): Use DRAP only if there are outgoing
>>>>>         arguments on stack and ACCUMULATE_OUTGOING_ARGS is false.
>>>>>         * config/i386/i386.h (machine_function): Add
>>>>>         outgoing_args_on_stack.
>>>>>
>>>>> @@ -10473,6 +10479,10 @@ ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
>>>>>    else
>>>>>      arg = function_arg_32 (cum, mode, omode, type, bytes, words);
>>>>>
>>>>> +  /* Track if there are outgoing arguments on stack.  */
>>>>> +  if (arg == NULL_RTX)
>>>>> +    cfun->machine->outgoing_args_on_stack = true;
>>>>
>>>> This should be
>>>>
>>>> +  /* Track if there are outgoing arguments on stack.  */
>>>> +  if (arg == NULL_RTX && cum->caller)
>>>> +    cfun->machine->outgoing_args_on_stack = true;
>>>>
>>>> to check outgoing arguments for caller here.
>>>>
>>>>>    return arg;
>>>>>  }
>>>>>
>>>>>
>>>>
>>>> I am testing updated patch with a new testcase.
>>>
>>> Updated patch LGTM.
>>>
>>> OK for mainline.
>>
>> Done.  My patch will cause
>>
>> FAIL: gcc.dg/stack-layout-dynamic-1.c
>>
>> since on x86, now stack realignment is done with
>>
>> .cfi_startproc
>> pushl %ebp
>> .cfi_def_cfa_offset 8
>> .cfi_offset 5, -8
>> movl %esp, %ebp
>> .cfi_def_cfa_register 5
>> andl $-65536, %esp
>>
>> instead of
>>
>> .cfi_startproc
>> pushl %edi
>> .cfi_def_cfa_offset 8
>> .cfi_offset 7, -8
>> leal 8(%esp), %edi
>> .cfi_def_cfa 7, 0
>> andl $-65536, %esp
>> pushl -4(%edi)
>> pushl %ebp
>> .cfi_escape 0x10,0x5,0x2,0x75,0
>> movl %esp, %ebp
>>
>> PR target/81313
>> * gcc.dg/stack-layout-dynamic-1.c (dg-options): Add -mregparm=3
>> for ia32.
>> Don't expect cfi_escape and expect cfi_def_cfa_register on x86.
>>
>> OK for trunk?
>
> Better use something like:
>
> --cut here--
> Index: stack-layout-dynamic-1.c
> ===================================================================
> --- stack-layout-dynamic-1.c    (revision 250084)
> +++ stack-layout-dynamic-1.c    (working copy)
> @@ -4,12 +4,12 @@
>  /* { dg-options "-O0 -fomit-frame-pointer" } */
>  /* { dg-require-effective-target ptr32plus } */
>
> -extern void bar (void *, void *, void *);
> +extern void bar (void *,void *, void *, void *, void *, void *, void *);
>  void foo (void)
>  {
> -  int i;
> +  int i, j, k, l, m;
>    __attribute__ ((aligned(65536))) char runtime_aligned_1[512];
>    __attribute__ ((aligned(32768))) char runtime_aligned_2[1024];
> -  bar (&i, &runtime_aligned_1, &runtime_aligned_2);
> +  bar (&i, &j, &k, &l, &m, &runtime_aligned_1, &runtime_aligned_2);
>  }
>  /* { dg-final { scan-assembler-not "cfi_def_cfa_register" } } */
> --cut here--
>
> so the testcase will suppress new optimization and test what it was
> intended to test also on x86_64.
>

Yes, it works.  Can you check it in?

Thanks.
diff mbox

Patch

Index: stack-layout-dynamic-1.c
===================================================================
--- stack-layout-dynamic-1.c    (revision 250084)
+++ stack-layout-dynamic-1.c    (working copy)
@@ -4,12 +4,12 @@ 
 /* { dg-options "-O0 -fomit-frame-pointer" } */
 /* { dg-require-effective-target ptr32plus } */

-extern void bar (void *, void *, void *);
+extern void bar (void *,void *, void *, void *, void *, void *, void *);
 void foo (void)
 {
-  int i;
+  int i, j, k, l, m;
   __attribute__ ((aligned(65536))) char runtime_aligned_1[512];
   __attribute__ ((aligned(32768))) char runtime_aligned_2[1024];
-  bar (&i, &runtime_aligned_1, &runtime_aligned_2);
+  bar (&i, &j, &k, &l, &m, &runtime_aligned_1, &runtime_aligned_2);
 }
 /* { dg-final { scan-assembler-not "cfi_def_cfa_register" } } */