[PATCHv5] Fix not 8-byte aligned ldrd/strd on ARMv5 (PR 89544)
diff mbox series

Message ID AM6PR10MB2566627D2A92173775D78936E4AC0@AM6PR10MB2566.EURPRD10.PROD.OUTLOOK.COM
State New
Headers show
Series
  • [PATCHv5] Fix not 8-byte aligned ldrd/strd on ARMv5 (PR 89544)
Related show

Commit Message

Bernd Edlinger Aug. 15, 2019, 7:47 p.m. UTC
On 8/15/19 6:29 PM, Richard Biener wrote:
>>>
>>> Please split it into the parts for the PR and parts making the
>>> asserts not trigger.
>>>
>>
>> Yes, will do.
>>

Okay, here is the rest of the PR 89544 fix,
actually just an optimization, making the larger stack alignment
known to the middle-end, and the test cases.


Boot-strapped and reg-tested on x86_64-pc-linux-gnu and arm-linux-gnueabihf.
Is it OK for trunk?


Thanks
Bernd.

Comments

Jeff Law Aug. 19, 2019, 10:58 p.m. UTC | #1
On 8/15/19 1:47 PM, Bernd Edlinger wrote:
> On 8/15/19 6:29 PM, Richard Biener wrote:
>>>> Please split it into the parts for the PR and parts making the
>>>> asserts not trigger.
>>>>
>>> Yes, will do.
>>>
> Okay, here is the rest of the PR 89544 fix,
> actually just an optimization, making the larger stack alignment
> known to the middle-end, and the test cases.
> 
> 
> Boot-strapped and reg-tested on x86_64-pc-linux-gnu and arm-linux-gnueabihf.
> Is it OK for trunk?
> 
> 
> Thanks
> Bernd.
> 
> 
> patch-arm-align-abi.diff
> 
> 2019-08-15  Bernd Edlinger  <bernd.edlinger@hotmail.de>
> 
> 	PR middle-end/89544
> 	* function.c (assign_parm_find_stack_rtl): Use larger alignment
> 	when possible.
> 
> testsuite:
> 2019-08-15  Bernd Edlinger  <bernd.edlinger@hotmail.de>
> 
> 	PR middle-end/89544
> 	* gcc.target/arm/unaligned-argument-1.c: New test.
> 	* gcc.target/arm/unaligned-argument-2.c: New test.
OK.

Given the sensitivity of this code, let's give the tester a chance to
run with this patch applied before we add the next one for sanitizing
the middle end interface.

jeff
John David Anglin Aug. 20, 2019, 2:29 p.m. UTC | #2
On 2019-08-15 3:47 p.m., Bernd Edlinger wrote:
> 2019-08-15  Bernd Edlinger  <bernd.edlinger@hotmail.de>
>
> 	PR middle-end/89544
> 	* function.c (assign_parm_find_stack_rtl): Use larger alignment
> 	when possible.
This patch breaks build on hppa-unknown-linux-gnu:
https://buildd.debian.org/status/fetch.php?pkg=gcc-snapshot&arch=hppa&ver=1%3A20190820-1&stamp=1566307455&raw=0

hppa-linux-gnu-g++-9 -std=gnu++98 -fno-PIE -c   -g -DIN_GCC     -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wno-format -Wmissing-format-attribute -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../src/gcc -I../../src/gcc/. -I../../src/gcc/../include -I../../src/gcc/../libcpp/include  -I../../src/gcc/../libdecnumber -I../../src/gcc/../libdecnumber/dpd -I../libdecnumber -I../../src/gcc/../libbacktrace   -o function.o -MT function.o -MMD -MP -MF ./.deps/function.TPo ../../src/gcc/function.c
hppa-linux-gnu-g++-9 -std=gnu++98 -fno-PIE -c   -g -DIN_GCC     -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wno-format -Wmissing-format-attribute -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../src/gcc -I../../src/gcc/. -I../../src/gcc/../include -I../../src/gcc/../libcpp/include  -I../../src/gcc/../libdecnumber -I../../src/gcc/../libdecnumber/dpd -I../libdecnumber -I../../src/gcc/../libbacktrace   -o function-tests.o -MT function-tests.o -MMD -MP -MF ./.deps/function-tests.TPo ../../src/gcc/function-tests.c
hppa-linux-gnu-g++-9 -std=gnu++98 -fno-PIE -c   -g -DIN_GCC     -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wno-format -Wmissing-format-attribute -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../src/gcc -I../../src/gcc/. -I../../src/gcc/../include -I../../src/gcc/../libcpp/include  -I../../src/gcc/../libdecnumber -I../../src/gcc/../libdecnumber/dpd -I../libdecnumber -I../../src/gcc/../libbacktrace   -o fwprop.o -MT fwprop.o -MMD -MP -MF ./.deps/fwprop.TPo ../../src/gcc/fwprop.c
../../src/gcc/function.c: In function 'void assign_parm_find_stack_rtl(tree, assign_parm_data_one*)':
../../src/gcc/function.c:2690:28: error: no match for 'operator==' (operand types are 'poly_int<1, long long int>' and 'int')
 2690 |    && STACK_POINTER_OFFSET == 0)
      |                            ^~ ~
      |                               |
      |                               int
In file included from ../../src/gcc/coretypes.h:415,
                 from ../../src/gcc/function.c:36:
../../src/gcc/wide-int.h:3287:19: note: candidate: 'template<class T1, class T2> typename wi::binary_traits<T1, T2>::predicate_result operator==(const T1&, const T2&)'
 3287 | BINARY_PREDICATE (operator ==, eq_p)
      |                   ^~~~~~~~
../../src/gcc/wide-int.h:3264:3: note: in definition of macro 'BINARY_PREDICATE'
 3264 |   OP (const T1 &x, const T2 &y) \
      |   ^~
../../src/gcc/wide-int.h:3287:19: note:   template argument deduction/substitution failed:
 3287 | BINARY_PREDICATE (operator ==, eq_p)
      |                   ^~~~~~~~
../../src/gcc/wide-int.h:3264:3: note: in definition of macro 'BINARY_PREDICATE'
 3264 |   OP (const T1 &x, const T2 &y) \
      |   ^~
../../src/gcc/wide-int.h: In substitution of 'template<class T1, class T2> typename wi::binary_traits<T1, T2>::predicate_result operator==(const T1&, const T2&) [with T1 = poly_int<1, long long int>; T2 = int]':
../../src/gcc/function.c:2690:31:   required from here
../../src/gcc/wide-int.h:3287:19: error: incomplete type 'wi::int_traits<poly_int<1, long long int> >' used in nested name specifier
 3287 | BINARY_PREDICATE (operator ==, eq_p)
      |                   ^~~~~~~~
../../src/gcc/wide-int.h:3264:3: note: in definition of macro 'BINARY_PREDICATE'
 3264 |   OP (const T1 &x, const T2 &y) \
      |   ^~
make[5]: *** [Makefile:1118: function.o] Error 1
make[5]: *** Waiting for unfinished jobs....

We have the following define for STACK_POINTER_OFFSET:

#define STACK_POINTER_OFFSET \
  (TARGET_64BIT ? -(crtl->outgoing_args_size + 48) : poly_int64 (-32))
 
Dave
Bernd Edlinger Aug. 20, 2019, 3:43 p.m. UTC | #3
Ah, yes that was unexpected...
Sorry for the breakage.

So this needs to be known_eq (STACK_POINTER_OFFSET, 0)
instead of STACK_POINTER_OFFSET == 0 obviously.

Should be fixed by this patch, which I am going to commit
as "obvious" in a moment unless someone objects.


Thanks
Bernd.


On 8/20/19 4:39 PM, John David Anglin wrote:
> On 2019-08-15 3:47 p.m., Bernd Edlinger wrote:
>> 2019-08-15  Bernd Edlinger  <bernd.edlinger@hotmail.de>
>>
>> 	PR middle-end/89544
>> 	* function.c (assign_parm_find_stack_rtl): Use larger alignment
>> 	when possible.
> This patch breaks build on hppa-unknown-linux-gnu:
> https://buildd.debian.org/status/fetch.php?pkg=gcc-snapshot&arch=hppa&ver=1%3A20190820-1&stamp=1566307455&raw=0
> 
> hppa-linux-gnu-g++-9 -std=gnu++98 -fno-PIE -c   -g -DIN_GCC     -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wno-format -Wmissing-format-attribute -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../src/gcc -I../../src/gcc/. -I../../src/gcc/../include -I../../src/gcc/../libcpp/include  -I../../src/gcc/../libdecnumber -I../../src/gcc/../libdecnumber/dpd -I../libdecnumber -I../../src/gcc/../libbacktrace   -o function.o -MT function.o -MMD -MP -MF ./.deps/function.TPo ../../src/gcc/function.c
> hppa-linux-gnu-g++-9 -std=gnu++98 -fno-PIE -c   -g -DIN_GCC     -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wno-format -Wmissing-format-attribute -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../src/gcc -I../../src/gcc/. -I../../src/gcc/../include -I../../src/gcc/../libcpp/include  -I../../src/gcc/../libdecnumber -I../../src/gcc/../libdecnumber/dpd -I../libdecnumber -I../../src/gcc/../libbacktrace   -o function-tests.o -MT function-tests.o -MMD -MP -MF ./.deps/function-tests.TPo ../../src/gcc/function-tests.c
> hppa-linux-gnu-g++-9 -std=gnu++98 -fno-PIE -c   -g -DIN_GCC     -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wno-format -Wmissing-format-attribute -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H -I. -I. -I../../src/gcc -I../../src/gcc/. -I../../src/gcc/../include -I../../src/gcc/../libcpp/include  -I../../src/gcc/../libdecnumber -I../../src/gcc/../libdecnumber/dpd -I../libdecnumber -I../../src/gcc/../libbacktrace   -o fwprop.o -MT fwprop.o -MMD -MP -MF ./.deps/fwprop.TPo ../../src/gcc/fwprop.c
> ../../src/gcc/function.c: In function 'void assign_parm_find_stack_rtl(tree, assign_parm_data_one*)':
> ../../src/gcc/function.c:2690:28: error: no match for 'operator==' (operand types are 'poly_int<1, long long int>' and 'int')
>  2690 |    && STACK_POINTER_OFFSET == 0)
>       |                            ^~ ~
>       |                               |
>       |                               int
> In file included from ../../src/gcc/coretypes.h:415,
>                  from ../../src/gcc/function.c:36:
> ../../src/gcc/wide-int.h:3287:19: note: candidate: 'template<class T1, class T2> typename wi::binary_traits<T1, T2>::predicate_result operator==(const T1&, const T2&)'
>  3287 | BINARY_PREDICATE (operator ==, eq_p)
>       |                   ^~~~~~~~
> ../../src/gcc/wide-int.h:3264:3: note: in definition of macro 'BINARY_PREDICATE'
>  3264 |   OP (const T1 &x, const T2 &y) \
>       |   ^~
> ../../src/gcc/wide-int.h:3287:19: note:   template argument deduction/substitution failed:
>  3287 | BINARY_PREDICATE (operator ==, eq_p)
>       |                   ^~~~~~~~
> ../../src/gcc/wide-int.h:3264:3: note: in definition of macro 'BINARY_PREDICATE'
>  3264 |   OP (const T1 &x, const T2 &y) \
>       |   ^~
> ../../src/gcc/wide-int.h: In substitution of 'template<class T1, class T2> typename wi::binary_traits<T1, T2>::predicate_result operator==(const T1&, const T2&) [with T1 = poly_int<1, long long int>; T2 = int]':
> ../../src/gcc/function.c:2690:31:   required from here
> ../../src/gcc/wide-int.h:3287:19: error: incomplete type 'wi::int_traits<poly_int<1, long long int> >' used in nested name specifier
>  3287 | BINARY_PREDICATE (operator ==, eq_p)
>       |                   ^~~~~~~~
> ../../src/gcc/wide-int.h:3264:3: note: in definition of macro 'BINARY_PREDICATE'
>  3264 |   OP (const T1 &x, const T2 &y) \
>       |   ^~
> make[5]: *** [Makefile:1118: function.o] Error 1
> make[5]: *** Waiting for unfinished jobs....
> 
> We have the following define for STACK_POINTER_OFFSET:
> 
> #define STACK_POINTER_OFFSET \
>   (TARGET_64BIT ? -(crtl->outgoing_args_size + 48) : poly_int64 (-32))
>  
> Dave
>
Richard Earnshaw (lists) Sept. 4, 2019, 12:53 p.m. UTC | #4
On 15/08/2019 20:47, Bernd Edlinger wrote:
> On 8/15/19 6:29 PM, Richard Biener wrote:
>>>>
>>>> Please split it into the parts for the PR and parts making the
>>>> asserts not trigger.
>>>>
>>>
>>> Yes, will do.
>>>
> 
> Okay, here is the rest of the PR 89544 fix,
> actually just an optimization, making the larger stack alignment
> known to the middle-end, and the test cases.
> 
> 
> Boot-strapped and reg-tested on x86_64-pc-linux-gnu and arm-linux-gnueabihf.
> Is it OK for trunk?
> 
> 
> Thanks
> Bernd.
> 

Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
===================================================================
--- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c	(Revision 0)
+++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c	(Arbeitskopie)
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arm_ok } */
+/* { dg-require-effective-target arm_ldrd_strd_ok } */
+/* { dg-options "-marm -mno-unaligned-access -O3" } */
+
+struct s {
+  int a, b;
+} __attribute__((aligned(8)));
+
+struct s f0;
+
+void f(int a, int b, int c, int d, int e, struct s f)
+{
+  f0 = f;
+}
+
+/* { dg-final { scan-assembler-times "ldrd" 0 } } */
+/* { dg-final { scan-assembler-times "strd" 0 } } */
+/* { dg-final { scan-assembler-times "stm" 1 } } */

I don't think this test is right.  While we can't use an LDRD to load 
the argument off the stack, there's nothing wrong with using an STRD to 
then store the value to f0 (as that is 8-byte aligned).  So the second 
and third scan-assembler tests are meaningless.

R.

(sorry, just noticed this).
Bernd Edlinger Sept. 4, 2019, 1:28 p.m. UTC | #5
On 9/4/19 2:53 PM, Richard Earnshaw (lists) wrote:
> On 15/08/2019 20:47, Bernd Edlinger wrote:
>> On 8/15/19 6:29 PM, Richard Biener wrote:
>>>>>
>>>>> Please split it into the parts for the PR and parts making the
>>>>> asserts not trigger.
>>>>>
>>>>
>>>> Yes, will do.
>>>>
>>
>> Okay, here is the rest of the PR 89544 fix,
>> actually just an optimization, making the larger stack alignment
>> known to the middle-end, and the test cases.
>>
>>
>> Boot-strapped and reg-tested on x86_64-pc-linux-gnu and arm-linux-gnueabihf.
>> Is it OK for trunk?
>>
>>
>> Thanks
>> Bernd.
>>
> 
> Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
> ===================================================================
> --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Revision 0)
> +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Arbeitskopie)
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_arm_ok } */
> +/* { dg-require-effective-target arm_ldrd_strd_ok } */
> +/* { dg-options "-marm -mno-unaligned-access -O3" } */
> +
> +struct s {
> +  int a, b;
> +} __attribute__((aligned(8)));
> +
> +struct s f0;
> +
> +void f(int a, int b, int c, int d, int e, struct s f)
> +{
> +  f0 = f;
> +}
> +
> +/* { dg-final { scan-assembler-times "ldrd" 0 } } */
> +/* { dg-final { scan-assembler-times "strd" 0 } } */
> +/* { dg-final { scan-assembler-times "stm" 1 } } */
> 
> I don't think this test is right.  While we can't use an LDRD to load the argument off the stack, there's nothing wrong with using an STRD to then store the value to f0 (as that is 8-byte aligned).  So the second and third scan-assembler tests are meaningless.
> 

Ah, that is very similar to the unaligned-memcpy-2/3.c,
see https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00157.html

initially that is a movdi,
then in subreg1 it is split in two movsi
which is then re-assembled as ldm


Not sure if that is intended in that way.


Bernd.
Richard Earnshaw (lists) Sept. 4, 2019, 2:14 p.m. UTC | #6
On 04/09/2019 14:28, Bernd Edlinger wrote:
> On 9/4/19 2:53 PM, Richard Earnshaw (lists) wrote:
>> On 15/08/2019 20:47, Bernd Edlinger wrote:
>>> On 8/15/19 6:29 PM, Richard Biener wrote:
>>>>>>
>>>>>> Please split it into the parts for the PR and parts making the
>>>>>> asserts not trigger.
>>>>>>
>>>>>
>>>>> Yes, will do.
>>>>>
>>>
>>> Okay, here is the rest of the PR 89544 fix,
>>> actually just an optimization, making the larger stack alignment
>>> known to the middle-end, and the test cases.
>>>
>>>
>>> Boot-strapped and reg-tested on x86_64-pc-linux-gnu and arm-linux-gnueabihf.
>>> Is it OK for trunk?
>>>
>>>
>>> Thanks
>>> Bernd.
>>>
>>
>> Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
>> ===================================================================
>> --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Revision 0)
>> +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Arbeitskopie)
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target arm_arm_ok } */
>> +/* { dg-require-effective-target arm_ldrd_strd_ok } */
>> +/* { dg-options "-marm -mno-unaligned-access -O3" } */
>> +
>> +struct s {
>> +  int a, b;
>> +} __attribute__((aligned(8)));
>> +
>> +struct s f0;
>> +
>> +void f(int a, int b, int c, int d, int e, struct s f)
>> +{
>> +  f0 = f;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "ldrd" 0 } } */
>> +/* { dg-final { scan-assembler-times "strd" 0 } } */
>> +/* { dg-final { scan-assembler-times "stm" 1 } } */
>>
>> I don't think this test is right.  While we can't use an LDRD to load the argument off the stack, there's nothing wrong with using an STRD to then store the value to f0 (as that is 8-byte aligned).  So the second and third scan-assembler tests are meaningless.
>>
> 
> Ah, that is very similar to the unaligned-memcpy-2/3.c,
> see https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00157.html
> 
> initially that is a movdi,
> then in subreg1 it is split in two movsi
> which is then re-assembled as ldm
> 
> 
> Not sure if that is intended in that way.
> 
> 

Yeah, these are causing me some problems too, but that's because with 
some changes I'm working on I now see the compiler using r4 and r5, 
which leads to prologue and epilogue stores that distort the results.

Tests like this are generally fragile - I hate 'em!!!!

R.
> Bernd.
>
Bernd Edlinger Sept. 4, 2019, 3 p.m. UTC | #7
On 9/4/19 4:14 PM, Richard Earnshaw (lists) wrote:
> On 04/09/2019 14:28, Bernd Edlinger wrote:
>> On 9/4/19 2:53 PM, Richard Earnshaw (lists) wrote:
>>> Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
>>> ===================================================================
>>> --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Revision 0)
>>> +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Arbeitskopie)
>>> @@ -0,0 +1,19 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target arm_arm_ok } */
>>> +/* { dg-require-effective-target arm_ldrd_strd_ok } */
>>> +/* { dg-options "-marm -mno-unaligned-access -O3" } */
>>> +
>>> +struct s {
>>> +  int a, b;
>>> +} __attribute__((aligned(8)));
>>> +
>>> +struct s f0;
>>> +
>>> +void f(int a, int b, int c, int d, int e, struct s f)
>>> +{
>>> +  f0 = f;
>>> +}
>>> +
>>> +/* { dg-final { scan-assembler-times "ldrd" 0 } } */
>>> +/* { dg-final { scan-assembler-times "strd" 0 } } */
>>> +/* { dg-final { scan-assembler-times "stm" 1 } } */
>>>
>>> I don't think this test is right.  While we can't use an LDRD to load the argument off the stack, there's nothing wrong with using an STRD to then store the value to f0 (as that is 8-byte aligned).  So the second and third scan-assembler tests are meaningless.
>>>
>>
>> Ah, that is very similar to the unaligned-memcpy-2/3.c,
>> see https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00157.html
>>
>> initially that is a movdi,
>> then in subreg1 it is split in two movsi
>> which is then re-assembled as ldm
>>
>>
>> Not sure if that is intended in that way.
>>
>>
> 
> Yeah, these are causing me some problems too, but that's because with some changes I'm working on I now see the compiler using r4 and r5, which leads to prologue and epilogue stores that distort the results.
> 
> Tests like this are generally fragile - I hate 'em!!!!
> 

Yeah, that changed since r275063 introduced the unaligned-load/storedi

r275063 | edlinger | 2019-08-30 12:38:37 +0200 (Fr, 30. Aug 2019) | 10 Zeilen
Geänderte Pfade:
   M /trunk/gcc/ChangeLog
   M /trunk/gcc/config/arm/arm.c
   M /trunk/gcc/config/arm/arm.md
   M /trunk/gcc/config/arm/neon.md

2019-08-30  Bernd Edlinger  <bernd.edlinger@hotmail.de>

        * config/arm/arm.md (unaligned_loaddi,
        unaligned_storedi): New unspec insn patterns.
        * config/arm/neon.md (unaligned_storev8qi): Likewise.
        * config/arm/arm.c (gen_cpymem_ldrd_strd): Use unaligned_loaddi
        and unaligned_storedi for 4-byte aligned memory.
        (arm_block_set_aligned_vect): Use unaligned_storev8qi for
        4-byte aligned memory.

Since other than the movdi they are not split up but stay as ldrd/strd.
But for some unknown reason ira assigns r4-5 to those although also
r1-2 would be available. :-(


Bernd.
Richard Earnshaw (lists) Sept. 4, 2019, 3:48 p.m. UTC | #8
On 04/09/2019 16:00, Bernd Edlinger wrote:
> On 9/4/19 4:14 PM, Richard Earnshaw (lists) wrote:
>> On 04/09/2019 14:28, Bernd Edlinger wrote:
>>> On 9/4/19 2:53 PM, Richard Earnshaw (lists) wrote:
>>>> Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
>>>> ===================================================================
>>>> --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Revision 0)
>>>> +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Arbeitskopie)
>>>> @@ -0,0 +1,19 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-require-effective-target arm_arm_ok } */
>>>> +/* { dg-require-effective-target arm_ldrd_strd_ok } */
>>>> +/* { dg-options "-marm -mno-unaligned-access -O3" } */
>>>> +
>>>> +struct s {
>>>> +  int a, b;
>>>> +} __attribute__((aligned(8)));
>>>> +
>>>> +struct s f0;
>>>> +
>>>> +void f(int a, int b, int c, int d, int e, struct s f)
>>>> +{
>>>> +  f0 = f;
>>>> +}
>>>> +
>>>> +/* { dg-final { scan-assembler-times "ldrd" 0 } } */
>>>> +/* { dg-final { scan-assembler-times "strd" 0 } } */
>>>> +/* { dg-final { scan-assembler-times "stm" 1 } } */
>>>>
>>>> I don't think this test is right.  While we can't use an LDRD to load the argument off the stack, there's nothing wrong with using an STRD to then store the value to f0 (as that is 8-byte aligned).  So the second and third scan-assembler tests are meaningless.
>>>>
>>>
>>> Ah, that is very similar to the unaligned-memcpy-2/3.c,
>>> see https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00157.html
>>>
>>> initially that is a movdi,
>>> then in subreg1 it is split in two movsi
>>> which is then re-assembled as ldm
>>>
>>>
>>> Not sure if that is intended in that way.
>>>
>>>
>>
>> Yeah, these are causing me some problems too, but that's because with some changes I'm working on I now see the compiler using r4 and r5, which leads to prologue and epilogue stores that distort the results.
>>
>> Tests like this are generally fragile - I hate 'em!!!!
>>
> 
> Yeah, that changed since r275063 introduced the unaligned-load/storedi
> 
> r275063 | edlinger | 2019-08-30 12:38:37 +0200 (Fr, 30. Aug 2019) | 10 Zeilen
> Geänderte Pfade:
>     M /trunk/gcc/ChangeLog
>     M /trunk/gcc/config/arm/arm.c
>     M /trunk/gcc/config/arm/arm.md
>     M /trunk/gcc/config/arm/neon.md
> 
> 2019-08-30  Bernd Edlinger  <bernd.edlinger@hotmail.de>
> 
>          * config/arm/arm.md (unaligned_loaddi,
>          unaligned_storedi): New unspec insn patterns.
>          * config/arm/neon.md (unaligned_storev8qi): Likewise.
>          * config/arm/arm.c (gen_cpymem_ldrd_strd): Use unaligned_loaddi
>          and unaligned_storedi for 4-byte aligned memory.
>          (arm_block_set_aligned_vect): Use unaligned_storev8qi for
>          4-byte aligned memory.
> 
> Since other than the movdi they are not split up but stay as ldrd/strd.
> But for some unknown reason ira assigns r4-5 to those although also
> r1-2 would be available. :-(
> 

r1-r2 can't be used in Arm state as the register has to start on an even 
boundary.  But ira has already used r3 for the address of the store (it 
could have picked r1) and now r4-r5 is the next even-numbered pair.  So 
we end up with needing to save some call-clobbered regs.

R.
> 
> Bernd.
>
Richard Earnshaw (lists) Sept. 5, 2019, 9:21 a.m. UTC | #9
On 04/09/2019 16:48, Richard Earnshaw (lists) wrote:
> On 04/09/2019 16:00, Bernd Edlinger wrote:
>> On 9/4/19 4:14 PM, Richard Earnshaw (lists) wrote:
>>> On 04/09/2019 14:28, Bernd Edlinger wrote:
>>>> On 9/4/19 2:53 PM, Richard Earnshaw (lists) wrote:
>>>>> Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
>>>>> ===================================================================
>>>>> --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    
>>>>> (Revision 0)
>>>>> +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    
>>>>> (Arbeitskopie)
>>>>> @@ -0,0 +1,19 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-require-effective-target arm_arm_ok } */
>>>>> +/* { dg-require-effective-target arm_ldrd_strd_ok } */
>>>>> +/* { dg-options "-marm -mno-unaligned-access -O3" } */
>>>>> +
>>>>> +struct s {
>>>>> +  int a, b;
>>>>> +} __attribute__((aligned(8)));
>>>>> +
>>>>> +struct s f0;
>>>>> +
>>>>> +void f(int a, int b, int c, int d, int e, struct s f)
>>>>> +{
>>>>> +  f0 = f;
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-assembler-times "ldrd" 0 } } */
>>>>> +/* { dg-final { scan-assembler-times "strd" 0 } } */
>>>>> +/* { dg-final { scan-assembler-times "stm" 1 } } */
>>>>>
>>>>> I don't think this test is right.  While we can't use an LDRD to 
>>>>> load the argument off the stack, there's nothing wrong with using 
>>>>> an STRD to then store the value to f0 (as that is 8-byte aligned).  
>>>>> So the second and third scan-assembler tests are meaningless.
>>>>>
>>>>
>>>> Ah, that is very similar to the unaligned-memcpy-2/3.c,
>>>> see https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00157.html
>>>>
>>>> initially that is a movdi,
>>>> then in subreg1 it is split in two movsi
>>>> which is then re-assembled as ldm
>>>>
>>>>
>>>> Not sure if that is intended in that way.
>>>>
>>>>
>>>
>>> Yeah, these are causing me some problems too, but that's because with 
>>> some changes I'm working on I now see the compiler using r4 and r5, 
>>> which leads to prologue and epilogue stores that distort the results.
>>>
>>> Tests like this are generally fragile - I hate 'em!!!!
>>>
>>
>> Yeah, that changed since r275063 introduced the unaligned-load/storedi
>>
>> r275063 | edlinger | 2019-08-30 12:38:37 +0200 (Fr, 30. Aug 2019) | 10 
>> Zeilen
>> Geänderte Pfade:
>>     M /trunk/gcc/ChangeLog
>>     M /trunk/gcc/config/arm/arm.c
>>     M /trunk/gcc/config/arm/arm.md
>>     M /trunk/gcc/config/arm/neon.md
>>
>> 2019-08-30  Bernd Edlinger  <bernd.edlinger@hotmail.de>
>>
>>          * config/arm/arm.md (unaligned_loaddi,
>>          unaligned_storedi): New unspec insn patterns.
>>          * config/arm/neon.md (unaligned_storev8qi): Likewise.
>>          * config/arm/arm.c (gen_cpymem_ldrd_strd): Use unaligned_loaddi
>>          and unaligned_storedi for 4-byte aligned memory.
>>          (arm_block_set_aligned_vect): Use unaligned_storev8qi for
>>          4-byte aligned memory.
>>
>> Since other than the movdi they are not split up but stay as ldrd/strd.
>> But for some unknown reason ira assigns r4-5 to those although also
>> r1-2 would be available. :-(
>>
> 
> r1-r2 can't be used in Arm state as the register has to start on an even 
> boundary.  But ira has already used r3 for the address of the store (it 
> could have picked r1) and now r4-r5 is the next even-numbered pair.  So 
> we end up with needing to save some call-clobbered regs.
> 
> R.
>>
>> Bernd.
>>
> 

One possible trick to stabilize the test is to insert an asm that 
clobbers r4 and r5 and forces the prologue/epilogue code to always save 
and restore them.  Then we can account for those prologue/epilogue 
consistently (at least, modulo the arm_prefer_ldrd_strd condition).

R.
Bernd Edlinger Sept. 5, 2019, 9:35 a.m. UTC | #10
On 9/5/19 11:21 AM, Richard Earnshaw (lists) wrote:
> On 04/09/2019 16:48, Richard Earnshaw (lists) wrote:
>> On 04/09/2019 16:00, Bernd Edlinger wrote:
>>> On 9/4/19 4:14 PM, Richard Earnshaw (lists) wrote:
>>>> On 04/09/2019 14:28, Bernd Edlinger wrote:
>>>>> On 9/4/19 2:53 PM, Richard Earnshaw (lists) wrote:
>>>>>> Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
>>>>>> ===================================================================
>>>>>> --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Revision 0)
>>>>>> +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Arbeitskopie)
>>>>>> @@ -0,0 +1,19 @@
>>>>>> +/* { dg-do compile } */
>>>>>> +/* { dg-require-effective-target arm_arm_ok } */
>>>>>> +/* { dg-require-effective-target arm_ldrd_strd_ok } */
>>>>>> +/* { dg-options "-marm -mno-unaligned-access -O3" } */
>>>>>> +
>>>>>> +struct s {
>>>>>> +  int a, b;
>>>>>> +} __attribute__((aligned(8)));
>>>>>> +
>>>>>> +struct s f0;
>>>>>> +
>>>>>> +void f(int a, int b, int c, int d, int e, struct s f)
>>>>>> +{
>>>>>> +  f0 = f;
>>>>>> +}
>>>>>> +
>>>>>> +/* { dg-final { scan-assembler-times "ldrd" 0 } } */
>>>>>> +/* { dg-final { scan-assembler-times "strd" 0 } } */
>>>>>> +/* { dg-final { scan-assembler-times "stm" 1 } } */
>>>>>>
>>>>>> I don't think this test is right.  While we can't use an LDRD to load the argument off the stack, there's nothing wrong with using an STRD to then store the value to f0 (as that is 8-byte aligned).  So the second and third scan-assembler tests are meaningless.
>>>>>>
>>>>>
>>>>> Ah, that is very similar to the unaligned-memcpy-2/3.c,
>>>>> see https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00157.html
>>>>>
>>>>> initially that is a movdi,
>>>>> then in subreg1 it is split in two movsi
>>>>> which is then re-assembled as ldm
>>>>>
>>>>>
>>>>> Not sure if that is intended in that way.
>>>>>
>>>>>
>>>>
>>>> Yeah, these are causing me some problems too, but that's because with some changes I'm working on I now see the compiler using r4 and r5, which leads to prologue and epilogue stores that distort the results.
>>>>
>>>> Tests like this are generally fragile - I hate 'em!!!!
>>>>
>>>
>>> Yeah, that changed since r275063 introduced the unaligned-load/storedi
>>>
>>> r275063 | edlinger | 2019-08-30 12:38:37 +0200 (Fr, 30. Aug 2019) | 10 Zeilen
>>> Geänderte Pfade:
>>>     M /trunk/gcc/ChangeLog
>>>     M /trunk/gcc/config/arm/arm.c
>>>     M /trunk/gcc/config/arm/arm.md
>>>     M /trunk/gcc/config/arm/neon.md
>>>
>>> 2019-08-30  Bernd Edlinger  <bernd.edlinger@hotmail.de>
>>>
>>>          * config/arm/arm.md (unaligned_loaddi,
>>>          unaligned_storedi): New unspec insn patterns.
>>>          * config/arm/neon.md (unaligned_storev8qi): Likewise.
>>>          * config/arm/arm.c (gen_cpymem_ldrd_strd): Use unaligned_loaddi
>>>          and unaligned_storedi for 4-byte aligned memory.
>>>          (arm_block_set_aligned_vect): Use unaligned_storev8qi for
>>>          4-byte aligned memory.
>>>
>>> Since other than the movdi they are not split up but stay as ldrd/strd.
>>> But for some unknown reason ira assigns r4-5 to those although also
>>> r1-2 would be available. :-(
>>>
>>
>> r1-r2 can't be used in Arm state as the register has to start on an even boundary.  But ira has already used r3 for the address of the store (it could have picked r1) and now r4-r5 is the next even-numbered pair.  So we end up with needing to save some call-clobbered regs.
>>
>> R.
>>>
>>> Bernd.
>>>
>>
> 
> One possible trick to stabilize the test is to insert an asm that clobbers r4 and r5 and forces the prologue/epilogue code to always save and restore them.  Then we can account for those prologue/epilogue consistently (at least, modulo the arm_prefer_ldrd_strd condition).
> 

Yes, or add -fno-omit-frame-pointer.

BTW: have you seen this negative lookahead in my patch
[PATCH] [ARM] Adjust test expectations of unaligned-memcpy-2/3.c (PR 91614)
https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00157.html

/* { dg-final { scan-assembler-times "ldrd\(?!\[^\\n\]*sp\)" 0 } } */

it makes the test work for all possible combinations with
RUNTESTFLAGS="--target_board=unix\{-mcpu=cortex-a15,-mcpu=cortex-a57,-mcpu=cortex-a9,-mcpu=cortex-a8,-mcpu=cortex-a7\}\{-fno-omit-frame-pointer,\}"


Cool isn't it?

Bernd.
Bernd Edlinger Sept. 6, 2019, 10:15 a.m. UTC | #11
On 9/4/19 2:53 PM, Richard Earnshaw (lists) wrote:
> Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
> ===================================================================
> --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Revision 0)
> +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Arbeitskopie)
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_arm_ok } */
> +/* { dg-require-effective-target arm_ldrd_strd_ok } */
> +/* { dg-options "-marm -mno-unaligned-access -O3" } */
> +
> +struct s {
> +  int a, b;
> +} __attribute__((aligned(8)));
> +
> +struct s f0;
> +
> +void f(int a, int b, int c, int d, int e, struct s f)
> +{
> +  f0 = f;
> +}
> +
> +/* { dg-final { scan-assembler-times "ldrd" 0 } } */
> +/* { dg-final { scan-assembler-times "strd" 0 } } */
> +/* { dg-final { scan-assembler-times "stm" 1 } } */
> 
> I don't think this test is right.  While we can't use an LDRD to load the argument off the stack, there's nothing wrong with using an STRD to then store the value to f0 (as that is 8-byte aligned).  So the second and third scan-assembler tests are meaningless.
> 
> R.
> 
> (sorry, just noticed this).

So, agreed, that is really likely to change.
I would just remove those, as attached.

Is that OK for trunk?


Thanks
Bernd.
Richard Earnshaw (lists) Sept. 6, 2019, 10:18 a.m. UTC | #12
On 06/09/2019 11:15, Bernd Edlinger wrote:
> On 9/4/19 2:53 PM, Richard Earnshaw (lists) wrote:
>> Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
>> ===================================================================
>> --- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Revision 0)
>> +++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c    (Arbeitskopie)
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target arm_arm_ok } */
>> +/* { dg-require-effective-target arm_ldrd_strd_ok } */
>> +/* { dg-options "-marm -mno-unaligned-access -O3" } */
>> +
>> +struct s {
>> +  int a, b;
>> +} __attribute__((aligned(8)));
>> +
>> +struct s f0;
>> +
>> +void f(int a, int b, int c, int d, int e, struct s f)
>> +{
>> +  f0 = f;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "ldrd" 0 } } */
>> +/* { dg-final { scan-assembler-times "strd" 0 } } */
>> +/* { dg-final { scan-assembler-times "stm" 1 } } */
>>
>> I don't think this test is right.  While we can't use an LDRD to load the argument off the stack, there's nothing wrong with using an STRD to then store the value to f0 (as that is 8-byte aligned).  So the second and third scan-assembler tests are meaningless.
>>
>> R.
>>
>> (sorry, just noticed this).
> 
> So, agreed, that is really likely to change.
> I would just remove those, as attached.
> 
> Is that OK for trunk?
> 
> 
> Thanks
> Bernd.
> 

OK.

R.

Patch
diff mbox series

2019-08-15  Bernd Edlinger  <bernd.edlinger@hotmail.de>

	PR middle-end/89544
	* function.c (assign_parm_find_stack_rtl): Use larger alignment
	when possible.

testsuite:
2019-08-15  Bernd Edlinger  <bernd.edlinger@hotmail.de>

	PR middle-end/89544
	* gcc.target/arm/unaligned-argument-1.c: New test.
	* gcc.target/arm/unaligned-argument-2.c: New test.

Index: gcc/function.c
===================================================================
--- gcc/function.c	(Revision 274531)
+++ gcc/function.c	(Arbeitskopie)
@@ -2697,8 +2697,23 @@  assign_parm_find_stack_rtl (tree parm, struct assi
      intentionally forcing upward padding.  Otherwise we have to come
      up with a guess at the alignment based on OFFSET_RTX.  */
   poly_int64 offset;
-  if (data->locate.where_pad != PAD_DOWNWARD || data->entry_parm)
+  if (data->locate.where_pad == PAD_NONE || data->entry_parm)
     align = boundary;
+  else if (data->locate.where_pad == PAD_UPWARD)
+    {
+      align = boundary;
+      /* If the argument offset is actually more aligned than the nominal
+	 stack slot boundary, take advantage of that excess alignment.
+	 Don't make any assumptions if STACK_POINTER_OFFSET is in use.  */
+      if (poly_int_rtx_p (offset_rtx, &offset)
+	  && STACK_POINTER_OFFSET == 0)
+	{
+	  unsigned int offset_align = known_alignment (offset) * BITS_PER_UNIT;
+	  if (offset_align == 0 || offset_align > STACK_BOUNDARY)
+	    offset_align = STACK_BOUNDARY;
+	  align = MAX (align, offset_align);
+	}
+    }
   else if (poly_int_rtx_p (offset_rtx, &offset))
     {
       align = least_bit_hwi (boundary);
Index: gcc/testsuite/gcc.target/arm/unaligned-argument-1.c
===================================================================
--- gcc/testsuite/gcc.target/arm/unaligned-argument-1.c	(Revision 0)
+++ gcc/testsuite/gcc.target/arm/unaligned-argument-1.c	(Arbeitskopie)
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arm_ok } */
+/* { dg-require-effective-target arm_ldrd_strd_ok } */
+/* { dg-options "-marm -mno-unaligned-access -O3" } */
+
+struct s {
+  int a, b;
+} __attribute__((aligned(8)));
+
+struct s f0;
+
+void f(int a, int b, int c, int d, struct s f)
+{
+  f0 = f;
+}
+
+/* { dg-final { scan-assembler-times "ldrd" 1 } } */
+/* { dg-final { scan-assembler-times "strd" 1 } } */
+/* { dg-final { scan-assembler-times "stm" 0 } } */
Index: gcc/testsuite/gcc.target/arm/unaligned-argument-2.c
===================================================================
--- gcc/testsuite/gcc.target/arm/unaligned-argument-2.c	(Revision 0)
+++ gcc/testsuite/gcc.target/arm/unaligned-argument-2.c	(Arbeitskopie)
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arm_ok } */
+/* { dg-require-effective-target arm_ldrd_strd_ok } */
+/* { dg-options "-marm -mno-unaligned-access -O3" } */
+
+struct s {
+  int a, b;
+} __attribute__((aligned(8)));
+
+struct s f0;
+
+void f(int a, int b, int c, int d, int e, struct s f)
+{
+  f0 = f;
+}
+
+/* { dg-final { scan-assembler-times "ldrd" 0 } } */
+/* { dg-final { scan-assembler-times "strd" 0 } } */
+/* { dg-final { scan-assembler-times "stm" 1 } } */