diff mbox series

powerpc64[le]: Fix CFI for assembly templated syscalls [BZ #28532]

Message ID 20211103142222.23948-1-msc@linux.ibm.com
State New
Headers show
Series powerpc64[le]: Fix CFI for assembly templated syscalls [BZ #28532] | expand

Commit Message

Matheus Castanho Nov. 3, 2021, 2:22 p.m. UTC
Syscalls based on the assembly templates are missing CFI for r31, which gets
clobbered when scv is used, and info for LR is inaccurate, placed in the wrong
LOC and not using the proper offset. These are fixed by this commit.

After this change:

$ readelf -wF libc.so.6 | grep 0004b9d4.. -A 7 && objdump --disassemble=kill libc.so.6
00004a48 0000000000000020 00004a4c FDE cie=00000000 pc=000000000004b9d4..000000000004ba3c
   LOC           CFA      r31   ra
000000000004b9d4 r1+0     u     u
000000000004b9e4 r1+48    u     u
000000000004b9e8 r1+48    c-16  u
000000000004b9fc r1+48    c-16  c-32
000000000004ba08 r1+48    c-16
000000000004ba18 r1+48    u
000000000004ba1c r1+0     u

libc.so.6:     file format elf64-powerpcle

Disassembly of section .text:

000000000004b9d4 <kill>:
   4b9d4:       1f 00 4c 3c     addis   r2,r12,31
   4b9d8:       2c c3 42 38     addi    r2,r2,-15572
   4b9dc:       25 00 00 38     li      r0,37
   4b9e0:       d1 ff 21 f8     stdu    r1,-48(r1)
   4b9e4:       20 00 e1 fb     std     r31,32(r1)
   4b9e8:       98 8f ed eb     ld      r31,-28776(r13)
   4b9ec:       10 00 ff 77     andis.  r31,r31,16
   4b9f0:       1c 00 82 41     beq     4ba0c <kill+0x38>
   4b9f4:       a6 02 28 7d     mflr    r9
   4b9f8:       10 00 21 f9     std     r9,16(r1)
   4b9fc:       01 00 00 44     scv     0
   4ba00:       10 00 21 e9     ld      r9,16(r1)
   4ba04:       a6 03 28 7d     mtlr    r9
   4ba08:       08 00 00 48     b       4ba10 <kill+0x3c>
   4ba0c:       02 00 00 44     sc
   4ba10:       00 00 bf 2e     cmpdi   cr5,r31,0
   4ba14:       20 00 e1 eb     ld      r31,32(r1)
   4ba18:       30 00 21 38     addi    r1,r1,48
   4ba1c:       18 00 96 41     beq     cr5,4ba34 <kill+0x60>
   4ba20:       01 f0 20 39     li      r9,-4095
   4ba24:       40 48 23 7c     cmpld   r3,r9
   4ba28:       20 00 e0 4d     bltlr+
   4ba2c:       d0 00 63 7c     neg     r3,r3
   4ba30:       08 00 00 48     b       4ba38 <kill+0x64>
   4ba34:       20 00 e3 4c     bnslr+
   4ba38:       c8 32 fe 4b     b       2ed00 <__syscall_error>
        ...
   4ba44:       40 20 0c 00     .long 0xc2040
   4ba48:       68 00 00 00     .long 0x68
   4ba4c:       06 00 5f 5f     rlwnm   r31,r26,r0,0,3
   4ba50:       6b 69 6c 6c     xoris   r12,r3,26987
---
 sysdeps/powerpc/powerpc64/sysdep.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

Comments

Paul E Murphy Nov. 3, 2021, 3:46 p.m. UTC | #1
On 11/3/21 9:22 AM, Matheus Castanho via Libc-alpha wrote:
> Syscalls based on the assembly templates are missing CFI for r31, which gets
> clobbered when scv is used, and info for LR is inaccurate, placed in the wrong
> LOC and not using the proper offset. These are fixed by this commit.
> 
> After this change:
> 
> $ readelf -wF libc.so.6 | grep 0004b9d4.. -A 7 && objdump --disassemble=kill libc.so.6
> 00004a48 0000000000000020 00004a4c FDE cie=00000000 pc=000000000004b9d4..000000000004ba3c
>     LOC           CFA      r31   ra
> 000000000004b9d4 r1+0     u     u
> 000000000004b9e4 r1+48    u     u
> 000000000004b9e8 r1+48    c-16  u
> 000000000004b9fc r1+48    c-16  c-32
> 000000000004ba08 r1+48    c-16
> 000000000004ba18 r1+48    u
> 000000000004ba1c r1+0     u
> 
> libc.so.6:     file format elf64-powerpcle
> 
> Disassembly of section .text:
> 
> 000000000004b9d4 <kill>:
>     4b9d4:       1f 00 4c 3c     addis   r2,r12,31
>     4b9d8:       2c c3 42 38     addi    r2,r2,-15572
>     4b9dc:       25 00 00 38     li      r0,37
>     4b9e0:       d1 ff 21 f8     stdu    r1,-48(r1)
>     4b9e4:       20 00 e1 fb     std     r31,32(r1)
>     4b9e8:       98 8f ed eb     ld      r31,-28776(r13)
>     4b9ec:       10 00 ff 77     andis.  r31,r31,16
>     4b9f0:       1c 00 82 41     beq     4ba0c <kill+0x38>
>     4b9f4:       a6 02 28 7d     mflr    r9
>     4b9f8:       10 00 21 f9     std     r9,16(r1)
>     4b9fc:       01 00 00 44     scv     0
>     4ba00:       10 00 21 e9     ld      r9,16(r1)
>     4ba04:       a6 03 28 7d     mtlr    r9
>     4ba08:       08 00 00 48     b       4ba10 <kill+0x3c>
>     4ba0c:       02 00 00 44     sc
>     4ba10:       00 00 bf 2e     cmpdi   cr5,r31,0
>     4ba14:       20 00 e1 eb     ld      r31,32(r1)
>     4ba18:       30 00 21 38     addi    r1,r1,48
>     4ba1c:       18 00 96 41     beq     cr5,4ba34 <kill+0x60>
>     4ba20:       01 f0 20 39     li      r9,-4095
>     4ba24:       40 48 23 7c     cmpld   r3,r9
>     4ba28:       20 00 e0 4d     bltlr+
>     4ba2c:       d0 00 63 7c     neg     r3,r3
>     4ba30:       08 00 00 48     b       4ba38 <kill+0x64>
>     4ba34:       20 00 e3 4c     bnslr+
>     4ba38:       c8 32 fe 4b     b       2ed00 <__syscall_error>
>          ...
>     4ba44:       40 20 0c 00     .long 0xc2040
>     4ba48:       68 00 00 00     .long 0x68
>     4ba4c:       06 00 5f 5f     rlwnm   r31,r26,r0,0,3
>     4ba50:       6b 69 6c 6c     xoris   r12,r3,26987
> ---
>   sysdeps/powerpc/powerpc64/sysdep.h | 6 ++++--
>   1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h
> index 589f7c8d18..beb477d57b 100644
> --- a/sysdeps/powerpc/powerpc64/sysdep.h
> +++ b/sysdeps/powerpc/powerpc64/sysdep.h
> @@ -275,12 +275,14 @@ LT_LABELSUFFIX(name,_name_end): ; \
>   /* Allocate frame and save register */
>   #define NVOLREG_SAVE \
>       stdu r1,-SCV_FRAME_SIZE(r1); \
> +    cfi_adjust_cfa_offset(SCV_FRAME_SIZE); \
>       std r31,SCV_FRAME_NVOLREG_SAVE(r1); \
> -    cfi_adjust_cfa_offset(SCV_FRAME_SIZE);
> +    cfi_offset(r31,-(SCV_FRAME_SIZE-SCV_FRAME_NVOLREG_SAVE));
>OK. This (and similarly the case below) seem a little obfuscated for 
computing stack save slots. I wonder if a distinct macro like 
SCV_FRAME_R31_SAVE_SLOT (and similar SCV_FRAME_CALLER_LR_SAVE_SLOT 
below) would make this code more approachable for those less familiar 
with ppc64 abis.

>   /* Restore register and destroy frame */
>   #define NVOLREG_RESTORE	\
>       ld r31,SCV_FRAME_NVOLREG_SAVE(r1); \
> +    cfi_restore(r31); \
>       addi r1,r1,SCV_FRAME_SIZE; \
>       cfi_adjust_cfa_offset(-SCV_FRAME_SIZE);
> 
> @@ -332,7 +334,7 @@ LT_LABELSUFFIX(name,_name_end): ; \
>   #define DO_CALL_SCV \
>       mflr r9; \
>       std r9,FRAME_LR_SAVE(r1); \
> -    cfi_offset(lr,FRAME_LR_SAVE); \
> +    cfi_offset(lr,-(SCV_FRAME_SIZE-FRAME_LR_SAVE)) >       .machine "push"; \
>       .machine "power9"; \
>       scv 0; \
>
Matheus Castanho Nov. 3, 2021, 4:41 p.m. UTC | #2
Paul E Murphy <murphyp@linux.ibm.com> writes:

> On 11/3/21 9:22 AM, Matheus Castanho via Libc-alpha wrote:
>> Syscalls based on the assembly templates are missing CFI for r31, which gets
>> clobbered when scv is used, and info for LR is inaccurate, placed in the wrong
>> LOC and not using the proper offset. These are fixed by this commit.
>> After this change:
>> $ readelf -wF libc.so.6 | grep 0004b9d4.. -A 7 && objdump --disassemble=kill
>> libc.so.6
>> 00004a48 0000000000000020 00004a4c FDE cie=00000000 pc=000000000004b9d4..000000000004ba3c
>>     LOC           CFA      r31   ra
>> 000000000004b9d4 r1+0     u     u
>> 000000000004b9e4 r1+48    u     u
>> 000000000004b9e8 r1+48    c-16  u
>> 000000000004b9fc r1+48    c-16  c-32
>> 000000000004ba08 r1+48    c-16
>> 000000000004ba18 r1+48    u
>> 000000000004ba1c r1+0     u
>> libc.so.6:     file format elf64-powerpcle
>> Disassembly of section .text:
>> 000000000004b9d4 <kill>:
>>     4b9d4:       1f 00 4c 3c     addis   r2,r12,31
>>     4b9d8:       2c c3 42 38     addi    r2,r2,-15572
>>     4b9dc:       25 00 00 38     li      r0,37
>>     4b9e0:       d1 ff 21 f8     stdu    r1,-48(r1)
>>     4b9e4:       20 00 e1 fb     std     r31,32(r1)
>>     4b9e8:       98 8f ed eb     ld      r31,-28776(r13)
>>     4b9ec:       10 00 ff 77     andis.  r31,r31,16
>>     4b9f0:       1c 00 82 41     beq     4ba0c <kill+0x38>
>>     4b9f4:       a6 02 28 7d     mflr    r9
>>     4b9f8:       10 00 21 f9     std     r9,16(r1)
>>     4b9fc:       01 00 00 44     scv     0
>>     4ba00:       10 00 21 e9     ld      r9,16(r1)
>>     4ba04:       a6 03 28 7d     mtlr    r9
>>     4ba08:       08 00 00 48     b       4ba10 <kill+0x3c>
>>     4ba0c:       02 00 00 44     sc
>>     4ba10:       00 00 bf 2e     cmpdi   cr5,r31,0
>>     4ba14:       20 00 e1 eb     ld      r31,32(r1)
>>     4ba18:       30 00 21 38     addi    r1,r1,48
>>     4ba1c:       18 00 96 41     beq     cr5,4ba34 <kill+0x60>
>>     4ba20:       01 f0 20 39     li      r9,-4095
>>     4ba24:       40 48 23 7c     cmpld   r3,r9
>>     4ba28:       20 00 e0 4d     bltlr+
>>     4ba2c:       d0 00 63 7c     neg     r3,r3
>>     4ba30:       08 00 00 48     b       4ba38 <kill+0x64>
>>     4ba34:       20 00 e3 4c     bnslr+
>>     4ba38:       c8 32 fe 4b     b       2ed00 <__syscall_error>
>>          ...
>>     4ba44:       40 20 0c 00     .long 0xc2040
>>     4ba48:       68 00 00 00     .long 0x68
>>     4ba4c:       06 00 5f 5f     rlwnm   r31,r26,r0,0,3
>>     4ba50:       6b 69 6c 6c     xoris   r12,r3,26987
>> ---
>>   sysdeps/powerpc/powerpc64/sysdep.h | 6 ++++--
>>   1 file changed, 4 insertions(+), 2 deletions(-)
>> diff --git a/sysdeps/powerpc/powerpc64/sysdep.h
>> b/sysdeps/powerpc/powerpc64/sysdep.h
>> index 589f7c8d18..beb477d57b 100644
>> --- a/sysdeps/powerpc/powerpc64/sysdep.h
>> +++ b/sysdeps/powerpc/powerpc64/sysdep.h
>> @@ -275,12 +275,14 @@ LT_LABELSUFFIX(name,_name_end): ; \
>>   /* Allocate frame and save register */
>>   #define NVOLREG_SAVE \
>>       stdu r1,-SCV_FRAME_SIZE(r1); \
>> +    cfi_adjust_cfa_offset(SCV_FRAME_SIZE); \
>>       std r31,SCV_FRAME_NVOLREG_SAVE(r1); \
>> -    cfi_adjust_cfa_offset(SCV_FRAME_SIZE);
>> +    cfi_offset(r31,-(SCV_FRAME_SIZE-SCV_FRAME_NVOLREG_SAVE));
>> OK. This (and similarly the case below) seem a little obfuscated for
> computing stack save slots. I wonder if a distinct macro like
> SCV_FRAME_R31_SAVE_SLOT (and similar SCV_FRAME_CALLER_LR_SAVE_SLOT
> below) would make this code more approachable for those less familiar with ppc64
> abis.
>

But those kind of already exist: FRAME_LR_SAVE and
SCV_FRAME_NVOLREG_SAVE, but these two values are relative to the
top-most frame, so we need some math to get the values relative to the
CFA.

Wouldn't adding 2 more macros with similar meanings (but with different
references) make things even more complicated?

>>   /* Restore register and destroy frame */
>>   #define NVOLREG_RESTORE	\
>>       ld r31,SCV_FRAME_NVOLREG_SAVE(r1); \
>> +    cfi_restore(r31); \
>>       addi r1,r1,SCV_FRAME_SIZE; \
>>       cfi_adjust_cfa_offset(-SCV_FRAME_SIZE);
>> @@ -332,7 +334,7 @@ LT_LABELSUFFIX(name,_name_end): ; \
>>   #define DO_CALL_SCV \
>>       mflr r9; \
>>       std r9,FRAME_LR_SAVE(r1); \
>> -    cfi_offset(lr,FRAME_LR_SAVE); \
>> +    cfi_offset(lr,-(SCV_FRAME_SIZE-FRAME_LR_SAVE)) >       .machine "push"; \
>>       .machine "power9"; \
>>       scv 0; \
>>


--
Matheus Castanho
Paul E Murphy Nov. 4, 2021, 10:36 p.m. UTC | #3
On 11/3/21 11:41 AM, Matheus Castanho wrote:
> 
> Paul E Murphy <murphyp@linux.ibm.com> writes:
> 
>> On 11/3/21 9:22 AM, Matheus Castanho via Libc-alpha wrote:
>>> Syscalls based on the assembly templates are missing CFI for r31, which gets
>>> clobbered when scv is used, and info for LR is inaccurate, placed in the wrong
>>> LOC and not using the proper offset. These are fixed by this commit.
>>> After this change:
>>> $ readelf -wF libc.so.6 | grep 0004b9d4.. -A 7 && objdump --disassemble=kill
>>> libc.so.6
>>> 00004a48 0000000000000020 00004a4c FDE cie=00000000 pc=000000000004b9d4..000000000004ba3c
>>>      LOC           CFA      r31   ra
>>> 000000000004b9d4 r1+0     u     u
>>> 000000000004b9e4 r1+48    u     u
>>> 000000000004b9e8 r1+48    c-16  u
>>> 000000000004b9fc r1+48    c-16  c-32
>>> 000000000004ba08 r1+48    c-16
>>> 000000000004ba18 r1+48    u
>>> 000000000004ba1c r1+0     u
>>> libc.so.6:     file format elf64-powerpcle
>>> Disassembly of section .text:
>>> 000000000004b9d4 <kill>:
>>>      4b9d4:       1f 00 4c 3c     addis   r2,r12,31
>>>      4b9d8:       2c c3 42 38     addi    r2,r2,-15572
>>>      4b9dc:       25 00 00 38     li      r0,37
>>>      4b9e0:       d1 ff 21 f8     stdu    r1,-48(r1)
>>>      4b9e4:       20 00 e1 fb     std     r31,32(r1)
>>>      4b9e8:       98 8f ed eb     ld      r31,-28776(r13)
>>>      4b9ec:       10 00 ff 77     andis.  r31,r31,16
>>>      4b9f0:       1c 00 82 41     beq     4ba0c <kill+0x38>
>>>      4b9f4:       a6 02 28 7d     mflr    r9
>>>      4b9f8:       10 00 21 f9     std     r9,16(r1)
>>>      4b9fc:       01 00 00 44     scv     0
>>>      4ba00:       10 00 21 e9     ld      r9,16(r1)
>>>      4ba04:       a6 03 28 7d     mtlr    r9
>>>      4ba08:       08 00 00 48     b       4ba10 <kill+0x3c>
>>>      4ba0c:       02 00 00 44     sc
>>>      4ba10:       00 00 bf 2e     cmpdi   cr5,r31,0
>>>      4ba14:       20 00 e1 eb     ld      r31,32(r1)
>>>      4ba18:       30 00 21 38     addi    r1,r1,48
>>>      4ba1c:       18 00 96 41     beq     cr5,4ba34 <kill+0x60>
>>>      4ba20:       01 f0 20 39     li      r9,-4095
>>>      4ba24:       40 48 23 7c     cmpld   r3,r9
>>>      4ba28:       20 00 e0 4d     bltlr+
>>>      4ba2c:       d0 00 63 7c     neg     r3,r3
>>>      4ba30:       08 00 00 48     b       4ba38 <kill+0x64>
>>>      4ba34:       20 00 e3 4c     bnslr+
>>>      4ba38:       c8 32 fe 4b     b       2ed00 <__syscall_error>
>>>           ...
>>>      4ba44:       40 20 0c 00     .long 0xc2040
>>>      4ba48:       68 00 00 00     .long 0x68
>>>      4ba4c:       06 00 5f 5f     rlwnm   r31,r26,r0,0,3
>>>      4ba50:       6b 69 6c 6c     xoris   r12,r3,26987
>>> ---
>>>    sysdeps/powerpc/powerpc64/sysdep.h | 6 ++++--
>>>    1 file changed, 4 insertions(+), 2 deletions(-)
>>> diff --git a/sysdeps/powerpc/powerpc64/sysdep.h
>>> b/sysdeps/powerpc/powerpc64/sysdep.h
>>> index 589f7c8d18..beb477d57b 100644
>>> --- a/sysdeps/powerpc/powerpc64/sysdep.h
>>> +++ b/sysdeps/powerpc/powerpc64/sysdep.h
>>> @@ -275,12 +275,14 @@ LT_LABELSUFFIX(name,_name_end): ; \
>>>    /* Allocate frame and save register */
>>>    #define NVOLREG_SAVE \
>>>        stdu r1,-SCV_FRAME_SIZE(r1); \
>>> +    cfi_adjust_cfa_offset(SCV_FRAME_SIZE); \
>>>        std r31,SCV_FRAME_NVOLREG_SAVE(r1); \
>>> -    cfi_adjust_cfa_offset(SCV_FRAME_SIZE);
>>> +    cfi_offset(r31,-(SCV_FRAME_SIZE-SCV_FRAME_NVOLREG_SAVE));
>>> OK. This (and similarly the case below) seem a little obfuscated for
>> computing stack save slots. I wonder if a distinct macro like
>> SCV_FRAME_R31_SAVE_SLOT (and similar SCV_FRAME_CALLER_LR_SAVE_SLOT
>> below) would make this code more approachable for those less familiar with ppc64
>> abis.
>>
> 
> But those kind of already exist: FRAME_LR_SAVE and
> SCV_FRAME_NVOLREG_SAVE, but these two values are relative to the
> top-most frame, so we need some math to get the values relative to the
> CFA.
> 
> Wouldn't adding 2 more macros with similar meanings (but with different
> references) make things even more complicated?
After refreshing my understanding of CFI. I agree with you.

> 
>>>    /* Restore register and destroy frame */
>>>    #define NVOLREG_RESTORE	\
>>>        ld r31,SCV_FRAME_NVOLREG_SAVE(r1); \
>>> +    cfi_restore(r31); \
>>>        addi r1,r1,SCV_FRAME_SIZE; \
>>>        cfi_adjust_cfa_offset(-SCV_FRAME_SIZE);
>>> @@ -332,7 +334,7 @@ LT_LABELSUFFIX(name,_name_end): ; \
>>>    #define DO_CALL_SCV \
>>>        mflr r9; \
>>>        std r9,FRAME_LR_SAVE(r1); \
>>> -    cfi_offset(lr,FRAME_LR_SAVE); \
>>> +    cfi_offset(lr,-(SCV_FRAME_SIZE-FRAME_LR_SAVE)) >       .machine "push"; \

I suspect this change is highlighting a bug in the asm.  We should be 
saving lr to the caller frame's lr slot, not the callee's.

>>>        .machine "power9"; \
>>>        scv 0; \
>>>
> 
> 
> --
> Matheus Castanho
>
diff mbox series

Patch

diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h
index 589f7c8d18..beb477d57b 100644
--- a/sysdeps/powerpc/powerpc64/sysdep.h
+++ b/sysdeps/powerpc/powerpc64/sysdep.h
@@ -275,12 +275,14 @@  LT_LABELSUFFIX(name,_name_end): ; \
 /* Allocate frame and save register */
 #define NVOLREG_SAVE \
     stdu r1,-SCV_FRAME_SIZE(r1); \
+    cfi_adjust_cfa_offset(SCV_FRAME_SIZE); \
     std r31,SCV_FRAME_NVOLREG_SAVE(r1); \
-    cfi_adjust_cfa_offset(SCV_FRAME_SIZE);
+    cfi_offset(r31,-(SCV_FRAME_SIZE-SCV_FRAME_NVOLREG_SAVE));
 
 /* Restore register and destroy frame */
 #define NVOLREG_RESTORE	\
     ld r31,SCV_FRAME_NVOLREG_SAVE(r1); \
+    cfi_restore(r31); \
     addi r1,r1,SCV_FRAME_SIZE; \
     cfi_adjust_cfa_offset(-SCV_FRAME_SIZE);
 
@@ -332,7 +334,7 @@  LT_LABELSUFFIX(name,_name_end): ; \
 #define DO_CALL_SCV \
     mflr r9; \
     std r9,FRAME_LR_SAVE(r1); \
-    cfi_offset(lr,FRAME_LR_SAVE); \
+    cfi_offset(lr,-(SCV_FRAME_SIZE-FRAME_LR_SAVE)); \
     .machine "push"; \
     .machine "power9"; \
     scv 0; \