Message ID | 10fee36792ba4cc90d0476e4dd23671e3bbf0bed.1702051831.git.szabolcs.nagy@arm.com |
---|---|
State | New |
Headers | show |
Series | aarch64: Add SME support | expand |
On 08/12/23 13:32, Szabolcs Nagy wrote: > The runtime support routines for the call ABI of the Scalable Matrix > Extension (SME) are mostly in libgcc. Since libc.so cannot depend on > libgcc_s.so have an implementation of __arm_za_disable in libc for > libc internal use in longjmp and similar APIs. > > __libc_arm_za_disable follows the same PCS rules as __arm_za_disable, > but it's a hidden symbol so it does not need variant PCS marking. > > Using __libc_fatal instead of abort because it can print a message and > works in ld.so too. But for now we don't need SME routines in ld.so. > > To check the SME HWCAP in asm, we need the _dl_hwcap2 member offset in > _rtld_global_ro in the shared libc.so, while in libc.a the _dl_hwcap2 > object is accessed. LGTM, thanks. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> > --- > sysdeps/aarch64/Makefile | 10 ++- > sysdeps/aarch64/__arm_za_disable.S | 112 ++++++++++++++++++++++++ > sysdeps/aarch64/rtld-global-offsets.sym | 10 +++ > 3 files changed, 129 insertions(+), 3 deletions(-) > create mode 100644 sysdeps/aarch64/__arm_za_disable.S > create mode 100644 sysdeps/aarch64/rtld-global-offsets.sym > > diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile > index 6a9559e5f5..9d8844d9c8 100644 > --- a/sysdeps/aarch64/Makefile > +++ b/sysdeps/aarch64/Makefile > @@ -48,7 +48,9 @@ endif > endif > > ifeq ($(subdir),csu) > -gen-as-const-headers += tlsdesc.sym > +gen-as-const-headers += \ > + tlsdesc.sym \ > + rtld-global-offsets.sym > endif > > ifeq ($(subdir),gmon) > @@ -62,8 +64,10 @@ endif > > ifeq ($(subdir),misc) > sysdep_headers += sys/ifunc.h > -sysdep_routines += __mtag_tag_zero_region \ > - __mtag_tag_region > +sysdep_routines += \ > + __mtag_tag_zero_region \ > + __mtag_tag_region \ > + __arm_za_disable > endif > > ifeq ($(subdir),malloc) Ok (although usually the Makefile reflow makes more sense to be a unrelated patch). > diff --git a/sysdeps/aarch64/__arm_za_disable.S b/sysdeps/aarch64/__arm_za_disable.S > new file mode 100644 > index 0000000000..f9e2d942f2 > --- /dev/null > +++ b/sysdeps/aarch64/__arm_za_disable.S > @@ -0,0 +1,112 @@ > +/* Libc internal support routine for SME. > + Copyright (C) 2023 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library. If not, see > + <http://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include <rtld-global-offsets.h> > + > +#define HWCAP2_SME_BIT 23 > + > +/* Disable ZA. Call ABI: > + - Private ZA, streaming-compatible. > + - x0-x13, x19-x29, sp and fp regs are call preserved. > + - On return tpidr2_el0 = 0, ZA = 0. > + - Takes no argument. > + - Does not return a value. > + - Can abort on failure (then registers are not preserved). */ > + > +ENTRY (__libc_arm_za_disable) > + > + /* Check if SME is available. */ > +#ifdef SHARED > + /* In libc.so. */ > + adrp x14, :got:_rtld_global_ro > + ldr x14, [x14, :got_lo12:_rtld_global_ro] > + ldr x14, [x14, GLRO_DL_HWCAP2_OFFSET] > +#else > + /* In libc.a, may be PIC. */ > + adrp x14, _dl_hwcap2 > + ldr x14, [x14, :lo12:_dl_hwcap2] > +#endif > + tbz x14, HWCAP2_SME_BIT, L(end) > + > + .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ > + cbz x14, L(end) > + > + /* check reserved bytes. */ Maybe add that the the action chose is to abort if non-zero bytes are found. > + ldrh w15, [x14, 10] > + ldr w16, [x14, 12] > + orr w15, w15, w16 > + cbnz w15, L(fail) > + > + ldr x16, [x14] > + cbz x16, L(end) > + ldrh w17, [x14, 8] > + cbz w17, L(end) > + > + /* x14: tpidr2, x15: 0, > + x16: za_save_buffer, x17: num_za_save_slices. */ > + > +L(save_loop): > + .inst 0xe1206200 /* str za[w15, 0], [x16] */ > + .inst 0xe1206201 /* str za[w15, 1], [x16, 1, mul vl] */ > + .inst 0xe1206202 /* str za[w15, 2], [x16, 2, mul vl] */ > + .inst 0xe1206203 /* str za[w15, 3], [x16, 3, mul vl] */ > + .inst 0xe1206204 /* str za[w15, 4], [x16, 4, mul vl] */ > + .inst 0xe1206205 /* str za[w15, 5], [x16, 5, mul vl] */ > + .inst 0xe1206206 /* str za[w15, 6], [x16, 6, mul vl] */ > + .inst 0xe1206207 /* str za[w15, 7], [x16, 7, mul vl] */ > + .inst 0xe1206208 /* str za[w15, 8], [x16, 8, mul vl] */ > + .inst 0xe1206209 /* str za[w15, 9], [x16, 9, mul vl] */ > + .inst 0xe120620a /* str za[w15, 10], [x16, 10, mul vl] */ > + .inst 0xe120620b /* str za[w15, 11], [x16, 11, mul vl] */ > + .inst 0xe120620c /* str za[w15, 12], [x16, 12, mul vl] */ > + .inst 0xe120620d /* str za[w15, 13], [x16, 13, mul vl] */ > + .inst 0xe120620e /* str za[w15, 14], [x16, 14, mul vl] */ > + .inst 0xe120620f /* str za[w15, 15], [x16, 15, mul vl] */ > + add w15, w15, 16 > + .inst 0x04305a10 /* addsvl x16, x16, 16 */ > + cmp w17, w15 > + bhi L(save_loop) > + .inst 0xd51bd0bf /* msr tpidr2_el0, xzr */ > + .inst 0xd503447f /* smstop za */ > +L(end): > + ret > +L(fail): > +#if HAVE_AARCH64_PAC_RET > + PACIASP > + cfi_window_save > +#endif > + stp x29, x30, [sp, -32]! > + cfi_adjust_cfa_offset (32) > + cfi_rel_offset (x29, 0) > + cfi_rel_offset (x30, 8) > + mov x29, sp > + .inst 0x04e0e3f0 /* cntd x16 */ > + str x16, [sp, 16] > + cfi_rel_offset (46, 16) > + .inst 0xd503467f /* smstop */ > + adrp x0, L(msg) > + add x0, x0, :lo12:L(msg) > + bl HIDDEN_JUMPTARGET (__libc_fatal) > +END (__libc_arm_za_disable) > + > + .section .rodata > + .align 3 > +L(msg): > + .string "FATAL: __libc_arm_za_disable failed.\n" Ok. > diff --git a/sysdeps/aarch64/rtld-global-offsets.sym b/sysdeps/aarch64/rtld-global-offsets.sym > new file mode 100644 > index 0000000000..23cdaf7d9e > --- /dev/null > +++ b/sysdeps/aarch64/rtld-global-offsets.sym > @@ -0,0 +1,10 @@ > +#define SHARED 1 > + > +#include <ldsodefs.h> > + > +#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) > + > +-- Offsets of _rtld_global_ro in libc.so > + > +GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) It seems to be unused. > +GLRO_DL_HWCAP2_OFFSET GLRO_offsetof (dl_hwcap2) OK.
The 12/28/2023 10:41, Adhemerval Zanella Netto wrote: > On 08/12/23 13:32, Szabolcs Nagy wrote: > > The runtime support routines for the call ABI of the Scalable Matrix > > Extension (SME) are mostly in libgcc. Since libc.so cannot depend on > > libgcc_s.so have an implementation of __arm_za_disable in libc for > > libc internal use in longjmp and similar APIs. > > > > __libc_arm_za_disable follows the same PCS rules as __arm_za_disable, > > but it's a hidden symbol so it does not need variant PCS marking. > > > > Using __libc_fatal instead of abort because it can print a message and > > works in ld.so too. But for now we don't need SME routines in ld.so. > > > > To check the SME HWCAP in asm, we need the _dl_hwcap2 member offset in > > _rtld_global_ro in the shared libc.so, while in libc.a the _dl_hwcap2 > > object is accessed. > > LGTM, thanks. > > Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> thanks. > > -sysdep_routines += __mtag_tag_zero_region \ > > - __mtag_tag_region > > +sysdep_routines += \ > > + __mtag_tag_zero_region \ > > + __mtag_tag_region \ > > + __arm_za_disable > > endif > > > > ifeq ($(subdir),malloc) > > Ok (although usually the Makefile reflow makes more sense to be a > unrelated patch). i kept this as it is a minor refactor. > > + /* check reserved bytes. */ > > Maybe add that the the action chose is to abort if non-zero bytes > are found. updated the comment.
diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile index 6a9559e5f5..9d8844d9c8 100644 --- a/sysdeps/aarch64/Makefile +++ b/sysdeps/aarch64/Makefile @@ -48,7 +48,9 @@ endif endif ifeq ($(subdir),csu) -gen-as-const-headers += tlsdesc.sym +gen-as-const-headers += \ + tlsdesc.sym \ + rtld-global-offsets.sym endif ifeq ($(subdir),gmon) @@ -62,8 +64,10 @@ endif ifeq ($(subdir),misc) sysdep_headers += sys/ifunc.h -sysdep_routines += __mtag_tag_zero_region \ - __mtag_tag_region +sysdep_routines += \ + __mtag_tag_zero_region \ + __mtag_tag_region \ + __arm_za_disable endif ifeq ($(subdir),malloc) diff --git a/sysdeps/aarch64/__arm_za_disable.S b/sysdeps/aarch64/__arm_za_disable.S new file mode 100644 index 0000000000..f9e2d942f2 --- /dev/null +++ b/sysdeps/aarch64/__arm_za_disable.S @@ -0,0 +1,112 @@ +/* Libc internal support routine for SME. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + +#define HWCAP2_SME_BIT 23 + +/* Disable ZA. Call ABI: + - Private ZA, streaming-compatible. + - x0-x13, x19-x29, sp and fp regs are call preserved. + - On return tpidr2_el0 = 0, ZA = 0. + - Takes no argument. + - Does not return a value. + - Can abort on failure (then registers are not preserved). */ + +ENTRY (__libc_arm_za_disable) + + /* Check if SME is available. */ +#ifdef SHARED + /* In libc.so. */ + adrp x14, :got:_rtld_global_ro + ldr x14, [x14, :got_lo12:_rtld_global_ro] + ldr x14, [x14, GLRO_DL_HWCAP2_OFFSET] +#else + /* In libc.a, may be PIC. */ + adrp x14, _dl_hwcap2 + ldr x14, [x14, :lo12:_dl_hwcap2] +#endif + tbz x14, HWCAP2_SME_BIT, L(end) + + .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ + cbz x14, L(end) + + /* check reserved bytes. */ + ldrh w15, [x14, 10] + ldr w16, [x14, 12] + orr w15, w15, w16 + cbnz w15, L(fail) + + ldr x16, [x14] + cbz x16, L(end) + ldrh w17, [x14, 8] + cbz w17, L(end) + + /* x14: tpidr2, x15: 0, + x16: za_save_buffer, x17: num_za_save_slices. */ + +L(save_loop): + .inst 0xe1206200 /* str za[w15, 0], [x16] */ + .inst 0xe1206201 /* str za[w15, 1], [x16, 1, mul vl] */ + .inst 0xe1206202 /* str za[w15, 2], [x16, 2, mul vl] */ + .inst 0xe1206203 /* str za[w15, 3], [x16, 3, mul vl] */ + .inst 0xe1206204 /* str za[w15, 4], [x16, 4, mul vl] */ + .inst 0xe1206205 /* str za[w15, 5], [x16, 5, mul vl] */ + .inst 0xe1206206 /* str za[w15, 6], [x16, 6, mul vl] */ + .inst 0xe1206207 /* str za[w15, 7], [x16, 7, mul vl] */ + .inst 0xe1206208 /* str za[w15, 8], [x16, 8, mul vl] */ + .inst 0xe1206209 /* str za[w15, 9], [x16, 9, mul vl] */ + .inst 0xe120620a /* str za[w15, 10], [x16, 10, mul vl] */ + .inst 0xe120620b /* str za[w15, 11], [x16, 11, mul vl] */ + .inst 0xe120620c /* str za[w15, 12], [x16, 12, mul vl] */ + .inst 0xe120620d /* str za[w15, 13], [x16, 13, mul vl] */ + .inst 0xe120620e /* str za[w15, 14], [x16, 14, mul vl] */ + .inst 0xe120620f /* str za[w15, 15], [x16, 15, mul vl] */ + add w15, w15, 16 + .inst 0x04305a10 /* addsvl x16, x16, 16 */ + cmp w17, w15 + bhi L(save_loop) + .inst 0xd51bd0bf /* msr tpidr2_el0, xzr */ + .inst 0xd503447f /* smstop za */ +L(end): + ret +L(fail): +#if HAVE_AARCH64_PAC_RET + PACIASP + cfi_window_save +#endif + stp x29, x30, [sp, -32]! + cfi_adjust_cfa_offset (32) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + mov x29, sp + .inst 0x04e0e3f0 /* cntd x16 */ + str x16, [sp, 16] + cfi_rel_offset (46, 16) + .inst 0xd503467f /* smstop */ + adrp x0, L(msg) + add x0, x0, :lo12:L(msg) + bl HIDDEN_JUMPTARGET (__libc_fatal) +END (__libc_arm_za_disable) + + .section .rodata + .align 3 +L(msg): + .string "FATAL: __libc_arm_za_disable failed.\n" diff --git a/sysdeps/aarch64/rtld-global-offsets.sym b/sysdeps/aarch64/rtld-global-offsets.sym new file mode 100644 index 0000000000..23cdaf7d9e --- /dev/null +++ b/sysdeps/aarch64/rtld-global-offsets.sym @@ -0,0 +1,10 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) + +-- Offsets of _rtld_global_ro in libc.so + +GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) +GLRO_DL_HWCAP2_OFFSET GLRO_offsetof (dl_hwcap2)