Message ID | 20220307150628.68146-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v3] x86: Disable SSE on unwind-c.c and unwind-dw2.c | expand |
On Mon, Mar 07, 2022 at 07:06:28AM -0800, H.J. Lu wrote: > Since eh_return doesn't work with stack realignment, disable SSE on > unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte > incoming stack to avoid SSE usage which is caused by > > commit 609e8c492d62d92465460eae3d43dfc4b2c68288 > Author: H.J. Lu <hjl.tools@gmail.com> > Date: Sat Feb 26 14:17:23 2022 -0800 > > x86: Always return pseudo register in ix86_gen_scratch_sse_rtx > > when pseudo vector registers are used to expand memset. > PR target/104781 > * config.host (tmake_file): Add i386/32/t-eh-return-no-sse for > 32-bit x86 Cygwin, MinGW and Solaris. > * config/i386/32/t-eh-return-no-sse: New file. For this, isn't the right fix instead something like: --- gcc/config/i386/i386.h.jj 2022-02-25 12:06:45.535493490 +0100 +++ gcc/config/i386/i386.h 2022-03-08 11:20:43.207043370 +0100 @@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule; #define NUM_X86_64_MS_CLOBBERED_REGS 12 #endif +/* __builtin_eh_return can't handle stack realignment, so disable SSE in + libgcc functions that call it. */ +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse"))) + /* Local variables: version-control: t As mentioned in PR104838, this likely isn't specific to just Solaris and cygwin/mingw. Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS among other things for i686. Jakub
On Tue, Mar 08, 2022 at 11:23:51AM +0100, Jakub Jelinek via Gcc-patches wrote: > On Mon, Mar 07, 2022 at 07:06:28AM -0800, H.J. Lu wrote: > > Since eh_return doesn't work with stack realignment, disable SSE on > > unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte > > incoming stack to avoid SSE usage which is caused by > > > > commit 609e8c492d62d92465460eae3d43dfc4b2c68288 > > Author: H.J. Lu <hjl.tools@gmail.com> > > Date: Sat Feb 26 14:17:23 2022 -0800 > > > > x86: Always return pseudo register in ix86_gen_scratch_sse_rtx > > > > when pseudo vector registers are used to expand memset. > > > PR target/104781 > > * config.host (tmake_file): Add i386/32/t-eh-return-no-sse for > > 32-bit x86 Cygwin, MinGW and Solaris. > > * config/i386/32/t-eh-return-no-sse: New file. > > For this, isn't the right fix instead something like: > > --- gcc/config/i386/i386.h.jj 2022-02-25 12:06:45.535493490 +0100 > +++ gcc/config/i386/i386.h 2022-03-08 11:20:43.207043370 +0100 > @@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule; > #define NUM_X86_64_MS_CLOBBERED_REGS 12 > #endif > > +/* __builtin_eh_return can't handle stack realignment, so disable SSE in > + libgcc functions that call it. */ > +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse"))) > + > /* > Local variables: > version-control: t > > > As mentioned in PR104838, this likely isn't specific to just Solaris and > cygwin/mingw. Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS > among other things for i686. Now verified that with your full patch applied gcc on Fedora/i686 doesn't build (gets those sorry messages when compiling unwind-dw2), while if I replace those 2 libgcc hunks from your patch with the above one it seems to get past the previous hanging point of gnat1 processes. Jakub
On Tue, Mar 08, 2022 at 12:15:15PM +0100, Jakub Jelinek via Gcc-patches wrote: > > --- gcc/config/i386/i386.h.jj 2022-02-25 12:06:45.535493490 +0100 > > +++ gcc/config/i386/i386.h 2022-03-08 11:20:43.207043370 +0100 > > @@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule; > > #define NUM_X86_64_MS_CLOBBERED_REGS 12 > > #endif > > > > +/* __builtin_eh_return can't handle stack realignment, so disable SSE in > > + libgcc functions that call it. */ > > +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse"))) > > + > > /* > > Local variables: > > version-control: t > > > > > > As mentioned in PR104838, this likely isn't specific to just Solaris and > > cygwin/mingw. Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS > > among other things for i686. > > Now verified that with your full patch applied gcc on Fedora/i686 doesn't > build (gets those sorry messages when compiling unwind-dw2), while if I > replace those 2 libgcc hunks from your patch with the above one it seems to > get past the previous hanging point of gnat1 processes. Though, perhaps it should be #ifndef __x86_64__ #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse"))) #endif or something similar, on x86-64 one at least normally doesn't use lower stack realignment unless avx or later. Maybe we want to use no-avx for the x86-64 case though. Disabling sse/sse2 might be a problem especially on mingw where we need to restore SSE registers in the EH return, no? Even better would be to make __builtin_eh_return work even with DRAP, but I admit I haven't understood what exactly is the problem that prevents it from working. Jakub
> Disabling sse/sse2 might be a problem especially on mingw where we need to > restore SSE registers in the EH return, no? Not in 32-bit mode I think, all XMM registers are call used.
On Tue, Mar 8, 2022 at 4:29 AM Jakub Jelinek <jakub@redhat.com> wrote: > > On Tue, Mar 08, 2022 at 12:15:15PM +0100, Jakub Jelinek via Gcc-patches wrote: > > > --- gcc/config/i386/i386.h.jj 2022-02-25 12:06:45.535493490 +0100 > > > +++ gcc/config/i386/i386.h 2022-03-08 11:20:43.207043370 +0100 > > > @@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule; > > > #define NUM_X86_64_MS_CLOBBERED_REGS 12 > > > #endif > > > > > > +/* __builtin_eh_return can't handle stack realignment, so disable SSE in > > > + libgcc functions that call it. */ > > > +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse"))) > > > + > > > /* > > > Local variables: > > > version-control: t > > > > > > > > > As mentioned in PR104838, this likely isn't specific to just Solaris and > > > cygwin/mingw. Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS > > > among other things for i686. > > > > Now verified that with your full patch applied gcc on Fedora/i686 doesn't > > build (gets those sorry messages when compiling unwind-dw2), while if I > > replace those 2 libgcc hunks from your patch with the above one it seems to > > get past the previous hanging point of gnat1 processes. > > Though, perhaps it should be > #ifndef __x86_64__ > #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse"))) > #endif > or something similar, on x86-64 one at least normally doesn't use lower > stack realignment unless avx or later. Maybe we want to use > no-avx for the x86-64 case though. I have verified that AVX and AVX512 have no issues on x86-64. In 32-bit, -mstackrealign triggers the problem. > Disabling sse/sse2 might be a problem especially on mingw where we need to > restore SSE registers in the EH return, no? No, it isn't needed. > Even better would be to make __builtin_eh_return work even with DRAP, > but I admit I haven't understood what exactly is the problem that prevents > it from working. The EH return is a very special case. Disable SSE in 32-bit is the simplest way to make the EH return to work.
On Tue, Mar 08, 2022 at 07:37:17AM -0800, H.J. Lu wrote: > > Though, perhaps it should be > > #ifndef __x86_64__ > > #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse"))) > > #endif > > or something similar, on x86-64 one at least normally doesn't use lower > > stack realignment unless avx or later. Maybe we want to use > > no-avx for the x86-64 case though. > > I have verified that AVX and AVX512 have no issues on x86-64. In 32-bit, > -mstackrealign triggers the problem. I bet it would be a problem if we started vectorizing something in there using avx/avx2/avx512*. But given the sorry, I think we'd find that out immediately. > > Disabling sse/sse2 might be a problem especially on mingw where we need to > > restore SSE registers in the EH return, no? > > No, it isn't needed. I meant for 64-bit where I think the Windows ABI preserves some XMM regs (low 128-bits of them). So my earlier patch to just define LIBGCC2_UNWIND_ATTRIBUTE unconditionally would be wrong for it. > > Even better would be to make __builtin_eh_return work even with DRAP, > > but I admit I haven't understood what exactly is the problem that prevents > > it from working. > > The EH return is a very special case. Disable SSE in 32-bit is the simplest > way to make the EH return to work. Ok. So, what do you think about replacing the libgcc/ part of your patch with that /* __builtin_eh_return can't handle stack realignment, so disable SSE in 32-bit libgcc functions that call it. */ #ifndef __x86_64__ #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse"))) #endif ? I'm bootstrapping/regtesting such a patch right now (because I needed some quick fix for the gnat1 hangs). Jakub
On Tue, Mar 8, 2022 at 7:46 AM Jakub Jelinek <jakub@redhat.com> wrote: > > On Tue, Mar 08, 2022 at 07:37:17AM -0800, H.J. Lu wrote: > > > Though, perhaps it should be > > > #ifndef __x86_64__ > > > #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse"))) > > > #endif > > > or something similar, on x86-64 one at least normally doesn't use lower > > > stack realignment unless avx or later. Maybe we want to use > > > no-avx for the x86-64 case though. > > > > I have verified that AVX and AVX512 have no issues on x86-64. In 32-bit, > > -mstackrealign triggers the problem. > > I bet it would be a problem if we started vectorizing something in there > using avx/avx2/avx512*. But given the sorry, I think we'd find that out YMM and ZMM can be used to expand memset with -march=native. It works fine on Linux. No stack realignment is needed. > immediately. True. > > > Disabling sse/sse2 might be a problem especially on mingw where we need to > > > restore SSE registers in the EH return, no? > > > > No, it isn't needed. > > I meant for 64-bit where I think the Windows ABI preserves some XMM regs Does it need to realign the stack? > (low 128-bits of them). So my earlier patch to just define > LIBGCC2_UNWIND_ATTRIBUTE unconditionally would be wrong for it. > > > > Even better would be to make __builtin_eh_return work even with DRAP, > > > but I admit I haven't understood what exactly is the problem that prevents > > > it from working. > > > > The EH return is a very special case. Disable SSE in 32-bit is the simplest > > way to make the EH return to work. > > Ok. So, what do you think about replacing the libgcc/ part of your patch > with that > /* __builtin_eh_return can't handle stack realignment, so disable SSE in > 32-bit libgcc functions that call it. */ > #ifndef __x86_64__ > #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse"))) > #endif > ? Yes, it should work. Thanks. > I'm bootstrapping/regtesting such a patch right now (because I needed some > quick fix for the gnat1 hangs). > > Jakub >
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index efa947f9795..4121f986221 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -9444,12 +9444,15 @@ ix86_expand_epilogue (int style) rtx sa = EH_RETURN_STACKADJ_RTX; rtx_insn *insn; - /* %ecx can't be used for both DRAP register and eh_return. */ - if (crtl->drap_reg) - gcc_assert (REGNO (crtl->drap_reg) != CX_REG); + /* Stack realignment doesn't work with eh_return. */ + if (crtl->stack_realign_needed) + sorry ("Stack realignment not supported with " + "%<__builtin_eh_return%>"); /* regparm nested functions don't work with eh_return. */ - gcc_assert (!ix86_static_chain_on_stack); + if (ix86_static_chain_on_stack) + sorry ("regparm nested function not supported with " + "%<__builtin_eh_return%>"); if (frame_pointer_needed) { diff --git a/gcc/testsuite/gcc.target/i386/eh_return-1.c b/gcc/testsuite/gcc.target/i386/eh_return-1.c index b21fd75fc93..43f94f01a97 100644 --- a/gcc/testsuite/gcc.target/i386/eh_return-1.c +++ b/gcc/testsuite/gcc.target/i386/eh_return-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */ +/* { dg-options "-O2 -mincoming-stack-boundary=4 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */ struct _Unwind_Context { diff --git a/gcc/testsuite/gcc.target/i386/eh_return-2.c b/gcc/testsuite/gcc.target/i386/eh_return-2.c index f23f4492dac..cb762f92cc2 100644 --- a/gcc/testsuite/gcc.target/i386/eh_return-2.c +++ b/gcc/testsuite/gcc.target/i386/eh_return-2.c @@ -1,6 +1,6 @@ /* PR target/101772 */ /* { dg-do compile } */ -/* { dg-additional-options "-O0 -march=x86-64 -mstackrealign" } */ +/* { dg-additional-options "-O0 -mincoming-stack-boundary=4 -march=x86-64 -mstackrealign" } */ struct _Unwind_Context _Unwind_Resume_or_Rethrow_this_context; diff --git a/libgcc/config.host b/libgcc/config.host index 094fd3ad254..1588ab6cf20 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -1583,3 +1583,16 @@ case ${host} in tmake_file="$tmake_file t-gthr-noweak" ;; esac + +case ${host} in +i[34567]86-*-* | x86_64-*-*) + if test "${host_address}" = 32; then + case ${host} in + *-*-cygwin* | *-*-mingw* | *-*-solaris2*) + # Disable SSE on unwind-c.c and unwind-dw2.c to avoid stack + # realignment with the 4-byte aligned incoming stack. + tmake_file="${tmake_file} i386/${host_address}/t-eh-return-no-sse" + ;; + esac + fi +esac diff --git a/libgcc/config/i386/32/t-eh-return-no-sse b/libgcc/config/i386/32/t-eh-return-no-sse new file mode 100644 index 00000000000..5a8c3135911 --- /dev/null +++ b/libgcc/config/i386/32/t-eh-return-no-sse @@ -0,0 +1,5 @@ +# Since eh_return doesn't work with stack realignment, disable SSE on +# unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte +# incoming stack. + +unwind-c.o unwind-c_s.o unwind-dw2.o unwind-dw2_s.o : HOST_LIBGCC2_CFLAGS += -mno-sse