diff mbox series

[v3] x86: Disable SSE on unwind-c.c and unwind-dw2.c

Message ID 20220307150628.68146-1-hjl.tools@gmail.com
State New
Headers show
Series [v3] x86: Disable SSE on unwind-c.c and unwind-dw2.c | expand

Commit Message

H.J. Lu March 7, 2022, 3:06 p.m. UTC
Since eh_return doesn't work with stack realignment, disable SSE on
unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte
incoming stack to avoid SSE usage which is caused by

commit 609e8c492d62d92465460eae3d43dfc4b2c68288
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Sat Feb 26 14:17:23 2022 -0800

    x86: Always return pseudo register in ix86_gen_scratch_sse_rtx

when pseudo vector registers are used to expand memset.

gcc/

	PR target/104781
	* config/i386/i386.cc (ix86_expand_epilogue): Sorry if there is
	stack realignment with eh_return or regparm nested function.

gcc/testsuite/

	PR target/104781
	* gcc.target/i386/eh_return-1.c: Add -mincoming-stack-boundary=4.
	* gcc.target/i386/eh_return-2.c: Likewise.

libgcc/

	PR target/104781
	* config.host (tmake_file): Add i386/32/t-eh-return-no-sse for
	32-bit x86 Cygwin, MinGW and Solaris.
	* config/i386/32/t-eh-return-no-sse: New file.
---
 gcc/config/i386/i386.cc                     | 11 +++++++----
 gcc/testsuite/gcc.target/i386/eh_return-1.c |  2 +-
 gcc/testsuite/gcc.target/i386/eh_return-2.c |  2 +-
 libgcc/config.host                          | 13 +++++++++++++
 libgcc/config/i386/32/t-eh-return-no-sse    |  5 +++++
 5 files changed, 27 insertions(+), 6 deletions(-)
 create mode 100644 libgcc/config/i386/32/t-eh-return-no-sse

Comments

Jakub Jelinek March 8, 2022, 10:23 a.m. UTC | #1
On Mon, Mar 07, 2022 at 07:06:28AM -0800, H.J. Lu wrote:
> Since eh_return doesn't work with stack realignment, disable SSE on
> unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte
> incoming stack to avoid SSE usage which is caused by
> 
> commit 609e8c492d62d92465460eae3d43dfc4b2c68288
> Author: H.J. Lu <hjl.tools@gmail.com>
> Date:   Sat Feb 26 14:17:23 2022 -0800
> 
>     x86: Always return pseudo register in ix86_gen_scratch_sse_rtx
> 
> when pseudo vector registers are used to expand memset.

> 	PR target/104781
> 	* config.host (tmake_file): Add i386/32/t-eh-return-no-sse for
> 	32-bit x86 Cygwin, MinGW and Solaris.
> 	* config/i386/32/t-eh-return-no-sse: New file.

For this, isn't the right fix instead something like:

--- gcc/config/i386/i386.h.jj	2022-02-25 12:06:45.535493490 +0100
+++ gcc/config/i386/i386.h	2022-03-08 11:20:43.207043370 +0100
@@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule;
 #define NUM_X86_64_MS_CLOBBERED_REGS 12
 #endif
 
+/* __builtin_eh_return can't handle stack realignment, so disable SSE in
+   libgcc functions that call it.  */
+#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
+
 /*
 Local variables:
 version-control: t


As mentioned in PR104838, this likely isn't specific to just Solaris and
cygwin/mingw.  Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS
among other things for i686.

	Jakub
Jakub Jelinek March 8, 2022, 11:15 a.m. UTC | #2
On Tue, Mar 08, 2022 at 11:23:51AM +0100, Jakub Jelinek via Gcc-patches wrote:
> On Mon, Mar 07, 2022 at 07:06:28AM -0800, H.J. Lu wrote:
> > Since eh_return doesn't work with stack realignment, disable SSE on
> > unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte
> > incoming stack to avoid SSE usage which is caused by
> > 
> > commit 609e8c492d62d92465460eae3d43dfc4b2c68288
> > Author: H.J. Lu <hjl.tools@gmail.com>
> > Date:   Sat Feb 26 14:17:23 2022 -0800
> > 
> >     x86: Always return pseudo register in ix86_gen_scratch_sse_rtx
> > 
> > when pseudo vector registers are used to expand memset.
> 
> > 	PR target/104781
> > 	* config.host (tmake_file): Add i386/32/t-eh-return-no-sse for
> > 	32-bit x86 Cygwin, MinGW and Solaris.
> > 	* config/i386/32/t-eh-return-no-sse: New file.
> 
> For this, isn't the right fix instead something like:
> 
> --- gcc/config/i386/i386.h.jj	2022-02-25 12:06:45.535493490 +0100
> +++ gcc/config/i386/i386.h	2022-03-08 11:20:43.207043370 +0100
> @@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule;
>  #define NUM_X86_64_MS_CLOBBERED_REGS 12
>  #endif
>  
> +/* __builtin_eh_return can't handle stack realignment, so disable SSE in
> +   libgcc functions that call it.  */
> +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> +
>  /*
>  Local variables:
>  version-control: t
> 
> 
> As mentioned in PR104838, this likely isn't specific to just Solaris and
> cygwin/mingw.  Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS
> among other things for i686.

Now verified that with your full patch applied gcc on Fedora/i686 doesn't
build (gets those sorry messages when compiling unwind-dw2), while if I
replace those 2 libgcc hunks from your patch with the above one it seems to
get past the previous hanging point of gnat1 processes.

	Jakub
Jakub Jelinek March 8, 2022, 12:29 p.m. UTC | #3
On Tue, Mar 08, 2022 at 12:15:15PM +0100, Jakub Jelinek via Gcc-patches wrote:
> > --- gcc/config/i386/i386.h.jj	2022-02-25 12:06:45.535493490 +0100
> > +++ gcc/config/i386/i386.h	2022-03-08 11:20:43.207043370 +0100
> > @@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule;
> >  #define NUM_X86_64_MS_CLOBBERED_REGS 12
> >  #endif
> >  
> > +/* __builtin_eh_return can't handle stack realignment, so disable SSE in
> > +   libgcc functions that call it.  */
> > +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> > +
> >  /*
> >  Local variables:
> >  version-control: t
> > 
> > 
> > As mentioned in PR104838, this likely isn't specific to just Solaris and
> > cygwin/mingw.  Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS
> > among other things for i686.
> 
> Now verified that with your full patch applied gcc on Fedora/i686 doesn't
> build (gets those sorry messages when compiling unwind-dw2), while if I
> replace those 2 libgcc hunks from your patch with the above one it seems to
> get past the previous hanging point of gnat1 processes.

Though, perhaps it should be
#ifndef __x86_64__
#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
#endif
or something similar, on x86-64 one at least normally doesn't use lower
stack realignment unless avx or later.  Maybe we want to use
no-avx for the x86-64 case though.
Disabling sse/sse2 might be a problem especially on mingw where we need to
restore SSE registers in the EH return, no?

Even better would be to make __builtin_eh_return work even with DRAP,
but I admit I haven't understood what exactly is the problem that prevents
it from working.

	Jakub
Eric Botcazou March 8, 2022, 12:49 p.m. UTC | #4
> Disabling sse/sse2 might be a problem especially on mingw where we need to
> restore SSE registers in the EH return, no?

Not in 32-bit mode I think, all XMM registers are call used.
H.J. Lu March 8, 2022, 3:37 p.m. UTC | #5
On Tue, Mar 8, 2022 at 4:29 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Tue, Mar 08, 2022 at 12:15:15PM +0100, Jakub Jelinek via Gcc-patches wrote:
> > > --- gcc/config/i386/i386.h.jj       2022-02-25 12:06:45.535493490 +0100
> > > +++ gcc/config/i386/i386.h  2022-03-08 11:20:43.207043370 +0100
> > > @@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule;
> > >  #define NUM_X86_64_MS_CLOBBERED_REGS 12
> > >  #endif
> > >
> > > +/* __builtin_eh_return can't handle stack realignment, so disable SSE in
> > > +   libgcc functions that call it.  */
> > > +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> > > +
> > >  /*
> > >  Local variables:
> > >  version-control: t
> > >
> > >
> > > As mentioned in PR104838, this likely isn't specific to just Solaris and
> > > cygwin/mingw.  Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS
> > > among other things for i686.
> >
> > Now verified that with your full patch applied gcc on Fedora/i686 doesn't
> > build (gets those sorry messages when compiling unwind-dw2), while if I
> > replace those 2 libgcc hunks from your patch with the above one it seems to
> > get past the previous hanging point of gnat1 processes.
>
> Though, perhaps it should be
> #ifndef __x86_64__
> #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> #endif
> or something similar, on x86-64 one at least normally doesn't use lower
> stack realignment unless avx or later.  Maybe we want to use
> no-avx for the x86-64 case though.

I have verified that AVX and AVX512 have no issues on x86-64.  In 32-bit,
-mstackrealign triggers the problem.

> Disabling sse/sse2 might be a problem especially on mingw where we need to
> restore SSE registers in the EH return, no?

No, it isn't needed.

> Even better would be to make __builtin_eh_return work even with DRAP,
> but I admit I haven't understood what exactly is the problem that prevents
> it from working.

The EH return is a very special case.  Disable SSE in 32-bit is the simplest
way to make the EH return to work.
Jakub Jelinek March 8, 2022, 3:46 p.m. UTC | #6
On Tue, Mar 08, 2022 at 07:37:17AM -0800, H.J. Lu wrote:
> > Though, perhaps it should be
> > #ifndef __x86_64__
> > #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> > #endif
> > or something similar, on x86-64 one at least normally doesn't use lower
> > stack realignment unless avx or later.  Maybe we want to use
> > no-avx for the x86-64 case though.
> 
> I have verified that AVX and AVX512 have no issues on x86-64.  In 32-bit,
> -mstackrealign triggers the problem.

I bet it would be a problem if we started vectorizing something in there
using avx/avx2/avx512*.  But given the sorry, I think we'd find that out
immediately.

> > Disabling sse/sse2 might be a problem especially on mingw where we need to
> > restore SSE registers in the EH return, no?
> 
> No, it isn't needed.

I meant for 64-bit where I think the Windows ABI preserves some XMM regs
(low 128-bits of them).  So my earlier patch to just define
LIBGCC2_UNWIND_ATTRIBUTE unconditionally would be wrong for it.

> > Even better would be to make __builtin_eh_return work even with DRAP,
> > but I admit I haven't understood what exactly is the problem that prevents
> > it from working.
> 
> The EH return is a very special case.  Disable SSE in 32-bit is the simplest
> way to make the EH return to work.

Ok.  So, what do you think about replacing the libgcc/ part of your patch
with that
/* __builtin_eh_return can't handle stack realignment, so disable SSE in
   32-bit libgcc functions that call it.  */
#ifndef __x86_64__
#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
#endif
?
I'm bootstrapping/regtesting such a patch right now (because I needed some
quick fix for the gnat1 hangs).

	Jakub
H.J. Lu March 8, 2022, 4:09 p.m. UTC | #7
On Tue, Mar 8, 2022 at 7:46 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Tue, Mar 08, 2022 at 07:37:17AM -0800, H.J. Lu wrote:
> > > Though, perhaps it should be
> > > #ifndef __x86_64__
> > > #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> > > #endif
> > > or something similar, on x86-64 one at least normally doesn't use lower
> > > stack realignment unless avx or later.  Maybe we want to use
> > > no-avx for the x86-64 case though.
> >
> > I have verified that AVX and AVX512 have no issues on x86-64.  In 32-bit,
> > -mstackrealign triggers the problem.
>
> I bet it would be a problem if we started vectorizing something in there
> using avx/avx2/avx512*.  But given the sorry, I think we'd find that out

YMM and ZMM can be used to expand memset with -march=native.
It works fine on Linux.  No stack realignment is needed.

> immediately.

True.

> > > Disabling sse/sse2 might be a problem especially on mingw where we need to
> > > restore SSE registers in the EH return, no?
> >
> > No, it isn't needed.
>
> I meant for 64-bit where I think the Windows ABI preserves some XMM regs

Does it need to realign the stack?

> (low 128-bits of them).  So my earlier patch to just define
> LIBGCC2_UNWIND_ATTRIBUTE unconditionally would be wrong for it.
>
> > > Even better would be to make __builtin_eh_return work even with DRAP,
> > > but I admit I haven't understood what exactly is the problem that prevents
> > > it from working.
> >
> > The EH return is a very special case.  Disable SSE in 32-bit is the simplest
> > way to make the EH return to work.
>
> Ok.  So, what do you think about replacing the libgcc/ part of your patch
> with that
> /* __builtin_eh_return can't handle stack realignment, so disable SSE in
>    32-bit libgcc functions that call it.  */
> #ifndef __x86_64__
> #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> #endif
> ?

Yes, it should work.

Thanks.

> I'm bootstrapping/regtesting such a patch right now (because I needed some
> quick fix for the gnat1 hangs).
>
>         Jakub
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index efa947f9795..4121f986221 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -9444,12 +9444,15 @@  ix86_expand_epilogue (int style)
 	  rtx sa = EH_RETURN_STACKADJ_RTX;
 	  rtx_insn *insn;
 
-	  /* %ecx can't be used for both DRAP register and eh_return.  */
-	  if (crtl->drap_reg)
-	    gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
+	  /* Stack realignment doesn't work with eh_return.  */
+	  if (crtl->stack_realign_needed)
+	    sorry ("Stack realignment not supported with "
+		   "%<__builtin_eh_return%>");
 
 	  /* regparm nested functions don't work with eh_return.  */
-	  gcc_assert (!ix86_static_chain_on_stack);
+	  if (ix86_static_chain_on_stack)
+	    sorry ("regparm nested function not supported with "
+		   "%<__builtin_eh_return%>");
 
 	  if (frame_pointer_needed)
 	    {
diff --git a/gcc/testsuite/gcc.target/i386/eh_return-1.c b/gcc/testsuite/gcc.target/i386/eh_return-1.c
index b21fd75fc93..43f94f01a97 100644
--- a/gcc/testsuite/gcc.target/i386/eh_return-1.c
+++ b/gcc/testsuite/gcc.target/i386/eh_return-1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */
+/* { dg-options "-O2 -mincoming-stack-boundary=4 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */
 
 struct _Unwind_Context
 {
diff --git a/gcc/testsuite/gcc.target/i386/eh_return-2.c b/gcc/testsuite/gcc.target/i386/eh_return-2.c
index f23f4492dac..cb762f92cc2 100644
--- a/gcc/testsuite/gcc.target/i386/eh_return-2.c
+++ b/gcc/testsuite/gcc.target/i386/eh_return-2.c
@@ -1,6 +1,6 @@ 
 /* PR target/101772  */
 /* { dg-do compile } */
-/* { dg-additional-options "-O0 -march=x86-64 -mstackrealign" } */
+/* { dg-additional-options "-O0 -mincoming-stack-boundary=4 -march=x86-64 -mstackrealign" } */
 
 struct _Unwind_Context _Unwind_Resume_or_Rethrow_this_context;
 
diff --git a/libgcc/config.host b/libgcc/config.host
index 094fd3ad254..1588ab6cf20 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1583,3 +1583,16 @@  case ${host} in
   tmake_file="$tmake_file t-gthr-noweak"
   ;;
 esac
+
+case ${host} in
+i[34567]86-*-* | x86_64-*-*)
+  if test "${host_address}" = 32; then
+    case ${host} in
+    *-*-cygwin* | *-*-mingw* | *-*-solaris2*)
+      # Disable SSE on unwind-c.c and unwind-dw2.c to avoid stack
+      # realignment with the 4-byte aligned incoming stack.
+      tmake_file="${tmake_file} i386/${host_address}/t-eh-return-no-sse"
+      ;;
+    esac
+  fi
+esac
diff --git a/libgcc/config/i386/32/t-eh-return-no-sse b/libgcc/config/i386/32/t-eh-return-no-sse
new file mode 100644
index 00000000000..5a8c3135911
--- /dev/null
+++ b/libgcc/config/i386/32/t-eh-return-no-sse
@@ -0,0 +1,5 @@ 
+# Since eh_return doesn't work with stack realignment, disable SSE on
+# unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte
+# incoming stack.
+
+unwind-c.o unwind-c_s.o unwind-dw2.o unwind-dw2_s.o : HOST_LIBGCC2_CFLAGS += -mno-sse