diff mbox series

target/104581 - compile-time regression in mode-switching

Message ID 20220217134640.20BA913BF6@imap2.suse-dmz.suse.de
State New
Headers show
Series target/104581 - compile-time regression in mode-switching | expand

Commit Message

Richard Biener Feb. 17, 2022, 1:46 p.m. UTC
The x86 backend piggy-backs on mode-switching for insertion of
vzeroupper.  A recent improvement there was implemented in a way
to walk possibly the whole basic-block for all DF reg def definitions
in its mode_needed hook which is called for each instruction in
a basic-block during mode-switching local analysis.

The following mostly reverts this improvement.  It needs to be
re-done in a way more consistent with a local dataflow which
probably means making targets aware of the state of the local
dataflow analysis.

This improves compile-time of some 538.imagick_r TU from
362s to 16s with -Ofast -mavx2 -fprofile-generate.

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?

Thanks,
Richard.

2022-02-17  Richard Biener  <rguenther@suse.de>

	PR target/104581
	* config/i386/i386.cc (ix86_avx_u128_mode_source): Remove.
	(ix86_avx_u128_mode_needed): Return AVX_U128_DIRTY instead
	of calling ix86_avx_u128_mode_source which would eventually
	have returned AVX_U128_ANY in some very special case.

	* gcc.target/i386/pr101456-1.c: XFAIL.
---
 gcc/config/i386/i386.cc                    | 78 +---------------------
 gcc/testsuite/gcc.target/i386/pr101456-1.c |  3 +-
 2 files changed, 5 insertions(+), 76 deletions(-)

Comments

Hongtao Liu Feb. 18, 2022, 2:40 a.m. UTC | #1
On Thu, Feb 17, 2022 at 9:47 PM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> The x86 backend piggy-backs on mode-switching for insertion of
> vzeroupper.  A recent improvement there was implemented in a way
> to walk possibly the whole basic-block for all DF reg def definitions
> in its mode_needed hook which is called for each instruction in
> a basic-block during mode-switching local analysis.
>
> The following mostly reverts this improvement.  It needs to be
> re-done in a way more consistent with a local dataflow which
> probably means making targets aware of the state of the local
> dataflow analysis.
>
> This improves compile-time of some 538.imagick_r TU from
> 362s to 16s with -Ofast -mavx2 -fprofile-generate.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?
LGTM, have talked to H.J, he also agrees.
>
> Thanks,
> Richard.
>
> 2022-02-17  Richard Biener  <rguenther@suse.de>
>
>         PR target/104581
>         * config/i386/i386.cc (ix86_avx_u128_mode_source): Remove.
>         (ix86_avx_u128_mode_needed): Return AVX_U128_DIRTY instead
>         of calling ix86_avx_u128_mode_source which would eventually
>         have returned AVX_U128_ANY in some very special case.
>
>         * gcc.target/i386/pr101456-1.c: XFAIL.
> ---
>  gcc/config/i386/i386.cc                    | 78 +---------------------
>  gcc/testsuite/gcc.target/i386/pr101456-1.c |  3 +-
>  2 files changed, 5 insertions(+), 76 deletions(-)
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index cf246e74e57..e4b42fbba6f 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -14377,80 +14377,12 @@ ix86_check_avx_upper_register (const_rtx exp)
>
>  static void
>  ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
> - {
> -   if (ix86_check_avx_upper_register (dest))
> +{
> +  if (ix86_check_avx_upper_register (dest))
>      {
>        bool *used = (bool *) data;
>        *used = true;
>      }
> - }
> -
> -/* For YMM/ZMM store or YMM/ZMM extract.  Return mode for the source
> -   operand of SRC DEFs in the same basic block before INSN.  */
> -
> -static int
> -ix86_avx_u128_mode_source (rtx_insn *insn, const_rtx src)
> -{
> -  basic_block bb = BLOCK_FOR_INSN (insn);
> -  rtx_insn *end = BB_END (bb);
> -
> -  /* Return AVX_U128_DIRTY if there is no DEF in the same basic
> -     block.  */
> -  int status = AVX_U128_DIRTY;
> -
> -  for (df_ref def = DF_REG_DEF_CHAIN (REGNO (src));
> -       def; def = DF_REF_NEXT_REG (def))
> -    if (DF_REF_BB (def) == bb)
> -      {
> -       /* Ignore DEF from different basic blocks.  */
> -       rtx_insn *def_insn = DF_REF_INSN (def);
> -
> -       /* Check if DEF_INSN is before INSN.  */
> -       rtx_insn *next;
> -       for (next = NEXT_INSN (def_insn);
> -            next != nullptr && next != end && next != insn;
> -            next = NEXT_INSN (next))
> -         ;
> -
> -       /* Skip if DEF_INSN isn't before INSN.  */
> -       if (next != insn)
> -         continue;
> -
> -       /* Return AVX_U128_DIRTY if the source operand of DEF_INSN
> -          isn't constant zero.  */
> -
> -       if (CALL_P (def_insn))
> -         {
> -           bool avx_upper_reg_found = false;
> -           note_stores (def_insn,
> -                        ix86_check_avx_upper_stores,
> -                        &avx_upper_reg_found);
> -
> -           /* Return AVX_U128_DIRTY if call returns AVX.  */
> -           if (avx_upper_reg_found)
> -             return AVX_U128_DIRTY;
> -
> -           continue;
> -         }
> -
> -       rtx set = single_set (def_insn);
> -       if (!set)
> -         return AVX_U128_DIRTY;
> -
> -       rtx dest = SET_DEST (set);
> -
> -       /* Skip if DEF_INSN is not an AVX load.  Return AVX_U128_DIRTY
> -          if the source operand isn't constant zero.  */
> -       if (ix86_check_avx_upper_register (dest)
> -           && standard_sse_constant_p (SET_SRC (set),
> -                                       GET_MODE (dest)) != 1)
> -         return AVX_U128_DIRTY;
> -
> -       /* We get here only if all AVX loads are from constant zero.  */
> -       status = AVX_U128_ANY;
> -      }
> -
> -  return status;
>  }
>
>  /* Return needed mode for entity in optimize_mode_switching pass.  */
> @@ -14520,11 +14452,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
>         {
>           FOR_EACH_SUBRTX (iter, array, src, NONCONST)
>             if (ix86_check_avx_upper_register (*iter))
> -             {
> -               int status = ix86_avx_u128_mode_source (insn, *iter);
> -               if (status == AVX_U128_DIRTY)
> -                 return status;
> -             }
> +             return AVX_U128_DIRTY;
>         }
>
>        /* This isn't YMM/ZMM load/store.  */
> diff --git a/gcc/testsuite/gcc.target/i386/pr101456-1.c b/gcc/testsuite/gcc.target/i386/pr101456-1.c
> index 803fc6e0207..7fb3a3f055c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr101456-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr101456-1.c
> @@ -30,4 +30,5 @@ foo3 (void)
>    bar ();
>  }
>
> -/* { dg-final { scan-assembler-not "vzeroupper" } } */
> +/* See PR104581 for the XFAIL reason.  */
> +/* { dg-final { scan-assembler-not "vzeroupper" { xfail *-*-* } } } */
> --
> 2.34.1
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index cf246e74e57..e4b42fbba6f 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14377,80 +14377,12 @@  ix86_check_avx_upper_register (const_rtx exp)
 
 static void
 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
- {
-   if (ix86_check_avx_upper_register (dest))
+{
+  if (ix86_check_avx_upper_register (dest))
     {
       bool *used = (bool *) data;
       *used = true;
     }
- }
-
-/* For YMM/ZMM store or YMM/ZMM extract.  Return mode for the source
-   operand of SRC DEFs in the same basic block before INSN.  */
-
-static int
-ix86_avx_u128_mode_source (rtx_insn *insn, const_rtx src)
-{
-  basic_block bb = BLOCK_FOR_INSN (insn);
-  rtx_insn *end = BB_END (bb);
-
-  /* Return AVX_U128_DIRTY if there is no DEF in the same basic
-     block.  */
-  int status = AVX_U128_DIRTY;
-
-  for (df_ref def = DF_REG_DEF_CHAIN (REGNO (src));
-       def; def = DF_REF_NEXT_REG (def))
-    if (DF_REF_BB (def) == bb)
-      {
-	/* Ignore DEF from different basic blocks.  */
-	rtx_insn *def_insn = DF_REF_INSN (def);
-
-	/* Check if DEF_INSN is before INSN.  */
-	rtx_insn *next;
-	for (next = NEXT_INSN (def_insn);
-	     next != nullptr && next != end && next != insn;
-	     next = NEXT_INSN (next))
-	  ;
-
-	/* Skip if DEF_INSN isn't before INSN.  */
-	if (next != insn)
-	  continue;
-
-	/* Return AVX_U128_DIRTY if the source operand of DEF_INSN
-	   isn't constant zero.  */
-
-	if (CALL_P (def_insn))
-	  {
-	    bool avx_upper_reg_found = false;
-	    note_stores (def_insn,
-			 ix86_check_avx_upper_stores,
-			 &avx_upper_reg_found);
-
-	    /* Return AVX_U128_DIRTY if call returns AVX.  */
-	    if (avx_upper_reg_found)
-	      return AVX_U128_DIRTY;
-
-	    continue;
-	  }
-
-	rtx set = single_set (def_insn);
-	if (!set)
-	  return AVX_U128_DIRTY;
-
-	rtx dest = SET_DEST (set);
-
-	/* Skip if DEF_INSN is not an AVX load.  Return AVX_U128_DIRTY
-	   if the source operand isn't constant zero.  */
-	if (ix86_check_avx_upper_register (dest)
-	    && standard_sse_constant_p (SET_SRC (set),
-					GET_MODE (dest)) != 1)
-	  return AVX_U128_DIRTY;
-
-	/* We get here only if all AVX loads are from constant zero.  */
-	status = AVX_U128_ANY;
-      }
-
-  return status;
 }
 
 /* Return needed mode for entity in optimize_mode_switching pass.  */
@@ -14520,11 +14452,7 @@  ix86_avx_u128_mode_needed (rtx_insn *insn)
 	{
 	  FOR_EACH_SUBRTX (iter, array, src, NONCONST)
 	    if (ix86_check_avx_upper_register (*iter))
-	      {
-		int status = ix86_avx_u128_mode_source (insn, *iter);
-		if (status == AVX_U128_DIRTY)
-		  return status;
-	      }
+	      return AVX_U128_DIRTY;
 	}
 
       /* This isn't YMM/ZMM load/store.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr101456-1.c b/gcc/testsuite/gcc.target/i386/pr101456-1.c
index 803fc6e0207..7fb3a3f055c 100644
--- a/gcc/testsuite/gcc.target/i386/pr101456-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr101456-1.c
@@ -30,4 +30,5 @@  foo3 (void)
   bar ();
 }
 
-/* { dg-final { scan-assembler-not "vzeroupper" } } */
+/* See PR104581 for the XFAIL reason.  */
+/* { dg-final { scan-assembler-not "vzeroupper" { xfail *-*-* } } } */