diff mbox series

[v2] fwprop: Allow (subreg (mem)) simplifications

Message ID 20210119181704.61991-1-iii@linux.ibm.com
State New
Headers show
Series [v2] fwprop: Allow (subreg (mem)) simplifications | expand

Commit Message

Ilya Leoshkevich Jan. 19, 2021, 6:17 p.m. UTC
v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563800.html

v1 -> v2: Allow (mem) -> (subreg) propagation only for single uses.

Boostrapped and regtested on x86_64-redhat-linux, ppc64le-redhat-linux
and s390x-redhat-linux.  Ok for master?



Suppose we have:

    (set (reg/v:TF 63) (mem/c:TF (reg/v:DI 62)))
    (set (reg:FPRX2 66) (subreg:FPRX2 (reg/v:TF 63) 0))

It is clearly profitable to propagate the first insn into the second
one and get:

    (set (reg:FPRX2 66) (mem/c:FPRX2 (reg/v:DI 62)))

fwprop actually manages to perform this, but doesn't think the result is
worth it, which results in unnecessary store/load sequences on s390.
Improve the situation by classifying SUBREG -> MEM changes as
profitable.

gcc/ChangeLog:

2021-01-15  Ilya Leoshkevich  <iii@linux.ibm.com>

	* fwprop.c (fwprop_propagation::classify_result): Allow
	(subreg (mem)) simplifications.
---
 gcc/fwprop.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

Comments

Richard Sandiford Jan. 21, 2021, 12:29 p.m. UTC | #1
Ilya Leoshkevich via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563800.html
>
> v1 -> v2: Allow (mem) -> (subreg) propagation only for single uses.
>
> Boostrapped and regtested on x86_64-redhat-linux, ppc64le-redhat-linux
> and s390x-redhat-linux.  Ok for master?
>
>
>
> Suppose we have:
>
>     (set (reg/v:TF 63) (mem/c:TF (reg/v:DI 62)))
>     (set (reg:FPRX2 66) (subreg:FPRX2 (reg/v:TF 63) 0))
>
> It is clearly profitable to propagate the first insn into the second
> one and get:
>
>     (set (reg:FPRX2 66) (mem/c:FPRX2 (reg/v:DI 62)))
>
> fwprop actually manages to perform this, but doesn't think the result is
> worth it, which results in unnecessary store/load sequences on s390.
> Improve the situation by classifying SUBREG -> MEM changes as
> profitable.
>
> gcc/ChangeLog:
>
> 2021-01-15  Ilya Leoshkevich  <iii@linux.ibm.com>
>
> 	* fwprop.c (fwprop_propagation::classify_result): Allow
> 	(subreg (mem)) simplifications.
> ---
>  gcc/fwprop.c | 22 +++++++++++++++++-----
>  1 file changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/fwprop.c b/gcc/fwprop.c
> index eff8f7cc141..02d3d507cbc 100644
> --- a/gcc/fwprop.c
> +++ b/gcc/fwprop.c
> @@ -176,7 +176,7 @@ namespace
>      static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
>      static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
>  
> -    fwprop_propagation (rtx_insn *, rtx, rtx);
> +    fwprop_propagation (rtx_insn *, insn_info *, rtx, rtx);
>  
>      bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
>      bool folded_to_constants_p () const;
> @@ -185,13 +185,18 @@ namespace
>      bool check_mem (int, rtx) final override;
>      void note_simplification (int, uint16_t, rtx, rtx) final override;
>      uint16_t classify_result (rtx, rtx);
> +
> +  private:
> +    const bool single_use_p;
>    };
>  }
>  
>  /* Prepare to replace FROM with TO in INSN.  */
>  
> -fwprop_propagation::fwprop_propagation (rtx_insn *insn, rtx from, rtx to)
> -  : insn_propagation (insn, from, to)
> +fwprop_propagation::fwprop_propagation (rtx_insn *insn, insn_info *def_insn,
> +					rtx from, rtx to)
> +    : insn_propagation (insn, from, to),
> +      single_use_p (def_insn->num_uses () == 1)

I think we should check whether the def and use are in the same ebb
as well.  For that, I guess we should change the first insn to be an
“insn_info *” too (and perhaps rename it to use_insn, now that there
are two insns in play).

>  {
>    should_check_mems = true;
>    should_note_simplifications = true;
> @@ -262,6 +267,13 @@ fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
>        && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
>      return PROFITABLE;
>  
> +  /* Allow (subreg (mem)) -> (mem) simplifications.  Do not allow propagation
> +     of (mem)s into multiple uses, since those are not profitable, as well as
> +     creating new (mem/v)s, since DCE will not remove the old ones.  */
> +  if (single_use_p && SUBREG_P (old_rtx) && MEM_P (new_rtx)
> +      && !MEM_VOLATILE_P (new_rtx))
> +    return PROFITABLE;

Nit: one check per line if they don't fit on a single line.

I think we should check !paradoxical_subreg_p (old_rtx) too.  I'm not
sure we'd allow the propagation in that case, but it seems like something
we should check for performance reasons too.

Given what you said in the other message about combine, I agree this
is a reasonable workaround.  I don't know whether it's suitable for
stage 4 or whether it would need to wait for stage 1.

Thanks,
Richard
diff mbox series

Patch

diff --git a/gcc/fwprop.c b/gcc/fwprop.c
index eff8f7cc141..02d3d507cbc 100644
--- a/gcc/fwprop.c
+++ b/gcc/fwprop.c
@@ -176,7 +176,7 @@  namespace
     static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
     static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
 
-    fwprop_propagation (rtx_insn *, rtx, rtx);
+    fwprop_propagation (rtx_insn *, insn_info *, rtx, rtx);
 
     bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
     bool folded_to_constants_p () const;
@@ -185,13 +185,18 @@  namespace
     bool check_mem (int, rtx) final override;
     void note_simplification (int, uint16_t, rtx, rtx) final override;
     uint16_t classify_result (rtx, rtx);
+
+  private:
+    const bool single_use_p;
   };
 }
 
 /* Prepare to replace FROM with TO in INSN.  */
 
-fwprop_propagation::fwprop_propagation (rtx_insn *insn, rtx from, rtx to)
-  : insn_propagation (insn, from, to)
+fwprop_propagation::fwprop_propagation (rtx_insn *insn, insn_info *def_insn,
+					rtx from, rtx to)
+    : insn_propagation (insn, from, to),
+      single_use_p (def_insn->num_uses () == 1)
 {
   should_check_mems = true;
   should_note_simplifications = true;
@@ -262,6 +267,13 @@  fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
       && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
     return PROFITABLE;
 
+  /* Allow (subreg (mem)) -> (mem) simplifications.  Do not allow propagation
+     of (mem)s into multiple uses, since those are not profitable, as well as
+     creating new (mem/v)s, since DCE will not remove the old ones.  */
+  if (single_use_p && SUBREG_P (old_rtx) && MEM_P (new_rtx)
+      && !MEM_VOLATILE_P (new_rtx))
+    return PROFITABLE;
+
   return 0;
 }
 
@@ -363,7 +375,7 @@  try_fwprop_subst_note (insn_info *use_insn, insn_info *def_insn,
   rtx_insn *use_rtl = use_insn->rtl ();
 
   insn_change_watermark watermark;
-  fwprop_propagation prop (use_rtl, dest, src);
+  fwprop_propagation prop (use_rtl, def_insn, dest, src);
   if (!prop.apply_to_rvalue (&XEXP (note, 0)))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
@@ -426,7 +438,7 @@  try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
   rtx_insn *use_rtl = use_insn->rtl ();
 
   insn_change_watermark watermark;
-  fwprop_propagation prop (use_rtl, dest, src);
+  fwprop_propagation prop (use_rtl, def_insn, dest, src);
   if (!prop.apply_to_pattern (loc))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))