diff mbox series

[cris,4/4] cris: Add new pass eliminating compares after delay-slot-filling

Message ID 202007130838.06D8cA8m030555@ignucius.se.axis.com
State New
Headers show
Series various cc0-related (and not) regression fixes | expand

Commit Message

Hans-Peter Nilsson July 13, 2020, 8:38 a.m. UTC
(All patches are committed.)

Delayed-branch-slot-filling a.k.a. reorg or dbr, often causes
opportunities for more compare-elimination than were visible for
the cmpelim pass.  With cc0, these were caught by the
elimination pass run in "final", thus the missed opportunities
is a regression.  A simple reorg-aware pass run just after reorg
handles most of them, if not all.  I chose to keep the "mach2"
pass identifier string I copy-pasted from the SPARC port instead
of inventing one like "postdbr_cmpelim".  Note the gap in numbers
in the test-case file names.

gcc:
	PR target/93372
	* config/cris/cris-passes.def: New file.
	* config/cris/t-cris (PASSES_EXTRA): Add cris-passes.def.
	* config/cris/cris.c: Add infrastructure bits and pass execute
	function cris_postdbr_cmpelim.
	* config/cris/cris-protos.h (make_pass_cris_postdbr_cmpelim): Declare.

gcc/testsuite:
	* gcc.target/cris/pr93372-44.c, gcc.target/cris/pr93372-46.c: New.
---
 gcc/config/cris/cris-passes.def            |  20 +++
 gcc/config/cris/cris-protos.h              |   2 +
 gcc/config/cris/cris.c                     | 202 +++++++++++++++++++++++++++++
 gcc/config/cris/t-cris                     |   2 +
 gcc/testsuite/gcc.target/cris/pr93372-44.c |  13 ++
 gcc/testsuite/gcc.target/cris/pr93372-46.c |  16 +++
 6 files changed, 255 insertions(+)
 create mode 100644 gcc/config/cris/cris-passes.def
 create mode 100644 gcc/testsuite/gcc.target/cris/pr93372-44.c
 create mode 100644 gcc/testsuite/gcc.target/cris/pr93372-46.c
diff mbox series

Patch

diff --git a/gcc/config/cris/cris-passes.def b/gcc/config/cris/cris-passes.def
new file mode 100644
index 00000000000..db3c74d4978
--- /dev/null
+++ b/gcc/config/cris/cris-passes.def
@@ -0,0 +1,20 @@ 
+/* Description of target passes for Visium.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+INSERT_PASS_AFTER (pass_delay_slots, 1, pass_cris_postdbr_cmpelim);
diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h
index 2db1ea1b8bc..053bba90df4 100644
--- a/gcc/config/cris/cris-protos.h
+++ b/gcc/config/cris/cris-protos.h
@@ -60,3 +60,5 @@  extern int cris_fatal (char *);
 extern int cris_initial_elimination_offset (int, int);
 
 extern void cris_init_expanders (void);
+
+extern rtl_opt_pass *make_pass_cris_postdbr_cmpelim (gcc::context *);
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
index b26b9f2e883..59cbceef7a4 100644
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -51,6 +51,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "output.h"
 #include "tm-constrs.h"
 #include "builtins.h"
+#include "cfgrtl.h"
+#include "tree-pass.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -129,6 +131,8 @@  static void cris_asm_output_mi_thunk
 static void cris_file_start (void);
 static void cris_init_libfuncs (void);
 
+static unsigned int cris_postdbr_cmpelim (void);
+
 static reg_class_t cris_preferred_reload_class (rtx, reg_class_t);
 
 static int cris_register_move_cost (machine_mode, reg_class_t, reg_class_t);
@@ -298,6 +302,204 @@  int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION;
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
+namespace {
+
+const pass_data pass_data_cris_postdbr_cmpelim =
+{
+  RTL_PASS, /* type */
+  "mach2", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_cris_postdbr_cmpelim : public rtl_opt_pass
+{
+public:
+  pass_cris_postdbr_cmpelim (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_cris_postdbr_cmpelim, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual unsigned int execute (function *)
+    {
+      return cris_postdbr_cmpelim ();
+    }
+
+  /* No use running this if reorg and cmpelim aren't both run.  */
+  virtual bool gate (function *)
+    {
+      return
+	optimize > 0
+	&& flag_delayed_branch
+	&& flag_compare_elim_after_reload;
+    }
+};
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_cris_postdbr_cmpelim (gcc::context *ctxt)
+{
+  return new pass_cris_postdbr_cmpelim (ctxt);
+}
+
+/* "Cheap version" of cmpelim, making use of the opportunities opened up
+   by reorg.
+
+   Go through the insns of a function and look at each actual compare
+   insn; considering only those that compare a register to 0.  If the
+   previous CC-affecting insn sets the compared register or if a move
+   reads from it, try to change that into a CC-setting move and try to
+   have it recognized.  Bail at labels or non-matching insns that
+   clobber the compared register.  If successful, delete the compare.
+
+   Also, reorg isn't up to date regarding data-flow handling, so we
+   can't go beyond classic RTL scanning.  */
+
+static unsigned int
+cris_postdbr_cmpelim ()
+{
+  rtx_insn *insn;
+  rtx_insn *next;
+  rtx_insn *prev_cc_setter = 0;
+  rtx_insn *prev_cc_outer = 0;
+  rtx dccr = gen_rtx_REG (CCmode, CRIS_CC0_REGNUM);
+
+  /* Now look for compares in the insn stream.  */
+  for (insn = get_insns (); insn; insn = next)
+    {
+      rtx_insn *outer_insn = insn;
+      rtx pat = PATTERN (insn);
+
+      next = NEXT_INSN (outer_insn);
+
+      /* Forget previous state when we see a label; we can't track or
+	 merge its state.  */
+      if (LABEL_P (insn))
+	{
+	  prev_cc_setter = 0;
+	  continue;
+	}
+
+      if (!NONDEBUG_INSN_P (insn))
+	continue;
+
+      /* Consider filled delay slots; there might be a comparison there.
+	 It's only the second insn in a sequence that is interesting.  */
+      if (GET_CODE (pat) == SEQUENCE)
+	insn = as_a <rtx_insn *> XVECEXP (pat, 0, 1);
+      /* The "else" eliminates temptations to consider an insn in a
+	 delay slot for elimination; it can only be a prev_cc_setter.  */
+      else if (prev_cc_setter != 0 && GET_CODE (pat) == SET)
+	{
+	  rtx dest = SET_DEST (pat);
+	  rtx src = SET_SRC (pat);
+	  rtx prev_set;
+
+	  if (REG_P (dest)
+	      && REGNO (dest) == CRIS_CC0_REGNUM
+	      && GET_CODE (src) == COMPARE
+	      && REG_P (XEXP (src, 0))
+	      && XEXP (src, 1) == const0_rtx
+	      && (prev_set = single_set (prev_cc_setter)) != 0)
+	    {
+	      /* We have a candidate, and a prev_cc_setter to inspect.  */
+	      rtx reg = XEXP (src, 0);
+	      rtx prev_dest = SET_DEST (prev_set);
+	      rtx prev_src = SET_SRC (prev_set);
+	      bool src_same = rtx_equal_p (prev_src, reg);
+
+	      /* If the prev_cc_setter isn't a simple SET, or if the
+		 compared register is modified in prev_cc_setter without
+		 being the destination, or if it's modified between
+		 prev_cc_setter (equal to or contained in prev_cc_outer)
+		 and this insn, then we can't use the flags result.  And
+		 of course, the SET_DEST of prev_cc_setter (the main
+		 interest, not dccr) has to be the same register and
+		 mode we're interested in - or the SET_SRC.  We've
+		 already checked that the compared register isn't
+		 changed in-between.  */
+	      if (REG_P (prev_dest)
+		  && ! reg_set_p (reg, prev_src)
+		  && ! reg_set_between_p (reg, prev_cc_outer, outer_insn)
+		  && (src_same || rtx_equal_p (prev_dest, reg)))
+		{
+		  machine_mode ccmode = GET_MODE (src);
+		  rtx modeadjusted_dccr
+		    = (ccmode == CCmode ? dccr
+		       : gen_rtx_REG (CCmode, CRIS_CC0_REGNUM));
+		  rtx compare
+		    /* We don't need to copy_rtx pat: we're going to
+		       delete that insn. */
+		    = (src_same ? pat
+		       : gen_rtx_SET (modeadjusted_dccr,
+				      gen_rtx_COMPARE (ccmode,
+						       copy_rtx (prev_src),
+						       const0_rtx)));
+
+		  /* Replace tentatively, the prev_set combo that is
+		     ((set d s) (clobber dccr)) with
+		     ((cmp s 0) (set d s)) where (cmp s 0) is the
+		     compare we're looking at, and validate it or fail
+		     the whole thing.  First replace the ((set d s) ...)
+		     with ((cmp s 0) ...)).  */
+		  validate_change (prev_cc_setter,
+				   &XVECEXP (PATTERN (prev_cc_setter),
+					     0, 0), compare, true);
+
+		  /* Then the clobber with the (set d s).  */
+		  validate_change (prev_cc_setter,
+				   &XVECEXP (PATTERN (prev_cc_setter),
+					     0, 1), prev_set, true);
+
+		  if (apply_change_group ())
+		    {
+		      delete_insn (insn);
+
+		      /* We eliminated the compare.  Then we must go to
+			 the next insn: we can't consider the eliminated
+			 insn for the next prev_cc_setter.
+
+			 FIXME: if later insns still match, we could do
+			 the delete_insn part only, for them.  But, it
+			 seems rare that reorg would manage to move a
+			 second CC-clobber to another delay-slot,
+			 leaving two identical compares (and presumably
+			 users).  */
+		      prev_cc_setter = 0;
+		      continue;
+		    }
+		}
+	      }
+	}
+
+      if (reg_set_p (dccr, insn))
+	{
+	  rtx pat = PATTERN (insn);
+
+	  prev_cc_setter = 0;
+
+	  /* Make sure we can use it later on, otherwise forget it.
+	     Don't look too close, we're going to pass a lot of these.
+	     Just make sure the structure is that we can work with. */
+	  if (GET_CODE (pat) == PARALLEL
+	      && XVECLEN (pat, 0) == 2
+	      && GET_CODE (XVECEXP (pat, 0, 1)) == CLOBBER)
+	    {
+	      prev_cc_setter = insn;
+	      prev_cc_outer = outer_insn;
+	    }
+	}
+    }
+
+  return 0;
+}
+
 /* Helper for cris_load_multiple_op and cris_ret_movem_op.  */
 
 bool
diff --git a/gcc/config/cris/t-cris b/gcc/config/cris/t-cris
index 97d6efa534b..eb4411ede56 100644
--- a/gcc/config/cris/t-cris
+++ b/gcc/config/cris/t-cris
@@ -24,3 +24,5 @@ 
 # The makefile macros etc. are included in the order found in the
 # section "Target Fragment" in the gcc info-files (or the paper copy) of
 # "Using and Porting GCC"
+
+PASSES_EXTRA += $(srcdir)/config/cris/cris-passes.def
diff --git a/gcc/testsuite/gcc.target/cris/pr93372-44.c b/gcc/testsuite/gcc.target/cris/pr93372-44.c
new file mode 100644
index 00000000000..5bd35d12087
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/pr93372-44.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not {\tcmp|\ttest} } } */
+
+extern void foo(void);
+unsigned int x (unsigned int a, unsigned int b, unsigned int *c)
+{
+  unsigned int y = a & 15;
+  unsigned int z = y + b;
+  if (y == 0)
+    *c = z;
+  return z;
+}
diff --git a/gcc/testsuite/gcc.target/cris/pr93372-46.c b/gcc/testsuite/gcc.target/cris/pr93372-46.c
new file mode 100644
index 00000000000..5d7c0301318
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/pr93372-46.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=v10" } */
+/* { dg-final { scan-assembler-not {\tnop} } } */
+/* { dg-final { scan-assembler-times {\tcmp|\ttest|\tmove.d \$r10,\$r} 1 } } */
+
+/* We either have a move from "a" to some other register or a compare. */
+
+extern void foo(void);
+unsigned int x (unsigned int a, unsigned int b, unsigned int *c, unsigned int *d)
+{
+  unsigned int z = __builtin_clz(b);
+  if (a != 0)
+    *c = a;
+  *d = a;
+  return z;
+}