dbr: Filter-out TARGET_FLAGS_REGNUM from end_of_function_needs.
diff mbox series

Message ID 202001251414.00PEE0CP020535@ignucius.se.axis.com
State New
Headers show
Series
  • dbr: Filter-out TARGET_FLAGS_REGNUM from end_of_function_needs.
Related show

Commit Message

Hans-Peter Nilsson Jan. 25, 2020, 2:14 p.m. UTC
Compared to the cc0 version, I noticed a regression in
delay-slot-filling for CRIS for several functions in libgcc with
a similar layout, one being lshrdi3, where with cc0 all
delay-slots were filled, as exposed by the test-case.  I ended
up including the thankfully-small lshrdi3 as-is, for simplicity
of testing, after failing to cook up an artificial test-case.

There's one slot that fails to be filled for the decc0rated CRIS
port.  A gdb session shows it is because of the automatic
inclusion of TARGET_FLAGS_REGNUM in "registers needed at the end
of the function" because there are insns in the epilogue that
clobber the condition-code register.  I'm not trying to tell a
clobber from a set, as parallels with set instead of clobber
seems likely to happen too, for targets with TARGET_FLAGS_REGNUM
set.

Other targets with delay-slots and one dedicated often-clobbered
condition-code-register should consider defining
TARGET_FLAGS_REGNUM.  I noticed it improved delay-slot-filling
also in other situations than this.  (Author of introduction of
TARGET_FLAGS_REGNUM use in dbr is CC:ed.)

Tested cris-elf.

Ok to commit or perhaps wait to gcc11?

(The test-case goes in either way, as it passes with cc0-CRIS.)

gcc:
* resource.c (init_resource_info): Filter-out TARGET_FLAGS_REGNUM
from end_of_function_needs.

gcc/testsuite:
* gcc.target/cris/pr93372-1.c: New.

---
 gcc/resource.c                            |  6 +++
 gcc/testsuite/gcc.target/cris/pr93372-1.c | 62 +++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/cris/pr93372-1.c

Comments

Jeff Law Jan. 25, 2020, 4:17 p.m. UTC | #1
On Sat, 2020-01-25 at 15:14 +0100, Hans-Peter Nilsson wrote:
> Compared to the cc0 version, I noticed a regression in
> delay-slot-filling for CRIS for several functions in libgcc with
> a similar layout, one being lshrdi3, where with cc0 all
> delay-slots were filled, as exposed by the test-case.  I ended
> up including the thankfully-small lshrdi3 as-is, for simplicity
> of testing, after failing to cook up an artificial test-case.
> 
> There's one slot that fails to be filled for the decc0rated CRIS
> port.  A gdb session shows it is because of the automatic
> inclusion of TARGET_FLAGS_REGNUM in "registers needed at the end
> of the function" because there are insns in the epilogue that
> clobber the condition-code register.  I'm not trying to tell a
> clobber from a set, as parallels with set instead of clobber
> seems likely to happen too, for targets with TARGET_FLAGS_REGNUM
> set.
> 
> Other targets with delay-slots and one dedicated often-clobbered
> condition-code-register should consider defining
> TARGET_FLAGS_REGNUM.  I noticed it improved delay-slot-filling
> also in other situations than this.  (Author of introduction of
> TARGET_FLAGS_REGNUM use in dbr is CC:ed.)
> 
> Tested cris-elf.
> 
> Ok to commit or perhaps wait to gcc11?
> 
> (The test-case goes in either way, as it passes with cc0-CRIS.)
> 
> gcc:
> * resource.c (init_resource_info): Filter-out TARGET_FLAGS_REGNUM
> from end_of_function_needs.
> 
> gcc/testsuite:
> * gcc.target/cris/pr93372-1.c: New.
Looks reasonable to me.  I'd probably wait for gcc-11 though out of an
abundance of caution.

jeff
>

Patch
diff mbox series

diff --git a/gcc/resource.c b/gcc/resource.c
index d26217c..62a69c0 100644
--- a/gcc/resource.c
+++ b/gcc/resource.c
@@ -21,6 +21,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
+#include "target.h"
 #include "rtl.h"
 #include "df.h"
 #include "memmodel.h"
@@ -1214,6 +1215,11 @@  init_resource_info (rtx_insn *epilogue_insn)
       if (return_insn_p (epilogue_insn))
 	break;
     }
+  
+  /* Filter-out the flags register from those additionally required
+     registers. */
+  if (targetm.flags_regnum != INVALID_REGNUM)
+    CLEAR_HARD_REG_BIT (end_of_function_needs.regs, targetm.flags_regnum);
 
   /* Allocate and initialize the tables used by mark_target_live_regs.  */
   target_hash_table = XCNEWVEC (struct target_info *, TARGET_HASH_PRIME);
diff --git a/gcc/testsuite/gcc.target/cris/pr93372-1.c b/gcc/testsuite/gcc.target/cris/pr93372-1.c
new file mode 100644
index 0000000..b625eda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/pr93372-1.c
@@ -0,0 +1,62 @@ 
+/* Check that all more-or-less trivially fillable delayed-branch-slots
+   are filled. */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\tnop" } } */
+
+void *f(void **p)
+{
+  /* Supposedly the memory read finds its way into the "ret"
+     delay-slot. */
+  return *p;
+}
+
+int g(int *x, int *y, char *v, int n)
+{
+  int z = *x;
+  int w = *v + 31;
+
+  /* Two branch and two return slots, all filled. */
+  if (z != 23 && z != n+1)
+    return *x+*y+24+w;
+  return *y+24+w;
+}
+
+/* No problem with the two examples above, but with a more involved
+   example, the epilogue contents matter (the condition-code register
+   clobber was mistaken for a register that needed to be alive). */
+
+struct DWstruct {int low, high;};
+typedef unsigned long long DItype;
+typedef unsigned int USItype;
+
+typedef union
+{
+  struct DWstruct s;
+  DItype ll;
+} DWunion;
+
+unsigned long long
+xlshrdi3 (DItype u, unsigned int b)
+{
+  if (b == 0)
+    return u;
+
+  const DWunion uu = {.ll = u};
+  const int bm = (4 * 8) - b;
+  DWunion w;
+
+  if (bm <= 0)
+    {
+      w.s.high = 0;
+      w.s.low = (USItype) uu.s.high >> -bm;
+    }
+  else
+    {
+      const USItype carries = (USItype) uu.s.high << bm;
+      w.s.high = (USItype) uu.s.high >> b;
+      w.s.low = ((USItype) uu.s.low >> b) | carries;
+    }
+
+  return w.ll;
+}