Patchwork [SH] PR 51244 - catch more unnecessary T bit stores

login
register
mail settings
Submitter Oleg Endo
Date Nov. 2, 2012, 10:14 a.m.
Message ID <1351851288.8110.35.camel@yam-132-YW-E178-FTW>
Download mbox | patch
Permalink /patch/196527/
State New
Headers show

Comments

Oleg Endo - Nov. 2, 2012, 10:14 a.m.
Hello,

With this patch a few more cases are handled where the T bit is stored
and then re-tested again before conditional branches.
Tested on rev 193061 with
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

and no new failures.
OK?

Cheers,
Oleg


gcc/ChangeLog:

	PR target/51244
	* config/sh/sh.md (*cbranch_t): Allow splitting after reload.
	Allow going beyond current basic block before reload when 
	looking for the reg set insn.
	* config/sh/sh.c (sh_find_set_of_reg): Don't stop at labels.

testsuite/ChangeLog:

	PR target/51244
	* gcc.target/sh/pr51244-18.c: New.
	* gcc.target/sh/pr51244-19.c: New.
Kaz Kojima - Nov. 2, 2012, 10:58 p.m.
Oleg Endo <oleg.endo@t-online.de> wrote:
> With this patch a few more cases are handled where the T bit is stored
> and then re-tested again before conditional branches.
> Tested on rev 193061 with
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> 
> and no new failures.
> OK?

OK.

Regards,
	kaz

Patch

Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 192482)
+++ gcc/config/sh/sh.md	(working copy)
@@ -8168,13 +8168,15 @@ 
 {
   return output_branch (sh_eval_treg_value (operands[1]), insn, operands);
 }
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(set (pc) (if_then_else (eq (reg:SI T_REG) (match_dup 2))
 			   (label_ref (match_dup 0))
 			   (pc)))]
 {
   /* Try to find missed test and branch combine opportunities which result
      in redundant T bit tests before conditional branches.
+     This is done not only after combine (and before reload) but in every
+     split pass, because some opportunities are formed also after combine.
      FIXME: Probably this would not be needed if CCmode was used
      together with TARGET_FIXED_CONDITION_CODE_REGS.  */
 
@@ -8213,8 +8215,11 @@ 
 
   while (true)
     {
+      /* It's not safe to go beyond the current basic block after reload.  */
       set_of_reg s1 = sh_find_set_of_reg (tested_reg, s0.insn,
-					  prev_nonnote_insn_bb);
+					  reload_completed
+					  ? prev_nonnote_insn_bb
+					  : prev_nonnote_insn);
       if (s1.set_src == NULL_RTX)
 	break;
 
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 192482)
+++ gcc/config/sh/sh.c	(working copy)
@@ -13488,7 +13488,7 @@ 
   for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
        result.insn = stepfunc (result.insn))
     {
-      if (LABEL_P (result.insn) || BARRIER_P (result.insn))
+      if (BARRIER_P (result.insn))
 	return result;
       if (!NONJUMP_INSN_P (result.insn))
 	continue;
Index: gcc/testsuite/gcc.target/sh/pr51244-19.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr51244-19.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr51244-19.c	(revision 0)
@@ -0,0 +1,75 @@ 
+/* Check that no unnecessary T bit stores are done before conditional
+   branches.
+   This case was extracted from the CSiBE set and contained the following
+   sequence:
+	mov.l	@(8,r4),r2
+	mov.l	@(4,r4),r3
+	cmp/gt	r2,r3
+	movt	r2
+.L3:
+	tst	r2,r2
+	bt/s	.L12
+	mov	#-1,r0
+
+	.....
+
+	mov.l	@r4,r2
+	tst	r2,r2
+	bra	.L3
+	movt	r2
+
+   In this reduced code the movt insns were only present in the
+   unwanted sequences.  Thus, if we see any movt insns, something is not
+   working as expected.  This test requires -O2 because the T bit stores
+   in question will be eliminated in additional insn split passes after
+   reload.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */
+/* { dg-final { scan-assembler-not "movt" } } */
+
+struct request
+{
+ unsigned long nr_sectors;
+};
+
+struct request_list
+{
+ int count;
+};
+
+struct request_queue
+{
+ struct request_list rq;
+ volatile int nr_sectors;
+ int max_queue_sectors;
+ int can_throttle;
+ unsigned long bounce_pfn;
+};
+
+typedef struct request_queue request_queue_t;
+
+static inline int
+blk_oversized_queue (request_queue_t* q)
+{
+  if (q->can_throttle)
+    return q->nr_sectors > q->max_queue_sectors;
+  return q->rq.count == 0;
+}
+
+struct request*
+get_request (request_queue_t* q, int rw)
+{
+  struct request* rq = ((void*)0);
+  struct request_list *rl = &q->rq;
+
+  if (blk_oversized_queue (q))
+    {
+      if ((rw == 1) || (rw == 0))
+	return ((void*)0);
+      if (blk_oversized_queue (q))
+	return ((void*)0);
+    }
+
+  return (void*)-100;
+}
Index: gcc/testsuite/gcc.target/sh/pr51244-18.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr51244-18.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr51244-18.c	(revision 0)
@@ -0,0 +1,102 @@ 
+/* Check that no unnecessary T bit stores are done before conditional
+   branches.
+   This case was extracted from the CSiBE set and contained the following
+   sequence:
+	cmp/hi	r1,r0
+	movt	r1
+	tst	r1,r1
+	bt	.L12
+	mov.l	@r10,r1
+   In this reduced code the movt and tst insns were only present in the
+   unwanted sequence.  Thus, if we see any tst or movt insns, something is
+   not working as expected.  This test requires -O2 because the T bit stores
+   in question will be eliminated in additional insn split passes after
+   reload.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */
+/* { dg-final { scan-assembler-not "movt|tst" } } */
+
+typedef char Char;
+typedef unsigned char Bool;
+typedef unsigned char UChar;
+typedef int Int32;
+typedef unsigned int UInt32;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+static inline Bool
+mainGtU (UInt32 i1, UInt32 i2, UChar* block, UInt16* quadrant, UInt32 nblock,
+	 Int32* budget)
+{
+  Int32 k;
+  UChar c1, c2;
+  UInt16 s1, s2;
+  k = nblock + 8;
+  do
+    {
+      c1 = block[i1];
+      c2 = block[i2];
+      if (c1 != c2)
+	return (c1 > c2);
+      s1 = quadrant[i1];
+      s2 = quadrant[i2];
+      if (s1 != s2)
+	return (s1 > s2);
+
+      i1++; i2++;
+      k -= 8;
+   } while (k >= 0);
+
+  return 0;
+}
+
+static inline void
+mainSimpleSort (UInt32* ptr, UChar* block, UInt16* quadrant, Int32 nblock,
+		Int32 lo, Int32 hi, Int32 d, Int32* budget)
+{
+  Int32 i, j, h, bigN, hp;
+  UInt32 v;
+  bigN = hi - lo + 1;
+  hp = 0;
+  h = 1;
+  j = lo + h;
+  v = ptr[j];
+
+  while (mainGtU (ptr[j-h]+d, v+d, block, quadrant, nblock, budget))
+    {
+      ptr[j] = ptr[j-h];
+      j = j - h;
+    }
+}
+
+static inline void
+mainQSort3 (UInt32* ptr, UChar* block, UInt16* quadrant, Int32 nblock,
+	    Int32 loSt, Int32 hiSt, Int32 dSt, Int32* budget)
+{
+  Int32 unLo, unHi, ltLo, gtHi;
+  Int32 sp, lo, hi, d;
+
+  Int32 stackLo[100];
+  Int32 stackHi[100];
+  Int32 stackD [100];
+
+  sp = 0;
+  stackLo[sp] = loSt;
+  stackHi[sp] = hiSt;
+  stackD [sp] = dSt;
+  lo = stackLo[sp];
+  hi = stackHi[sp];
+  d = stackD [sp];
+  mainSimpleSort (ptr, block, quadrant, nblock, lo, hi, d, budget);
+}
+
+void
+mainSort (UInt32* ptr, UChar* block, UInt16* quadrant, UInt32* ftab,
+	  Int32 nblock, Int32 verb, Int32* budget)
+{
+  Int32 sb = 0;
+  Int32 lo = ftab[sb] & (~((1 << 21)));
+  Int32 hi = (ftab[sb+1] & (~((1 << 21)))) - 1;
+  mainQSort3 (ptr, block, quadrant, nblock, lo, hi, 2, budget);
+}