Patchwork [SH2A] : Add movml instruction

login
register
mail settings
Submitter Kaz Kojima
Date Aug. 22, 2010, 2:37 a.m.
Message ID <20100822.113755.25336379.kkojima@rr.iij4u.or.jp>
Download mbox | patch
Permalink /patch/62358/
State New
Headers show

Comments

Kaz Kojima - Aug. 22, 2010, 2:37 a.m.
> If so, it would be better to use normal rtls which describe
> the semantic of those movml insns accurately, like as the peepholes
> in your previous patch.

Another minor problem is that your patch gives wrong dwarf2
information with -g.  Even if it might not be a big problem
for the interrupt handlers, the correct debug information is
better.  The attached patch will give correct dwarf2 codes
with -g, though it isn't tested except for gcc.dg/attr-isr.c.
Does it work for you?

Regards,
	kaz
--
	* config/sh/sh.c (push_regs): Emit movml for interrupt handler
	when possible.
	(sh_expand_epilogue): Likewise.
	* config/sh/sh.md (movml_push_banked): New insn.
	(movml_pop_banked): Likewise.
Naveen H. S - Aug. 24, 2010, 5:07 a.m.
Hi Kaz-san,

>> The attached patch will give correct dwarf2 codes with -g, though it
>> isn't tested except for gcc.dg/attr-isr.c.

Thanks for the modified patch.

>> Does it work for you?

Yes, the modified patch works as expected for interrupt handlers.
The regression was performed for sh2a and related targets. There were
no extra regressions. 
The testcase "g++.dg/parse/stack1.C" in C++ testsuite PASS with the 
patch which FAIL with unpatched/fresh toolchain.

Thanks & Regards,
Naveen
Kaz Kojima - Aug. 24, 2010, 9:22 p.m.
"Naveen H. S" <Naveen.S@kpitcummins.com> wrote:
> Yes, the modified patch works as expected for interrupt handlers.
> The regression was performed for sh2a and related targets. There were
> no extra regressions. 
> The testcase "g++.dg/parse/stack1.C" in C++ testsuite PASS with the 
> patch which FAIL with unpatched/fresh toolchain.

Thanks for testing.  I'll apply it together with the testsuite
patch when it bootstraps on sh-linux.

Regards,
	kaz

Patch

diff -up ORIG/trunk/gcc/config/sh/sh.c trunk/gcc/config/sh/sh.c
--- ORIG/trunk/gcc/config/sh/sh.c	2010-07-17 10:31:31.000000000 +0900
+++ trunk/gcc/config/sh/sh.c	2010-08-22 10:58:56.000000000 +0900
@@ -6407,9 +6407,50 @@  push_regs (HARD_REG_SET *mask, int inter
 
   /* Push banked registers last to improve delay slot opportunities.  */
   if (interrupt_handler)
-    for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
-      if (TEST_HARD_REG_BIT (*mask, i))
-	push (i);
+    {
+      bool use_movml = false;
+
+      if (TARGET_SH2A)
+	{
+	  unsigned int count = 0;
+
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    if (TEST_HARD_REG_BIT (*mask, i))
+	      count++;
+	    else
+	      break;
+
+	  /* Use movml when all banked registers are pushed.  */
+	  if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+	    use_movml = true;
+	}
+
+      if (use_movml)
+	{
+	  rtx x, mem, reg, set;
+	  rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	  /* We must avoid scheduling multiple store insn with another
+	     insns.  */
+	  emit_insn (gen_blockage ());
+	  x = gen_movml_push_banked (sp_reg);
+	  x = frame_insn (x);
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    {
+	      mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
+	      reg = gen_rtx_REG (SImode, i);
+	      add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
+	    }
+
+	  set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
+	  add_reg_note (x, REG_CFA_ADJUST_CFA, set);
+	  emit_insn (gen_blockage ());
+	}
+      else
+	for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	  if (TEST_HARD_REG_BIT (*mask, i))
+	    push (i);
+    }
 
   /* Don't push PR register for an ISR with RESBANK attribute assigned.  */
   if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
@@ -7347,9 +7388,37 @@  sh_expand_epilogue (bool sibcall_p)
 	 delay slot. RTE switches banks before the ds instruction.  */
       if (current_function_interrupt)
 	{
-	  for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
-	    if (TEST_HARD_REG_BIT (live_regs_mask, i))
-	      pop (i);
+	  bool use_movml = false;
+
+	  if (TARGET_SH2A)
+	    {
+	      unsigned int count = 0;
+
+	      for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+		if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		  count++;
+		else
+		  break;
+
+	      /* Use movml when all banked register are poped.  */
+	      if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+		use_movml = true;
+	    }
+
+	  if (use_movml)
+	    {
+	      rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	      /* We must avoid scheduling multiple load insn with another
+		 insns.  */
+	      emit_insn (gen_blockage ());
+	      emit_insn (gen_movml_pop_banked (sp_reg));
+	      emit_insn (gen_blockage ());
+	    }
+	  else
+	    for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
+	      if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		pop (i);
 
 	  last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
 	}
diff -up ORIG/trunk/gcc/config/sh/sh.md trunk/gcc/config/sh/sh.md
--- ORIG/trunk/gcc/config/sh/sh.md	2010-07-13 23:39:56.000000000 +0900
+++ trunk/gcc/config/sh/sh.md	2010-08-22 09:59:38.000000000 +0900
@@ -9216,6 +9216,39 @@  mov.l\\t1f,r0\\n\\
   ""
   [(set_attr "length" "0")])
 
+;; Define movml instructions for SH2A target.  Currently they are
+;; used to push and pop all banked registers only.
+
+(define_insn "movml_push_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int -32)))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 28))) (reg:SI R7_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 24))) (reg:SI R6_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 20))) (reg:SI R5_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 16))) (reg:SI R4_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 12))) (reg:SI R3_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 8))) (reg:SI R2_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 4))) (reg:SI R1_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 0))) (reg:SI R0_REG))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\tr7,@-r15"
+  [(set_attr "in_delay_slot" "no")])
+
+(define_insn "movml_pop_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int 32)))
+   (set (reg:SI R0_REG) (mem:SI (plus:SI (match_dup 0) (const_int -32))))
+   (set (reg:SI R1_REG) (mem:SI (plus:SI (match_dup 0) (const_int -28))))
+   (set (reg:SI R2_REG) (mem:SI (plus:SI (match_dup 0) (const_int -24))))
+   (set (reg:SI R3_REG) (mem:SI (plus:SI (match_dup 0) (const_int -20))))
+   (set (reg:SI R4_REG) (mem:SI (plus:SI (match_dup 0) (const_int -16))))
+   (set (reg:SI R5_REG) (mem:SI (plus:SI (match_dup 0) (const_int -12))))
+   (set (reg:SI R6_REG) (mem:SI (plus:SI (match_dup 0) (const_int -8))))
+   (set (reg:SI R7_REG) (mem:SI (plus:SI (match_dup 0) (const_int -4))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\t@r15+,r7"
+  [(set_attr "in_delay_slot" "no")])
+
 ;; ------------------------------------------------------------------------
 ;; Scc instructions
 ;; ------------------------------------------------------------------------