Patchwork [rl78] Various optimizations

login
register
mail settings
Submitter DJ Delorie
Date Sept. 12, 2013, 12:16 a.m.
Message ID <201309120016.r8C0G7B7029017@greed.delorie.com>
Download mbox | patch
Permalink /patch/274394/
State New
Headers show

Comments

DJ Delorie - Sept. 12, 2013, 12:16 a.m.
This is an amalgam of a series of optimizations Nick and I did for
RL78.  I've collected the ones that were new patterns and
devirtualization related, since those go together.  Committed.

	* config/rl78/predicates.md (rl78_cmp_operator_signed): New.
	(rl78_stack_based_mem): New.
	* config/rl78/constraints.md (Iv08): New.
	(Iv16): New.
	(Iv24): New.
	(Is09): New.
	(Is17): New.
	(Is25): New.
	(ISsi): New.
	(IShi): New.
	(ISqi): New.
	* config/rl78/rl78-expand.md (movqi): Reject more SUBREG operands.
	(movhi): Likewise.
	(movsi): Change from expand to insn-and-split.
	(ashrsi3): Clobber AX.
	(lshrsi3): New.
	(ashlsi3): New.
	(cbranchsi4): New.
	* config/rl78/rl78.md (CC_REG): Fix.
	(addsi3): Allow memory and immediate operands.
	(addsi3_internal): Split into...
	(addsi3_internal_virt): ...new, and ...
	(addsi3_internal_real): ...new.
	(subsi): New.
	(subsi3_internal_virt): New.
	(subsi3_internal_real): New.
	(mulsi3): Add memory operand.
	(mulsi3_rl78): Likewise.
	(mulsi3_g13): Likewise.
	* config/rl78/rl78-real.md (cbranchqi4_real_signed): New.
	(cbranchqi4_real): Add more constraint options.
	(cbranchhi4_real): Expand pattern.
	(cbranchhi4_real_signed): New.
	(cbranchhi4_real_inverted): New.
	(cbranchsi4_real_lt): New.
	(cbranchsi4_real_ge): New.
	(cbranchsi4_real_signed): New.
	(cbranchsi4_real): New.
	(peephole2): New.
	* config/rl78/rl78-virt.md (ashrsi3_virt): Add custom cases for constant shifts.
	(lshrsi3_virt): Likewise.
	(ashlsi3_virt): Likewise.
	(cbranchqi4_virt_signed): New.
	(cbranchhi4_virt_signed): New.
	(cbranchsi4_virt): New.
	* config/rl78/rl78.c: Whitespace fixes throughout.
	(move_elim_pass): New.
	(pass_data_rl78_move_elim): New.
	(pass_rl78_move_elim): New.
	(make_pass_rl78_move_elim): New.
	(rl78_devirt_info): Run devirt earlier.
	(rl78_move_elim_info): New.
	(rl78_asm_file_start): Register it.
	(rl78_split_movsi): New.
	(rl78_as_legitimate_address): Allow virtual base registers when
	appropriate.
	(rl78_addr_space_convert): Remove spurious debug stuff.
	(rl78_print_operand_1): Add z,s,S,r,E modifiers.
	(rl78_print_operand): More cases for not printing '#'.
	(rl78_expand_compare): Remove most of the logic.
	(content_memory): New.
	(clear_content_memory): New.
	(get_content_index): New.
	(get_content_name): New.
	(display_content_memory): New.
	(update_content): New.
	(record_content): New.
	(already_contains): New.
	(insn_ok_now): Re-recog insns with virtual registers.
	(add_postponed_content_update): New.
	(process_postponed_content_update): New.
	(gen_and_emit_move): New.
	(transcode_memory_rtx): Record new location content.  Use
	gen_and_emit_move.
	(force_into_acc): New.
	(move_to_acc): Use gen_and_emit_move.
	(move_from_acc): Likewise.
	(move_acc_to_reg): Likewise.
	(move_to_x): Likewise.
	(move_to_hl): Likewise.
	(move_to_de): Likewise.
	(rl78_alloc_physical_registers_op1): Record location content.
	(has_constraint): New.
	(rl78_alloc_physical_registers_op2): Record location content.
	Optimize use of HL.
	(rl78_alloc_physical_registers_ro1): Likewise.
	(rl78_alloc_physical_registers_cmp): Likewise.
	(rl78_alloc_physical_registers_umul): Likewise.
	(rl78_alloc_address_registers_macax): New.
	(rl78_alloc_physical_registers): Initialize and set location
	content memory as needed.
	(rl78_reorg): Make sure split2 is called.
	(rl78_rtx_costs): New.

Patch

Index: gcc/config/rl78/predicates.md
===================================================================
--- gcc/config/rl78/predicates.md	(revision 202510)
+++ gcc/config/rl78/predicates.md	(working copy)
@@ -43,18 +43,29 @@ 
 
 (define_predicate "uword_operand"
   (ior (match_code "const")
        (and (match_code "const_int")
 	    (match_test "IN_RANGE (INTVAL (op), 0, 65536)"))))
 
+(define_predicate "rl78_cmp_operator_signed"
+  (match_code "gt,ge,lt,le"))
 (define_predicate "rl78_cmp_operator_real"
   (match_code "eq,ne,gtu,ltu,geu,leu"))
 (define_predicate "rl78_cmp_operator"
   (match_code "eq,ne,gtu,ltu,geu,leu,gt,lt,ge,le"))
 
 (define_predicate "rl78_ax_operand"
   (and (match_code "reg")
        (match_test "REGNO (op) == AX_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER")))
 
 (define_predicate "rl78_addw_operand"
   (and (match_code "reg")
        (match_test "REGNO (op) == AX_REG || REGNO (op) == SP_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER")))
+
+(define_predicate "rl78_stack_based_mem"
+  (and (match_code "mem")
+       (ior (and (match_code "reg" "0")
+		 (match_test "REGNO (XEXP (op, 0)) == SP_REG"))
+	    (and (match_code "plus" "0")
+		 (and (match_code "reg" "00")
+		      (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG")
+		      (match_code "const_int" "01"))))))
Index: gcc/config/rl78/constraints.md
===================================================================
--- gcc/config/rl78/constraints.md	(revision 202510)
+++ gcc/config/rl78/constraints.md	(working copy)
@@ -40,12 +40,13 @@ 
 ; a x b c d e h l w - 8-bit regs
 ; A B D T S - 16-bit regs
 ; R = all regular registers (A-L)
 ; Y - any valid memory
 ; Wxx - various memory addressing modes
 ; Qxx - conditionals
+; U = usual memory references mov-able to/from AX
 ; v = virtual registers
 ; Zxx = specific virtual registers
 
 (define_constraint "Int8"
   "Integer constant in the range 0 @dots{} 255."
   (and (match_code "const_int")
@@ -53,12 +54,62 @@ 
 
 (define_constraint "Int3"
   "Integer constant in the range 1 @dots{} 7."
   (and (match_code "const_int")
        (match_test "IN_RANGE (ival, 1, 7)")))
 
+(define_constraint "Iv08"
+  "@internal
+   Integer constant equal to 8."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 8, 8)")))
+(define_constraint "Iv16"
+  "@internal
+   Integer constant equal to 16."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 16, 16)")))
+(define_constraint "Iv24"
+  "@internal
+   Integer constant equal to 24."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 24, 24)")))
+
+(define_constraint "Is09"
+  "@internal
+   Integer constant in the range 9 @dots{} 15 (for shifts)."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 9, 15)")))
+(define_constraint "Is17"
+  "@internal
+   Integer constant in the range 17 @dots{} 23 (for shifts)."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 17, 23)")))
+(define_constraint "Is25"
+  "@internal
+   Integer constant in the range 25 @dots{} 31 (for shifts)."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 25, 31)")))
+
+(define_constraint "ISsi"
+  "@internal
+   Integer constant with bit 31 set."
+  (and (match_code "const_int")
+       (match_test "(ival & 0x80000000) != 0")))
+
+(define_constraint "IShi"
+  "@internal
+   Integer constant with bit 15 set."
+  (and (match_code "const_int")
+       (match_test "(ival & 0x8000) != 0")))
+
+(define_constraint "ISqi"
+  "@internal
+   Integer constant with bit 7 set."
+  (and (match_code "const_int")
+       (match_test "(ival & 0x80) != 0")))
+
 (define_constraint "J"
   "Integer constant in the range -255 @dots{} 0"
   (and (match_code "const_int")
        (match_test "IN_RANGE (ival, -255, 0)")))
 
 (define_constraint "K"
Index: gcc/config/rl78/rl78-protos.h
===================================================================
--- gcc/config/rl78/rl78-protos.h	(revision 202510)
+++ gcc/config/rl78/rl78-protos.h	(working copy)
@@ -18,12 +18,13 @@ 
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
 void		rl78_emit_eh_epilogue (rtx);
 void		rl78_expand_compare (rtx *);
 void		rl78_expand_movsi (rtx *);
+void		rl78_split_movsi (rtx *);
 int		rl78_force_nonfar_2 (rtx *, rtx (*gen)(rtx,rtx));
 int		rl78_force_nonfar_3 (rtx *, rtx (*gen)(rtx,rtx,rtx));
 void		rl78_expand_eh_epilogue (rtx);
 void		rl78_expand_epilogue (void);
 void		rl78_expand_prologue (void);
 int		rl78_far_p (rtx x);
Index: gcc/config/rl78/rl78-expand.md
===================================================================
--- gcc/config/rl78/rl78-expand.md	(revision 202510)
+++ gcc/config/rl78/rl78-expand.md	(working copy)
@@ -32,15 +32,30 @@ 
 
     /* FIXME: Not sure how GCC can generate (SUBREG (SYMBOL_REF)),
        but it does.  Since this makes no sense, reject it here.  */
     if (GET_CODE (operand1) == SUBREG
         && GET_CODE (XEXP (operand1, 0)) == SYMBOL_REF)
       FAIL;
+    /* Similarly for (SUBREG (CONST (PLUS (SYMBOL_REF)))).
+       cf. g++.dg/abi/packed.C.  */
+    if (GET_CODE (operand1) == SUBREG
+	&& GET_CODE (XEXP (operand1, 0)) == CONST
+        && GET_CODE (XEXP (XEXP (operand1, 0), 0)) == PLUS
+        && GET_CODE (XEXP (XEXP (XEXP (operand1, 0), 0), 0)) == SYMBOL_REF)
+      FAIL;
+
+    /* Similarly for (SUBREG (CONST (PLUS (SYMBOL_REF)))).
+       cf. g++.dg/abi/packed.C.  */
+    if (GET_CODE (operand1) == SUBREG
+	&& GET_CODE (XEXP (operand1, 0)) == CONST
+        && GET_CODE (XEXP (XEXP (operand1, 0), 0)) == PLUS
+        && GET_CODE (XEXP (XEXP (XEXP (operand1, 0), 0), 0)) == SYMBOL_REF)
+      FAIL;
 
     if (CONST_INT_P (operand1) && ! IN_RANGE (INTVAL (operand1), (-1 << 8) + 1, (1 << 8) - 1))
-      gcc_unreachable();
+      FAIL;
   }
 )
 
 (define_expand "movhi"
   [(set (match_operand:HI 0 "nonimmediate_operand")
 	(match_operand:HI 1 "general_operand"))]
@@ -53,23 +68,33 @@ 
 
     /* FIXME: Not sure how GCC can generate (SUBREG (SYMBOL_REF)),
        but it does.  Since this makes no sense, reject it here.  */
     if (GET_CODE (operand1) == SUBREG
         && GET_CODE (XEXP (operand1, 0)) == SYMBOL_REF)
       FAIL;
+    /* Similarly for (SUBREG (CONST (PLUS (SYMBOL_REF)))).  */
+    if (GET_CODE (operand1) == SUBREG
+	&& GET_CODE (XEXP (operand1, 0)) == CONST
+        && GET_CODE (XEXP (XEXP (operand1, 0), 0)) == PLUS
+        && GET_CODE (XEXP (XEXP (XEXP (operand1, 0), 0), 0)) == SYMBOL_REF)
+      FAIL;
   }
 )
 
-(define_expand "movsi"
-  [(set (match_operand:SI 0 "nonimmediate_operand")
-	(match_operand:SI 1 "general_operand"))]
-  ""
-  {
-    rl78_expand_movsi (operands);
-    DONE;
-  }
+(define_insn_and_split "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=vYS,v,Wfr")
+	(match_operand:SI 1 "general_operand" "viYS,Wfr,v"))]
+  ""
+  "#"
+  ""
+  [(set (match_operand:HI 2 "nonimmediate_operand")
+	(match_operand:HI 4 "general_operand"))
+   (set (match_operand:HI 3 "nonimmediate_operand")
+	(match_operand:HI 5 "general_operand"))]
+  "rl78_split_movsi (operands);"
+  [(set_attr "valloc" "op1")]
 )
 
 ;;---------- Conversions ------------------------
 
 (define_expand "zero_extendqihi2"
   [(set (match_operand:HI                 0 "nonimmediate_operand")
@@ -197,19 +222,39 @@ 
   ""
   "if (rl78_force_nonfar_3 (operands, gen_lshr<mode>3))
      DONE;"
 )
 
 (define_expand "ashrsi3"
-  [(set (match_operand:SI               0 "register_operand")
-	(ashiftrt:SI (match_operand:SI  1 "register_operand")
-		      (match_operand:SI 2 "immediate_operand")))
+  [(parallel [(set (match_operand:SI               0 "nonimmediate_operand")
+		   (ashiftrt:SI (match_operand:SI  1 "nonimmediate_operand")
+				(match_operand:SI  2 "nonmemory_operand")))
+	      (clobber (reg:HI X_REG))])
    ]
   ""
-  "if (GET_CODE (operands[2]) != CONST_INT)
-     FAIL;"
+  ""
+)
+
+(define_expand "lshrsi3"
+  [(parallel [(set (match_operand:SI               0 "nonimmediate_operand")
+		   (lshiftrt:SI (match_operand:SI  1 "nonimmediate_operand")
+				(match_operand:SI  2 "nonmemory_operand")))
+	      (clobber (reg:HI X_REG))])
+   ]
+  ""
+  ""
+)
+
+(define_expand "ashlsi3"
+  [(parallel [(set (match_operand:SI               0 "nonimmediate_operand")
+	(ashift:SI (match_operand:SI  1 "nonimmediate_operand")
+		      (match_operand:SI 2 "nonmemory_operand")))
+	      (clobber (reg:HI X_REG))])
+   ]
+  ""
+  ""
 )
 
 ;;---------- Branching ------------------------
 
 (define_expand "indirect_jump"
   [(set (pc)
@@ -251,6 +296,19 @@ 
 			       (match_operand:HI 2 "general_operand")])
               (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   ""
   "rl78_expand_compare (operands);"
 )
+
+(define_expand "cbranchsi4"
+  [(parallel [(set (pc) (if_then_else
+			 (match_operator 0 "rl78_cmp_operator"
+					 [(match_operand:SI 1 "general_operand")
+					  (match_operand:SI 2 "nonmemory_operand")])
+			 (label_ref (match_operand 3 "" ""))
+			 (pc)))
+	      (clobber (reg:HI AX_REG))
+	      ])]
+  "1"
+  "rl78_expand_compare (operands);"
+)
Index: gcc/config/rl78/rl78.md
===================================================================
--- gcc/config/rl78/rl78.md	(revision 202510)
+++ gcc/config/rl78/rl78.md	(working copy)
@@ -32,13 +32,13 @@ 
    (HL_REG 6)
    (L_REG 6)
    (H_REG 7)
 
    (FP_REG 22)
    (SP_REG 32)
-   (CC_REG 33)
+   (CC_REG 34)
    (ES_REG 35)
    (CS_REG 36)
 
    (UNS_PROLOG	1)
    (UNS_EPILOG	1)
    (UNS_RETI	2)
@@ -202,39 +202,82 @@ 
 ;;======================================================================
 ;;
 ;; "macro" insns - cases where inline chunks of code are more
 ;; efficient than anything else.
 
 (define_expand "addsi3"
-  [(set (match_operand:SI          0 "register_operand" "=&v")
-	(plus:SI (match_operand:SI 1 "nonmemory_operand" "vi")
-		 (match_operand    2 "nonmemory_operand" "vi")))
+  [(set (match_operand:SI          0 "nonimmediate_operand" "=&vm")
+	(plus:SI (match_operand:SI 1 "general_operand"      "vim")
+		 (match_operand    2 "general_operand"      "vim")))
    ]
   ""
-  "if (!nonmemory_operand (operands[1], SImode))
-     operands[1] = force_reg (SImode, operands[1]);
-   if (!nonmemory_operand (operands[1], SImode))
-     operands[2] = force_reg (SImode, operands[2]);"
+  "emit_insn (gen_addsi3_internal_virt (operands[0], operands[1], operands[2]));
+   DONE;"
 )
 
-(define_insn "addsi3_internal"
-  [(set (match_operand:SI          0 "register_operand" "=&v")
-	(plus:SI (match_operand:SI 1 "nonmemory_operand" "vi")
-		 (match_operand:SI 2 "nonmemory_operand" "vi")))
+(define_insn "addsi3_internal_virt"
+  [(set (match_operand:SI          0 "nonimmediate_operand" "=v,&vm, vm")
+	(plus:SI (match_operand:SI 1 "general_operand"      "0, vim, vim")
+		 (match_operand    2 "general_operand"      "vim,vim,vim")))
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
    ]
+  "rl78_virt_insns_ok ()"
   ""
-  "; addSI macro %0 = %1 + %2
-	movw	ax, %h1
-	addw	ax, %h2
-	movw	%h0, ax
-	movw	ax,%H1
-	sknc
-	incw	ax
-	addw	ax,%H2
-	movw	%H0,ax
-	; end of addSI macro"
+  [(set_attr "valloc" "macax")]
+)
+
+(define_insn "addsi3_internal_real"
+  [(set (match_operand:SI          0 "nonimmediate_operand" "=v,&vU, vU")
+	(plus:SI (match_operand:SI 1 "general_operand"      "+0, viU, viU")
+		 (match_operand    2 "general_operand"      "viWabWhlWh1,viWabWhlWh1,viWabWhlWh1")))
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   movw ax,%h1 \;addw ax,%h2 \;movw %h0, ax \;movw ax,%H1 \;sknc \;incw ax \;addw ax,%H2 \;movw %H0,ax
+   movw ax,%h1 \;addw ax,%h2 \;movw %h0, ax \;movw ax,%H1 \;sknc \;incw ax \;addw ax,%H2 \;movw %H0,ax
+   movw ax,%h1 \;addw ax,%h2 \;movw bc,  ax \;movw ax,%H1 \;sknc \;incw ax \;addw ax,%H2 \;movw %H0,ax \;movw ax,bc \;movw %h0, ax"
+  [(set_attr "valloc" "macax")]
+)
+
+(define_expand "subsi3"
+  [(set (match_operand:SI           0 "nonimmediate_operand" "=&vm")
+	(minus:SI (match_operand:SI 1 "general_operand"      "vim")
+		  (match_operand    2 "general_operand"    "vim")))
+   ]
+  ""
+  "emit_insn (gen_subsi3_internal_virt (operands[0], operands[1], operands[2]));
+  DONE;"
+)
+
+(define_insn "subsi3_internal_virt"
+  [(set (match_operand:SI           0 "nonimmediate_operand" "=v,&vm, vm")
+	(minus:SI (match_operand:SI 1 "general_operand"      "0, vim, vim")
+		  (match_operand    2 "general_operand"      "vim,vim,vim")))
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+   ]
+  "rl78_virt_insns_ok ()"
+  ""
+  [(set_attr "valloc" "macax")]
+)
+
+(define_insn "subsi3_internal_real"
+  [(set (match_operand:SI           0 "nonimmediate_operand" "=v,&vU, vU")
+	(minus:SI (match_operand:SI 1 "general_operand"      "+0, viU, viU")
+		  (match_operand    2 "general_operand"      "viWabWhlWh1,viWabWhlWh1,viWabWhlWh1")))
+   (clobber (reg:HI AX_REG))
+   (clobber (reg:HI BC_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   movw ax,%h1 \;subw ax,%h2 \;movw %h0, ax \;movw ax,%H1 \;sknc \;decw ax \;subw ax,%H2 \;movw %H0,ax
+   movw ax,%h1 \;subw ax,%h2 \;movw %h0, ax \;movw ax,%H1 \;sknc \;decw ax \;subw ax,%H2 \;movw %H0,ax
+   movw ax,%h1 \;subw ax,%h2 \;movw bc,  ax \;movw ax,%H1 \;sknc \;decw ax \;subw ax,%H2 \;movw %H0,ax \;movw ax,bc \;movw %h0, ax"
   [(set_attr "valloc" "macax")]
 )
 
 (define_expand "mulqi3"
   [(set (match_operand:QI          0 "register_operand" "")
 	(mult:QI  (match_operand:QI 1 "general_operand" "")
@@ -252,13 +295,13 @@ 
   "! RL78_MUL_NONE"
   ""
 )
 
 (define_expand "mulsi3"
   [(set (match_operand:SI          0 "register_operand" "=&v")
-	(mult:SI (match_operand:SI 1 "nonmemory_operand" "vi")
+	(mult:SI (match_operand:SI 1 "general_operand" "+vim")
 		 (match_operand:SI 2 "nonmemory_operand" "vi")))
    ]
   "! RL78_MUL_NONE"
   ""
 )
 
@@ -316,14 +359,14 @@ 
 
 ;; 0xFFFF0 is MACR(L).  0xFFFF2 is MACR(H) but we don't care about it
 ;; because we're only using the lower 16 bits (which is the upper 16
 ;; bits of the result).
 (define_insn "mulsi3_rl78"
   [(set (match_operand:SI          0 "register_operand" "=&v")
-	(mult:SI (match_operand:SI 1 "nonmemory_operand" "vi")
-		 (match_operand:SI 2 "nonmemory_operand" "vi")))
+	(mult:SI (match_operand:SI 1 "general_operand" "+viU")
+		 (match_operand:SI 2 "general_operand" "vi")))
    ]
   "RL78_MUL_RL78"
   "; mulsi macro %0 = %1 * %2
 	movw	ax, %h1
 	movw	bc, %h2
 	MULHU	; bcax = bc * ax
@@ -346,14 +389,14 @@ 
 ;; 0xFFFF6 is MDBL.  0xFFFF4 is MDBH.
 ;; 0xF00E0 is MDCL.  0xF00E2 is MDCH.
 ;; 0xF00E8 is MDUC.
 ;; Warning: this matches the silicon not the documentation.
 (define_insn "mulsi3_g13"
   [(set (match_operand:SI          0 "register_operand" "=&v")
-	(mult:SI (match_operand:SI 1 "nonmemory_operand" "vi")
-		 (match_operand:SI 2 "nonmemory_operand" "vi")))
+	(mult:SI (match_operand:SI 1 "general_operand" "viU")
+		 (match_operand:SI 2 "general_operand" "viU")))
    ]
   "RL78_MUL_G13"
   "; mulsi macro %0 = %1 * %2
 	mov	a, #0x00
 	mov	!0xf00e8, a	; MDUC
 	movw	ax, %h1
Index: gcc/config/rl78/rl78-real.md
===================================================================
--- gcc/config/rl78/rl78-real.md	(revision 202510)
+++ gcc/config/rl78/rl78-real.md	(working copy)
@@ -309,31 +309,153 @@ 
   "rl78_real_insns_ok ()"
   "@
    call\t!!%A1
    call\t%A1"
   )
 
+(define_insn "cbranchqi4_real_signed"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_signed"
+			      [(match_operand:QI 1 "general_operand" "A,A,A")
+			       (match_operand:QI 2 "general_operand" "ISqi,i,v")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "rl78_real_insns_ok ()"
+  "@
+   cmp\t%1, %2 \;xor1 CY,%1.7\;not1 CY\;sk%c0 \;br\t!!%3
+   cmp\t%1, %2 \;xor1 CY,%1.7\;sk%c0 \;br\t!!%3
+   cmp\t%1, %2 \;xor1 CY,%1.7\;xor1 CY,%2.7\;sk%c0 \;br\t!!%3"
+  )
+
+
 (define_insn "*cbranchqi4_real"
   [(set (pc) (if_then_else
 	      (match_operator 0 "rl78_cmp_operator_real"
-			      [(match_operand:QI 1 "general_operand" "Wabvaxbc,a,          v,bcdehl")
-			       (match_operand:QI 2 "general_operand" "M,       irWhlWh1Whb,i,a")])
+			      [(match_operand:QI 1 "general_operand" "Wabvaxbc,a,              v,bcdehl")
+			       (match_operand:QI 2 "general_operand" "M,       irvWabWhlWh1Whb,i,a")])
               (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "rl78_real_insns_ok ()"
   "@
    cmp0\t%1 \;sk%c0 \;br\t!!%3
    cmp\t%1, %2 \;sk%c0 \;br\t!!%3
    cmp\t%1, %2 \;sk%c0 \;br\t!!%3
    cmp\t%1, %2 \;sk%c0 \;br\t!!%3"
   )
 
-(define_insn "*cbranchhi4_real"
+(define_insn "cbranchhi4_real_signed"
   [(set (pc) (if_then_else
-	      (match_operator 0 "rl78_cmp_operator_real"
-			      [(match_operand:HI 1 "general_operand" "A")
-			       (match_operand:HI 2 "general_operand" "iBDTWhlWh1")])
-              (label_ref (match_operand 3 "" ""))
+	      (match_operator 0 "rl78_cmp_operator_signed"
+			      [(match_operand:HI 1 "general_operand" "A,A,A,vR")
+			       (match_operand:HI 2 "general_operand" "IShi,i,v,1")])
+              (label_ref (match_operand 3))
 	      (pc)))]
   "rl78_real_insns_ok ()"
+  "@
+   cmpw\t%1, %2 \;xor1 CY,%Q1.7\;not1 CY\;sk%c0 \;br\t!!%3
+   cmpw\t%1, %2 \;xor1 CY,%Q1.7\;sk%c0 \;br\t!!%3
+   cmpw\t%1, %2 \;xor1 CY,%Q1.7\;xor1 CY,%Q2.7\;sk%c0 \;br\t!!%3
+   %z0\t!!%3"
+  )
+
+(define_insn "cbranchhi4_real"
+  [(set (pc) (if_then_else
+	      (match_operator                    0 "rl78_cmp_operator_real"
+			      [(match_operand:HI 1 "general_operand" "A,vR")
+			       (match_operand:HI 2 "general_operand" "iBDTvWabWhlWh1,1")])
+              (label_ref (match_operand          3 "" ""))
+	      (pc)))]
+  "rl78_real_insns_ok ()"
+  "@
+  cmpw\t%1, %2 \;sk%c0 \;br\t!!%3
+  %z0\t!!%3"
+  )
+
+(define_insn "cbranchhi4_real_inverted"  
+  [(set (pc) (if_then_else
+	      (match_operator                    0 "rl78_cmp_operator_real"
+			      [(match_operand:HI 1 "general_operand" "A")
+			       (match_operand:HI 2 "general_operand" "iBDTvWabWhlWh1")])
+	      (pc)
+              (label_ref (match_operand          3 "" ""))))]
+  "rl78_real_insns_ok ()"
   "cmpw\t%1, %2 \;sk%c0 \;br\t!!%3"
   )
+
+(define_insn "cbranchsi4_real_lt"
+  [(set (pc) (if_then_else
+	      (lt (match_operand:SI 0 "general_operand" "U,vWabWhlWh1")
+		  (const_int 0))
+              (label_ref (match_operand 1 "" ""))
+	      (pc)))
+   (clobber (reg:HI AX_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   mov a, %E0 \;mov1 CY,a.7 \;sknc \;br\t!!%1
+   mov1 CY,%E0.7 \;sknc \;br\t!!%1"
+  )
+
+(define_insn "cbranchsi4_real_ge"
+  [(set (pc) (if_then_else
+	      (ge (match_operand:SI 0 "general_operand" "U,vWabWhlWh1")
+		  (const_int 0))
+              (label_ref (match_operand 1 "" ""))
+	      (pc)))
+   (clobber (reg:HI AX_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   mov a, %E0 \;mov1 CY,a.7 \;skc \;br\t!!%1
+   mov1 CY,%E0.7 \;skc \;br\t!!%1"
+  )
+
+(define_insn "cbranchsi4_real_signed"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_signed"
+			      [(match_operand:SI 1 "nonimmediate_operand" "vU,vU,vU")
+			       (match_operand:SI 2 "nonmemory_operand" "ISsi,i,v")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))
+   (clobber (reg:HI AX_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "@
+   movw ax,%H1 \;cmpw  ax, %H2 \;xor1 CY,a.7\;not1 CY\;      movw ax,%h1 \;sknz \;cmpw  ax, %h2 \;sk%c0 \;br\t!!%3
+   movw ax,%H1 \;cmpw  ax, %H2 \;xor1 CY,a.7\;               movw ax,%h1 \;sknz \;cmpw  ax, %h2 \;sk%c0 \;br\t!!%3
+   movw ax,%H1 \;cmpw  ax, %H2 \;xor1 CY,a.7\;xor1 CY,%E2.7\;movw ax,%h1 \;sknz \;cmpw  ax, %h2 \;sk%c0 \;br\t!!%3"
+  )
+
+(define_insn "cbranchsi4_real"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_real"
+			      [(match_operand:SI 1 "general_operand" "vUi")
+			       (match_operand:SI 2 "general_operand" "iWhlWh1v")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))
+   (clobber (reg:HI AX_REG))
+   ]
+  "rl78_real_insns_ok ()"
+  "movw ax,%H1 \;cmpw  ax, %H2 \;movw ax,%h1 \;sknz \;cmpw  ax, %h2 \;sk%c0 \;br\t!!%3"
+  )
+
+;; Peephole to match:
+;;
+;;     (set (mem (sp)) (ax))
+;;     (set (ax) (mem (sp)))
+;; or:
+;;     (set (mem (plus (sp) (const)) (ax))
+;;     (set (ax) (mem (plus (sp) (const))))
+;;
+;; which can be generated as the last instruction of the conversion
+;; of one virtual insn into a real insn and the first instruction of
+;; the conversion of the following virtual insn.
+
+(define_peephole2
+  [(set (match_operand:HI 0 "rl78_stack_based_mem")
+	(reg:HI AX_REG))
+   (set (reg:HI AX_REG)
+	(match_dup 0))]
+  ""
+  [(set (match_dup 0) (reg:HI AX_REG))]
+  )
+
Index: gcc/config/rl78/rl78-virt.md
===================================================================
--- gcc/config/rl78/rl78-virt.md	(revision 202510)
+++ gcc/config/rl78/rl78-virt.md	(working copy)
@@ -158,26 +158,136 @@ 
 		      (match_operand:QI  2 "general_operand" "vim")))
    ]
   "rl78_virt_insns_ok ()"
   "v.shr\t%0, %1, %2"
 )
 
-;; really a macro
-(define_insn "*ashrsi3_virt"
-  [(set (match_operand:SI               0 "register_operand" "=v,v,v")
-	(ashiftrt:SI (match_operand:SI  1 "register_operand" "0,v,0")
-		      (match_operand:SI 2 "immediate_operand" "M,K,i")))
+;; This is complex mostly because the RL78 has no SImode operations,
+;; and very limited HImode operations, and no variable shifts.  This
+;; pattern is optimized for each constant shift count and operand
+;; types, so as to use a hand-optimized pattern.  For readability, the
+;; usual \t\; syntax is not used here.  Also, there's no easy way to 
+;; constrain to avoid partial overlaps, hence the duplication.
+(define_insn "ashrsi3_virt"                                  ;;   0  1      2-7            8         9-15           16   17-23     24   25-31 var
+  [(set (match_operand:SI               0 "nonimmediate_operand" "=v,vU,&vU,v,  &vU,  &vU, v,  &vU,  v,  &vU, &vU,  vU,  v,&vU,    vU,  vU,   vU")
+	(ashiftrt:SI (match_operand:SI  1 "nonimmediate_operand" "0, 0,  vU,0,   vWab, U,  0,   vU,  0,   vWab,U,   vU,  0, vU,    vU,  vU,   0")
+		      (match_operand:SI 2 "nonmemory_operand"    "M, K,  K, Int3,Int3,Int3,Iv08,Iv08,Is09,Is09,Is09,Iv16,Is17,Is17,Iv24,Is25, iv")))
+   (clobber (reg:HI X_REG))
+    ]
+   ""
+   "@
+    ; ashrsi %0, 0
+
+   movw ax,%H1 | sarw ax,1 | movw %H0,ax | mov a,%Q1 | rorc a,1 | mov %Q0,a | mov a,%q1 | rorc a,1 | mov %q0,a
+   movw ax,%H1 | sarw ax,1 | movw %H0,ax | mov a,%Q1 | rorc a,1 | mov %Q0,a | mov a,%q1 | rorc a,1 | mov %q0,a
+
+   movw ax,%1 | shlw ax,%r2 | mov %0,a             | mov x,%Q1 | mov a,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | sarw ax,%u2 | movw %H0,ax
+   movw ax,%1 | shlw ax,%r2 | mov %0,a             | mov x,%Q1 | mov a,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | sarw ax,%u2 | movw %H0,ax
+   movw ax,%1 | shlw ax,%r2 | mov %0,a | mov a,%Q1 | mov x,a   | mov a,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | sarw ax,%u2 | movw %H0,ax
+
+   mov x,%Q1            | mov a,%H1 | movw %0,ax | movw ax,%H1 | sarw ax,8 | movw %H0,ax
+   mov a,%Q1 | mov x, a | mov a,%H1 | movw %0,ax | movw ax,%H1 | sarw ax,8 | movw %H0,ax
+
+   mov x,%Q1           | mov a,%H1 | shlw ax,%r2 | mov %0,a | movw ax,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | sarw ax,%u2 | movw %H0,ax
+   mov x,%Q1           | mov a,%H1 | shlw ax,%r2 | mov %0,a | movw ax,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | sarw ax,%u2 | movw %H0,ax
+   mov a,%Q1 | mov x,a | mov a,%H1 | shlw ax,%r2 | mov %0,a | movw ax,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | sarw ax,%u2 | movw %H0,ax
+
+   movw ax,%H1 | movw %0,ax | sarw ax,15 | movw %H0,ax
+
+   movw ax,%H1 | sarw ax,%S2 | movw %0,ax | sarw ax,15 | movw %H0,ax
+   movw ax,%H1 | sarw ax,%S2 | movw %0,ax | sarw ax,15 | movw %H0,ax
+
+   movw ax,%H1 | mov %0,a | sarw ax,15 | movw %H0,ax | mov %Q0,a
+
+   movw ax,%H1 | sar a,%s2 | mov %0,a | sarw ax,15 | movw %H0,ax | mov %Q0,a
+
+   mov b,%2 | cmp0 b | bz $2f | 1: | movw ax,%H1 | sarw ax,1 | movw %H0,ax | mov a,%Q1 | rorc a,1 | mov %Q0,a | mov a,%q1 | rorc a,1 | mov %q0,a | dec b | bnz $1b | 2:"
+  [(set_attr "valloc" "macax")]
+)
+
+;; Likewise.
+(define_insn "lshrsi3_virt"                                  ;;   0  1      2-7            8         9-15           16   17-23     24   25-31 var
+  [(set (match_operand:SI               0 "nonimmediate_operand" "=v,vU,&vU,v,  &vU,  &vU, v,  &vU,  v,  &vU, &vU,  vU,  v,&vU,    vU,  vU,   vU")
+	(lshiftrt:SI (match_operand:SI  1 "nonimmediate_operand" "0, 0,  vU,0,   vWab, U,  0,   vU,  0,   vWab,U,   vU,  0, vU,    vU,  vU,   0")
+		      (match_operand:SI 2 "nonmemory_operand"    "M, K,  K, Int3,Int3,Int3,Iv08,Iv08,Is09,Is09,Is09,Iv16,Is17,Is17,Iv24,Is25, iv")))
+   (clobber (reg:HI X_REG))
    ]
   ""
   "@
-   ; ashrsi %0, 0
-   movw\tax,%H1\;sarw\tax,1\;movw\t%H0,ax\;mov\ta,%Q1\;rorc\ta,1\;mov\t%Q0,a\;mov\ta,%q1\;rorc\ta,1\;mov\t%q0,a
-   mov\tb,%2\;1:\;movw\tax,%H1\;sarw\tax,1\;movw\t%H0,ax\;mov\ta,%Q1\;rorc\ta,1\;mov\t%Q0,a\;mov\ta,%q1\;rorc\ta,1\;mov\t%q0,a\;dec\tb\;bnz $1b"
+   ; lshrsi %0, 0
+
+   movw ax,%H1 | shrw ax,1 | movw %H0,ax | mov a,%Q1 | rorc a,1 | mov %Q0,a | mov a,%q1 | rorc a,1 | mov %q0,a
+   movw ax,%H1 | shrw ax,1 | movw %H0,ax | mov a,%Q1 | rorc a,1 | mov %Q0,a | mov a,%q1 | rorc a,1 | mov %q0,a
+
+   movw ax,%1 | shlw ax,%r2 | mov %0,a             | mov x,%Q1 | mov a,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | shrw ax,%u2 | movw %H0,ax
+   movw ax,%1 | shlw ax,%r2 | mov %0,a             | mov x,%Q1 | mov a,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | shrw ax,%u2 | movw %H0,ax
+   movw ax,%1 | shlw ax,%r2 | mov %0,a | mov a,%Q1 | mov x,a   | mov a,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | shrw ax,%u2 | movw %H0,ax
+
+   mov x,%Q1            | mov a,%H1 | movw %0,ax | movw ax,%H1 | shrw ax,8 | movw %H0,ax
+   mov a,%Q1 | mov x, a | mov a,%H1 | movw %0,ax | movw ax,%H1 | shrw ax,8 | movw %H0,ax
+
+   mov x,%Q1           | mov a,%H1 | shlw ax,%r2 | mov %0,a | movw ax,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | shrw ax,%u2 | movw %H0,ax
+   mov x,%Q1           | mov a,%H1 | shlw ax,%r2 | mov %0,a | movw ax,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | shrw ax,%u2 | movw %H0,ax
+   mov a,%Q1 | mov x,a | mov a,%H1 | shlw ax,%r2 | mov %0,a | movw ax,%H1 | shlw ax,%r2 | mov %Q0,a | movw ax,%H1 | shrw ax,%u2 | movw %H0,ax
+
+   movw ax,%H1 | movw %0,ax | movw ax,#0 | movw %H0,ax
+
+   movw ax,%H1 | shrw ax,%S2 | movw %0,ax | movw ax,#0 | movw %H0,ax
+   movw ax,%H1 | shrw ax,%S2 | movw %0,ax | movw ax,#0 | movw %H0,ax
+
+   movw ax,%H1 | mov %0,a | movw ax,#0 | movw %H0,ax | mov %Q0,a
+
+   movw ax,%H1 | shr a,%s2 | mov %0,a | movw ax,#0 | movw %H0,ax | mov %Q0,a
+
+   mov b,%2 | cmp0 b | bz $2f | 1: | movw ax,%H1 | shrw ax,1 | movw %H0,ax | mov a,%Q1 | rorc a,1 | mov %Q0,a | mov a,%q1 | rorc a,1 | mov %q0,a | dec b | bnz $1b | 2:"
   [(set_attr "valloc" "macax")]
 )
 
+;; Likewise.
+(define_insn "ashlsi3_virt"                                ;;   0  1      2-7            8         9-15           16        17-23     24        25-31     var
+  [(set (match_operand:SI             0 "nonimmediate_operand" "=v,vU,&vU,v,  &vU,  &vU, v,  &vU,  v,  &vU, &vU,  v,   U,   v,&vU,    v,   U,   v,   U,   vWab,vU,  vU")
+	(ashift:SI (match_operand:SI  1 "nonimmediate_operand" "0, 0,  vU,0,   vWab, U,  0,   vU,  0,   vWab,U,   vU,  vU,  0, vU,    vU,  vU,  vU,  vU,  0,   vWab,U")
+		    (match_operand:SI 2 "nonmemory_operand"    "M, K,  K, Int3,Int3,Int3,Iv08,Iv08,Is09,Is09,Is09,Iv16,Iv16,Is17,Is17,Iv24,Iv24,Is25,Is25,iv,  iv,  iv")))
+   (clobber (reg:HI X_REG))
+   ]
+  ""
+  "@
+   ; lshrsi %0, 0
+
+   movw ax,%1 | shlw ax,1 | movw %0,ax | movw ax,%H1 | rolwc ax,1 | movw %H0,ax
+   movw ax,%1 | shlw ax,1 | movw %0,ax | movw ax,%H1 | rolwc ax,1 | movw %H0,ax
+
+   movw ax,%H1 | shlw ax,%u2 | mov %E0,a | mov x,%Q1           | mov a, %H1 | shlw ax,%S2 | mov %H0,a | movw ax,%1 | shlw ax,%u2 | movw %0,ax
+   movw ax,%H1 | shlw ax,%u2 | mov %E0,a | mov x,%Q1           | mov a, %H1 | shlw ax,%S2 | mov %H0,a | movw ax,%1 | shlw ax,%u2 | movw %0,ax
+   movw ax,%H1 | shlw ax,%u2 | mov %E0,a | mov a,%Q1 | mov x,a | mov a, %H1 | shlw ax,%S2 | mov %H0,a | movw ax,%1 | shlw ax,%u2 | movw %0,ax
+
+   mov x,%Q1           | mov a,%H1 | movw %H0,ax | movw ax,%1 | shlw ax,8 | movw %0,ax
+   mov a,%Q1 | mov x,a | mov a,%H1 | movw %H0,ax | movw ax,%1 | shlw ax,8 | movw %0,ax
+
+   mov x,%Q1           | mov a,%H1 | shlw ax,%s2 | movw %H0,ax | movw ax,%1 | shlw ax,%s2 | mov %H0,a | movw ax,%1 | shlw ax,%u2 | movw %0,ax
+   mov x,%Q1           | mov a,%H1 | shlw ax,%s2 | movw %H0,ax | movw ax,%1 | shlw ax,%s2 | mov %H0,a | movw ax,%1 | shlw ax,%u2 | movw %0,ax
+   mov a,%Q1 | mov x,a | mov a,%H1 | shlw ax,%s2 | movw %H0,ax | movw ax,%1 | shlw ax,%s2 | mov %H0,a | movw ax,%1 | shlw ax,%u2 | movw %0,ax
+
+   movw ax,%1 | movw %H0,ax | movw %0,#0
+   movw ax,%1 | movw %H0,ax | movw ax,#0 | movw %0,ax
+
+   movw ax,%1 | shlw ax,%S2 | movw %H0,ax | movw %0,#0
+   movw ax,%1 | shlw ax,%S2 | movw %H0,ax | movw ax,#0 | movw %0,ax
+
+   mov a,%1 | movw %H0,ax | mov %H0,#0 | movw %0,#0
+   mov a,%1 | movw %H0,ax | movw ax,#0 | mov %H0,a | movW %0,ax
+
+   mov a,%1 | shl a,%s2 | movw %H0,ax | mov %H0,#0 | movw %0,#0
+   mov a,%1 | shl a,%s2 | movw %H0,ax | movw ax,#0 | mov %H0,a | movW %0,ax
+
+   mov a,%2 | cmp0 a | bz $2f | mov d,a | movw ax,%H1 | movw bc,%1 | 1: | shlw bc,1 | rolwc ax,1 | dec d | bnz $1b | movw %H0,ax | movw ax,bc | movw %0,ax | 2:
+   mov a,%2 | mov d,a | movw ax,%H1 | movw bc,%1 | cmp0 0xFFEFD | bz $2f | 1: | shlw bc,1 | rolwc ax,1 | dec d | bnz $1b | 2: | movw %H0,ax | movw ax,bc | movw %0,ax
+   mov a,%2 | mov d,a | movw ax,%1 | movw bc,ax | movw ax,%H1 | cmp0 0xFFEFD | bz $2f | 1: | shlw bc,1 | rolwc ax,1 | dec d | bnz $1b | 2: | movw %H0,ax | movw ax,bc | movw %0,ax"
+   [(set_attr "valloc" "macax")]
+ )
+
 ;;---------- Branching ------------------------
 
 (define_insn "*indirect_jump_virt"
   [(set (pc)
 	(match_operand:HI 0 "nonimmediate_operand" "vm"))]
   "rl78_virt_insns_ok ()"
@@ -199,36 +309,74 @@ 
 	      (match_operand 2 "" "")))]
   "rl78_virt_insns_ok ()"
   "v.call\t%1"
   [(set_attr "valloc" "op1")]
   )
 
+(define_insn "cbranchqi4_virt_signed"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_signed"
+			      [(match_operand:QI 1 "general_operand" "vim")
+			       (match_operand:QI 2 "nonmemory_operand" "vi")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "rl78_virt_insns_ok ()"
+  "v.cmp\t%1, %2\\n\tv.b%c0\t%3"
+  [(set_attr "valloc" "cmp")]
+  )
+
 (define_insn "*cbranchqi4_virt"
   [(set (pc) (if_then_else
 	      (match_operator 0 "rl78_cmp_operator_real"
 			      [(match_operand:QI 1 "general_operand" "vim")
 			       (match_operand:QI 2 "general_operand" "vim")])
               (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "rl78_virt_insns_ok ()"
   "v.cmp\t%1, %2\\n\tv.b%c0\t%3"
   [(set_attr "valloc" "cmp")]
   )
 
+(define_insn "cbranchhi4_virt_signed"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator_signed"
+			      [(match_operand:HI 1 "general_operand" "vim")
+			       (match_operand:HI 2 "nonmemory_operand" "vi")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "rl78_virt_insns_ok ()"
+  "v.cmpw\t%1, %2\\n\tv.b%c0\t%3"
+  [(set_attr "valloc" "cmp")]
+  )
+
 (define_insn "*cbranchhi4_virt"
   [(set (pc) (if_then_else
 	      (match_operator 0 "rl78_cmp_operator_real"
 			      [(match_operand:HI 1 "general_operand" "vim")
 			       (match_operand:HI 2 "general_operand" "vim")])
               (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "rl78_virt_insns_ok ()"
   "v.cmpw\t%1, %2\\n\tv.b%c0\t%3"
   [(set_attr "valloc" "cmp")]
   )
 
+(define_insn "cbranchsi4_virt"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "rl78_cmp_operator"
+			      [(match_operand:SI 1 "general_operand" "vim")
+			       (match_operand:SI 2 "nonmemory_operand" "vi")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))
+   (clobber (reg:HI AX_REG))
+   ]
+  "rl78_virt_insns_ok ()"
+  "v.cmpd\t%1, %2\\n\tv.b%c0\t%3"
+  [(set_attr "valloc" "macax")]
+  )
+
 ;;---------- Peepholes ------------------------
 
 (define_peephole2
   [(set (match_operand:QI 0 "" "")
 	(match_operand:QI 1 "" ""))
    (set (match_operand:QI 2 "" "")
Index: gcc/config/rl78/rl78.c
===================================================================
--- gcc/config/rl78/rl78.c	(revision 202510)
+++ gcc/config/rl78/rl78.c	(working copy)
@@ -47,12 +47,14 @@ 
 #include "target-def.h"
 #include "langhooks.h"
 #include "rl78-protos.h"
 #include "dumpfile.h"
 #include "tree-pass.h"
 #include "context.h"
+#include "tm-constrs.h" /* for satisfies_constraint_*().  */
+#include "insn-flags.h" /* for gen_*().  */
 
 static inline bool is_interrupt_func (const_tree decl);
 static inline bool is_brk_interrupt_func (const_tree decl);
 static void rl78_reorg (void);
 
 
@@ -165,12 +167,92 @@  public:
 rtl_opt_pass *
 make_pass_rl78_devirt (gcc::context *ctxt)
 {
   return new pass_rl78_devirt (ctxt);
 }
 
+static unsigned int
+move_elim_pass (void)
+{
+  rtx insn, ninsn, prev = NULL_RTX;
+
+  for (insn = get_insns (); insn; insn = ninsn)
+    {
+      rtx set;
+
+      ninsn = next_nonnote_nondebug_insn (insn);
+
+      if ((set = single_set (insn)) == NULL_RTX)
+	{
+	  prev = NULL_RTX;
+	  continue;
+	}
+
+      /* If we have two SET insns in a row (without anything
+	 between them) and the source of the second one is the
+	 destination of the first one, and vice versa, then we
+	 can eliminate the second SET.  */
+      if (prev
+	  && rtx_equal_p (SET_DEST (prev), SET_SRC (set))
+	  && rtx_equal_p (SET_DEST (set), SET_SRC (prev))
+	  )	  
+	{
+	  if (dump_file)
+	    fprintf (dump_file, " Delete insn %d because it is redundant\n",
+		     INSN_UID (insn));
+
+	  delete_insn (insn);
+	  prev = NULL_RTX;
+	}
+      else
+	prev = set;
+    }
+  
+  if (dump_file)
+    print_rtl_with_bb (dump_file, get_insns (), 0);
+
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_rl78_move_elim =
+{
+  RTL_PASS, /* type */
+  "move_elim", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_rl78_move_elim : public rtl_opt_pass
+{
+public:
+  pass_rl78_move_elim(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_rl78_move_elim, ctxt)
+  {
+  }
+
+  /* opt_pass methods: */
+  bool gate () { return devirt_gate (); }
+  unsigned int execute () { return move_elim_pass (); }
+};
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_rl78_move_elim (gcc::context *ctxt)
+{
+  return new pass_rl78_move_elim (ctxt);
+}
 
 #undef  TARGET_ASM_FILE_START
 #define TARGET_ASM_FILE_START rl78_asm_file_start
 
 static void
 rl78_asm_file_start (void)
@@ -181,21 +263,31 @@  rl78_asm_file_start (void)
     {
       fprintf (asm_out_file, "r%d\t=\t0x%x\n", 8 + i, 0xffef0 + i);
       fprintf (asm_out_file, "r%d\t=\t0x%x\n", 16 + i, 0xffee8 + i);
     }
 
   opt_pass *rl78_devirt_pass = make_pass_rl78_devirt (g);
-  struct register_pass_info rl78_devirt_info =
+  static struct register_pass_info rl78_devirt_info =
     {
       rl78_devirt_pass,
-      "vartrack",
+      "pro_and_epilogue",
       1,
       PASS_POS_INSERT_BEFORE
     };
 
+  opt_pass *rl78_move_elim_pass = make_pass_rl78_move_elim (g);
+  static struct register_pass_info rl78_move_elim_info =
+    {
+      rl78_move_elim_pass,
+      "bbro",
+      1,
+      PASS_POS_INSERT_AFTER
+    };
+
   register_pass (& rl78_devirt_info);
+  register_pass (& rl78_move_elim_info);
 }
 
 
 #undef  TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE		rl78_option_override
 
@@ -329,12 +421,52 @@  rl78_expand_movsi (rtx *operands)
     {
       emit_move_insn (op00, op10);
       emit_move_insn (op02, op12);
     }
 }
 
+void
+rl78_split_movsi (rtx *operands)
+{
+  rtx op00, op02, op10, op12;
+
+  op00 = rl78_subreg (HImode, operands[0], SImode, 0);
+  op02 = rl78_subreg (HImode, operands[0], SImode, 2);
+  if (GET_CODE (operands[1]) == CONST
+      || GET_CODE (operands[1]) == SYMBOL_REF)
+    {
+      op10 = gen_rtx_ZERO_EXTRACT (HImode, operands[1], GEN_INT (16), GEN_INT (0));
+      op10 = gen_rtx_CONST (HImode, op10);
+      op12 = gen_rtx_ZERO_EXTRACT (HImode, operands[1], GEN_INT (16), GEN_INT (16));
+      op12 = gen_rtx_CONST (HImode, op12);
+    }
+  else
+    {
+      op10 = rl78_subreg (HImode, operands[1], SImode, 0);
+      op12 = rl78_subreg (HImode, operands[1], SImode, 2);
+    }
+
+  if (rtx_equal_p (operands[0], operands[1]))
+    ;
+  else if (rtx_equal_p (op00, op12))
+    {
+      operands[2] = op02;
+      operands[4] = op12;
+      operands[3] = op00;
+      operands[5] = op10;
+    }
+  else
+    {
+      operands[2] = op00;
+      operands[4] = op10;
+      operands[3] = op02;
+      operands[5] = op12;
+    }
+}
+
+
 /* Used by various two-operand expanders which cannot accept all
    operands in the "far" namespace.  Force some such operands into
    registers so that each pattern has at most one far operand.  */
 int
 rl78_force_nonfar_2 (rtx *operands, rtx (*gen)(rtx,rtx))
 {
@@ -643,17 +775,17 @@  rl78_hl_b_c_addr_p (rtx op)
 
 /* Used in various constraints and predicates to match operands in the
    "far" address space.  */
 int
 rl78_far_p (rtx x)
 {
-  if (GET_CODE (x) != MEM)
+  if (! MEM_P (x))
     return 0;
 #if DEBUG0
-  fprintf(stderr, "\033[35mrl78_far_p: "); debug_rtx(x);
-  fprintf(stderr, " = %d\033[0m\n", MEM_ADDR_SPACE (x) == ADDR_SPACE_FAR);
+  fprintf (stderr, "\033[35mrl78_far_p: "); debug_rtx(x);
+  fprintf (stderr, " = %d\033[0m\n", MEM_ADDR_SPACE (x) == ADDR_SPACE_FAR);
 #endif
   return MEM_ADDR_SPACE (x) == ADDR_SPACE_FAR;
 }
 
 /* Return the appropriate mode for a named address pointer.  */
 #undef TARGET_ADDR_SPACE_POINTER_MODE
@@ -741,12 +873,16 @@  rl78_as_legitimate_address (enum machine
       return false;
     }
 
   if (strict && base && GET_CODE (base) == REG && REGNO (base) >= FIRST_PSEUDO_REGISTER)
     return false;
 
+  if (! cfun->machine->virt_insns_ok && base && GET_CODE (base) == REG
+      && REGNO (base) >= 8 && REGNO (base) <= 31)
+    return false;
+
   return true;
 }
 
 /* Determine if one named address space is a subset of another.  */
 #undef  TARGET_ADDR_SPACE_SUBSET_P
 #define TARGET_ADDR_SPACE_SUBSET_P rl78_addr_space_subset_p
@@ -786,14 +922,12 @@  rl78_addr_space_convert (rtx op, tree fr
       return result;
     }
   else if (to_as == ADDR_SPACE_FAR && from_as == ADDR_SPACE_GENERIC)
     {
       /* This always works.  */
       result = gen_reg_rtx (SImode);
-      debug_rtx(result);
-      debug_rtx(op);
       emit_move_insn (rl78_subreg (HImode, result, SImode, 0), op);
       emit_move_insn (rl78_subreg (HImode, result, SImode, 2), const0_rtx);
       return result;
     }
   else
     gcc_unreachable ();
@@ -995,13 +1129,13 @@  rl78_start_function (FILE *file, HOST_WI
   if (cfun->machine->framesize_regs)
     {
       fprintf (file, "\t; push %d:", cfun->machine->framesize_regs);
       for (i = 0; i < 16; i ++)
 	if (cfun->machine->need_to_push[i])
 	  fprintf (file, " %s", word_regnames[i*2]);
-      fprintf(file, "\n");
+      fprintf (file, "\n");
     }
 
   if (frame_pointer_needed)
     fprintf (file, "\t; $fp points here (r22)\n");
 
   if (cfun->machine->framesize_locals)
@@ -1091,18 +1225,23 @@  rl78_function_arg_boundary (enum machine
 
    A - address of a MEM
    S - SADDR form of a real register
    v - real register corresponding to a virtual register
    m - minus - negative of CONST_INT value.
    c - inverse of a conditional (NE vs EQ for example)
+   z - collapsed conditional
+   s - shift count mod 8
+   S - shift count mod 16
+   r - reverse shift count (8-(count mod 8))
 
    h - bottom HI of an SI
    H - top HI of an SI
    q - bottom QI of an HI
    Q - top QI of an HI
    e - third QI of an SI (i.e. where the ES register gets values from)
+   E - fourth QI of an SI (i.e. MSB)
 
 */
 
 /* Implements the bulk of rl78_print_operand, below.  We do it this
    way because we need to test for a constant at the top level and
    insert the '#', but not test for it anywhere else as we recurse
@@ -1117,13 +1256,13 @@  rl78_print_operand_1 (FILE * file, rtx o
     case MEM:
       if (letter == 'A')
 	rl78_print_operand_1 (file, XEXP (op, 0), letter);
       else
 	{
 	  if (rl78_far_p (op))
-	    fprintf(file, "es:");
+	    fprintf (file, "es:");
 	  if (letter == 'H')
 	    {
 	      op = adjust_address (op, HImode, 2);
 	      letter = 0;
 	    }
 	  if (letter == 'h')
@@ -1143,37 +1282,42 @@  rl78_print_operand_1 (FILE * file, rtx o
 	    }
 	  if (letter == 'e')
 	    {
 	      op = adjust_address (op, QImode, 2);
 	      letter = 0;
 	    }
+	  if (letter == 'E')
+	    {
+	      op = adjust_address (op, QImode, 3);
+	      letter = 0;
+	    }
 	  if (CONSTANT_P (XEXP (op, 0)))
 	    {
-	      fprintf(file, "!");
+	      fprintf (file, "!");
 	      rl78_print_operand_1 (file, XEXP (op, 0), letter);
 	    }
 	  else if (GET_CODE (XEXP (op, 0)) == PLUS
 		   && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF)
 	    {
-	      fprintf(file, "!");
+	      fprintf (file, "!");
 	      rl78_print_operand_1 (file, XEXP (op, 0), letter);
 	    }
 	  else if (GET_CODE (XEXP (op, 0)) == PLUS
 		   && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
 		   && REGNO (XEXP (XEXP (op, 0), 0)) == 2)
 	    {
 	      rl78_print_operand_1 (file, XEXP (XEXP (op, 0), 1), 'u');
-	      fprintf(file, "[");
+	      fprintf (file, "[");
 	      rl78_print_operand_1 (file, XEXP (XEXP (op, 0), 0), 0);
-	      fprintf(file, "]");
+	      fprintf (file, "]");
 	    }
 	  else
 	    {
-	      fprintf(file, "[");
+	      fprintf (file, "[");
 	      rl78_print_operand_1 (file, XEXP (op, 0), letter);
-	      fprintf(file, "]");
+	      fprintf (file, "]");
 	    }
 	}
       break;
 
     case REG:
       if (letter == 'Q')
@@ -1181,12 +1325,14 @@  rl78_print_operand_1 (FILE * file, rtx o
       else if (letter == 'H')
 	fprintf (file, "%s", reg_names [REGNO (op) + 2]);
       else if (letter == 'q')
 	fprintf (file, "%s", reg_names [REGNO (op) & ~1]);
       else if (letter == 'e')
 	fprintf (file, "%s", reg_names [REGNO (op) + 2]);
+      else if (letter == 'E')
+	fprintf (file, "%s", reg_names [REGNO (op) + 3]);
       else if (letter == 'S')
 	fprintf (file, "0x%x", 0xffef8 + REGNO (op));
       else if (GET_MODE (op) == HImode
 	       && ! (REGNO (op) & ~0xfe))
 	{
 	  if (letter == 'v')
@@ -1206,16 +1352,26 @@  rl78_print_operand_1 (FILE * file, rtx o
       else if (letter == 'q')
 	fprintf (file, "%ld", INTVAL (op) & 0xff);
       else if (letter == 'h')
 	fprintf (file, "%ld", INTVAL (op) & 0xffff);
       else if (letter == 'e')
 	fprintf (file, "%ld", (INTVAL (op) >> 16) & 0xff);
+      else if (letter == 'E')
+	fprintf (file, "%ld", (INTVAL (op) >> 24) & 0xff);
       else if (letter == 'm')
 	fprintf (file, "%ld", - INTVAL (op));
+      else if (letter == 's')
+	fprintf (file, "%ld", INTVAL (op) % 8);
+      else if (letter == 'S')
+	fprintf (file, "%ld", INTVAL (op) % 16);
+      else if (letter == 'r')
+	fprintf (file, "%ld", 8 - (INTVAL (op) % 8));
+      else if (letter == 'C')
+	fprintf (file, "%ld", (INTVAL (op) ^ 0x8000) & 0xffff);
       else
-	fprintf(file, "%ld", INTVAL (op));
+	fprintf (file, "%ld", INTVAL (op));
       break;
 
     case CONST:
       rl78_print_operand_1 (file, XEXP (op, 0), letter);
       break;
 
@@ -1313,28 +1469,74 @@  rl78_print_operand_1 (FILE * file, rtx o
     case CODE_LABEL:
     case LABEL_REF:
       output_asm_label (op);
       break;
 
     case LTU:
-      fprintf (file, letter == 'c' ? "nc" : "c");
+      if (letter == 'z')
+	fprintf (file, "#comparison eliminated");
+      else
+	fprintf (file, letter == 'c' ? "nc" : "c");
       break;
     case LEU:
-      fprintf (file, letter == 'c' ? "h" : "nh");
+      if (letter == 'z')
+	fprintf (file, "br");
+      else
+	fprintf (file, letter == 'c' ? "h" : "nh");
       break;
     case GEU:
-      fprintf (file, letter == 'c' ? "c" : "nc");
+      if (letter == 'z')
+	fprintf (file, "br");
+      else
+	fprintf (file, letter == 'c' ? "c" : "nc");
       break;
     case GTU:
-      fprintf (file, letter == 'c' ? "nh" : "h");
+      if (letter == 'z')
+	fprintf (file, "#comparison eliminated");
+      else
+	fprintf (file, letter == 'c' ? "nh" : "h");
       break;
     case EQ:
-      fprintf (file, letter == 'c' ? "nz" : "z");
+      if (letter == 'z')
+	fprintf (file, "br");
+      else
+	fprintf (file, letter == 'c' ? "nz" : "z");
       break;
     case NE:
-      fprintf (file, letter == 'c' ? "z" : "nz");
+      if (letter == 'z')
+	fprintf (file, "#comparison eliminated");
+      else
+	fprintf (file, letter == 'c' ? "z" : "nz");
+      break;
+
+    /* Note: these assume appropriate adjustments were made so that
+       unsigned comparisons, which is all this chip has, will
+       work.  */
+    case LT:
+      if (letter == 'z')
+	fprintf (file, "#comparison eliminated");
+      else
+	fprintf (file, letter == 'c' ? "nc" : "c");
+      break;
+    case LE:
+      if (letter == 'z')
+	fprintf (file, "br");
+      else
+        fprintf (file, letter == 'c' ? "h" : "nh");
+      break;
+    case GE:
+      if (letter == 'z')
+	fprintf (file, "br");
+      else
+	fprintf (file, letter == 'c' ? "c" : "nc");
+      break;
+    case GT:
+      if (letter == 'z')
+	fprintf (file, "#comparison eliminated");
+      else
+	fprintf (file, letter == 'c' ? "nh" : "h");
       break;
 
     default:
       fprintf (file, "(%s)", GET_RTX_NAME (GET_CODE (op)));
       break;
     }
@@ -1343,13 +1545,13 @@  rl78_print_operand_1 (FILE * file, rtx o
 #undef  TARGET_PRINT_OPERAND
 #define TARGET_PRINT_OPERAND		rl78_print_operand
 
 static void
 rl78_print_operand (FILE * file, rtx op, int letter)
 {
-  if (CONSTANT_P (op) && letter != 'u')
+  if (CONSTANT_P (op) && letter != 'u' && letter != 's' && letter != 'r' && letter != 'S')
     fprintf (file, "#");
   rl78_print_operand_1 (file, op, letter);
 }
 
 #undef  TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT rl78_trampoline_init
@@ -1388,72 +1590,14 @@  rl78_trampoline_adjust_address (rtx m_tr
 /* Expander for cbranchqi4 and cbranchhi4.  RL78 is missing some of
    the "normal" compares, specifically, it only has unsigned compares,
    so we must synthesize the missing ones.  */
 void
 rl78_expand_compare (rtx *operands)
 {
-  /* RL78 does not have signed comparisons.  We must modify the
-     operands to be in the unsigned range, and emit an unsigned
-     comparison.  */
-
-  enum machine_mode mode;
-  rtx high_bit;
-  int i;
-  RTX_CODE new_cond;
-
-  switch (GET_CODE (operands[0]))
-    {
-    case GE:
-      new_cond = GEU;
-      break;
-    case LE:
-      new_cond = LEU;
-      break;
-    case GT:
-      new_cond = GTU;
-      break;
-    case LT:
-      new_cond = LTU;
-      break;
-    default:
-      return;
-    }
-
-#if DEBUG0
-  fprintf (stderr, "\033[38;5;129mrl78_expand_compare\n");
-  debug_rtx (operands[0]);
-  fprintf (stderr, "\033[0m");
-#endif
-
-  mode = GET_MODE (operands[1]);
-  if (mode == VOIDmode)
-    mode = GET_MODE (operands[2]);
-  high_bit = GEN_INT (~0 << (GET_MODE_BITSIZE (mode) - 1));
-
-  /* 0: conditional 1,2: operands */
-  for (i = 1; i <= 2; i ++)
-    {
-      rtx r = operands[i];
-
-      if (GET_CODE (r) == CONST_INT)
-	r = GEN_INT (INTVAL (r) ^ INTVAL (high_bit));
-      else
-	{
-	  r = gen_rtx_PLUS (mode, operands[i], high_bit);
-	  r = copy_to_mode_reg (mode, r);
-	}
-      operands[i] = r;
-    }
-
-  operands[0] = gen_rtx_fmt_ee (new_cond, GET_MODE (operands[0]), operands[1], operands[2]);
-
-#if DEBUG0
-  fprintf (stderr, "\033[38;5;142mrl78_expand_compare\n");
-  debug_rtx (operands[0]);
-  fprintf (stderr, "\033[0m");
-#endif
+  if (GET_CODE (operands[2]) == MEM)
+    operands[2] = copy_to_mode_reg (GET_MODE (operands[2]), operands[2]);
 }
 
 
 
 /* Define this to 1 if you are debugging the peephole optimizers.  */
 #define DEBUG_PEEP 0
@@ -1470,16 +1614,16 @@  rl78_peep_movhi_p (rtx *operands)
 
   /* (set (op0) (op1))
      (set (op2) (op3)) */
 
 #if DEBUG_PEEP
   fprintf (stderr, "\033[33m");
-  debug_rtx(operands[0]);
-  debug_rtx(operands[1]);
-  debug_rtx(operands[2]);
-  debug_rtx(operands[3]);
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+  debug_rtx (operands[2]);
+  debug_rtx (operands[3]);
   fprintf (stderr, "\033[0m");
 #endif
 
   if (rtx_equal_p (operands[0], operands[3]))
     {
 #if DEBUG_PEEP
@@ -1659,56 +1803,303 @@  During devirtualization, a simple regist
 would be better to run a full CSE/propogation pass on it through, or
 re-run regmove, but that has not yet been attempted.
 
  */
 #define DEBUG_ALLOC 0
 
+/* This array is used to hold knowledge about the contents of the
+   real registers (A ... H), the memory-based registers (r8 ... r31)
+   and the first NUM_STACK_LOCS words on the stack.  We use this to
+   avoid generating redundant move instructions.
+
+   A value in the range 0 .. 31 indicates register A .. r31.
+   A value in the range 32 .. 63 indicates stack slot (value - 32).
+   A value of NOT_KNOWN indicates that the contents of that location
+   are not known.  */
+
+#define NUM_STACK_LOCS	32
+#define NOT_KNOWN       127
+
+static unsigned char content_memory [32 + NUM_STACK_LOCS];
+
+static unsigned char saved_update_index = NOT_KNOWN;
+static unsigned char saved_update_value;
+static enum machine_mode saved_update_mode;
+
+
+static inline void
+clear_content_memory (void)
+{
+  memset (content_memory, NOT_KNOWN, sizeof content_memory);
+  if (dump_file)
+    fprintf (dump_file, "  clear content memory\n");
+  saved_update_index = NOT_KNOWN;
+}
+
+/* Convert LOC into an index into the content_memory array.
+   If LOC cannot be converted, return NOT_KNOWN.  */
+
+static unsigned char
+get_content_index (rtx loc)
+{
+  enum machine_mode mode;
+
+  if (loc == NULL_RTX)
+    return NOT_KNOWN;
+
+  if (REG_P (loc))
+    {
+      if (REGNO (loc) < 32)
+	return REGNO (loc);
+      return NOT_KNOWN;
+    }
+
+  mode = GET_MODE (loc);
+
+  if (! rl78_stack_based_mem (loc, mode))
+    return NOT_KNOWN;
+
+  loc = XEXP (loc, 0);
+
+  if (REG_P (loc))
+    /* loc = MEM (SP) */
+    return 32;
+
+  /* loc = MEM (PLUS (SP, INT)).  */
+  loc = XEXP (loc, 1);
+
+  if (INTVAL (loc) < NUM_STACK_LOCS)
+    return 32 + INTVAL (loc);
+
+  return NOT_KNOWN;
+}
+
+/* Return a string describing content INDEX in mode MODE.
+   WARNING: Can return a pointer to a static buffer.  */
+
+static const char *
+get_content_name (unsigned char index, enum machine_mode mode)
+{
+  static char buffer [128];
+
+  if (index == NOT_KNOWN)
+    return "Unknown";
+
+  if (index > 31)
+    sprintf (buffer, "stack slot %d", index - 32);
+  else if (mode == HImode)
+    sprintf (buffer, "%s%s",
+	     reg_names [index + 1], reg_names [index]);
+  else
+    return reg_names [index];
+
+  return buffer;
+}
+
+#if DEBUG_ALLOC
+
+static void
+display_content_memory (FILE * file)
+{
+  unsigned int i;
+
+  fprintf (file, " Known memory contents:\n");
+
+  for (i = 0; i < sizeof content_memory; i++)
+    if (content_memory[i] != NOT_KNOWN)
+      {
+	fprintf (file, "   %s contains a copy of ", get_content_name (i, QImode));
+	fprintf (file, "%s\n", get_content_name (content_memory [i], QImode));
+      }
+}
+#endif
+
+static void
+update_content (unsigned char index, unsigned char val, enum machine_mode mode)
+{
+  unsigned int i;
+
+  gcc_assert (index < sizeof content_memory);
+
+  content_memory [index] = val;
+  if (val != NOT_KNOWN)
+    content_memory [val] = index;
+
+  /* Make the entry in dump_file *before* VAL is increased below.  */
+  if (dump_file)
+    {
+      fprintf (dump_file, "  %s now contains ", get_content_name (index, mode));
+      if (val == NOT_KNOWN)
+	fprintf (dump_file, "Unknown\n");
+      else
+	fprintf (dump_file, "%s and vice versa\n", get_content_name (val, mode));
+    }
+  
+  if (mode == HImode)
+    {
+      val = val == NOT_KNOWN ? val : val + 1;
+
+      content_memory [index + 1] = val;
+      if (val != NOT_KNOWN)
+	{
+	  content_memory [val] = index + 1;
+	  -- val;
+	}
+    }
+
+  /* Any other places that had INDEX recorded as their contents are now invalid.  */
+  for (i = 0; i < sizeof content_memory; i++)
+    {
+      if (i == index
+	  || (val != NOT_KNOWN && i == val))
+	{
+	  if (mode == HImode)
+	    ++ i;
+	  continue;
+	}
+	
+      if (content_memory[i] == index
+	  || (val != NOT_KNOWN && content_memory[i] == val))
+	{
+	  content_memory[i] = NOT_KNOWN;
+
+	  if (dump_file)
+	    fprintf (dump_file, "  %s cleared\n", get_content_name (i, mode));
+
+	  if (mode == HImode)
+	    content_memory[++ i] = NOT_KNOWN;
+	}
+    }
+}
+
+/* Record that LOC contains VALUE.
+   For HImode locations record that LOC+1 contains VALUE+1.
+   If LOC is not a register or stack slot, do nothing.
+   If VALUE is not a register or stack slot, clear the recorded content.  */
+
+static void
+record_content (rtx loc, rtx value)
+{
+  enum machine_mode mode;
+  unsigned char index;
+  unsigned char val;
+
+  if ((index = get_content_index (loc)) == NOT_KNOWN)
+    return;
+
+  val = get_content_index (value);
+
+  mode = GET_MODE (loc);
+
+  if (val == index)
+    {
+      if (! optimize)
+	return;
+
+      /* This should not happen when optimizing.  */
+#if 1
+      fprintf (stderr, "ASSIGNMENT of location to itself detected! [%s]\n",
+	       get_content_name (val, mode));
+      return;
+#else
+      gcc_unreachable ();
+#endif
+    }
+
+  update_content (index, val, mode);
+}
+
+/* Returns TRUE if LOC already contains a copy of VALUE.  */
+
+static bool
+already_contains (rtx loc, rtx value)
+{
+  unsigned char index;
+  unsigned char val;
+
+  if ((index = get_content_index (loc)) == NOT_KNOWN)
+    return false;
+
+  if ((val = get_content_index (value)) == NOT_KNOWN)
+    return false;
+
+  if (content_memory [index] != val)
+    return false;
+
+  if (GET_MODE (loc) == HImode)
+    return content_memory [index + 1] == val + 1;
+
+  return true;
+}
+
 /* Rescans an insn to see if it's recognized again.  This is done
    carefully to ensure that all the constraint information is accurate
    for the newly matched insn.  */
 static bool
 insn_ok_now (rtx insn)
 {
+  rtx pattern = PATTERN (insn);
+
   INSN_CODE (insn) = -1;
-  if (recog (PATTERN (insn), insn, 0) > -1)
+
+  if (recog (pattern, insn, 0) > -1)
     {
       extract_insn (insn);
       if (constrain_operands (1))
 	{
 #if DEBUG_ALLOC
 	  fprintf (stderr, "\033[32m");
 	  debug_rtx (insn);
 	  fprintf (stderr, "\033[0m");
 #endif
+	  if (SET_P (pattern))
+	    record_content (SET_DEST (pattern), SET_SRC (pattern));
+
 	  return true;
 	}
     }
   else
     {
-      fprintf (stderr, "\033[41;30m Unrecognized insn \033[0m\n");
+      /* We need to re-recog the insn with virtual registers to get
+	 the operands */
+      cfun->machine->virt_insns_ok = 1;
+      if (recog (pattern, insn, 0) > -1)
+	{
+	  extract_insn (insn);
+	  if (constrain_operands (0))
+	    {
+	      cfun->machine->virt_insns_ok = 0;
+	      return false;
+	    }
+	}
+
+#if DEBUG_ALLOC
+      fprintf (stderr, "\033[41;30m Unrecognized *virtual* insn \033[0m\n");
       debug_rtx (insn);
+#endif
       gcc_unreachable ();
     }
+
 #if DEBUG_ALLOC
   fprintf (stderr, "\033[31m");
   debug_rtx (insn);
   fprintf (stderr, "\033[0m");
 #endif
   return false;
 }
 
 #if DEBUG_ALLOC
-#define WORKED fprintf (stderr, "\033[48;5;22m Worked at line %d \033[0m\n", __LINE__)
+#define WORKED      fprintf (stderr, "\033[48;5;22m Worked at line %d \033[0m\n", __LINE__)
 #define FAILEDSOFAR fprintf (stderr, "\033[48;5;52m FAILED at line %d \033[0m\n", __LINE__)
-#define FAILED fprintf (stderr, "\033[48;5;52m FAILED at line %d \033[0m\n", __LINE__), gcc_unreachable()
+#define FAILED      fprintf (stderr, "\033[48;5;52m FAILED at line %d \033[0m\n", __LINE__), gcc_unreachable()
 #define MAYBE_OK(insn) if (insn_ok_now (insn)) { WORKED; return; } else { FAILEDSOFAR; }
+#define MUST_BE_OK(insn) if (insn_ok_now (insn)) { WORKED; return; } FAILED
 #else
-#define WORKED
-#define FAILEDSOFAR
 #define FAILED gcc_unreachable ()
 #define MAYBE_OK(insn) if (insn_ok_now (insn)) return;
+#define MUST_BE_OK(insn) if (insn_ok_now (insn)) return; FAILED
 #endif
 
 /* Registers into which we move the contents of virtual registers.  */
 #define X gen_rtx_REG (QImode, 0)
 #define A gen_rtx_REG (QImode, 1)
 #define C gen_rtx_REG (QImode, 2)
@@ -1784,49 +2175,133 @@  rl78_hi8 (rtx addr)
       r = gen_rtx_CONST (QImode, r);
       return r;
     }
   return rl78_subreg (QImode, addr, SImode, 2);
 }
 
-/* Copy any register values into real registers and return an RTX for
-   the same memory, now addressed by real registers.  Any needed insns
-   are emitted before BEFORE.  */
+static void
+add_postponed_content_update (rtx to, rtx value)
+{
+  unsigned char index;
+
+  if ((index = get_content_index (to)) == NOT_KNOWN)
+    return;
+
+  gcc_assert (saved_update_index == NOT_KNOWN);
+  saved_update_index = index;
+  saved_update_value = get_content_index (value);
+  saved_update_mode  = GET_MODE (to);
+}
+
+static void
+process_postponed_content_update (void)
+{
+  if (saved_update_index != NOT_KNOWN)
+    {
+      update_content (saved_update_index, saved_update_value, saved_update_mode);
+      saved_update_index = NOT_KNOWN;
+    }
+}
+
+/* Generate and emit a move of (register) FROM into TO.  if WHERE is not NULL
+   then if BEFORE is true then emit the insn before WHERE, otherwise emit it
+   after WHERE.  If TO already contains FROM then do nothing.  Returns TO if
+   BEFORE is true, FROM otherwise.  */
+static rtx
+gen_and_emit_move (rtx to, rtx from, rtx where, bool before)
+{
+  enum machine_mode mode = GET_MODE (to);
+
+  if (optimize && before && already_contains (to, from))
+    {
+#if DEBUG_ALLOC
+      display_content_memory (stderr);
+#endif
+      if (dump_file)
+	{
+	  fprintf (dump_file, " Omit move of %s into ",
+		   get_content_name (get_content_index (from), mode));
+	  fprintf (dump_file, "%s as it already contains this value\n",
+		   get_content_name (get_content_index (to), mode));
+	}
+    }
+  else
+    {
+      rtx move = mode == QImode ? gen_movqi (to, from) : gen_movhi (to, from);
+      
+      EM (move);
+
+      if (where == NULL_RTX)
+	emit_insn (move);
+      else if (before)
+	emit_insn_before (move, where);
+      else
+	{
+	  rtx note = find_reg_note (where, REG_EH_REGION, NULL_RTX);
+
+	  /* If necessary move REG_EH_REGION notes forward.
+	     cf. compiling gcc.dg/pr44545.c.  */
+	  if (note != NULL_RTX)
+	    {
+	      add_reg_note (move, REG_EH_REGION, XEXP (note, 0));
+	      remove_note (where, note);
+	    }
+
+	  emit_insn_after (move, where);
+	}
+
+      if (before)
+	record_content (to, from);
+      else
+	add_postponed_content_update (to, from);
+    }
+  return before ? to : from;
+}
+
+/* If M is MEM(REG) or MEM(PLUS(REG,INT)) and REG is virtual then
+   copy it into NEWBASE and return the updated MEM.  Otherwise just
+   return M.  Any needed insns are emitted before BEFORE.  */
 static rtx
 transcode_memory_rtx (rtx m, rtx newbase, rtx before)
 {
   rtx base, index, addendr;
   int addend = 0;
 
-  if (GET_CODE (m) != MEM)
+  if (! MEM_P (m))
     return m;
 
   if (GET_MODE (XEXP (m, 0)) == SImode)
     {
       rtx seg = rl78_hi8 (XEXP (m, 0));
 #if DEBUG_ALLOC
       fprintf (stderr, "setting ES:\n");
       debug_rtx(seg);
 #endif
-      emit_insn_before (EM(gen_movqi (A, seg)), before);
-      emit_insn_before (EM(gen_movqi_es (A)), before);
+      emit_insn_before (EM (gen_movqi (A, seg)), before);
+      emit_insn_before (EM (gen_movqi_es (A)), before);
+      record_content (A, NULL_RTX);
+
       m = change_address (m, GET_MODE (m), rl78_lo16 (XEXP (m, 0)));
     }
 
-  characterize_address (XEXP (m, 0), &base, &index, &addendr);
+  characterize_address (XEXP (m, 0), & base, & index, & addendr);
   gcc_assert (index == NULL_RTX);
 
 #if DEBUG_ALLOC
-  fprintf (stderr, "\033[33m"); debug_rtx(m); fprintf (stderr, "\033[0m");
+  fprintf (stderr, "\033[33m"); debug_rtx (m); fprintf (stderr, "\033[0m");
   debug_rtx (base);
 #endif
   if (base == NULL_RTX)
     return m;
 
   if (addendr && GET_CODE (addendr) == CONST_INT)
     addend = INTVAL (addendr);
 
+  gcc_assert (REG_P (base));
+  gcc_assert (REG_P (newbase));
+
   if (REGNO (base) == SP_REG)
     {
       if (addend >= 0 && addend  <= 255)
 	return m;
     }
 
@@ -1841,23 +2316,28 @@  transcode_memory_rtx (rtx m, rtx newbase
       /* mov ax, vreg
 	 add ax, #imm
 	 mov hl, ax	*/
       EM (emit_insn_before (gen_movhi (AX, base), before));
       EM (emit_insn_before (gen_addhi3 (AX, AX, addendr), before));
       EM (emit_insn_before (gen_movhi (newbase, AX), before));
+      record_content (AX, NULL_RTX);
+      record_content (newbase, NULL_RTX);
+
       base = newbase;
       addend = 0;
     }
   else
     {
-      EM (emit_insn_before (gen_movhi (newbase, base), before));
-      base = newbase;
+      base = gen_and_emit_move (newbase, base, before, true);
     }
 
   if (addend)
-    base = gen_rtx_PLUS (HImode, base, GEN_INT (addend));
+    {
+      record_content (base, NULL_RTX);
+      base = gen_rtx_PLUS (HImode, base, GEN_INT (addend));
+    }
 
 #if DEBUG_ALLOC
   fprintf (stderr, "\033[33m");
   debug_rtx (m);
 #endif
   m = change_address (m, GET_MODE (m), base);
@@ -1871,55 +2351,54 @@  transcode_memory_rtx (rtx m, rtx newbase
 /* Copy SRC to accumulator (A or AX), placing any generated insns
    before BEFORE.  Returns accumulator RTX.  */
 
 static rtx
 move_to_acc (int opno, rtx before)
 {
-  rtx src = OP(opno);
+  rtx src = OP (opno);
   enum machine_mode mode = GET_MODE (src);
 
-  if (GET_CODE (src) == REG
-      && REGNO (src) < 2)
+  if (REG_P (src) && REGNO (src) < 2)
     return src;
 
   if (mode == VOIDmode)
     mode = recog_data.operand_mode[opno];
 
-  if (mode == QImode)
-    {
-      EM (emit_insn_before (gen_movqi (A, src), before));
-      return A;
-    }
-  else
-    {
-      EM (emit_insn_before (gen_movhi (AX, src), before));
-      return AX;
-    }
+  return gen_and_emit_move (mode == QImode ? A : AX, src, before, true);
+}
+
+static void
+force_into_acc (rtx src, rtx before)
+{
+  enum machine_mode mode = GET_MODE (src);
+  rtx move;
+
+  if (REG_P (src) && REGNO (src) < 2)
+    return;
+
+  move = mode == QImode ? gen_movqi (A, src) : gen_movhi (AX, src);
+      
+  EM (move);
+
+  emit_insn_before (move, before);
+  record_content (AX, NULL_RTX);
 }
 
 /* Copy accumulator (A or AX) to DEST, placing any generated insns
    after AFTER.  Returns accumulator RTX.  */
 
 static rtx
-move_from_acc (rtx dest, rtx after)
+move_from_acc (unsigned int opno, rtx after)
 {
+  rtx dest = OP (opno);
   enum machine_mode mode = GET_MODE (dest);
 
   if (REG_P (dest) && REGNO (dest) < 2)
     return dest;
 
-  if (mode == QImode)
-    {
-      EM (emit_insn_after (gen_movqi (dest, A), after));
-      return A;
-    }
-  else
-    {
-      EM (emit_insn_after (gen_movhi (dest, AX), after));
-      return AX;
-    }
+  return gen_and_emit_move (dest, mode == QImode ? A : AX, after, false);
 }
 
 /* Copy accumulator (A or AX) to REGNO, placing any generated insns
    before BEFORE.  Returns reg RTX.  */
 
 static rtx
@@ -1927,51 +2406,37 @@  move_acc_to_reg (rtx acc, int regno, rtx
 {
   enum machine_mode mode = GET_MODE (acc);
   rtx reg;
 
   reg = gen_rtx_REG (mode, regno);
 
-  if (mode == QImode)
-    {
-      EM (emit_insn_before (gen_movqi (reg, A), before));
-      return reg;
-    }
-  else
-    {
-      EM (emit_insn_before (gen_movhi (reg, AX), before));
-      return reg;
-    }
+  return gen_and_emit_move (reg, acc, before, true);
 }
 
 /* Copy SRC to X, placing any generated insns before BEFORE.
    Returns X RTX.  */
 
 static rtx
 move_to_x (int opno, rtx before)
 {
-  rtx src = OP(opno);
+  rtx src = OP (opno);
   enum machine_mode mode = GET_MODE (src);
   rtx reg;
 
   if (mode == VOIDmode)
     mode = recog_data.operand_mode[opno];
   reg = (mode == QImode) ? X : AX;
 
   if (mode == QImode || ! is_virtual_register (OP (opno)))
     {
-      OP(opno) = move_to_acc (opno, before);
-      OP(opno) = move_acc_to_reg (OP(opno), X_REG, before);
+      OP (opno) = move_to_acc (opno, before);
+      OP (opno) = move_acc_to_reg (OP(opno), X_REG, before);
       return reg;
     }
 
-  if (mode == QImode)
-    EM (emit_insn_before (gen_movqi (reg, src), before));
-  else
-    EM (emit_insn_before (gen_movhi (reg, src), before));
-
-  return reg;
+  return gen_and_emit_move (reg, src, before, true);
 }
 
 /* Copy OP(opno) to H or HL, placing any generated insns before BEFORE.
    Returns H/HL RTX.  */
 
 static rtx
@@ -1989,18 +2454,13 @@  move_to_hl (int opno, rtx before)
     {
       OP (opno) = move_to_acc (opno, before);
       OP (opno) = move_acc_to_reg (OP (opno), L_REG, before);
       return reg;
     }
 
-  if (mode == QImode)
-    EM (emit_insn_before (gen_movqi (reg, src), before));
-  else
-    EM (emit_insn_before (gen_movhi (reg, src), before));
-
-  return reg;
+  return gen_and_emit_move (reg, src, before, true);
 }
 
 /* Copy OP(opno) to E or DE, placing any generated insns before BEFORE.
    Returns E/DE RTX.  */
 
 static rtx
@@ -2019,15 +2479,13 @@  move_to_de (int opno, rtx before)
     {
       OP (opno) = move_to_acc (opno, before);
       OP (opno) = move_acc_to_reg (OP (opno), E_REG, before);
     }
   else
     {
-      rtx move = mode == QImode ? gen_movqi (reg, src) : gen_movhi (reg, src);
-
-      EM (emit_insn_before (move, before));
+      gen_and_emit_move (reg, src, before, true);
     }
 
   return reg;
 }
 
 /* Devirtualize an insn of the form (SET (op) (unop (op))).  */
@@ -2035,69 +2493,110 @@  static void
 rl78_alloc_physical_registers_op1 (rtx insn)
 {
   /* op[0] = func op[1] */
 
   /* We first try using A as the destination, then copying it
      back.  */
-  if (rtx_equal_p (OP(0), OP(1)))
+  if (rtx_equal_p (OP (0), OP (1)))
     {
-      OP(0) =
-      OP(1) = transcode_memory_rtx (OP(1), DE, insn);
+      OP (0) =
+      OP (1) = transcode_memory_rtx (OP (1), DE, insn);
     }
   else
     {
-      OP(0) = transcode_memory_rtx (OP(0), BC, insn);
-      OP(1) = transcode_memory_rtx (OP(1), HL, insn);
+      /* If necessary, load the operands into BC and HL.
+	 Check to see if we already have OP (0) in HL
+	 and if so, swap the order.  */
+      if (MEM_P (OP (0))
+	  && already_contains (HL, XEXP (OP (0), 0)))
+	{
+	  OP (0) = transcode_memory_rtx (OP (0), HL, insn);
+	  OP (1) = transcode_memory_rtx (OP (1), BC, insn);
+	}
+      else
+	{
+	  OP (0) = transcode_memory_rtx (OP (0), BC, insn);
+	  OP (1) = transcode_memory_rtx (OP (1), HL, insn);
+	}
     }
 
   MAYBE_OK (insn);
 
-  OP(0) = move_from_acc (OP(0), insn);
+  OP (0) = move_from_acc (0, insn);
 
   MAYBE_OK (insn);
 
   /* Try copying the src to acc first, then.  This is for, for
      example, ZERO_EXTEND or NOT.  */
-  OP(1) = move_to_acc (1, insn);
+  OP (1) = move_to_acc (1, insn);
 
-  MAYBE_OK (insn);
+  MUST_BE_OK (insn);
+}
+
+/* Returns true if operand OPNUM contains a constraint of type CONSTRAINT.
+   Assumes that the current insn has already been recognised and hence the
+   constraint data has been filled in.  */
+static bool
+has_constraint (unsigned int opnum, enum constraint_num constraint)
+{
+  const char * p = recog_data.constraints[opnum];
 
-  FAILED;
+  /* No constraints means anything is accepted.  */
+  if (p == NULL || *p == 0 || *p == ',')
+    return true;
+ 
+  do
+    {
+      char c;
+      unsigned int len;
+
+      c = *p;
+      len = CONSTRAINT_LEN (c, p);
+      gcc_assert (len > 0);
+
+      switch (c)
+	{
+	case 0:
+	case ',':
+	  return false;
+	default:
+	  if (lookup_constraint (p) == constraint)
+	    return true;
+	}
+      p += len;
+    }
+  while (1);
 }
 
-/* Devirtualize an insn of the form (SET (op) (unop (op) (op))).  */
+/* Devirtualize an insn of the form (SET (op) (binop (op) (op))).  */
 static void
 rl78_alloc_physical_registers_op2 (rtx insn)
 {
-  /* op[0] = op[1] func op[2] */
-  rtx prev = prev_nonnote_nondebug_insn (insn);
+  rtx prev;
   rtx first;
   bool hl_used;
+  int tmp_id;
+  rtx saved_op1;
 
-  if (rtx_equal_p (OP(0), OP(1)))
+  if (rtx_equal_p (OP (0), OP (1)))
+    {
+      OP (0) =
+      OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+      OP (2) = transcode_memory_rtx (OP (2), HL, insn);
+    }
+  else if (rtx_equal_p (OP (0), OP (2)))
     {
-      OP(0) =
-      OP(1) = transcode_memory_rtx (OP(1), DE, insn);
-      prev = next_nonnote_nondebug_insn (prev);
-      OP(2) = transcode_memory_rtx (OP(2), HL, insn);
-      prev = prev_nonnote_nondebug_insn (prev);
-    }
-  else if (rtx_equal_p (OP(0), OP(2)))
-    {
-      OP(1) = transcode_memory_rtx (OP(1), DE, insn);
-      prev = next_nonnote_nondebug_insn (prev);
-      OP(0) =
-      OP(2) = transcode_memory_rtx (OP(2), HL, insn);
-      prev = prev_nonnote_nondebug_insn (prev);
+      OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+      OP (0) =
+      OP (2) = transcode_memory_rtx (OP (2), HL, insn);
     }
   else
     {
-      OP(0) = transcode_memory_rtx (OP(0), BC, insn);
-      OP(1) = transcode_memory_rtx (OP(1), DE, insn);
-      prev = next_nonnote_nondebug_insn (prev);
-      OP(2) = transcode_memory_rtx (OP(2), HL, insn);
+      OP (0) = transcode_memory_rtx (OP (0), BC, insn);
+      OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+      OP (2) = transcode_memory_rtx (OP (2), HL, insn);
     }
 
   MAYBE_OK (insn);
 
   prev = prev_nonnote_nondebug_insn (insn);
   if (recog_data.constraints[1][0] == '%'
@@ -2107,129 +2606,343 @@  rl78_alloc_physical_registers_op2 (rtx i
     {
       rtx tmp = OP (1);
       OP (1) = OP (2);
       OP (2) = tmp;
     }
 
-  /* Make a note of wether (H)L is being used.  It matters
-     because if OP(2) alsoneeds reloading, then we must take
+  /* Make a note of whether (H)L is being used.  It matters
+     because if OP (2) alsoneeds reloading, then we must take
      care not to corrupt HL.  */
   hl_used = reg_mentioned_p (L, OP (0)) || reg_mentioned_p (L, OP (1));
 
-  OP(0) = move_from_acc (OP (0), insn);
-  OP(1) = move_to_acc (1, insn);
+  /* If HL is not currently being used and dest == op1 then there are
+     some possible optimizations available by reloading one of the
+     operands into HL, before trying to use the accumulator.  */
+  if (optimize
+      && ! hl_used
+      && rtx_equal_p (OP (0), OP (1)))
+    {
+      /* If op0 is a Ws1 type memory address then switching the base
+	 address register to HL might allow us to perform an in-memory
+	 operation.  (eg for the INCW instruction).
+	 
+	 FIXME: Adding the move into HL is costly if this optimization is not
+	 going to work, so for now, make sure that we know that the new insn will
+	 match the requirements of the addhi3_real pattern.  Really we ought to
+	 generate a candidate sequence, test that, and then install it if the
+	 results are good.  */
+      if (satisfies_constraint_Ws1 (OP (0))
+	  && has_constraint (0, CONSTRAINT_Wh1)
+	  && (satisfies_constraint_K (OP (2)) || satisfies_constraint_L (OP (2))))
+	{
+	  rtx base, index, addend, newbase;
+
+	  characterize_address (XEXP (OP (0), 0), & base, & index, & addend);
+	  gcc_assert (index == NULL_RTX);
+	  gcc_assert (REG_P (base) && REGNO (base) == SP_REG);
+
+	  /* Ws1 addressing allows an offset of 0, Wh1 addressing requires a non-zero offset.  */
+	  if (addend != NULL_RTX)
+	    {
+	      newbase = gen_and_emit_move (HL, base, insn, true);
+	      record_content (newbase, NULL_RTX);
+	      newbase = gen_rtx_PLUS (HImode, newbase, addend);
+      
+	      OP (0) = OP (1) = change_address (OP (0), VOIDmode, newbase);
+
+	      /* We do not want to fail here as this means that
+		 we have inserted useless insns into the stream.  */
+	      MUST_BE_OK (insn);
+	    }
+	}
+      else if (REG_P (OP (0))
+	       && satisfies_constraint_Ws1 (OP (2))
+	       && has_constraint (2, CONSTRAINT_Wh1))
+	{
+	  rtx base, index, addend, newbase;
+
+	  characterize_address (XEXP (OP (2), 0), & base, & index, & addend);
+	  gcc_assert (index == NULL_RTX);
+	  gcc_assert (REG_P (base) && REGNO (base) == SP_REG);
+
+	  /* Ws1 addressing allows an offset of 0, Wh1 addressing requires a non-zero offset.  */
+	  if (addend != NULL_RTX)
+	    {
+	      gen_and_emit_move (HL, base, insn, true);
+
+	      if (REGNO (OP (0)) != X_REG)
+		{
+		  OP (1) = move_to_acc (1, insn);
+		  OP (0) = move_from_acc (0, insn);
+		}
+
+	      record_content (HL, NULL_RTX);
+	      newbase = gen_rtx_PLUS (HImode, HL, addend);
+      
+	      OP (2) = change_address (OP (2), VOIDmode, newbase);
+
+	      /* We do not want to fail here as this means that
+		 we have inserted useless insns into the stream.  */
+	      MUST_BE_OK (insn);
+	    }
+	}
+    }
+
+
+  OP (0) = move_from_acc (0, insn);
+
+  tmp_id = get_max_insn_count ();
+  saved_op1 = OP (1);
+
+  if (rtx_equal_p (OP (1), OP (2)))
+    OP (2) = OP (1) = move_to_acc (1, insn);
+  else
+    OP (1) = move_to_acc (1, insn);
 
   MAYBE_OK (insn);
 
-  /* We have to copy op2 to HL, but that involves AX, which
+  /* If we omitted the move of OP1 into the accumulator (because
+     it was already there from a previous insn), then force the
+     generation of the move instruction now.  We know that we
+     are about to emit a move into HL (or DE) via AX, and hence
+     our optimization to remove the load of OP1 is no longer valid.  */
+  if (tmp_id == get_max_insn_count ())
+    force_into_acc (saved_op1, insn);
+
+  /* We have to copy op2 to HL (or DE), but that involves AX, which
      already has a live value.  Emit it before those insns.  */
 
   if (prev)
     first = next_nonnote_nondebug_insn (prev);
   else
     for (first = insn; prev_nonnote_nondebug_insn (first); first = prev_nonnote_nondebug_insn (first))
       ;
 
   OP (2) = hl_used ? move_to_de (2, first) : move_to_hl (2, first);
   
-  MAYBE_OK (insn);
-  
-  FAILED;
+  MUST_BE_OK (insn);
 }
 
-/* Devirtualize an insn of the form (SET () (unop (op))).  */
+/* Devirtualize an insn of the form SET (PC) (MEM/REG).  */
 
 static void
 rl78_alloc_physical_registers_ro1 (rtx insn)
 {
-  /* (void) op[0] */
-  OP(0) = transcode_memory_rtx (OP(0), BC, insn);
+  OP (0) = transcode_memory_rtx (OP (0), BC, insn);
 
   MAYBE_OK (insn);
 
-  OP(0) = move_to_acc (0, insn);
-
-  MAYBE_OK (insn);
+  OP (0) = move_to_acc (0, insn);
 
-  FAILED;
+  MUST_BE_OK (insn);
 }
 
 /* Devirtualize a compare insn.  */
+
 static void
 rl78_alloc_physical_registers_cmp (rtx insn)
 {
-  /* op[1] cmp_op[0] op[2] */
+  int tmp_id;
+  rtx saved_op1;
   rtx prev = prev_nonnote_nondebug_insn (insn);
   rtx first;
 
-  OP(1) = transcode_memory_rtx (OP(1), DE, insn);
-  OP(2) = transcode_memory_rtx (OP(2), HL, insn);
+  OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+  OP (2) = transcode_memory_rtx (OP (2), HL, insn);
 
+  /* HI compares have to have OP(1) in AX, but QI
+     compares do not, so it is worth checking here.  */
   MAYBE_OK (insn);
 
-  OP(1) = move_to_acc (1, insn);
+  /* For an HImode compare, OP(1) must always be in AX.
+     But if OP(1) is a REG (and not AX), then we can avoid
+     a reload of OP(1) if we reload OP(2) into AX and invert
+     the comparison.  */
+  if (REG_P (OP (1))
+      && REGNO (OP (1)) != AX_REG
+      && GET_MODE (OP (1)) == HImode
+      && MEM_P (OP (2)))
+    {
+      rtx cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
+
+      OP (2) = move_to_acc (2, insn);
+
+      switch (GET_CODE (cmp))
+	{
+	case EQ:
+	case NE:
+	  break;
+	case LTU: cmp = gen_rtx_GTU (HImode, OP (2), OP (1)); break;
+	case GTU: cmp = gen_rtx_LTU (HImode, OP (2), OP (1)); break;
+	case LEU: cmp = gen_rtx_GEU (HImode, OP (2), OP (1)); break;
+	case GEU: cmp = gen_rtx_LEU (HImode, OP (2), OP (1)); break;
+
+	case LT:
+	case GT:
+	case LE:
+	case GE:
+#if DEBUG_ALLOC
+	  debug_rtx (insn);
+#endif
+	default:
+	  gcc_unreachable ();
+	}
+      
+      if (GET_CODE (cmp) == EQ || GET_CODE (cmp) == NE)
+	PATTERN (insn) = gen_cbranchhi4_real (cmp, OP (2), OP (1), OP (3));
+      else
+	PATTERN (insn) = gen_cbranchhi4_real_inverted (cmp, OP (2), OP (1), OP (3));
+
+      MUST_BE_OK (insn);
+    }
+
+  /* Surprisingly, gcc can generate a comparison of a register with itself, but this
+     should be handled by the second alternative of the cbranchhi_real pattern.  */
+  if (rtx_equal_p (OP (1), OP (2)))
+    {
+      OP (1) = OP (2) = BC;
+      MUST_BE_OK (insn);
+    }
+  
+  tmp_id = get_max_insn_count ();
+  saved_op1 = OP (1);
+
+  OP (1) = move_to_acc (1, insn);
 
   MAYBE_OK (insn);
 
+  /* If we omitted the move of OP1 into the accumulator (because
+     it was already there from a previous insn), then force the
+     generation of the move instruction now.  We know that we
+     are about to emit a move into HL via AX, and hence our
+     optimization to remove the load of OP1 is no longer valid.  */
+  if (tmp_id == get_max_insn_count ())
+    force_into_acc (saved_op1, insn);
+
   /* We have to copy op2 to HL, but that involves the acc, which
      already has a live value.  Emit it before those insns.  */
-
   if (prev)
     first = next_nonnote_nondebug_insn (prev);
   else
     for (first = insn; prev_nonnote_nondebug_insn (first); first = prev_nonnote_nondebug_insn (first))
       ;
-  OP(2) = move_to_hl (2, first);
-
-  MAYBE_OK (insn);
+  OP (2) = move_to_hl (2, first);
 
-  FAILED;
+  MUST_BE_OK (insn);
 }
 
 /* Like op2, but AX = A op X.  */
+
 static void
 rl78_alloc_physical_registers_umul (rtx insn)
 {
-  /* op[0] = op[1] func op[2] */
   rtx prev = prev_nonnote_nondebug_insn (insn);
   rtx first;
+  int tmp_id;
+  rtx saved_op1;
 
-  OP(0) = transcode_memory_rtx (OP(0), BC, insn);
-  OP(1) = transcode_memory_rtx (OP(1), DE, insn);
-  OP(2) = transcode_memory_rtx (OP(2), HL, insn);
+  OP (0) = transcode_memory_rtx (OP (0), BC, insn);
+  OP (1) = transcode_memory_rtx (OP (1), DE, insn);
+  OP (2) = transcode_memory_rtx (OP (2), HL, insn);
 
   MAYBE_OK (insn);
 
   if (recog_data.constraints[1][0] == '%'
-      && is_virtual_register (OP(1))
-      && !is_virtual_register (OP(2))
-      && !CONSTANT_P (OP(2)))
+      && is_virtual_register (OP (1))
+      && !is_virtual_register (OP (2))
+      && !CONSTANT_P (OP (2)))
     {
-      rtx tmp = OP(1);
-      OP(1) = OP(2);
-      OP(2) = tmp;
+      rtx tmp = OP (1);
+      OP (1) = OP (2);
+      OP (2) = tmp;
     }
 
-  OP(0) = move_from_acc (OP(0), insn);
-  OP(1) = move_to_acc (1, insn);
+  OP (0) = move_from_acc (0, insn);
+
+  tmp_id = get_max_insn_count ();
+  saved_op1 = OP (1);
+  
+  OP (1) = move_to_acc (1, insn);
 
   MAYBE_OK (insn);
 
+  /* If we omitted the move of OP1 into the accumulator (because
+     it was already there from a previous insn), then force the
+     generation of the move instruction now.  We know that we
+     are about to emit a move into HL (or DE) via AX, and hence
+     our optimization to remove the load of OP1 is no longer valid.  */
+  if (tmp_id == get_max_insn_count ())
+    force_into_acc (saved_op1, insn);
+
   /* We have to copy op2 to X, but that involves the acc, which
      already has a live value.  Emit it before those insns.  */
 
   if (prev)
     first = next_nonnote_nondebug_insn (prev);
   else
     for (first = insn; prev_nonnote_nondebug_insn (first); first = prev_nonnote_nondebug_insn (first))
       ;
-  OP(2) = move_to_x (2, first);
+  OP (2) = move_to_x (2, first);
+
+  MUST_BE_OK (insn);
+}
+
+static void
+rl78_alloc_address_registers_macax (rtx insn)
+{
+  int which, op;
+  bool replace_in_op0 = false;
+  bool replace_in_op1 = false;
 
   MAYBE_OK (insn);
 
-  FAILED;
+  /* Two different MEMs are not allowed.  */
+  which = 0;
+  for (op = 2; op >= 0; op --)
+    {
+      if (MEM_P (OP (op)))
+	{
+	  if (op == 0 && replace_in_op0)
+	    continue;
+	  if (op == 1 && replace_in_op1)
+	    continue;
+
+	  switch (which)
+	    {
+	    case 0:
+	      /* If we replace a MEM, make sure that we replace it for all
+		 occurrences of the same MEM in the insn.  */
+	      replace_in_op0 = (op > 0 && rtx_equal_p (OP (op), OP (0)));
+	      replace_in_op1 = (op > 1 && rtx_equal_p (OP (op), OP (1)));
+
+	      OP (op) = transcode_memory_rtx (OP (op), HL, insn);
+	      if (op == 2
+		  && MEM_P (OP (op))
+		  && (REGNO (XEXP (OP (op), 0)) == SP_REG
+		      || (GET_CODE (XEXP (OP (op), 0)) == PLUS
+			  && REGNO (XEXP (XEXP (OP (op), 0), 0)) == SP_REG)))
+		{
+		  emit_insn_before (gen_movhi (HL, gen_rtx_REG (HImode, SP_REG)), insn);
+		  OP (op) = replace_rtx (OP (op), gen_rtx_REG (HImode, SP_REG), HL);
+		}
+	      if (replace_in_op0)
+		OP (0) = OP (op);
+	      if (replace_in_op1)
+		OP (1) = OP (op);
+	      break;
+	    case 1:
+	      OP (op) = transcode_memory_rtx (OP (op), DE, insn);
+	      break;
+	    case 2:
+	      OP (op) = transcode_memory_rtx (OP (op), BC, insn);
+	      break;
+	    }
+	  which ++;
+	}
+    }
+  MUST_BE_OK (insn);
 }
 
 /* Scan all insns and devirtualize them.  */
 static void
 rl78_alloc_physical_registers (void)
 {
@@ -2263,29 +2976,52 @@  rl78_alloc_physical_registers (void)
 	}
     }
 
   cfun->machine->virt_insns_ok = 0;
   cfun->machine->real_insns_ok = 1;
 
+  clear_content_memory ();
+
   for (insn = get_insns (); insn; insn = curr)
     {
+      rtx pattern;
+
       curr = insn ? next_nonnote_nondebug_insn (insn) : NULL;
 
       if (!INSN_P (insn))
-	continue;
-      if (GET_CODE (PATTERN (insn)) != SET
-	  && GET_CODE (PATTERN (insn)) != CALL)
-	  continue;
+	{
+	  if (LABEL_P (insn))
+	    clear_content_memory ();
+	    
+ 	  continue;
+	}
 
-      if (GET_CODE (PATTERN (insn)) == SET
-	  && GET_CODE (SET_SRC (PATTERN (insn))) == ASM_OPERANDS)
+      if (dump_file)
+	fprintf (dump_file, "Converting insn %d\n", INSN_UID (insn));
+
+      pattern = PATTERN (insn);
+      if (GET_CODE (pattern) == PARALLEL)
+	pattern = XVECEXP (pattern, 0, 0);
+      if (JUMP_P (insn) || CALL_P (insn) || GET_CODE (pattern) == CALL)
+	clear_content_memory ();
+      if (GET_CODE (pattern) != SET
+	  && GET_CODE (pattern) != CALL)
+	continue;
+      if (GET_CODE (SET_SRC (pattern)) == ASM_OPERANDS)
 	continue;
 
       valloc_method = get_attr_valloc (insn);
 
-      PATTERN (insn)= copy_rtx_if_shared (PATTERN (insn));
+      PATTERN (insn) = copy_rtx_if_shared (PATTERN (insn));
+
+      if (valloc_method == VALLOC_MACAX)
+	{
+	  record_content (AX, NULL_RTX);
+	  record_content (BC, NULL_RTX);
+	  record_content (DE, NULL_RTX);
+	}
 
       if (insn_ok_now (insn))
 	continue;
 
       INSN_CODE (insn) = -1;
 
@@ -2309,15 +3045,24 @@  rl78_alloc_physical_registers (void)
 	  rl78_alloc_physical_registers_cmp (insn);
 	  break;
 	case VALLOC_UMUL:
 	  rl78_alloc_physical_registers_umul (insn);
 	  break;
 	case VALLOC_MACAX:
-	  /* Macro that clobbers AX */
+	  /* Macro that clobbers AX.  */
+	  rl78_alloc_address_registers_macax (insn);
+	  record_content (AX, NULL_RTX);
+	  record_content (BC, NULL_RTX);
+	  record_content (DE, NULL_RTX);
 	  break;
 	}
+
+      if (JUMP_P (insn) || CALL_P (insn) || GET_CODE (pattern) == CALL)
+	clear_content_memory ();
+      else
+	process_postponed_content_update ();
     }
 #if DEBUG_ALLOC
   fprintf (stderr, "\033[0m");
 #endif
 }
 
@@ -2423,13 +3168,13 @@  rl78_calculate_death_notes (void)
       if (dump_file)
 	{
 	  fprintf (dump_file, "\n--------------------------------------------------");
 	  fprintf (dump_file, "\nDead:");
 	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i ++)
 	    if (dead[i])
-	      fprintf(dump_file, " %s", reg_names[i]);
+	      fprintf (dump_file, " %s", reg_names[i]);
 	  fprintf (dump_file, "\n");
 	  print_rtl_single (dump_file, insn);
 	}
 
       switch (GET_CODE (insn))
 	{
@@ -2648,19 +3393,19 @@  rl78_propogate_register_origins (void)
 		      {
 			origins[i] = i;
 			age[i] = 0;
 		      }
 		}
 
-	      /* Special case - our ADDSI3 macro uses AX */
+	      /* Special case - our ADDSI3 macro uses AX and sometimes BC.  */
 	      if (get_attr_valloc (insn) == VALLOC_MACAX)
 		{
 		  if (dump_file)
-		    fprintf (dump_file, "Resetting origin of AX for macro.\n");
+		    fprintf (dump_file, "Resetting origin of AX/BC for macro.\n");
 		  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
-		    if (i <= 1 || origins[i] <= 1)
+		    if (i <= 3 || origins[i] <= 3)
 		      {
 			origins[i] = i;
 			age[i] = 0;
 		      }
 		}
 
@@ -2709,19 +3454,21 @@  rl78_remove_unused_sets (void)
 
       if (find_regno_note (insn, REG_UNUSED, REGNO (dest)))
 	delete_insn (insn);
     }
 }
 
-#undef  xTARGET_MACHINE_DEPENDENT_REORG
-#define xTARGET_MACHINE_DEPENDENT_REORG  rl78_reorg
-
 /* This is the top of the devritualization pass.  */
 static void
 rl78_reorg (void)
 {
+  /* split2 only happens when optimizing, but we need all movSIs to be
+     split now.  */
+  if (optimize <= 0)
+    split_all_insns ();
+
   rl78_alloc_physical_registers ();
 
   if (dump_file)
     {
       fprintf (dump_file, "\n================DEVIRT:=AFTER=ALLOC=PHYSICAL=REGISTERS================\n");
       print_rtl_with_bb (dump_file, get_insns (), 0);
@@ -2750,23 +3497,80 @@  rl78_reorg (void)
   df_scan_blocks ();
 
   if (optimize)
     df_analyze ();
 }
 
-#undef TARGET_RETURN_IN_MEMORY
+#undef  TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY rl78_return_in_memory
 
 static bool
 rl78_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
   const HOST_WIDE_INT size = int_size_in_bytes (type);
   return (size == -1 || size > 8);
 }
 
 
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS rl78_rtx_costs
+
+static bool rl78_rtx_costs (rtx   x,
+			    int   code,
+			    int   outer_code ATTRIBUTE_UNUSED,
+			    int   opno ATTRIBUTE_UNUSED,
+			    int * total,
+			    bool  speed ATTRIBUTE_UNUSED)
+{
+  if (code == IF_THEN_ELSE)
+    return COSTS_N_INSNS (10);
+  if (GET_MODE (x) == SImode)
+    {
+      switch (code)
+	{
+	case MULT:
+	  if (RL78_MUL_RL78)
+	    *total = COSTS_N_INSNS (14);
+	  else if (RL78_MUL_G13)
+	    *total = COSTS_N_INSNS (29);
+	  else
+	    *total = COSTS_N_INSNS (500);
+	  return true;
+	case PLUS:
+	  *total = COSTS_N_INSNS (8);
+	  return true;
+	case ASHIFT:
+	case ASHIFTRT:
+	case LSHIFTRT:
+	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    {
+	      switch (INTVAL (XEXP (x, 1)))
+		{
+		case 0:  *total = COSTS_N_INSNS (0);	break;
+		case 1:  *total = COSTS_N_INSNS (6);	break;
+		case 2: case 3: case 4: case 5: case 6: case 7:
+		  *total = COSTS_N_INSNS (10); break;
+		case 8:  *total = COSTS_N_INSNS (6);	break;
+		case 9: case 10: case 11: case 12: case 13: case 14: case 15:
+		  *total = COSTS_N_INSNS (10); break;
+		case 16: *total = COSTS_N_INSNS (3);	break;
+		case 17: case 18: case 19: case 20: case 21: case 22: case 23:
+		  *total = COSTS_N_INSNS (4); break;
+		case 24: *total = COSTS_N_INSNS (4);	break;
+		case 25: case 26: case 27: case 28: case 29: case 30: case 31:
+		  *total = COSTS_N_INSNS (5); break;
+		}
+	    }
+	  else
+	    *total = COSTS_N_INSNS (10+4*16);
+	  return true;
+	}
+    }
+  return false;
+}
+
 
 #undef  TARGET_UNWIND_WORD_MODE
 #define TARGET_UNWIND_WORD_MODE rl78_unwind_word_mode
 
 static enum machine_mode
 rl78_unwind_word_mode (void)