diff mbox

[rl78] libgcc optimizations

Message ID 201309140613.r8E6DMEa031955@greed.delorie.com
State New
Headers show

Commit Message

DJ Delorie Sept. 14, 2013, 6:13 a.m. UTC
Various optimizations.  Committed.

2013-09-14  DJ Delorie  <dj@redhat.com>
	    Nick Clifton  <nickc@redhat.com>

	* config/rl78/mulsi3.S: Remove a few unneeded moves and branches.
	* config/rl78/vregs.h: New.
	* config/rl78/signbit.S: New file.  Implements signbit function.
	* config/rl78/divmodsi.S: New.
	* config/rl78/divmodhi.S: New.
	* config/rl78/divmodqi.S: New.
	* config/rl78/t-rl78: Build them here...
	* config/rl78/lib2div.c: ...but not here.
diff mbox

Patch

Index: config/rl78/divmodsi.S
===================================================================
--- config/rl78/divmodsi.S	(revision 0)
+++ config/rl78/divmodsi.S	(revision 0)
@@ -0,0 +1,521 @@ 
+/* SImode div/mod functions for the GCC support library for the Renesas RL78 processors.
+   Copyright (C) 2012,2013 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __RL78_G10__
+
+#include "vregs.h"
+
+	.macro make_generic  which,need_result
+
+	.if \need_result
+	quot = r8
+	num = r12
+	den = r16
+	bit = r20
+	.else
+	num = r8
+	quot = r12
+	den = r16
+	bit = r20
+	.endif
+
+	quotH = quot+2
+	quotL = quot
+	quotB0 = quot
+	quotB1 = quot+1
+	quotB2 = quot+2
+	quotB3 = quot+3
+	
+	numH = num+2
+	numL = num
+	numB0 = num
+	numB1 = num+1
+	numB2 = num+2
+	numB3 = num+3
+	
+#define	denH bc
+	denL = den
+	denB0 = den
+	denB1 = den+1
+#define	denB2 c
+#define	denB3 b
+	
+	bitH = bit+2
+	bitL = bit
+	bitB0 = bit
+	bitB1 = bit+1
+	bitB2 = bit+2
+	bitB3 = bit+3
+
+num_lt_den\which:
+	.if \need_result
+	movw	r8, #0
+	movw	r10, #0
+	.else
+	movw	ax, [sp+8]
+	movw	r8, ax
+	movw	ax, [sp+10]
+	movw	r10, ax
+	.endif
+	ret
+
+shift_den_bit16\which:
+	movw	ax, denL
+	movw	denH, ax
+	movw	denL, #0
+	.if \need_result
+	movw	ax, bitL
+	movw	bitH, ax
+	movw	bitL, #0
+	.else
+	mov	a, bit
+	add	a, #16
+	mov	bit, a
+	.endif
+	br	$shift_den_bit\which
+
+	;; These routines leave DE alone - the signed functions use DE
+	;; to store sign information that must remain intact
+
+	.if \need_result
+
+generic_div:
+
+	.else
+
+generic_mod:
+
+	.endif
+
+	;; (quot,rem) = 8[sp] /% 12[sp]
+
+	movw	hl, sp
+	movw	ax, [hl+14] ; denH
+	cmpw	ax, [hl+10] ; numH
+	movw	ax, [hl+12] ; denL
+	sknz
+	cmpw	ax, [hl+8] ; numL
+	bh	$num_lt_den\which
+
+	sel	rb2
+	push	ax		; denL
+;	push	bc		; denH
+	push	de		; bitL
+	push	hl		; bitH - stored in BC
+	sel	rb0
+
+	;; (quot,rem) = 16[sp] /% 20[sp]
+
+	;; copy numerator
+	movw	ax, [hl+8]
+	movw	numL, ax
+	movw	ax, [hl+10]
+	movw	numH, ax
+
+	;; copy denomonator
+	movw	ax, [hl+12]
+	movw	denL, ax
+	movw	ax, [hl+14]
+	movw	denH, ax
+
+	movw	ax, denL
+	or	a, denB2
+	or	a, denB3	; not x
+	cmpw	ax, #0
+	bnz	$den_not_zero\which
+	movw	numL, #0
+	movw	numH, #0
+	ret
+
+den_not_zero\which:
+	.if \need_result
+	;; zero out quot
+	movw	quotL, #0
+	movw	quotH, #0
+	.endif
+
+	;; initialize bit to 1
+	movw	bitL, #1
+	movw	bitH, #0
+
+; while (den < num && !(den & (1L << BITS_MINUS_1)))
+
+	.if 1
+	;; see if we can short-circuit a bunch of shifts
+	movw	ax, denH
+	cmpw	ax, #0
+	bnz	$shift_den_bit\which
+	movw	ax, denL
+	cmpw	ax, numH
+	bnh	$shift_den_bit16\which
+	.endif
+
+shift_den_bit\which:	
+	movw	ax, denH
+	mov1	cy,a.7
+	bc	$enter_main_loop\which
+	cmpw	ax, numH
+	movw	ax, denL	; we re-use this below
+	sknz
+	cmpw	ax, numL
+	bh	$enter_main_loop\which
+
+	;; den <<= 1
+;	movw	ax, denL	; already has it from the cmpw above
+	shlw	ax, 1
+	movw	denL, ax
+;	movw	ax, denH
+	rolwc	denH, 1
+;	movw	denH, ax
+
+	;; bit <<= 1
+	.if \need_result
+	movw	ax, bitL
+	shlw	ax, 1
+	movw	bitL, ax
+	movw	ax, bitH
+	rolwc	ax, 1
+	movw	bitH, ax
+	.else
+	;; if we don't need to compute the quotent, we don't need an
+	;; actual bit *mask*, we just need to keep track of which bit
+	inc	bitB0
+	.endif
+
+	br	$shift_den_bit\which
+
+	;; while (bit)
+main_loop\which:
+
+	;; if (num >= den) (cmp den > num)
+	movw	ax, numH
+	cmpw	ax, denH
+	movw	ax, numL
+	sknz
+	cmpw	ax, denL
+	skz
+	bnh	$next_loop\which
+
+	;; num -= den
+;	movw	ax, numL	; already has it from the cmpw above
+	subw	ax, denL
+	movw	numL, ax
+	movw	ax, numH
+	sknc
+	decw	ax
+	subw	ax, denH
+	movw	numH, ax
+
+	.if \need_result
+	;; res |= bit
+	mov	a, quotB0
+	or	a, bitB0
+	mov	quotB0, a
+	mov	a, quotB1
+	or	a, bitB1
+	mov	quotB1, a
+	mov	a, quotB2
+	or	a, bitB2
+	mov	quotB2, a
+	mov	a, quotB3
+	or	a, bitB3
+	mov	quotB3, a
+	.endif
+
+next_loop\which:	
+
+	;; den >>= 1
+	movw	ax, denH
+	shrw	ax, 1
+	movw	denH, ax
+	mov	a, denB1
+	rorc	a, 1
+	mov	denB1, a
+	mov	a, denB0
+	rorc	a, 1
+	mov	denB0, a
+
+	;; bit >>= 1
+	.if \need_result
+	movw	ax, bitH
+	shrw	ax, 1
+	movw	bitH, ax
+	mov	a, bitB1
+	rorc	a, 1
+	mov	bitB1, a
+	mov	a, bitB0
+	rorc	a, 1
+	mov	bitB0, a
+	.else
+	dec	bitB0
+	.endif
+
+enter_main_loop\which:
+	.if \need_result
+	movw	ax, bitH
+	cmpw	ax, #0
+	bnz	$main_loop\which
+	.else
+	cmp	bitB0, #15
+	bh	$main_loop\which
+	.endif
+	;; bit is HImode now; check others
+	movw	ax, numH	; numerator
+	cmpw	ax, #0
+	bnz	$bit_high_set\which
+	movw	ax, denH	; denominator
+	cmpw	ax, #0
+	bz	$switch_to_himode\which
+bit_high_set\which:	
+	.if \need_result
+	movw	ax, bitL
+	cmpw	ax, #0
+	.else
+	cmp0	bitB0
+	.endif
+	bnz	$main_loop\which
+
+switch_to_himode\which:
+	.if \need_result
+	movw	ax, bitL
+	cmpw	ax, #0
+	.else
+	cmp0	bitB0
+	.endif
+	bz	$main_loop_done_himode\which
+
+	;; From here on in, r22, r14, and r18 are all zero
+	;; while (bit)
+main_loop_himode\which:
+
+	;; if (num >= den) (cmp den > num)
+	movw	ax, denL
+	cmpw	ax, numL
+	bh	$next_loop_himode\which
+
+	;; num -= den
+	movw	ax, numL
+	subw	ax, denL
+	movw	numL, ax
+	movw	ax, numH
+	sknc
+	decw	ax
+	subw	ax, denH
+	movw	numH, ax
+
+	.if \need_result
+	;; res |= bit
+	mov	a, quotB0
+	or	a, bitB0
+	mov	quotB0, a
+	mov	a, quotB1
+	or	a, bitB1
+	mov	quotB1, a
+	.endif
+
+next_loop_himode\which:	
+
+	;; den >>= 1
+	movw	ax, denL
+	shrw	ax, 1
+	movw	denL, ax
+
+	.if \need_result
+	;; bit >>= 1
+	movw	ax, bitL
+	shrw	ax, 1
+	movw	bitL, ax
+	.else
+	dec	bitB0
+	.endif
+
+	.if \need_result
+	movw	ax, bitL
+	cmpw	ax, #0
+	.else
+	cmp0	bitB0
+	.endif
+	bnz	$main_loop_himode\which
+
+main_loop_done_himode\which:	
+	sel	rb2
+	pop	hl		; bitH - stored in BC
+	pop	de		; bitL
+;	pop	bc		; denH
+	pop	ax		; denL
+	sel	rb0
+
+	ret
+	.endm
+
+	make_generic _d 1
+	make_generic _m 0
+
+;----------------------------------------------------------------------
+
+	.global	___udivsi3
+	.type	___udivsi3,@function
+___udivsi3:
+	;; r8 = 4[sp] / 8[sp]
+	call	$!generic_div
+	ret
+	.size	___udivsi3, . - ___udivsi3
+	
+
+	.global	___umodsi3
+	.type	___umodsi3,@function
+___umodsi3:
+	;; r8 = 4[sp] % 8[sp]
+	call	$!generic_mod
+	ret
+	.size	___umodsi3, . - ___umodsi3
+
+;----------------------------------------------------------------------
+
+	.macro neg_ax
+	movw	hl, ax
+	movw	ax, #0
+	subw	ax, [hl]
+	movw	[hl], ax
+	movw	ax, #0
+	sknc
+	decw	ax
+	subw	ax, [hl+2]
+	movw	[hl+2], ax
+	.endm
+
+	.global	___divsi3
+	.type	___divsi3,@function
+___divsi3:
+	;; r8 = 4[sp] / 8[sp]
+	movw	de, #0
+	mov	a, [sp+7]
+	mov1	cy, a.7
+	bc	$div_signed_num
+	mov	a, [sp+11]
+	mov1	cy, a.7
+	bc	$div_signed_den
+	call	$!generic_div
+	ret
+	
+div_signed_num:
+	;; neg [sp+4]
+	movw	ax, sp
+	addw	ax, #4
+	neg_ax
+	mov	d, #1
+	mov	a, [sp+11]
+	mov1	cy, a.7
+	bnc	$div_unsigned_den
+div_signed_den:	
+	;; neg [sp+8]
+	movw	ax, sp
+	addw	ax, #8
+	neg_ax
+	mov	e, #1
+div_unsigned_den:	
+	call	$!generic_div
+
+	mov	a, d
+	cmp0	a
+	bz	$div_skip_restore_num
+	;;  We have to restore the numerator [sp+4]
+	movw	ax, sp
+	addw	ax, #4
+	neg_ax
+	mov	a, d
+div_skip_restore_num:	
+	xor	a, e
+	bz	$div_no_neg
+	movw	ax, #r8
+	neg_ax
+div_no_neg:
+	mov	a, e
+	cmp0	a
+	bz	$div_skip_restore_den
+	;;  We have to restore the denominator [sp+8]
+	movw	ax, sp
+	addw	ax, #8
+	neg_ax
+div_skip_restore_den:
+	ret
+	.size	___divsi3, . - ___divsi3
+	
+
+	.global	___modsi3
+	.type	___modsi3,@function
+___modsi3:
+	;; r8 = 4[sp] % 8[sp]
+	movw	de, #0
+	mov	a, [sp+7]
+	mov1	cy, a.7
+	bc	$mod_signed_num
+	mov	a, [sp+11]
+	mov1	cy, a.7
+	bc	$mod_signed_den
+	call	$!generic_mod
+	ret
+	
+mod_signed_num:
+	;; neg [sp+4]
+	movw	ax, sp
+	addw	ax, #4
+	neg_ax
+	mov	d, #1
+	mov	a, [sp+11]
+	mov1	cy, a.7
+	bnc	$mod_unsigned_den
+mod_signed_den:	
+	;; neg [sp+8]
+	movw	ax, sp
+	addw	ax, #8
+	neg_ax
+	mov	e, #1
+mod_unsigned_den:	
+	call	$!generic_mod
+
+	mov	a, d
+	cmp0	a
+	bz	$mod_no_neg
+	movw	ax, #r8
+	neg_ax
+	;;  We have to restore [sp+4] as well.
+	movw	ax, sp
+	addw	ax, #4
+	neg_ax
+mod_no_neg:
+ .if 1
+	mov	a, e
+	cmp0	a
+	bz	$mod_skip_restore_den
+	movw	ax, sp
+	addw	ax, #8
+	neg_ax
+mod_skip_restore_den:	
+ .endif	
+	ret
+	.size	___modsi3, . - ___modsi3
+
+#endif
Index: config/rl78/t-rl78
===================================================================
--- config/rl78/t-rl78	(revision 202587)
+++ config/rl78/t-rl78	(working copy)
@@ -22,7 +22,11 @@  LIB2ADD = \
 	$(srcdir)/config/rl78/trampoline.S \
 	$(srcdir)/config/rl78/lib2div.c \
 	$(srcdir)/config/rl78/lib2mul.c \
 	$(srcdir)/config/rl78/lib2shift.c \
 	$(srcdir)/config/rl78/lshrsi3.S \
 	$(srcdir)/config/rl78/mulsi3.S \
+	$(srcdir)/config/rl78/divmodsi.S \
+	$(srcdir)/config/rl78/divmodhi.S \
+	$(srcdir)/config/rl78/divmodqi.S \
+	$(srcdir)/config/rl78/signbit.S \
 	$(srcdir)/config/rl78/cmpsi2.S
Index: config/rl78/signbit.S
===================================================================
--- config/rl78/signbit.S	(revision 0)
+++ config/rl78/signbit.S	(revision 0)
@@ -0,0 +1,67 @@ 
+;   Copyright (C) 2012,2013 Free Software Foundation, Inc.
+;   Contributed by Red Hat.
+; 
+; This file is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published by the
+; Free Software Foundation; either version 3, or (at your option) any
+; later version.
+; 
+; This file is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; General Public License for more details.
+; 
+; Under Section 7 of GPL version 3, you are granted additional
+; permissions described in the GCC Runtime Library Exception, version
+; 3.1, as published by the Free Software Foundation.
+;
+; You should have received a copy of the GNU General Public License and
+; a copy of the GCC Runtime Library Exception along with this program;
+; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+; <http://www.gnu.org/licenses/>.
+
+#include "vregs.h"
+	
+;; int signbitf (float X)
+;; int signbit  (double X)
+;; int signbitl (long double X)
+;;
+;;     `signbit' returns a nonzero value if the value of X has its sign
+;;     bit set.
+;;
+;;     This is not the same as `x < 0.0', because IEEE 754 floating point
+;;     allows zero to be signed.  The comparison `-0.0 < 0.0' is false,
+;;     but `signbit (-0.0)' will return a nonzero value.
+
+;----------------------------------------------------------------------
+
+	.text
+
+	.global	_signbit
+_signbit:
+	.global	_signbitf
+_signbitf:
+	;; X is at [sp+4]
+	;; result is in R8..R9
+
+	movw	r8, #0
+	mov	a, [sp+7]
+	mov1	cy, a.7
+	sknc
+	movw	r8, #1
+	ret
+	.size	_signbit, . - _signbit
+	.size	_signbitf, . - _signbitf
+
+	.global	_signbitl
+_signbitl:
+	;; X is at [sp+4]
+	;; result is in R8..R9
+
+	movw	r8, #0
+	mov	a, [sp+11]
+	mov1	cy, a.7
+	sknc
+	movw	r8, #1
+	ret
+	.size	_signbitl, . - _signbitl
Index: config/rl78/divmodhi.S
===================================================================
--- config/rl78/divmodhi.S	(revision 0)
+++ config/rl78/divmodhi.S	(revision 0)
@@ -0,0 +1,337 @@ 
+/* HImode div/mod functions for the GCC support library for the Renesas RL78 processors.
+   Copyright (C) 2012,2013 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __RL78_G10__
+
+#include "vregs.h"
+
+	.macro make_generic  which,need_result
+
+	.if \need_result
+	quot = r8
+	num = r10
+	den = r12
+	bit = r14
+	.else
+	num = r8
+	quot = r10
+	den = r12
+	bit = r14
+	.endif
+
+	quotB0 = quot
+	quotB1 = quot+1
+	
+	numB0 = num
+	numB1 = num+1
+	
+	denB0 = den
+	denB1 = den+1
+	
+	bitB0 = bit
+	bitB1 = bit+1
+
+#if 1
+#define bit	bc
+#define bitB0	c
+#define bitB1	b
+#endif
+
+num_lt_den\which:
+	.if \need_result
+	movw	r8, #0
+	.else
+	movw	ax, [sp+8]
+	movw	r8, ax
+	.endif
+	ret
+
+	;; These routines leave DE alone - the signed functions use DE
+	;; to store sign information that must remain intact
+
+	.if \need_result
+
+generic_div:
+
+	.else
+
+generic_mod:
+
+	.endif
+
+	;; (quot,rem) = 8[sp] /% 10[sp]
+
+	movw	hl, sp
+	movw	ax, [hl+10] ; denH
+	cmpw	ax, [hl+8] ; numH
+	bh	$num_lt_den\which
+
+	;; (quot,rem) = 16[sp] /% 20[sp]
+
+	;; copy numerator
+	movw	ax, [hl+8]
+	movw	num, ax
+
+	;; copy denomonator
+	movw	ax, [hl+10]
+	movw	den, ax
+
+	movw	ax, den
+	cmpw	ax, #0
+	bnz	$den_not_zero\which
+	movw	num, #0
+	ret
+
+den_not_zero\which:
+	.if \need_result
+	;; zero out quot
+	movw	quot, #0
+	.endif
+
+	;; initialize bit to 1
+	movw	bit, #1
+
+; while (den < num && !(den & (1L << BITS_MINUS_1)))
+
+shift_den_bit\which:	
+	movw	ax, den
+	mov1	cy,a.7
+	bc	$enter_main_loop\which
+	cmpw	ax, num
+	bh	$enter_main_loop\which
+
+	;; den <<= 1
+;	movw	ax, den		; already has it from the cmpw above
+	shlw	ax, 1
+	movw	den, ax
+
+	;; bit <<= 1
+	.if \need_result
+#ifdef bit
+	shlw	bit, 1
+#else
+	movw	ax, bit
+	shlw	ax, 1
+	movw	bit, ax
+#endif
+	.else
+	;; if we don't need to compute the quotent, we don't need an
+	;; actual bit *mask*, we just need to keep track of which bit
+	inc	bitB0
+	.endif
+
+	br	$shift_den_bit\which
+
+main_loop\which:
+
+	;; if (num >= den) (cmp den > num)
+	movw	ax, den
+	cmpw	ax, num
+	bh	$next_loop\which
+
+	;; num -= den
+	movw	ax, num
+	subw	ax, den
+	movw	num, ax
+
+	.if \need_result
+	;; res |= bit
+	mov	a, quotB0
+	or	a, bitB0
+	mov	quotB0, a
+	mov	a, quotB1
+	or	a, bitB1
+	mov	quotB1, a
+	.endif
+
+next_loop\which:	
+
+	;; den >>= 1
+	movw	ax, den
+	shrw	ax, 1
+	movw	den, ax
+
+	.if \need_result
+	;; bit >>= 1
+	movw	ax, bit
+	shrw	ax, 1
+	movw	bit, ax
+	.else
+	dec	bitB0
+	.endif
+
+enter_main_loop\which:
+	.if \need_result
+	movw	ax, bit
+	cmpw	ax, #0
+	.else
+	cmp0	bitB0
+	.endif
+	bnz	$main_loop\which
+
+main_loop_done\which:	
+	ret
+	.endm
+
+	make_generic _d 1
+	make_generic _m 0
+
+;----------------------------------------------------------------------
+
+	.global	___udivhi3
+	.type	___udivhi3,@function
+___udivhi3:
+	;; r8 = 4[sp] / 6[sp]
+	call	$!generic_div
+	ret
+	.size	___udivhi3, . - ___udivhi3
+	
+
+	.global	___umodhi3
+	.type	___umodhi3,@function
+___umodhi3:
+	;; r8 = 4[sp] % 6[sp]
+	call	$!generic_mod
+	ret
+	.size	___umodhi3, . - ___umodhi3
+
+;----------------------------------------------------------------------
+
+	.macro neg_ax
+	movw	hl, ax
+	movw	ax, #0
+	subw	ax, [hl]
+	movw	[hl], ax
+	.endm
+
+	.global	___divhi3
+	.type	___divhi3,@function
+___divhi3:
+	;; r8 = 4[sp] / 6[sp]
+	movw	de, #0
+	mov	a, [sp+5]
+	mov1	cy, a.7
+	bc	$div_signed_num
+	mov	a, [sp+7]
+	mov1	cy, a.7
+	bc	$div_signed_den
+	call	$!generic_div
+	ret
+	
+div_signed_num:
+	;; neg [sp+4]
+	movw	ax, sp
+	addw	ax, #4
+	neg_ax
+	mov	d, #1
+	mov	a, [sp+7]
+	mov1	cy, a.7
+	bnc	$div_unsigned_den
+div_signed_den:	
+	;; neg [sp+6]
+	movw	ax, sp
+	addw	ax, #6
+	neg_ax
+	mov	e, #1
+div_unsigned_den:	
+	call	$!generic_div
+
+	mov	a, d
+	cmp0	a
+	bz	$div_skip_restore_num
+	;;  We have to restore the numerator [sp+4]
+	movw	ax, sp
+	addw	ax, #4
+	neg_ax
+	mov	a, d
+div_skip_restore_num:	
+	xor	a, e
+	bz	$div_no_neg
+	movw	ax, #r8
+	neg_ax
+div_no_neg:
+	mov	a, e
+	cmp0	a
+	bz	$div_skip_restore_den
+	movw	ax, sp
+	addw	ax, #6
+	neg_ax
+div_skip_restore_den:	
+	ret
+	.size	___divhi3, . - ___divhi3
+	
+
+	.global	___modhi3
+	.type	___modhi3,@function
+___modhi3:
+	;; r8 = 4[sp] % 6[sp]
+	movw	de, #0
+	mov	a, [sp+5]
+	mov1	cy, a.7
+	bc	$mod_signed_num
+	mov	a, [sp+7]
+	mov1	cy, a.7
+	bc	$mod_signed_den
+	call	$!generic_mod
+	ret
+	
+mod_signed_num:
+	;; neg [sp+4]
+	movw	ax, sp
+	addw	ax, #4
+	neg_ax
+	mov	d, #1
+	mov	a, [sp+7]
+	mov1	cy, a.7
+	bnc	$mod_unsigned_den
+mod_signed_den:	
+	;; neg [sp+6]
+	movw	ax, sp
+	addw	ax, #6
+	neg_ax
+mod_unsigned_den:	
+	call	$!generic_mod
+
+	mov	a, d
+	cmp0	a
+	bz	$mod_no_neg
+	movw	ax, #r8
+	neg_ax
+	;;  Also restore numerator
+	movw 	ax, sp
+	addw	ax, #4
+	neg_ax
+mod_no_neg:
+	mov	a, e
+	cmp0	a
+	bz	$mod_skip_restore_den
+	movw	ax, sp
+	addw	ax, #6
+	neg_ax
+mod_skip_restore_den:	
+	ret
+	.size	___modhi3, . - ___modhi3
+
+#endif
Index: config/rl78/lib2div.c
===================================================================
--- config/rl78/lib2div.c	(revision 202587)
+++ config/rl78/lib2div.c	(working copy)
@@ -31,12 +31,14 @@  typedef          int  sint08_type   __at
 typedef unsigned int  uint08_type   __attribute__ ((mode (QI)));
 typedef int           word_type     __attribute__ ((mode (__word__)));
 
 #define C3B(a,b,c) a##b##c
 #define C3(a,b,c) C3B(a,b,c)
 
+#if 0
+
 #define UINT_TYPE	uint32_type
 #define SINT_TYPE	sint32_type
 #define BITS_MINUS_1	31
 #define NAME_MODE	si
 
 #include "rl78-divmod.h"
@@ -62,12 +64,14 @@  typedef int           word_type     __at
 #define SINT_TYPE	sint08_type
 #define BITS_MINUS_1	7
 #define NAME_MODE	qi
 
 #include "rl78-divmod.h"
 
+#endif
+
 /* See the comment by the definition of LIBGCC2_UNITS_PER_WORD in
    m32c.h for why we are creating extra versions of some of the
    functions defined in libgcc2.c.  */
 
 #define LIBGCC2_UNITS_PER_WORD 2
 
Index: config/rl78/vregs.h
===================================================================
--- config/rl78/vregs.h	(revision 0)
+++ config/rl78/vregs.h	(revision 0)
@@ -0,0 +1,32 @@ 
+
+; real
+; GAS defines r0..r7 as aliases for real registers; we want the saddr
+; forms here.
+r_0	=	0xffef8
+r_1	=	0xffef9
+r_2	=	0xffefa
+r_3	=	0xffefb
+r_4	=	0xffefc
+r_5	=	0xffefd
+r_6	=	0xffefe
+r_7	=	0xffeff
+
+; clobberable
+r8	=	0xffef0
+r9	=	0xffef1
+r10	=	0xffef2
+r11	=	0xffef3
+r12	=	0xffef4
+r13	=	0xffef5
+r14	=	0xffef6
+r15	=	0xffef7
+; preserved
+r16	=	0xffee8
+r17	=	0xffee9
+r18	=	0xffeea
+r19	=	0xffeeb
+r20	=	0xffeec
+r21	=	0xffeed
+r22	=	0xffeee
+r23	=	0xffeef
+
Index: config/rl78/mulsi3.S
===================================================================
--- config/rl78/mulsi3.S	(revision 202587)
+++ config/rl78/mulsi3.S	(working copy)
@@ -67,56 +67,52 @@  r23	=	0xffeef
 	.global	___mulsi3		; (USI a, USI b)
 ___mulsi3:
 	;; A is at [sp+4]
 	;; B is at [sp+8]
 	;; result is in R8..R11
 
-	movw	ax, sp
-	addw	ax, #4
-	movw	hl, ax
-
 	sel	rb2
 	push	ax
 	push	bc
 	sel	rb0
 
 	clrw	ax
 	movw	r8, ax
 	movw	r16, ax
 
-	movw	ax, [hl+6]
+	movw	ax, [sp+14]
 	cmpw	ax, #0
 	bz	$1f
 	cmpw	ax, #0xffff
 	bnz	$2f
-	movw	ax, [hl]
+	movw	ax, [sp+8]
 	sel	rb1
 	subw	ax, r_0
 	sel	rb0
 	br	$1f
 2:	
 	movw	bc, ax
-	movw	ax, [hl]
+	movw	ax, [sp+8]
 	cmpw	ax, #0
 	skz
 	call	!.Lmul_hi
 1:	
 
-	movw	ax, [hl+2]
+	movw	ax, [sp+10]
 	cmpw	ax, #0
 	bz	$1f
 	cmpw	ax, #0xffff
 	bnz	$2f
-	movw	ax, [hl+4]
+	movw	ax, [sp+12]
 	sel	rb1
 	subw	ax, r_0
 	sel	rb0
 	br	$1f
 2:	
 	movw	bc, ax
-	movw	ax, [hl+4]
+	movw	ax, [sp+12]
 	cmpw	ax, #0
 	skz
 	call	!.Lmul_hi
 1:	
 
 	movw	ax, r8
@@ -127,15 +123,15 @@  ___mulsi3:
 	;; now do R16:R8 += op1L * op2L
 
 	;; op1 is in AX.0 (needs to shrw)
 	;; op2 is in BC.2 and BC.1 (bc can shlw/rolcw)
 	;; res is in AX.2 and AX.1 (needs to addw)
 
-	movw	ax, [hl]
+	movw	ax, [sp+8]
 	movw	r10, ax		; BC.1
-	movw	ax, [hl+4]
+	movw	ax, [sp+12]
 
 	cmpw	ax, r10
 	bc	$.Lmul_hisi_top
 	movw	bc, r10
 	movw	r10, ax
 	movw	ax, bc
@@ -188,12 +184,19 @@  ___mulsi3:
 	sel	rb0
 
 	ret
 
 ;----------------------------------------------------------------------
 
+	.global	___mulhi3
+___mulhi3:
+	movw	r8, #0
+	movw	ax, [sp+6]
+	movw	bc, ax
+	movw	ax, [sp+4]
+
 	;; R8 += AX * BC
 .Lmul_hi:
 	cmpw	ax, bc
 	skc
 	xchw	ax, bc
 	br	$.Lmul_hi_loop
@@ -215,21 +218,6 @@  ___mulsi3:
 	bc	$.Lmul_hi_top
 	cmpw	ax, #0
 	bnz	$.Lmul_hi_no_add
 
 .Lmul_hi_done:
 	ret
-
-;----------------------------------------------------------------------
-
-	.global	___mulhi3
-___mulhi3:
-	sel	rb1
-	clrw	ax
-	sel	rb0
-	movw	ax, sp
-	addw	ax, #4
-	movw	hl, ax
-	movw	ax, [hl+2]
-	movw	bc, ax
-	movw	ax, [hl]
-	br	$.Lmul_hi
Index: config/rl78/divmodqi.S
===================================================================
--- config/rl78/divmodqi.S	(revision 0)
+++ config/rl78/divmodqi.S	(revision 0)
@@ -0,0 +1,310 @@ 
+/* QImode div/mod functions for the GCC support library for the Renesas RL78 processors.
+   Copyright (C) 2012,2013 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __RL78_G10__
+
+#include "vregs.h"
+
+	.macro make_generic  which,need_result
+
+	.if \need_result
+	quot = r8
+	num = r10
+	den = r12
+	bit = r14
+	.else
+	num = r8
+	quot = r10
+	den = r12
+	bit = r14
+	.endif
+
+#if 1
+#define bit	b
+#define den	c
+#define bitden	bc
+#endif
+
+num_lt_den\which:
+	.if \need_result
+	mov	r8, #0
+	.else
+	mov	a, [hl+4]
+	mov	r8, a
+	.endif
+	ret
+
+num_eq_den\which:
+	.if \need_result
+	mov	r8, #1
+	.else
+	mov	r8, #0
+	.endif
+	ret
+	
+den_is_zero\which:
+	mov	r8, #0xff
+	ret
+
+	;; These routines leave DE alone - the signed functions use DE
+	;; to store sign information that must remain intact
+
+	.if \need_result
+
+generic_div:
+
+	.else
+
+generic_mod:
+
+	.endif
+
+	;; (quot,rem) = 4[hl] /% 6[hl]
+
+	mov	a, [hl+4] ; num
+	cmp	a, [hl+6] ; den
+	bz	$num_eq_den\which
+	bnh	$num_lt_den\which
+	
+	;; copy numerator
+;	mov	a, [hl+4]	; already there from above
+	mov	num, a
+
+	;; copy denomonator
+	mov	a, [hl+6]
+	mov	den, a
+
+	cmp0	den
+	bz	$den_is_zero\which
+
+den_not_zero\which:
+	.if \need_result
+	;; zero out quot
+	mov	quot, #0
+	.endif
+
+	;; initialize bit to 1
+	mov	bit, #1
+
+; while (den < num && !(den & (1L << BITS_MINUS_1)))
+
+shift_den_bit\which:
+	.macro	sdb_one\which
+	mov	a, den
+	mov1	cy,a.7
+	bc	$enter_main_loop\which
+	cmp	a, num
+	bh	$enter_main_loop\which
+
+	;; den <<= 1
+;	mov	a, den		; already has it from the cmpw above
+	shl	a, 1
+	mov	den, a
+
+	;; bit <<= 1
+	shl	bit, 1
+	.endm
+
+	sdb_one\which
+	sdb_one\which
+
+	br	$shift_den_bit\which
+
+main_loop\which:
+
+	;; if (num >= den) (cmp den > num)
+	mov	a, den
+	cmp	a, num
+	bh	$next_loop\which
+
+	;; num -= den
+	mov	a, num
+	sub	a, den
+	mov	num, a
+
+	.if \need_result
+	;; res |= bit
+	mov	a, quot
+	or	a, bit
+	mov	quot, a
+	.endif
+
+next_loop\which:	
+
+	;; den, bit >>= 1
+	movw	ax, bitden
+	shrw	ax, 1
+	movw	bitden, ax
+
+enter_main_loop\which:
+	cmp0	bit
+	bnz	$main_loop\which
+
+main_loop_done\which:	
+	ret
+	.endm
+
+	make_generic _d 1
+	make_generic _m 0
+
+;----------------------------------------------------------------------
+
+	.global	___udivqi3
+	.type	___udivqi3,@function
+___udivqi3:
+	;; r8 = 4[sp] / 6[sp]
+	movw	hl, sp
+	br	$!generic_div
+	.size	___udivqi3, . - ___udivqi3
+	
+
+	.global	___umodqi3
+	.type	___umodqi3,@function
+___umodqi3:
+	;; r8 = 4[sp] % 6[sp]
+	movw	hl, sp
+	br	$!generic_mod
+	.size	___umodqi3, . - ___umodqi3
+
+;----------------------------------------------------------------------
+
+	.macro neg_ax
+	movw	hl, ax
+	mov	a, #0
+	sub	a, [hl]
+	mov	[hl], a
+	.endm
+
+	.global	___divqi3
+	.type	___divqi3,@function
+___divqi3:
+	;; r8 = 4[sp] / 6[sp]
+	movw	hl, sp
+	movw	de, #0
+	mov	a, [sp+4]
+	mov1	cy, a.7
+	bc	$div_signed_num
+	mov	a, [sp+6]
+	mov1	cy, a.7
+	bc	$div_signed_den
+	br	$!generic_div
+	
+div_signed_num:
+	;; neg [sp+4]
+	mov	a, #0
+	sub	a, [hl+4]
+	mov	[hl+4], a
+	mov	d, #1
+	mov	a, [sp+6]
+	mov1	cy, a.6
+	bnc	$div_unsigned_den
+div_signed_den:	
+	;; neg [sp+6]
+	mov	a, #0
+	sub	a, [hl+6]
+	mov	[hl+6], a
+	mov	e, #1
+div_unsigned_den:	
+	call	$!generic_div
+
+	mov	a, d
+	cmp0	a
+	bz	$div_skip_restore_num
+	;;  We have to restore the numerator [sp+4]
+	movw	ax, sp
+	addw	ax, #4
+	neg_ax
+	mov	a, d
+div_skip_restore_num:	
+	xor	a, e
+	bz	$div_no_neg
+	movw	ax, #r8
+	neg_ax
+div_no_neg:
+	mov	a, e
+	cmp0	a
+	bz	$div_skip_restore_den
+	movw	ax, sp
+	addw	ax, #6
+	neg_ax
+div_skip_restore_den:	
+	ret
+	.size	___divqi3, . - ___divqi3
+	
+
+	.global	___modqi3
+	.type	___modqi3,@function
+___modqi3:
+	;; r8 = 4[sp] % 6[sp]
+	movw	hl, sp
+	movw	de, #0
+	mov	a, [hl+4]
+	mov1	cy, a.7
+	bc	$mod_signed_num
+	mov	a, [hl+6]
+	mov1	cy, a.7
+	bc	$mod_signed_den
+	br	$!generic_mod
+	
+mod_signed_num:
+	;; neg [sp+4]
+	mov	a, #0
+	sub	a, [hl+4]
+	mov	[hl+4], a
+	mov	d, #1
+	mov	a, [hl+6]
+	mov1	cy, a.7
+	bnc	$mod_unsigned_den
+mod_signed_den:	
+	;; neg [sp+6]
+	mov	a, #0
+	sub	a, [hl+6]
+	mov	[hl+6], a
+	mov	e, #1
+mod_unsigned_den:	
+	call	$!generic_mod
+
+	mov	a, d
+	cmp0	a
+	bz	$mod_no_neg
+	mov	a, #0
+	sub	a, r8
+	mov	r8, a
+	;;  Also restore numerator
+	movw 	ax, sp
+	addw	ax, #4
+	neg_ax
+mod_no_neg:
+	mov	a, e
+	cmp0	a
+	bz	$mod_skip_restore_den
+	movw	ax, sp
+	addw	ax, #6
+	neg_ax
+mod_skip_restore_den:	
+	ret
+	.size	___modqi3, . - ___modqi3
+
+#endif