===================================================================
@@ -446,3 +446,34 @@ (define_insn "<code_stdname><mode>3_insn
"%~call __<code_stdname>di3"
[(set_attr "adjust_len" "call")
(set_attr "cc" "clobber")])
+
+;; "umulsidi3"
+;; "mulsidi3"
+(define_expand "<extend_u>mulsidi3"
+ [(parallel [(match_operand:DI 0 "register_operand" "")
+ (match_operand:SI 1 "general_operand" "")
+ (match_operand:SI 2 "general_operand" "")
+ ;; Just to mention the iterator
+ (clobber (any_extend:SI (match_dup 1)))])]
+ "avr_have_dimode"
+ {
+ emit_move_insn (gen_rtx_REG (SImode, 22), operands[1]);
+ emit_move_insn (gen_rtx_REG (SImode, 18), operands[2]);
+ emit_insn (gen_<extend_u>mulsidi3_insn());
+ // Use emit_move_insn and not open-coded expand because of missing movdi
+ emit_move_insn (operands[0], gen_rtx_REG (DImode, ACC_A));
+ DONE;
+ })
+
+;; "umulsidi3_insn"
+;; "mulsidi3_insn"
+(define_insn "<extend_u>mulsidi3_insn"
+ [(set (reg:DI ACC_A)
+ (mult:DI (any_extend:DI (reg:SI 18))
+ (any_extend:DI (reg:SI 22))))
+ (clobber (reg:HI REG_X))
+ (clobber (reg:HI REG_Z))]
+ "avr_have_dimode"
+ "%~call __<extend_u>mulsidi3"
+ [(set_attr "adjust_len" "call")
+ (set_attr "cc" "clobber")])
===================================================================
@@ -669,6 +669,210 @@ ENDF __mulusa3_round
#undef GUARD
+/***********************************************************
+ Fixed unsigned saturated Multiplication 8.8 x 8.8
+***********************************************************/
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+#define SS __tmp_reg__
+
+#if defined (L_usmuluha3)
+DEFUN __usmuluha3
+ ;; Widening multiply
+#ifdef __AVR_HAVE_MUL__
+ ;; Adjust interface
+ movw R26, R22
+ movw R18, R24
+#endif /* HAVE MUL */
+ XCALL __umulhisi3
+ tst C3
+ brne .Lmax
+ ;; Round, target is in C1..C2
+ lsl C0
+ adc C1, __zero_reg__
+ adc C2, __zero_reg__
+ brcs .Lmax
+ ;; Move result into place
+ mov C3, C2
+ mov C2, C1
+ ret
+.Lmax:
+ ;; Saturate
+ ldi C2, 0xff
+ ldi C3, 0xff
+ ret
+ENDF __usmuluha3
+#endif /* L_usmuluha3 */
+
+/***********************************************************
+ Fixed signed saturated Multiplication s8.7 x s8.7
+***********************************************************/
+
+#if defined (L_ssmulha3)
+DEFUN __ssmulha3
+ ;; Widening multiply
+#ifdef __AVR_HAVE_MUL__
+ ;; Adjust interface
+ movw R26, R22
+ movw R18, R24
+#endif /* HAVE MUL */
+ XCALL __mulhisi3
+ ;; Adjust decimal point
+ lsl C0
+ rol C1
+ rol C2
+ brvs .LsatC3.3
+ ;; The 9 MSBs must be the same
+ rol C3
+ sbc SS, SS
+ cp C3, SS
+ brne .LsatSS
+ ;; Round
+ lsl C0
+ adc C1, __zero_reg__
+ adc C2, __zero_reg__
+ brvs .Lmax
+ ;; Move result into place
+ mov C3, C2
+ mov C2, C1
+ ret
+.Lmax:
+ ;; Load 0x7fff
+ clr C3
+.LsatC3.3:
+ ;; C3 < 0 --> 0x8000
+ ;; C3 >= 0 --> 0x7fff
+ mov SS, C3
+.LsatSS:
+ ;; Load min / max value:
+ ;; SS = -1 --> 0x8000
+ ;; SS = 0 --> 0x7fff
+ ldi C3, 0x7f
+ ldi C2, 0xff
+ sbrc SS, 7
+ adiw C2, 1
+ ret
+ENDF __ssmulha3
+#endif /* L_ssmulha3 */
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef SS
+
+/***********************************************************
+ Fixed unsigned saturated Multiplication 16.16 x 16.16
+***********************************************************/
+
+#define C0 18
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+#define C4 C0+4
+#define C5 C0+5
+#define C6 C0+6
+#define C7 C0+7
+#define SS __tmp_reg__
+
+#if defined (L_usmulusa3)
+;; R22[4] = R22[4] *{ssat} R18[4]
+;; Ordinary ABI function
+DEFUN __usmulusa3
+ ;; Widening multiply
+ XCALL __umulsidi3
+ or C7, C6
+ brne .Lmax
+ ;; Round, target is in C2..C5
+ lsl C1
+ adc C2, __zero_reg__
+ adc C3, __zero_reg__
+ adc C4, __zero_reg__
+ adc C5, __zero_reg__
+ brcs .Lmax
+ ;; Move result into place
+ wmov C6, C4
+ wmov C4, C2
+ ret
+.Lmax:
+ ;; Saturate
+ ldi C7, 0xff
+ ldi C6, 0xff
+ wmov C4, C6
+ ret
+ENDF __usmulusa3
+#endif /* L_usmulusa3 */
+
+/***********************************************************
+ Fixed signed saturated Multiplication s16.15 x s16.15
+***********************************************************/
+
+#if defined (L_ssmulsa3)
+;; R22[4] = R22[4] *{ssat} R18[4]
+;; Ordinary ABI function
+DEFUN __ssmulsa3
+ ;; Widening multiply
+ XCALL __mulsidi3
+ ;; Adjust decimal point
+ lsl C1
+ rol C2
+ rol C3
+ rol C4
+ rol C5
+ brvs .LsatC7.7
+ ;; The 17 MSBs must be the same
+ rol C6
+ rol C7
+ sbc SS, SS
+ cp C6, SS
+ cpc C7, SS
+ brne .LsatSS
+ ;; Round
+ lsl C1
+ adc C2, __zero_reg__
+ adc C3, __zero_reg__
+ adc C4, __zero_reg__
+ adc C5, __zero_reg__
+ brvs .Lmax
+ ;; Move result into place
+ wmov C6, C4
+ wmov C4, C2
+ ret
+
+.Lmax:
+ ;; Load 0x7fffffff
+ clr C7
+.LsatC7.7:
+ ;; C7 < 0 --> 0x80000000
+ ;; C7 >= 0 --> 0x7fffffff
+ lsl C7
+ sbc SS, SS
+.LsatSS:
+ ;; Load min / max value:
+ ;; SS = -1 --> 0x80000000
+ ;; SS = 0 --> 0x7fffffff
+ com SS
+ mov C4, SS
+ mov C5, C4
+ wmov C6, C4
+ subi C7, 0x80
+ ret
+ENDF __ssmulsa3
+#endif /* L_ssmulsa3 */
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#undef SS
+
/*******************************************************
Fractional Division 8 / 8
*******************************************************/
===================================================================
@@ -79,6 +79,41 @@ see the files COPYING3 and COPYING.RUNTI
#define XJMP rjmp
#endif
+;; Prologue stuff
+
+.macro do_prologue_saves n_pushed n_frame=0
+ ldi r26, lo8(\n_frame)
+ ldi r27, hi8(\n_frame)
+ ldi r30, lo8(gs(.L_prologue_saves.\@))
+ ldi r31, hi8(gs(.L_prologue_saves.\@))
+ XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
+.L_prologue_saves.\@:
+.endm
+
+;; Epilogue stuff
+
+.macro do_epilogue_restores n_pushed n_frame=0
+ in r28, __SP_L__
+#ifdef __AVR_HAVE_SPH__
+ in r29, __SP_H__
+.if \n_frame > 63
+ subi r28, lo8(-\n_frame)
+ sbci r29, hi8(-\n_frame)
+.elseif \n_frame > 0
+ adiw r28, \n_frame
+.endif
+#else
+ clr r29
+.if \n_frame > 0
+ subi r28, lo8(-\n_frame)
+.endif
+#endif /* HAVE SPH */
+ ldi r30, \n_pushed
+ XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
+.endm
+
+;; Support function entry and exit for convenience
+
.macro DEFUN name
.global \name
.func \name
@@ -98,6 +133,9 @@ see the files COPYING3 and COPYING.RUNTI
.endfunc
.endm
+;; Skip next instruction, typically a jump target
+#define skip cpse 0,0
+
;; Negate a 2-byte value held in consecutive registers
.macro NEG2 reg
com \reg+1
@@ -736,8 +774,6 @@ ENDF __mulsqipsi3
Multiplication 64 x 64
*******************************************************/
-#if defined (L_muldi3)
-
;; A[] = A[] * B[]
;; A[0..7]: In: Multiplicand
@@ -774,6 +810,8 @@ ENDF __mulsqipsi3
#define C6 C4+2
#define C7 C4+3
+#if defined (L_muldi3)
+
;; A[] *= B[]
;; R25:R18 *= R17:R10
;; Ordinary ABI-Function
@@ -818,7 +856,7 @@ DEFUN __muldi3
wmov 26, B2
;; 0 * 1
- rcall __muldi3_6
+ XCALL __muldi3_6
pop A0
pop A1
@@ -829,7 +867,7 @@ DEFUN __muldi3
pop r26
pop r27
;; 1 * 0
- rcall __muldi3_6
+ XCALL __muldi3_6
pop A0
pop A1
@@ -852,7 +890,12 @@ DEFUN __muldi3
pop r28
pop r29
ret
+ENDF __muldi3
+#endif /* L_muldi3 */
+#if defined (L_muldi3_6)
+;; A helper for some 64-bit multiplications with MUL available
+DEFUN __muldi3_6
__muldi3_6:
XCALL __umulhisi3
add C2, 22
@@ -862,7 +905,8 @@ __muldi3_6:
brcc 0f
adiw C6, 1
0: ret
-ENDF __muldi3
+ENDF __muldi3_6
+#endif /* L_muldi3_6 */
#undef C7
#undef C6
@@ -875,6 +919,8 @@ ENDF __muldi3
#else /* !HAVE_MUL */
+#if defined (L_muldi3)
+
#define C0 26
#define C1 C0+1
#define C2 C0+2
@@ -952,6 +998,7 @@ ENDF __muldi3
#undef C1
#undef C0
+#endif /* L_muldi3 */
#endif /* HAVE_MUL */
#undef B7
@@ -972,7 +1019,240 @@ ENDF __muldi3
#undef A1
#undef A0
-#endif /* L_muldi3 */
+/*******************************************************
+ Widening Multiplication 64 = 32 x 32 with MUL
+*******************************************************/
+
+#if defined (__AVR_HAVE_MUL__)
+#define A0 r22
+#define A1 r23
+#define A2 r24
+#define A3 r25
+
+#define B0 r18
+#define B1 r19
+#define B2 r20
+#define B3 r21
+
+#define C0 18
+#define C1 C0+1
+#define C2 20
+#define C3 C2+1
+#define C4 28
+#define C5 C4+1
+#define C6 C4+2
+#define C7 C4+3
+
+#if defined (L_umulsidi3)
+
+;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
+
+;; R18[8] = R22[4] * R18[4]
+;;
+;; Ordinary ABI Function, but additionally sets
+;; X = R20[2] = B2[2]
+;; Z = R22[2] = A0[2]
+DEFUN __umulsidi3
+ clt
+ ;; FALLTHRU
+ENDF __umulsidi3
+ ;; T = sign (A)
+DEFUN __umulsidi3_helper
+ push 29 $ push 28 ; Y
+ wmov 30, A2
+ ;; Counting in Words, we have to perform 4 Multiplications
+ ;; 0 * 0
+ wmov 26, A0
+ XCALL __umulhisi3
+ push 23 $ push 22 ; C0
+ wmov 28, B0
+ wmov 18, B2
+ wmov C2, 24
+ push 27 $ push 26 ; A0
+ push 19 $ push 18 ; B2
+ ;;
+ ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
+ ;; B2 C2 -- -- -- B0 A2
+ ;; 1 * 1
+ wmov 26, 30 ; A2
+ XCALL __umulhisi3
+ ;; Sign-extend A. T holds the sign of A
+ brtc 0f
+ ;; Subtract B from the high part of the result
+ sub 22, 28
+ sbc 23, 29
+ sbc 24, 18
+ sbc 25, 19
+0: wmov 18, 28 ;; B0
+ wmov C4, 22
+ wmov C6, 24
+ ;;
+ ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
+ ;; B0 C2 -- -- A2 C4 C6
+ ;;
+ ;; 1 * 0
+ XCALL __muldi3_6
+ ;; 0 * 1
+ pop 26 $ pop 27 ;; B2
+ pop 18 $ pop 19 ;; A0
+ XCALL __muldi3_6
+
+ ;; Move result C into place and save A0 in Z
+ wmov 22, C4
+ wmov 24, C6
+ wmov 30, 18 ; A0
+ pop C0 $ pop C1
+
+ ;; Epilogue
+ pop 28 $ pop 29 ;; Y
+ ret
+ENDF __umulsidi3_helper
+#endif /* L_umulsidi3 */
+
+
+#if defined (L_mulsidi3)
+
+;; Signed widening 64 = 32 * 32 Multiplication
+;;
+;; R18[8] = R22[4] * R18[4]
+;; Ordinary ABI Function
+DEFUN __mulsidi3
+ bst A3, 7
+ sbrs B3, 7 ; Enhanced core has no skip bug
+ XJMP __umulsidi3_helper
+
+ ;; B needs sign-extension
+ push A3
+ push A2
+ XCALL __umulsidi3_helper
+ ;; A0 survived in Z
+ sub r22, r30
+ sbc r23, r31
+ pop r26
+ pop r27
+ sbc r24, r26
+ sbc r25, r27
+ ret
+ENDF __mulsidi3
+#endif /* L_mulsidi3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#endif /* HAVE_MUL */
+
+/**********************************************************
+ Widening Multiplication 64 = 32 x 32 without MUL
+**********************************************************/
+
+#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+#define A4 A0+4
+#define A5 A0+5
+#define A6 A0+6
+#define A7 A0+7
+
+#define B0 10
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+#define B4 B0+4
+#define B5 B0+5
+#define B6 B0+6
+#define B7 B0+7
+
+#define AA0 22
+#define AA1 AA0+1
+#define AA2 AA0+2
+#define AA3 AA0+3
+
+#define BB0 18
+#define BB1 BB0+1
+#define BB2 BB0+2
+#define BB3 BB0+3
+
+#define Mask r30
+
+;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
+;;
+;; R18[8] = R22[4] * R18[4]
+;; Ordinary ABI Function
+DEFUN __mulsidi3
+ set
+ skip
+ ;; FALLTHRU
+ENDF __mulsidi3
+
+DEFUN __umulsidi3
+ clt ; skipped
+ ;; Save 10 Registers: R10..R17, R28, R29
+ do_prologue_saves 10
+ ldi Mask, 0xff
+ bld Mask, 7
+ ;; Move B into place...
+ wmov B0, BB0
+ wmov B2, BB2
+ ;; ...and extend it
+ and BB3, Mask
+ lsl BB3
+ sbc B4, B4
+ mov B5, B4
+ wmov B6, B4
+ ;; Move A into place...
+ wmov A0, AA0
+ wmov A2, AA2
+ ;; ...and extend it
+ and AA3, Mask
+ lsl AA3
+ sbc A4, A4
+ mov A5, A4
+ wmov A6, A4
+ XCALL __muldi3
+ do_epilogue_restores 10
+ENDF __umulsidi3
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef A4
+#undef A5
+#undef A6
+#undef A7
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef B4
+#undef B5
+#undef B6
+#undef B7
+#undef AA0
+#undef AA1
+#undef AA2
+#undef AA3
+#undef BB0
+#undef BB1
+#undef BB2
+#undef BB3
+#undef Mask
+#endif /* L_mulsidi3 && !HAVE_MUL */
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1625,14 +1905,10 @@ DEFUN __divdi3_moddi3
0: ;; The Prologue
;; Save 12 Registers: Y, 17...8
- ;; No Frame needed (X = 0)
- clr r26
- clr r27
- ldi r30, lo8(gs(1f))
- ldi r31, hi8(gs(1f))
- XJMP __prologue_saves__ + ((18 - 12) * 2)
+ ;; No Frame needed
+ do_prologue_saves 12
-1: ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
+ ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
;; SS.6 will contain the Sign of the Remainder (A.sign)
mov SS, A7
asr SS
@@ -1672,15 +1948,8 @@ DEFUN __divdi3_moddi3
#endif /* __AVR_HAVE_JMP_CALL__ */
XCALL __negdi2
-4: ;; Epilogue: Restore the Z = 12 Registers and return
- in r28, __SP_L__
-#if defined (__AVR_HAVE_SPH__)
- in r29, __SP_H__
-#else
- clr r29
-#endif /* #SP = 8/16 */
- ldi r30, 12
- XJMP __epilogue_restores__ + ((18 - 12) * 2)
+4: ;; Epilogue: Restore 12 Registers and return
+ do_epilogue_restores 12
ENDF __divdi3_moddi3
===================================================================
@@ -18,7 +18,8 @@ LIB1ASMFUNCS = \
_udivmodsi4 \
_divmodsi4 \
_divdi3 _udivdi3 \
- _muldi3 \
+ _muldi3 _muldi3_6 \
+ _mulsidi3 _umulsidi3 \
_udivmod64 \
_negsi2 _negdi2 \
_prologue \
@@ -69,6 +70,8 @@ LIB1ASMFUNCS += \
_mulhq3 _muluhq3 \
_mulha3 _muluha3 _muluha3_round \
_mulsa3 _mulusa3 \
+ _usmuluha3 _ssmulha3 \
+ _usmulusa3 _ssmulsa3 \
_divqq3 _udivuqq3 _divqq_helper \
_divhq3 _udivuhq3 \
_divha3 _udivuha3 \
@@ -260,3 +263,15 @@ LIB2FUNCS_EXCLUDE += \
LIB2FUNCS_EXCLUDE += \
$(foreach func,_div,\
$(foreach mode,$(sdiv_modes) $(udiv_modes),$(func_X)))
+
+
+ssmul_modes = HA SA
+usmul_modes = UHA USA
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach func,_usmul,\
+ $(foreach mode,$(usmul_modes),$(func_X)))
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach func,_ssmul,\
+ $(foreach mode,$(ssmul_modes),$(func_X)))