Patchwork [avr] Ad PR54222: Move decimal point of signed accum one bit right.

login
register
mail settings
Submitter Georg-Johann Lay
Date Nov. 21, 2012, 2:38 p.m.
Message ID <50ACE777.1000708@gjlay.de>
Download mbox | patch
Permalink /patch/200784/
State New
Headers show

Comments

Georg-Johann Lay - Nov. 21, 2012, 2:38 p.m.
This patch restores the GCC default layout of HA, SA and DA mode.

The original fixed point support tried to be binary compatible with fixed-point
support already provided by some non-FSF ports, but that turned out to result
in wrong code in some situations.

Reason is that these ports adjusted the signed accum fixed-point modes so that
the decimal point is byte aligned which results in smaller code.

This means GCC fixed-point engine is not generic enough to handle all mode
adjustments from <target>-modes.md, thus this patch switches the modes to GCC
default.

There are no new regressions.

However, I could not drive all the tests I usually do, because my framework to
test arithmetics crashes with PR54814 (spill fail).

Ok for trunk?

Johann



libgcc/
	Adjust decimal point of signed accum mode to GCC default.

	PR target/54222
	* config/avr/t-avr (LIB1ASMFUNCS): Add _fractsfsq _fractsfusq,
	_divqq_helper.
	* config/avr/lib1funcs-fixed.S (__fractqqsf, __fracthqsf)
	(__fractsasf, __fractsfha, __fractusqsf, __fractsfsa)
	(__mulha3, __mulsa3)
	(__divqq3, __divha3, __divsa3): Adjust to new position of
	decimal point of signed accum types.
	
	(__mulusa3_round): New function.
	(__mulusa3): Use it.
	(__divqq_helper): New function.
	(__udivuqq3): Use it.

gcc/
	Adjust decimal point of signed accum mode to GCC default.

	PR target/54222
	* config/avr/avr-modes.def (HA, SA, DA): Remove mode adjustments.
	(TA): Move decimal point one bit to the right.
	* config/avr/avr.c (avr_out_fract): Rewrite.
Denis Chertykov - Nov. 22, 2012, 5:04 a.m.
2012/11/21 Georg-Johann Lay <avr@gjlay.de>:
> This patch restores the GCC default layout of HA, SA and DA mode.
>
> The original fixed point support tried to be binary compatible with fixed-point
> support already provided by some non-FSF ports, but that turned out to result
> in wrong code in some situations.
>
> Reason is that these ports adjusted the signed accum fixed-point modes so that
> the decimal point is byte aligned which results in smaller code.
>
> This means GCC fixed-point engine is not generic enough to handle all mode
> adjustments from <target>-modes.md, thus this patch switches the modes to GCC
> default.
>
> There are no new regressions.
>
> However, I could not drive all the tests I usually do, because my framework to
> test arithmetics crashes with PR54814 (spill fail).
>
> Ok for trunk?
>

Approved.

Denis.

Patch

Index: libgcc/config/avr/lib1funcs-fixed.S
===================================================================
--- libgcc/config/avr/lib1funcs-fixed.S	(revision 193557)
+++ libgcc/config/avr/lib1funcs-fixed.S	(working copy)
@@ -43,8 +43,8 @@  DEFUN __fractqqsf
     ;; Move in place for SA -> SF conversion
     clr     r22
     mov     r23, r24
-    lsl     r23
     ;; Sign-extend
+    lsl     r24
     sbc     r24, r24
     mov     r25, r24
     XJMP    __fractsasf
@@ -67,9 +67,8 @@  ENDF __fractuqqsf
 DEFUN __fracthqsf
     ;; Move in place for SA -> SF conversion
     wmov    22, 24
-    lsl     r22
-    rol     r23
     ;; Sign-extend
+    lsl     r25
     sbc     r24, r24
     mov     r25, r24
     XJMP    __fractsasf
@@ -140,11 +139,13 @@  ENDF __fractusqsf
 #if defined (L_fractsasf)
 DEFUN __fractsasf
     XCALL   __floatsisf
-    ;; Divide non-zero results by 2^16 to move the
+    ;; Divide non-zero results by 2^15 to move the
     ;; decimal point into place
-    cpse    r25, __zero_reg__
-    subi    r25, exp_hi (16)
-    ret
+    tst     r25
+    breq    0f
+    subi    r24, exp_lo (15)
+    sbci    r25, exp_hi (15)
+0:  ret
 ENDF __fractsasf
 #endif  /* L_fractsasf */
 
@@ -186,8 +187,9 @@  ENDF __fractsfuqq
 
 #if defined (L_fractsfha)
 DEFUN __fractsfha
-    ;; Multiply with 2^24 to get a HA result in r25:r24
-    subi    r25, exp_hi (-24)
+    ;; Multiply with 2^{16+7} to get a HA result in r25:r24
+    subi    r24, exp_lo (-23)
+    sbci    r25, exp_hi (-23)
     XJMP    __fixsfsi
 ENDF __fractsfha
 #endif  /* L_fractsfha */
@@ -201,8 +203,7 @@  ENDF __fractsfuha
 #endif  /* L_fractsfuha */
 
 #if defined (L_fractsfhq)
-DEFUN __fractsfsq
-ENDF  __fractsfsq
+FALIAS __fractsfsq
 
 DEFUN __fractsfhq
     ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
@@ -214,8 +215,7 @@  ENDF __fractsfhq
 #endif  /* L_fractsfhq */
 
 #if defined (L_fractsfuhq)
-DEFUN __fractsfusq
-ENDF  __fractsfusq
+FALIAS __fractsfusq
 
 DEFUN __fractsfuhq
     ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
@@ -227,8 +227,9 @@  ENDF __fractsfuhq
 
 #if defined (L_fractsfsa)
 DEFUN __fractsfsa
-    ;; Multiply with 2^16 to get a SA result in r25:r22
-    subi    r25, exp_hi (-16)
+    ;; Multiply with 2^15 to get a SA result in r25:r22
+    subi    r24, exp_lo (-15)
+    sbci    r25, exp_hi (-15)
     XJMP    __fixsfsi
 ENDF __fractsfsa
 #endif  /* L_fractsfsa */
@@ -325,6 +326,9 @@  ENDF __muluhq3
 ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
 DEFUN   __mulha3
     XCALL   __mulhisi3
+    lsl     r22
+    rol     r23
+    rol     r24
     XJMP    __muluha3_round
 ENDF __mulha3
 #endif  /* L_mulha3 */
@@ -359,6 +363,9 @@  ENDF __muluha3_round
     Fixed  Multiplication  16.16 x 16.16
 *******************************************************/
 
+;; Bits outside the result (below LSB), used in the signed version
+#define GUARD __tmp_reg__
+
 #if defined (__AVR_HAVE_MUL__)
 
 ;; Multiplier
@@ -381,9 +388,16 @@  ENDF __muluha3_round
 
 #if defined (L_mulusa3)
 ;;; (C3:C0) = (A3:A0) * (B3:B0)
-;;; Clobbers: __tmp_reg__
-;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
-DEFUN   __mulusa3
+DEFUN __mulusa3
+    set
+    ;; Fallthru
+ENDF  __mulusa3
+
+;;; Round for last digit iff T = 1
+;;; Return guard bits in GUARD (__tmp_reg__).
+;;; Rounding, T = 0:  -1.0 LSB  <  error  <=  0   LSB
+;;; Rounding, T = 1:  -0.5 LSB  <  error  <=  0.5 LSB
+DEFUN __mulusa3_round
     ;; Some of the MUL instructions have LSBs outside the result.
     ;; Don't ignore these LSBs in order to tame rounding error.
     ;; Use C2/C3 for these LSBs.
@@ -395,9 +409,12 @@  DEFUN   __mulusa3
     mul A1, B0  $  add  C3, r0  $  adc C0, r1
     mul A0, B1  $  add  C3, r0  $  adc C0, r1  $  rol C1
     
-    ;; Round
+    ;; Round if T = 1.  Store guarding bits outside the result for rounding
+    ;; and left-shift by the signed version (function below).
+    brtc 0f
     sbrc C3, 7
     adiw C0, 1
+0:  push C3
     
     ;; The following MULs don't have LSBs outside the result.
     ;; C2/C3 is the high part.
@@ -420,25 +437,42 @@  DEFUN   __mulusa3
     mul  A2, B3  $  add C3, r0
     mul  A3, B2  $  add C3, r0
 
+    ;; Guard bits used in the signed version below.
+    pop  GUARD
     clr  __zero_reg__
     ret
-ENDF __mulusa3
+ENDF __mulusa3_round
 #endif /* L_mulusa3 */
 
 #if defined (L_mulsa3)
 ;;; (C3:C0) = (A3:A0) * (B3:B0)
-;;; Clobbers: __tmp_reg__
+;;; Clobbers: __tmp_reg__, T
 ;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
 DEFUN __mulsa3
-    XCALL   __mulusa3
+    clt
+    XCALL   __mulusa3_round
+    ;; A posteriori sign extension of the operands
     tst     B3
-    brpl    1f
+    brpl 1f
     sub     C2, A0
     sbc     C3, A1
 1:  sbrs    A3, 7
-    ret
+    rjmp 2f
     sub     C2, B0
     sbc     C3, B1
+2:
+    ;;  Shift 1 bit left to adjust for 15 fractional bits
+    lsl     GUARD
+    rol     C0
+    rol     C1
+    rol     C2
+    rol     C3
+    ;; Round last digit
+    lsl     GUARD
+    adc     C0, __zero_reg__
+    adc     C1, __zero_reg__
+    adc     C2, __zero_reg__
+    adc     C3, __zero_reg__
     ret
 ENDF __mulsa3
 #endif /* L_mulsa3 */
@@ -492,27 +526,56 @@  ENDF __mulsa3
 DEFUN   __mulsa3
     push    B0
     push    B1
-    bst     B3, 7
-    XCALL   __mulusa3
-    ;; A survived in  31:30:27:26
-    rcall 1f
-    pop     AA1
-    pop     AA0
+    push    B3
+    clt
+    XCALL   __mulusa3_round
+    pop     r30
+    ;; sign-extend B
+    bst     r30, 7
+    brtc 1f
+    ;; A1, A0 survived in  R27:R26
+    sub     C2, AA0
+    sbc     C3, AA1
+1:
+    pop     AA1  ;; B1
+    pop     AA0  ;; B0
+
+    ;; sign-extend A.  A3 survived in  R31
     bst     AA3, 7
-1:  brtc  9f
-    ;; 1-extend A/B
+    brtc 2f
     sub     C2, AA0
     sbc     C3, AA1
-9:  ret
+2:
+    ;;  Shift 1 bit left to adjust for 15 fractional bits
+    lsl     GUARD
+    rol     C0
+    rol     C1
+    rol     C2
+    rol     C3
+    ;; Round last digit
+    lsl     GUARD
+    adc     C0, __zero_reg__
+    adc     C1, __zero_reg__
+    adc     C2, __zero_reg__
+    adc     C3, __zero_reg__
+    ret
 ENDF __mulsa3
 #endif  /* L_mulsa3 */
 
 #if defined (L_mulusa3)
 ;;; (R25:R22)  *=  (R21:R18)
-;;; Clobbers: ABI, called by optabs and __mulsua
+;;; Clobbers: ABI, called by optabs
 ;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
-;;; Does not clobber T and A[] survives in 26, 27, 30, 31
-DEFUN   __mulusa3
+DEFUN __mulusa3
+    set
+    ;; Fallthru
+ENDF  __mulusa3
+
+;;; A[] survives in 26, 27, 30, 31
+;;; Also used by __mulsa3 with T = 0
+;;; Round if T = 1
+;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
+DEFUN __mulusa3_round
     push    CC2
     push    CC3
     ; clear result
@@ -560,21 +623,26 @@  DEFUN   __mulusa3
     sbci    B0, 0
     brne 5b
 
-    ;; Move result into place and round
+    ;; Save guard bits and set carry for rounding
+    push    B3
     lsl     B3
+    ;; Move result into place
     wmov    C2, CC2
     wmov    C0, CC0
     clr     __zero_reg__
+    brtc 6f
+    ;; Round iff T = 1
     adc     C0, __zero_reg__
     adc     C1, __zero_reg__
     adc     C2, __zero_reg__
     adc     C3, __zero_reg__
-    
+6:  
+    pop     GUARD
     ;; Epilogue
     pop     CC3
     pop     CC2
     ret
-ENDF __mulusa3
+ENDF __mulusa3_round
 #endif  /* L_mulusa3 */
 
 #undef A0
@@ -600,6 +668,8 @@  ENDF __mulusa3
 
 #endif /* __AVR_HAVE_MUL__ */
 
+#undef GUARD
+
 /*******************************************************
       Fractional Division 8 / 8
 *******************************************************/
@@ -607,30 +677,38 @@  ENDF __mulusa3
 #define r_divd  r25     /* dividend */
 #define r_quo   r24     /* quotient */
 #define r_div   r22     /* divisor */
+#define r_sign  __tmp_reg__
 
 #if defined (L_divqq3)
 DEFUN   __divqq3
-    mov     r0, r_divd
-    eor     r0, r_div
+    mov     r_sign, r_divd
+    eor     r_sign, r_div
     sbrc    r_div, 7
     neg     r_div
     sbrc    r_divd, 7
     neg     r_divd
-    cp      r_divd, r_div
-    breq    __divqq3_minus1  ; if equal return -1
-    XCALL   __udivuqq3
+    XCALL   __divqq_helper
     lsr     r_quo
-    sbrc    r0, 7   ; negate result if needed
+    sbrc    r_sign, 7   ; negate result if needed
     neg     r_quo
     ret
-__divqq3_minus1:
-    ldi     r_quo, 0x80
-    ret
 ENDF __divqq3
-#endif  /* defined (L_divqq3) */
+#endif  /* L_divqq3 */
 
 #if defined (L_udivuqq3)
 DEFUN   __udivuqq3
+    cp      r_divd, r_div
+    brsh    0f
+    XJMP __divqq_helper
+    ;; Result is out of [0, 1)  ==>  Return 1 - eps.
+0:  ldi     r_quo, 0xff
+    ret
+ENDF __udivuqq3
+#endif  /* L_udivuqq3 */
+
+
+#if defined (L_divqq_helper)
+DEFUN   __divqq_helper
     clr     r_quo           ; clear quotient
     inc     __zero_reg__    ; init loop counter, used per shift
 __udivuqq3_loop:
@@ -649,12 +727,13 @@  __udivuqq3_cont:
     com     r_quo           ; complement result
                             ; because C flag was complemented in loop
     ret
-ENDF __udivuqq3
-#endif  /* defined (L_udivuqq3) */
+ENDF __divqq_helper
+#endif  /* L_divqq_helper */
 
 #undef  r_divd
 #undef  r_quo
 #undef  r_div
+#undef  r_sign
 
 
 /*******************************************************
@@ -746,6 +825,8 @@  DEFUN   __divha3
     NEG2    r_divdL
 2:
     XCALL   __udivuha3
+    lsr     r_quoH  ; adjust to 7 fractional bits
+    ror     r_quoL
     sbrs    r0, 7   ; negate result if needed
     ret
     NEG2    r_quoL
@@ -806,6 +887,10 @@  DEFUN   __divsa3
     NEG4    r_arg1L
 2:
     XCALL   __udivusa3
+    lsr     r_quoHH ; adjust to 15 fractional bits
+    ror     r_quoHL
+    ror     r_quoH
+    ror     r_quoL
     sbrs    r0, 7   ; negate result if needed
     ret
     ;; negate r_quoL
@@ -1024,8 +1109,8 @@  DEFUN __usadd_8
     XCALL   __adddi3
     brcs 0f
     ret
-	;; A[] = 0xffffffff
-0:  XJMP    __sbc_8
+0:  ;; A[] = 0xffffffff
+    XJMP    __sbc_8
 ENDF __usadd_8
 #endif /* L_usadd_8 */
 
@@ -1038,8 +1123,8 @@  DEFUN __ussub_8
     XCALL   __subdi3
     brcs 0f
     ret
-	;; A[] = 0
-0:  XJMP    __clr_8
+0:  ;; A[] = 0
+    XJMP    __clr_8
 ENDF __ussub_8
 #endif /* L_ussub_8 */
 
@@ -1049,9 +1134,9 @@  FALIAS __ssaddda3
 FALIAS __ssadddq3
 
 DEFUN __ssadd_8
-    ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
     XCALL   __adddi3
     brvc 0f
+    ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
     cpi     B7, 0x80
     XCALL   __sbc_8
     subi    A7, 0x80
@@ -1067,7 +1152,7 @@  FALIAS __sssubdq3
 DEFUN __sssub_8
     XCALL   __subdi3
     brvc 0f
-	;; A = (B < 0) ? INT64_MAX : INT64_MIN
+    ;; A = (B < 0) ? INT64_MAX : INT64_MIN
     ldi     A7, 0x7f
     cp      A7, B7
     XCALL   __sbc_8
Index: libgcc/config/avr/t-avr
===================================================================
--- libgcc/config/avr/t-avr	(revision 193557)
+++ libgcc/config/avr/t-avr	(working copy)
@@ -64,12 +64,12 @@  LIB1ASMFUNCS += \
 	\
 	_fractsfqq _fractsfuqq \
 	_fractsfhq _fractsfuhq _fractsfha _fractsfuha \
-	_fractsfsa _fractsfusa \
+	_fractsfsq _fractsfusq _fractsfsa _fractsfusa \
 	_mulqq3 \
 	_mulhq3 _muluhq3 \
 	_mulha3 _muluha3 _muluha3_round \
 	_mulsa3 _mulusa3 \
-	_divqq3 _udivuqq3 \
+	_divqq3 _udivuqq3 _divqq_helper \
 	_divhq3 _udivuhq3 \
 	_divha3 _udivuha3 \
 	_divsa3 _udivusa3 \
Index: gcc/config/avr/avr-modes.def
===================================================================
--- gcc/config/avr/avr-modes.def	(revision 193557)
+++ gcc/config/avr/avr-modes.def	(working copy)
@@ -1,26 +1,13 @@ 
 FRACTIONAL_INT_MODE (PSI, 24, 3);
 
-/* On 8 bit machines it requires fewer instructions for fixed point
-   routines if the decimal place is on a byte boundary which is not
-   the default for signed accum types.  */
-
-ADJUST_IBIT (HA, 7);
-ADJUST_FBIT (HA, 8);
-
-ADJUST_IBIT (SA, 15);
-ADJUST_FBIT (SA, 16);
-
-ADJUST_IBIT (DA, 31);
-ADJUST_FBIT (DA, 32);
-
 /* Make TA and UTA 64 bits wide.
    128 bit wide modes would be insane on a 8-bit machine.
    This needs special treatment in avr.c and avr-lib.h.  */
 
 ADJUST_BYTESIZE  (TA, 8);
 ADJUST_ALIGNMENT (TA, 1);
-ADJUST_IBIT (TA, 15);
-ADJUST_FBIT (TA, 48);
+ADJUST_IBIT (TA, 16);
+ADJUST_FBIT (TA, 47);
 
 ADJUST_BYTESIZE  (UTA, 8);
 ADJUST_ALIGNMENT (UTA, 1);
Index: gcc/config/avr/avr.c
===================================================================
--- gcc/config/avr/avr.c	(revision 193557)
+++ gcc/config/avr/avr.c	(working copy)
@@ -6974,6 +6974,332 @@  avr_out_addto_sp (rtx *op, int *plen)
 }
 
 
+/* Outputs instructions needed for fixed point type conversion.
+   This includes converting between any fixed point type, as well
+   as converting to any integer type.  Conversion between integer
+   types is not supported.
+
+   Converting signed fractional types requires a bit shift if converting
+   to or from any unsigned fractional type because the decimal place is
+   shifted by 1 bit.  When the destination is a signed fractional, the sign
+   is stored in either the carry or T bit.  */
+
+const char*
+avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
+{
+  size_t i;
+  rtx xop[6];
+  RTX_CODE shift = UNKNOWN;
+  bool sign_in_carry = false;
+  bool msb_in_carry = false;
+  bool lsb_in_carry = false;
+  const char *code_ashift = "lsl %0";
+
+  
+#define MAY_CLOBBER(RR)                                                 \
+  /* Shorthand used below.  */                                          \
+  ((sign_bytes                                                          \
+    && IN_RANGE (RR, dest.regno_msb - sign_bytes + 1, dest.regno_msb))  \
+   || (reg_unused_after (insn, all_regs_rtx[RR])                        \
+       && !IN_RANGE (RR, dest.regno, dest.regno_msb)))
+
+  struct
+  {
+    /* bytes       : Length of operand in bytes.
+       ibyte       : Length of integral part in bytes.
+       fbyte, fbit : Length of fractional part in bytes, bits.  */
+
+    bool sbit;
+    unsigned fbit, bytes, ibyte, fbyte;
+    unsigned regno, regno_msb;
+  } dest, src, *val[2] = { &dest, &src };
+
+  if (plen)
+    *plen = 0;
+
+  /* Step 0:  Determine information on source and destination operand we
+     ======   will need in the remainder.  */
+
+  for (i = 0; i < sizeof (val) / sizeof (*val); i++)
+    {
+      enum machine_mode mode;
+
+      xop[i] = operands[i];
+
+      mode = GET_MODE (xop[i]);
+
+      val[i]->bytes = GET_MODE_SIZE (mode);
+      val[i]->regno = REGNO (xop[i]);
+      val[i]->regno_msb = REGNO (xop[i]) + val[i]->bytes - 1;
+
+      if (SCALAR_INT_MODE_P (mode))
+        {
+          val[i]->sbit = intsigned;
+          val[i]->fbit = 0;
+        }
+      else if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
+        {
+          val[i]->sbit = SIGNED_SCALAR_FIXED_POINT_MODE_P (mode);
+          val[i]->fbit = GET_MODE_FBIT (mode);
+        }
+      else
+        fatal_insn ("unsupported fixed-point conversion", insn);
+
+      val[i]->fbyte = (1 + val[i]->fbit) / BITS_PER_UNIT;
+      val[i]->ibyte = val[i]->bytes - val[i]->fbyte;
+    }
+
+  // Byte offset of the decimal point taking into account different place
+  // of the decimal point in input and output and different register numbers
+  // of input and output.
+  int offset = dest.regno - src.regno + dest.fbyte - src.fbyte;
+
+  // Number of destination bytes that will come from sign / zero extension.
+  int sign_bytes = (dest.ibyte - src.ibyte) * (dest.ibyte > src.ibyte);
+
+  // Number of bytes at the low end to be filled with zeros.
+  int zero_bytes = (dest.fbyte - src.fbyte) * (dest.fbyte > src.fbyte);
+
+  // Do we have a 16-Bit register that is cleared?
+  rtx clrw = NULL_RTX;
+      
+  bool sign_extend = src.sbit && sign_bytes;
+
+  if (0 == dest.fbit % 8 && 7 == src.fbit % 8)
+    shift = ASHIFT;
+  else if (7 == dest.fbit % 8 && 0 == src.fbit % 8)
+    shift = ASHIFTRT;
+  else if (dest.fbit % 8 == src.fbit % 8)
+    shift = UNKNOWN;
+  else
+    gcc_unreachable();
+
+  /* Step 1: Clear bytes at the low end and copy payload bits from source
+     ======  to destination.  */
+
+  int step = offset < 0 ? 1 : -1;
+  unsigned d0 = offset < 0 ? dest.regno : dest.regno_msb;
+
+  // We leared at least that number of registers.
+  int clr_n = 0;
+
+  for (; d0 >= dest.regno && d0 <= dest.regno_msb; d0 += step)
+    {
+      // Next regno of destination is needed for MOVW
+      unsigned d1 = d0 + step;
+
+      // Current and next regno of source
+      unsigned s0 = d0 - offset;
+      unsigned s1 = s0 + step;
+
+      // Must current resp. next regno be CLRed?  This applies to the low
+      // bytes of the destination that have no associated source bytes.
+      bool clr0 = s0 < src.regno;
+      bool clr1 = s1 < src.regno && d1 >= dest.regno;
+
+      // First gather what code to emit (if any) and additional step to
+      // apply if a MOVW is in use.  xop[2] is destination rtx and xop[3]
+      // is the source rtx for the current loop iteration.
+      const char *code = NULL;
+      int stepw = 0;
+      
+      if (clr0)
+        {
+          if (AVR_HAVE_MOVW && clr1 && clrw)
+            {
+              xop[2] = all_regs_rtx[d0 & ~1];
+              xop[3] = clrw;
+              code = "movw %2,%3";
+              stepw = step;
+            }
+          else
+            {
+              xop[2] = all_regs_rtx[d0];
+              code = "clr %2";
+
+              if (++clr_n >= 2
+                  && !clrw
+                  && d0 % 2 == (step > 0))
+                {
+                  clrw = all_regs_rtx[d0 & ~1];
+                }
+            }
+        }
+      else if (offset && s0 <= src.regno_msb)
+        {
+          int movw = AVR_HAVE_MOVW && offset % 2 == 0
+            && d0 % 2 == (offset > 0)
+            && d1 <= dest.regno_msb && d1 >= dest.regno
+            && s1 <= src.regno_msb  && s1 >= src.regno;
+
+          xop[2] = all_regs_rtx[d0 & ~movw];
+          xop[3] = all_regs_rtx[s0 & ~movw];
+          code = movw ? "movw %2,%3" : "mov %2,%3";
+          stepw = step * movw;
+        }
+
+      if (code)
+        {
+          if (sign_extend && shift != ASHIFT && !sign_in_carry
+              && (d0 == src.regno_msb || d0 + stepw == src.regno_msb))
+            {
+              /* We are going to override the sign bit.  If we sign-extend,
+                 store the sign in the Carry flag.  This is not needed if
+                 the destination will be ASHIFT is the remainder because
+                 the ASHIFT will set Carry without extra instruction.  */
+
+              avr_asm_len ("lsl %0", &all_regs_rtx[src.regno_msb], plen, 1);
+              sign_in_carry = true;
+            }
+
+          unsigned src_msb = dest.regno_msb - sign_bytes - offset + 1;
+
+          if (!sign_extend && shift == ASHIFTRT && !msb_in_carry
+              && src.ibyte > dest.ibyte
+              && (d0 == src_msb || d0 + stepw == src_msb))
+            {
+              /* We are going to override the MSB.  If we shift right,
+                 store the MSB in the Carry flag.  This is only needed if
+                 we don't sign-extend becaue with sign-extension the MSB
+                 (the sign) will be produced by the sign extension.  */
+
+              avr_asm_len ("lsr %0", &all_regs_rtx[src_msb], plen, 1);
+              msb_in_carry = true;
+            }
+
+          unsigned src_lsb = dest.regno - offset -1;
+
+          if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry
+              && (d0 == src_lsb || d0 + stepw == src_lsb))
+            {
+              /* We are going to override the new LSB; store it into carry.  */
+
+              avr_asm_len ("lsl %0", &all_regs_rtx[src_lsb], plen, 1);
+              code_ashift = "rol %0";
+              lsb_in_carry = true;
+            }
+
+          avr_asm_len (code, xop, plen, 1);
+          d0 += stepw;
+        }
+    }
+
+  /* Step 2:  Shift destination left by 1 bit position.  This might be needed
+     ======   for signed input and unsigned output.  */
+
+  if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry)
+    {
+      unsigned s0 = dest.regno - offset -1;
+
+      if (MAY_CLOBBER (s0))
+        avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1);
+      else
+        avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
+                     "lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2);
+
+      code_ashift = "rol %0";
+      lsb_in_carry = true;
+    }
+
+  if (shift == ASHIFT)
+    {
+      for (d0 = dest.regno + zero_bytes;
+           d0 <= dest.regno_msb - sign_bytes; d0++)
+        {
+          avr_asm_len (code_ashift, &all_regs_rtx[d0], plen, 1);
+          code_ashift = "rol %0";
+        }
+
+      lsb_in_carry = false;
+      sign_in_carry = true;
+    }
+
+  /* Step 4a:  Store MSB in carry if we don't already have it or will produce
+     =======   it in sign-extension below.  */
+
+  if (!sign_extend && shift == ASHIFTRT && !msb_in_carry
+      && src.ibyte > dest.ibyte)
+    {
+      unsigned s0 = dest.regno_msb - sign_bytes - offset + 1;
+
+      if (MAY_CLOBBER (s0))
+        avr_asm_len ("lsr %0 ; 4A", &all_regs_rtx[s0], plen, 1);
+      else
+        avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
+                     "lsr __tmp_reg__", &all_regs_rtx[s0], plen, 2);
+
+      msb_in_carry = true;
+    }
+
+  /* Step 3:  Sign-extend or zero-extend the destination as needed.
+     ======   */
+
+  if (sign_extend && !sign_in_carry)
+    {
+      unsigned s0 = src.regno_msb;
+      
+      if (MAY_CLOBBER (s0))
+        avr_asm_len ("lsl %0", &all_regs_rtx[src.regno_msb], plen, 1);
+      else
+        avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
+                     "lsl __tmp_reg__", &all_regs_rtx[src.regno_msb], plen, 2);
+
+      sign_in_carry = true;
+  }
+
+  gcc_assert (sign_in_carry + msb_in_carry + lsb_in_carry <= 1);
+
+  unsigned copies = 0;
+  rtx movw = sign_extend ? NULL_RTX : clrw;
+
+  for (d0 = dest.regno_msb - sign_bytes + 1; d0 <= dest.regno_msb; d0++)
+    {
+      if (AVR_HAVE_MOVW && movw
+          && d0 % 2 == 0 && d0 + 1 <= dest.regno_msb)
+        {
+          xop[2] = all_regs_rtx[d0];
+          xop[3] = movw;
+          avr_asm_len ("movw %2,%3", xop, plen, 1);
+          d0++;
+        }
+      else
+        {
+          avr_asm_len (sign_extend ? "sbc %0,%0" : "clr %0",
+                       &all_regs_rtx[d0], plen, 1);
+
+          if (++copies >= 2 && !movw && d0 % 2 == 1)
+            movw = all_regs_rtx[d0-1];
+        }
+    } /* for */
+
+
+  /* Step 4:  Right shift the destination.  This might be needed for
+     ======   conversions from unsigned to signed.  */
+
+  if (shift == ASHIFTRT)
+    {
+      const char *code_ashiftrt = "lsr %0";
+
+      if (sign_extend || msb_in_carry)
+        code_ashiftrt = "ror %0";
+
+      if (src.sbit && src.ibyte == dest.ibyte)
+        code_ashiftrt = "asr %0";
+
+      for (d0 = dest.regno_msb - sign_bytes;
+           d0 >= dest.regno + zero_bytes - 1 && d0 >= dest.regno; d0--)
+        {
+          avr_asm_len (code_ashiftrt, &all_regs_rtx[d0], plen, 1);
+          code_ashiftrt = "ror %0";
+        }
+    }
+
+#undef MAY_CLOBBER
+
+  return "";
+}
+
+
 /* Create RTL split patterns for byte sized rotate expressions.  This
   produces a series of move instructions and considers overlap situations.
   Overlapping non-HImode operands need a scratch register.  */
@@ -7123,348 +7449,6 @@  avr_rotate_bytes (rtx operands[])
 }
 
 
-/* Outputs instructions needed for fixed point type conversion.
-   This includes converting between any fixed point type, as well
-   as converting to any integer type.  Conversion between integer
-   types is not supported.
-
-   The number of instructions generated depends on the types
-   being converted and the registers assigned to them.
-
-   The number of instructions required to complete the conversion
-   is least if the registers for source and destination are overlapping
-   and are aligned at the decimal place as actual movement of data is
-   completely avoided.  In some cases, the conversion may already be
-   complete without any instructions needed.
-
-   When converting to signed types from signed types, sign extension
-   is implemented.
-
-   Converting signed fractional types requires a bit shift if converting
-   to or from any unsigned fractional type because the decimal place is
-   shifted by 1 bit.  When the destination is a signed fractional, the sign
-   is stored in either the carry or T bit.  */
-
-const char*
-avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
-{
-  int i;
-  bool sbit[2];
-  /* ilen: Length of integral part (in bytes)
-     flen: Length of fractional part (in bytes)
-     tlen: Length of operand (in bytes)
-     blen: Length of operand (in bits) */
-  int ilen[2], flen[2], tlen[2], blen[2];
-  int rdest, rsource, offset;
-  int start, end, dir;
-  bool sign_in_T = false, sign_in_Carry = false, sign_done = false;
-  bool widening_sign_extend = false;
-  int clrword = -1, lastclr = 0, clr = 0;
-  rtx xop[6];
-
-  const int dest = 0;
-  const int src = 1;
-
-  xop[dest] = operands[dest];
-  xop[src] = operands[src];
-
-  if (plen)
-    *plen = 0;
-
-  /* Determine format (integer and fractional parts)
-     of types needing conversion.  */
-
-  for (i = 0; i < 2; i++)
-    {
-      enum machine_mode mode = GET_MODE (xop[i]);
-
-      tlen[i] = GET_MODE_SIZE (mode);
-      blen[i] = GET_MODE_BITSIZE (mode);
-
-      if (SCALAR_INT_MODE_P (mode))
-        {
-          sbit[i] = intsigned;
-          ilen[i] = GET_MODE_SIZE (mode);
-          flen[i] = 0;
-        }
-      else if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
-        {
-          sbit[i] = SIGNED_SCALAR_FIXED_POINT_MODE_P (mode);
-          ilen[i] = (GET_MODE_IBIT (mode) + 1) / 8;
-          flen[i] = (GET_MODE_FBIT (mode) + 1) / 8;
-        }
-      else
-        fatal_insn ("unsupported fixed-point conversion", insn);
-    }
-
-  /* Perform sign extension if source and dest are both signed,
-     and there are more integer parts in dest than in source.  */
-
-  widening_sign_extend = sbit[dest] && sbit[src] && ilen[dest] > ilen[src];
-
-  rdest = REGNO (xop[dest]);
-  rsource = REGNO (xop[src]);
-  offset = flen[src] - flen[dest];
-
-  /* Position of MSB resp. sign bit.  */
-
-  xop[2] = GEN_INT (blen[dest] - 1);
-  xop[3] = GEN_INT (blen[src] - 1);
-
-  /* Store the sign bit if the destination is a signed fract and the source
-     has a sign in the integer part.  */
-
-  if (sbit[dest] && ilen[dest] == 0 && sbit[src] && ilen[src] > 0)
-    {
-      /* To avoid using BST and BLD if the source and destination registers
-         overlap or the source is unused after, we can use LSL to store the
-         sign bit in carry since we don't need the integral part of the source.
-         Restoring the sign from carry saves one BLD instruction below.  */
-
-      if (reg_unused_after (insn, xop[src])
-          || (rdest < rsource + tlen[src]
-              && rdest + tlen[dest] > rsource))
-        {
-          avr_asm_len ("lsl %T1%t3", xop, plen, 1);
-          sign_in_Carry = true;
-        }
-      else
-        {
-          avr_asm_len ("bst %T1%T3", xop, plen, 1);
-          sign_in_T = true;
-        }
-    }
-
-  /* Pick the correct direction to shift bytes.  */
-
-  if (rdest < rsource + offset)
-    {
-      dir = 1;
-      start = 0;
-      end = tlen[dest];
-    }
-  else
-    {
-      dir = -1;
-      start = tlen[dest] - 1;
-      end = -1;
-    }
-
-  /* Perform conversion by moving registers into place, clearing
-     destination registers that do not overlap with any source.  */
-
-  for (i = start; i != end; i += dir)
-    {
-      int destloc = rdest + i;
-      int sourceloc = rsource + i + offset;
-
-      /* Source register location is outside range of source register,
-         so clear this byte in the dest.  */
-
-      if (sourceloc < rsource
-          || sourceloc >= rsource + tlen[src])
-        {
-          if (AVR_HAVE_MOVW
-              && i + dir != end
-              && (sourceloc + dir < rsource
-                  || sourceloc + dir >= rsource + tlen[src])
-              && ((dir == 1 && !(destloc % 2) && !(sourceloc % 2))
-                  || (dir == -1 && (destloc % 2) && (sourceloc % 2)))
-              && clrword != -1)
-            {
-              /* Use already cleared word to clear two bytes at a time.  */
-
-              int even_i = i & ~1;
-              int even_clrword = clrword & ~1;
-
-              xop[4] = GEN_INT (8 * even_i);
-              xop[5] = GEN_INT (8 * even_clrword);
-              avr_asm_len ("movw %T0%t4,%T0%t5", xop, plen, 1);
-              i += dir;
-            }
-          else
-            {
-              if (i == tlen[dest] - 1
-                  && widening_sign_extend
-                  && blen[src] - 1 - 8 * offset < 0)
-                {
-                  /* The SBRC below that sign-extends would come
-                     up with a negative bit number because the sign
-                     bit is out of reach.  ALso avoid some early-clobber
-                     situations because of premature CLR.  */
-
-                  if (reg_unused_after (insn, xop[src]))
-                    avr_asm_len ("lsl %T1%t3" CR_TAB
-                                 "sbc %T0%t2,%T0%t2", xop, plen, 2);
-                  else
-                    avr_asm_len ("mov __tmp_reg__,%T1%t3"  CR_TAB
-                                 "lsl __tmp_reg__"         CR_TAB
-                                 "sbc %T0%t2,%T0%t2", xop, plen, 3);
-                  sign_done = true;
-
-                  continue;
-                }
-              
-              /* Do not clear the register if it is going to get
-                 sign extended with a MOV later.  */
-
-              if (sbit[dest] && sbit[src]
-                  && i != tlen[dest] - 1
-                  && i >= flen[dest])
-                {
-                  continue;
-                }
-
-              xop[4] = GEN_INT (8 * i);
-              avr_asm_len ("clr %T0%t4", xop, plen, 1);
-
-              /* If the last byte was cleared too, we have a cleared
-                 word we can MOVW to clear two bytes at a time.  */
-
-              if (lastclr) 
-                clrword = i;
-
-              clr = 1;
-            }
-        }
-      else if (destloc == sourceloc)
-        {
-          /* Source byte is already in destination:  Nothing needed.  */
-
-          continue;
-        }
-      else
-        {
-          /* Registers do not line up and source register location
-             is within range:  Perform move, shifting with MOV or MOVW.  */
-
-          if (AVR_HAVE_MOVW
-              && i + dir != end
-              && sourceloc + dir >= rsource
-              && sourceloc + dir < rsource + tlen[src]
-              && ((dir == 1 && !(destloc % 2) && !(sourceloc % 2))
-                  || (dir == -1 && (destloc % 2) && (sourceloc % 2))))
-            {
-              int even_i = i & ~1;
-              int even_i_plus_offset = (i + offset) & ~1;
-
-              xop[4] = GEN_INT (8 * even_i);
-              xop[5] = GEN_INT (8 * even_i_plus_offset);
-              avr_asm_len ("movw %T0%t4,%T1%t5", xop, plen, 1);
-              i += dir;
-            }
-          else
-            {
-              xop[4] = GEN_INT (8 * i);
-              xop[5] = GEN_INT (8 * (i + offset));
-              avr_asm_len ("mov %T0%t4,%T1%t5", xop, plen, 1);
-            }
-        }
-
-      lastclr = clr;
-      clr = 0;
-    }
-      
-  /* Perform sign extension if source and dest are both signed,
-     and there are more integer parts in dest than in source.  */
-
-  if (widening_sign_extend)
-    {
-      if (!sign_done)
-        {
-          xop[4] = GEN_INT (blen[src] - 1 - 8 * offset);
-
-          /* Register was cleared above, so can become 0xff and extended.
-             Note:  Instead of the CLR/SBRC/COM the sign extension could
-             be performed after the LSL below by means of a SBC if only
-             one byte has to be shifted left.  */
-
-          avr_asm_len ("sbrc %T0%T4" CR_TAB
-                       "com %T0%t2", xop, plen, 2);
-        }
-
-      /* Sign extend additional bytes by MOV and MOVW.  */
-
-      start = tlen[dest] - 2;
-      end = flen[dest] + ilen[src] - 1;
-
-      for (i = start; i != end; i--)
-        {
-          if (AVR_HAVE_MOVW && i != start && i-1 != end)
-            {
-              i--;
-              xop[4] = GEN_INT (8 * i);
-              xop[5] = GEN_INT (8 * (tlen[dest] - 2));
-              avr_asm_len ("movw %T0%t4,%T0%t5", xop, plen, 1);
-            }
-          else
-            {
-              xop[4] = GEN_INT (8 * i);
-              xop[5] = GEN_INT (8 * (tlen[dest] - 1));
-              avr_asm_len ("mov %T0%t4,%T0%t5", xop, plen, 1);
-            }
-        }
-    }
-
-  /* If destination is a signed fract, and the source was not, a shift
-     by 1 bit is needed.  Also restore sign from carry or T.  */
-
-  if (sbit[dest] && !ilen[dest] && (!sbit[src] || ilen[src]))
-    {
-      /* We have flen[src] non-zero fractional bytes to shift.
-         Because of the right shift, handle one byte more so that the
-         LSB won't be lost.  */
-
-      int nonzero = flen[src] + 1;
-
-      /* If the LSB is in the T flag and there are no fractional
-         bits, the high byte is zero and no shift needed.  */
-      
-      if (flen[src] == 0 && sign_in_T)
-        nonzero = 0;
-
-      start = flen[dest] - 1;
-      end = start - nonzero;
-
-      for (i = start; i > end && i >= 0; i--)
-        {
-          xop[4] = GEN_INT (8 * i);
-          if (i == start && !sign_in_Carry)
-            avr_asm_len ("lsr %T0%t4", xop, plen, 1);
-          else
-            avr_asm_len ("ror %T0%t4", xop, plen, 1);
-        }
-
-      if (sign_in_T)
-        {
-          avr_asm_len ("bld %T0%T2", xop, plen, 1);
-        }
-    }
-  else if (sbit[src] && !ilen[src] && (!sbit[dest] || ilen[dest]))
-    {
-      /* If source was a signed fract and dest was not, shift 1 bit
-         other way.  */
-
-      start = flen[dest] - flen[src];
-
-      if (start < 0)
-        start = 0;
-
-      for (i = start; i < flen[dest]; i++)
-        {
-          xop[4] = GEN_INT (8 * i);
-
-          if (i == start)
-            avr_asm_len ("lsl %T0%t4", xop, plen, 1);
-          else
-            avr_asm_len ("rol %T0%t4", xop, plen, 1);
-        }
-    }
-
-  return "";
-}
-
-
 /* Modifies the length assigned to instruction INSN
    LEN is the initially computed length of the insn.  */