Patchwork [Path,AVR] : Implement __builtin_avr_fmul* if no hardware multiplier

login
register
mail settings
Submitter Georg-Johann Lay
Date July 4, 2011, 12:01 p.m.
Message ID <4E11AB88.4090106@gjlay.de>
Download mbox | patch
Permalink /patch/103094/
State New
Headers show

Comments

Georg-Johann Lay - July 4, 2011, 12:01 p.m.
The current implementation of __builtin_avr_fmul/fmuls/fmulsu has a
gap if no hardware multiplier is available.

This patch closes that gap by providing libgcc implementations named
__fmul, __fmuls resp. __fmulsu.

The implementations yield the same result as respective FMUL*
instructions and have been testes against these instructions for all
possible combinations of input values on an atmega88 device.

Johann


	* doc/extend.texi (AVR Built-in Functions): Update documentation
	of __builtin_avr_fmul*.
	* config/avr/avr.c (avr_init_builtins): Don't depend on
	AVR_HAVE_MUL.
	* config/avr/avr-c.c (avr_cpu_cpp_builtins): Ditto.
	* config/avr/avr.md (fmul): Rename to fmul_insn.
	(fmuls): Rename to fmuls_insn.
	(fmulsu): Rename to fmulsu_insn.
	(fmul,fmuls,fmulsu): New expander.
	(*fmul.call,*fmuls.call,*fmulsu.call): New Insn.
	* config/avr/t-avr (LIB1ASMFUNCS): Add _fmul, _fmuls, _fmulsu.
	* config/avr/libgcc.S (__fmul): New function.
	(__fmuls): New function.
	(__fmulsu,__fmulsu_exit): New function.
Denis Chertykov - July 4, 2011, 12:16 p.m.
2011/7/4 Georg-Johann Lay <avr@gjlay.de>:
> The current implementation of __builtin_avr_fmul/fmuls/fmulsu has a
> gap if no hardware multiplier is available.
>
> This patch closes that gap by providing libgcc implementations named
> __fmul, __fmuls resp. __fmulsu.
>
> The implementations yield the same result as respective FMUL*
> instructions and have been testes against these instructions for all
> possible combinations of input values on an atmega88 device.
>
> Johann
>
>
>        * doc/extend.texi (AVR Built-in Functions): Update documentation
>        of __builtin_avr_fmul*.
>        * config/avr/avr.c (avr_init_builtins): Don't depend on
>        AVR_HAVE_MUL.
>        * config/avr/avr-c.c (avr_cpu_cpp_builtins): Ditto.
>        * config/avr/avr.md (fmul): Rename to fmul_insn.
>        (fmuls): Rename to fmuls_insn.
>        (fmulsu): Rename to fmulsu_insn.
>        (fmul,fmuls,fmulsu): New expander.
>        (*fmul.call,*fmuls.call,*fmulsu.call): New Insn.
>        * config/avr/t-avr (LIB1ASMFUNCS): Add _fmul, _fmuls, _fmulsu.
>        * config/avr/libgcc.S (__fmul): New function.
>        (__fmuls): New function.
>        (__fmulsu,__fmulsu_exit): New function.
>

Approved.

Denis.

Patch

Index: doc/extend.texi
===================================================================
--- doc/extend.texi	(revision 175800)
+++ doc/extend.texi	(working copy)
@@ -8226,8 +8226,8 @@  or if not a specific built-in is impleme
 The following built-in functions map to the respective machine
 instruction, i.e. @code{nop}, @code{sei}, @code{cli}, @code{sleep},
 @code{wdr}, @code{swap}, @code{fmul}, @code{fmuls}
-resp. @code{fmulsu}. The latter three are only available if the AVR
-device actually supports multiplication.
+resp. @code{fmulsu}. The three @code{fmul*} built-ins are implemented
+as library call if no hardware multiplier is available.
 
 @smallexample
 void __builtin_avr_nop (void)
Index: config/avr/libgcc.S
===================================================================
--- config/avr/libgcc.S	(revision 175628)
+++ config/avr/libgcc.S	(working copy)
@@ -1417,3 +1417,91 @@  DEFUN __ashldi3
     ret
 ENDF __ashldi3
 #endif /* defined (L_ashldi3) */
+
+
+/***********************************************************/    
+;;; Softmul versions of FMUL, FMULS and FMULSU to implement
+;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
+/***********************************************************/    
+
+#define A1 24
+#define B1 25
+#define C0 22
+#define C1 23
+#define A0 __tmp_reg__
+
+#ifdef L_fmuls
+;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmuls
+    ;; A0.7 = negate result?
+    mov  A0, A1
+    eor  A0, B1
+    ;; B1 = |B1|
+    sbrc B1, 7
+    neg  B1
+    XJMP __fmulsu_exit
+ENDF __fmuls
+#endif /* L_fmuls */
+
+#ifdef L_fmulsu
+;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmulsu
+    ;; A0.7 = negate result?
+    mov  A0, A1
+;; FALLTHRU
+ENDF __fmulsu
+
+;; Helper for __fmuls and __fmulsu
+DEFUN __fmulsu_exit
+    ;; A1 = |A1|
+    sbrc A1, 7
+    neg  A1
+#ifdef __AVR_HAVE_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst  A0
+    brmi 1f
+#else
+    sbrs A0, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+    XJMP  __fmul
+1:  XCALL __fmul
+    ;; C = -C iff A0.7 = 1
+    com  C1
+    neg  C0
+    sbci C1, -1
+    ret
+ENDF __fmulsu_exit
+#endif /* L_fmulsu */
+
+
+#ifdef L_fmul
+;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmul
+    ; clear result
+    clr   C0
+    clr   C1
+    clr   A0
+1:  tst   B1
+    ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
+2:  brpl  3f
+    ;; C += A
+    add   C0, A0
+    adc   C1, A1
+3:  ;; A >>= 1
+    lsr   A1
+    ror   A0
+    ;; B <<= 1
+    lsl   B1
+    brne  2b
+    ret
+ENDF __fmul
+#endif /* L_fmul */
+
+#undef A0
+#undef A1
+#undef B1
+#undef C0
+#undef C1
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 175628)
+++ config/avr/avr.md	(working copy)
@@ -3394,7 +3394,27 @@  (define_insn "wdr"
    (set_attr "cc" "none")])
   
 ;; FMUL
-(define_insn "fmul"
+(define_expand "fmul"
+  [(set (reg:QI 24)
+        (match_operand:QI 1 "register_operand" ""))
+   (set (reg:QI 25)
+        (match_operand:QI 2 "register_operand" ""))
+   (parallel [(set (reg:HI 22)
+                   (unspec:HI [(reg:QI 24)
+                               (reg:QI 25)] UNSPEC_FMUL))
+              (clobber (reg:HI 24))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 22))]
+  ""
+  {
+    if (AVR_HAVE_MUL)
+      {
+        emit_insn (gen_fmul_insn (operand0, operand1, operand2));
+        DONE;
+      }
+  })
+
+(define_insn "fmul_insn"
   [(set (match_operand:HI 0 "register_operand" "=r")
         (unspec:HI [(match_operand:QI 1 "register_operand" "a")
                     (match_operand:QI 2 "register_operand" "a")]
@@ -3406,8 +3426,38 @@  (define_insn "fmul"
   [(set_attr "length" "3")
    (set_attr "cc" "clobber")])
 
+(define_insn "*fmul.call"
+  [(set (reg:HI 22)
+        (unspec:HI [(reg:QI 24)
+                    (reg:QI 25)] UNSPEC_FMUL))
+   (clobber (reg:HI 24))]
+  "!AVR_HAVE_MUL"
+  "%~call __fmul"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
 ;; FMULS
-(define_insn "fmuls"
+(define_expand "fmuls"
+  [(set (reg:QI 24)
+        (match_operand:QI 1 "register_operand" ""))
+   (set (reg:QI 25)
+        (match_operand:QI 2 "register_operand" ""))
+   (parallel [(set (reg:HI 22)
+                   (unspec:HI [(reg:QI 24)
+                               (reg:QI 25)] UNSPEC_FMULS))
+              (clobber (reg:HI 24))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 22))]
+  ""
+  {
+    if (AVR_HAVE_MUL)
+      {
+        emit_insn (gen_fmuls_insn (operand0, operand1, operand2));
+        DONE;
+      }
+  })
+
+(define_insn "fmuls_insn"
   [(set (match_operand:HI 0 "register_operand" "=r")
         (unspec:HI [(match_operand:QI 1 "register_operand" "a")
                     (match_operand:QI 2 "register_operand" "a")]
@@ -3419,8 +3469,38 @@  (define_insn "fmuls"
   [(set_attr "length" "3")
    (set_attr "cc" "clobber")])
 
+(define_insn "*fmuls.call"
+  [(set (reg:HI 22)
+        (unspec:HI [(reg:QI 24)
+                    (reg:QI 25)] UNSPEC_FMULS))
+   (clobber (reg:HI 24))]
+  "!AVR_HAVE_MUL"
+  "%~call __fmuls"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
 ;; FMULSU
-(define_insn "fmulsu"
+(define_expand "fmulsu"
+  [(set (reg:QI 24)
+        (match_operand:QI 1 "register_operand" ""))
+   (set (reg:QI 25)
+        (match_operand:QI 2 "register_operand" ""))
+   (parallel [(set (reg:HI 22)
+                   (unspec:HI [(reg:QI 24)
+                               (reg:QI 25)] UNSPEC_FMULSU))
+              (clobber (reg:HI 24))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 22))]
+  ""
+  {
+    if (AVR_HAVE_MUL)
+      {
+        emit_insn (gen_fmulsu_insn (operand0, operand1, operand2));
+        DONE;
+      }
+  })
+
+(define_insn "fmulsu_insn"
   [(set (match_operand:HI 0 "register_operand" "=r")
         (unspec:HI [(match_operand:QI 1 "register_operand" "a")
                     (match_operand:QI 2 "register_operand" "a")]
@@ -3432,6 +3512,16 @@  (define_insn "fmulsu"
   [(set_attr "length" "3")
    (set_attr "cc" "clobber")])
 
+(define_insn "*fmulsu.call"
+  [(set (reg:HI 22)
+        (unspec:HI [(reg:QI 24)
+                    (reg:QI 25)] UNSPEC_FMULSU))
+   (clobber (reg:HI 24))]
+  "!AVR_HAVE_MUL"
+  "%~call __fmulsu"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
 
 ;; Some combiner patterns dealing with bits.
 ;; See PR42210
Index: config/avr/t-avr
===================================================================
--- config/avr/t-avr	(revision 175628)
+++ config/avr/t-avr	(working copy)
@@ -78,7 +78,8 @@  LIB1ASMFUNCS = \
 	_bswapdi2 \
 	_ashldi3 \
 	_ashrdi3 \
-	_lshrdi3
+	_lshrdi3 \
+	_fmul _fmuls _fmulsu
 
 LIB2FUNCS_EXCLUDE = \
 	_clz
Index: config/avr/avr-c.c
===================================================================
--- config/avr/avr-c.c	(revision 175628)
+++ config/avr/avr-c.c	(working copy)
@@ -94,10 +94,7 @@  avr_cpu_cpp_builtins (struct cpp_reader
   cpp_define (pfile, "__BUILTIN_AVR_SWAP");
   cpp_define (pfile, "__BUILTIN_AVR_DELAY_CYCLES");
 
-  if (AVR_HAVE_MUL)
-    {
-      cpp_define (pfile, "__BUILTIN_AVR_FMUL");
-      cpp_define (pfile, "__BUILTIN_AVR_FMULS");
-      cpp_define (pfile, "__BUILTIN_AVR_FMULSU");
-    }
+  cpp_define (pfile, "__BUILTIN_AVR_FMUL");
+  cpp_define (pfile, "__BUILTIN_AVR_FMULS");
+  cpp_define (pfile, "__BUILTIN_AVR_FMULSU");
 }
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c	(revision 175629)
+++ config/avr/avr.c	(working copy)
@@ -6536,19 +6536,12 @@  avr_init_builtins (void)
   DEF_BUILTIN ("__builtin_avr_delay_cycles", void_ftype_ulong, 
                AVR_BUILTIN_DELAY_CYCLES);
 
-  if (AVR_HAVE_MUL)
-    {
-      /* FIXME: If !AVR_HAVE_MUL, make respective functions available
-         in libgcc. For fmul and fmuls this is straight forward with
-         upcoming fixed point support. */
-      
-      DEF_BUILTIN ("__builtin_avr_fmul", uint_ftype_uchar_uchar, 
-                   AVR_BUILTIN_FMUL);
-      DEF_BUILTIN ("__builtin_avr_fmuls", int_ftype_char_char, 
-                   AVR_BUILTIN_FMULS);
-      DEF_BUILTIN ("__builtin_avr_fmulsu", int_ftype_char_uchar, 
-                   AVR_BUILTIN_FMULSU);
-    }
+  DEF_BUILTIN ("__builtin_avr_fmul", uint_ftype_uchar_uchar, 
+               AVR_BUILTIN_FMUL);
+  DEF_BUILTIN ("__builtin_avr_fmuls", int_ftype_char_char, 
+               AVR_BUILTIN_FMULS);
+  DEF_BUILTIN ("__builtin_avr_fmulsu", int_ftype_char_uchar, 
+               AVR_BUILTIN_FMULSU);
 }
 
 #undef DEF_BUILTIN