diff mbox

[avr] : ad PR54222: Add saturated multiply

Message ID 5119470C.2050501@gjlay.de
State New
Headers show

Commit Message

Georg-Johann Lay Feb. 11, 2013, 7:31 p.m. UTC
This patch adds optimized support for (short) (un)signed _Sat _Accum
multiplication (HA, UHA, SA, USA).

The 32-bit multiply uses 64 = 32 * 32 widening multiplication.  As a spin-off,
the widening mul is available as [u]mulsidi3 default pattern.


Okay for trunk?

Johann


gcc/
	PR target/54222
	* config/avr/avr-dimode.md (umulsidi3, mulsidi3): New expanders.
	(umulsidi3_insn, mulsidi3_insn): New insns.

libgcc/
	PR target/54222
	* config/avr/t-avr (LIB2FUNCS_EXCLUDE): Add: _usmulUHA, _usmulUSA,
	_ssmulHA, _ssmulSA.
	(LIB1ASMFUNCS): Add: _muldi3_6, _mulsidi3, _umulsidi3, _usmuluha3,
	_ssmulha3, _usmulusa3, _ssmulsa3.
	* config/avr/lib1funcs.S (__muldi3_6): Break out of __muldi3.
	(__muldi3): XCALL __muldi3_6 instead of rcall.
	(__umulsidi3, __mulsidi3): New functions.
	(do_prologue_saves, do_epilogue_restores): New .macros.
	(__divdi3_moddi3): Use them.
	* config/avr/lib1funcs-fixed.S (__usmuluha3, __ssmulha3)
	(__usmulusa3, __ssmulsa3): New functions.

Comments

Denis Chertykov Feb. 12, 2013, 9:49 a.m. UTC | #1
2013/2/11 Georg-Johann Lay <avr@gjlay.de>:
> This patch adds optimized support for (short) (un)signed _Sat _Accum
> multiplication (HA, UHA, SA, USA).
>
> The 32-bit multiply uses 64 = 32 * 32 widening multiplication.  As a spin-off,
> the widening mul is available as [u]mulsidi3 default pattern.
>
>
> Okay for trunk?
>
> Johann
>
>
> gcc/
>         PR target/54222
>         * config/avr/avr-dimode.md (umulsidi3, mulsidi3): New expanders.
>         (umulsidi3_insn, mulsidi3_insn): New insns.
>
> libgcc/
>         PR target/54222
>         * config/avr/t-avr (LIB2FUNCS_EXCLUDE): Add: _usmulUHA, _usmulUSA,
>         _ssmulHA, _ssmulSA.
>         (LIB1ASMFUNCS): Add: _muldi3_6, _mulsidi3, _umulsidi3, _usmuluha3,
>         _ssmulha3, _usmulusa3, _ssmulsa3.
>         * config/avr/lib1funcs.S (__muldi3_6): Break out of __muldi3.
>         (__muldi3): XCALL __muldi3_6 instead of rcall.
>         (__umulsidi3, __mulsidi3): New functions.
>         (do_prologue_saves, do_epilogue_restores): New .macros.
>         (__divdi3_moddi3): Use them.
>         * config/avr/lib1funcs-fixed.S (__usmuluha3, __ssmulha3)
>         (__usmulusa3, __ssmulsa3): New functions.
>

Approved.

Denis.
diff mbox

Patch

Index: gcc/config/avr/avr-dimode.md
===================================================================
--- gcc/config/avr/avr-dimode.md	(revision 195877)
+++ gcc/config/avr/avr-dimode.md	(working copy)
@@ -446,3 +446,34 @@  (define_insn "<code_stdname><mode>3_insn
   "%~call __<code_stdname>di3"
   [(set_attr "adjust_len" "call")
    (set_attr "cc" "clobber")])
+
+;; "umulsidi3"
+;; "mulsidi3"
+(define_expand "<extend_u>mulsidi3"
+  [(parallel [(match_operand:DI 0 "register_operand" "")
+              (match_operand:SI 1 "general_operand" "")
+              (match_operand:SI 2 "general_operand" "")
+              ;; Just to mention the iterator 
+              (clobber (any_extend:SI (match_dup 1)))])]
+  "avr_have_dimode"
+  {
+    emit_move_insn (gen_rtx_REG (SImode, 22), operands[1]);
+    emit_move_insn (gen_rtx_REG (SImode, 18), operands[2]);
+    emit_insn (gen_<extend_u>mulsidi3_insn());
+    // Use emit_move_insn and not open-coded expand because of missing movdi
+    emit_move_insn (operands[0], gen_rtx_REG (DImode, ACC_A));
+    DONE;
+  })
+
+;; "umulsidi3_insn"
+;; "mulsidi3_insn"
+(define_insn "<extend_u>mulsidi3_insn"
+  [(set (reg:DI ACC_A)
+        (mult:DI (any_extend:DI (reg:SI 18))
+                 (any_extend:DI (reg:SI 22))))
+   (clobber (reg:HI REG_X))
+   (clobber (reg:HI REG_Z))]
+  "avr_have_dimode"
+  "%~call __<extend_u>mulsidi3"
+  [(set_attr "adjust_len" "call")
+   (set_attr "cc" "clobber")])
Index: libgcc/config/avr/lib1funcs-fixed.S
===================================================================
--- libgcc/config/avr/lib1funcs-fixed.S	(revision 195878)
+++ libgcc/config/avr/lib1funcs-fixed.S	(working copy)
@@ -669,6 +669,210 @@  ENDF __mulusa3_round
 
 #undef GUARD
 
+/***********************************************************
+    Fixed  unsigned saturated Multiplication  8.8 x 8.8
+***********************************************************/
+
+#define C0  22
+#define C1  C0+1
+#define C2  C0+2
+#define C3  C0+3
+#define SS __tmp_reg__
+
+#if defined (L_usmuluha3)
+DEFUN __usmuluha3
+    ;; Widening multiply
+#ifdef __AVR_HAVE_MUL__
+    ;; Adjust interface
+    movw    R26, R22
+    movw    R18, R24
+#endif /* HAVE MUL */
+    XCALL   __umulhisi3
+    tst     C3
+    brne .Lmax
+    ;; Round, target is in C1..C2
+    lsl     C0
+    adc     C1, __zero_reg__
+    adc     C2, __zero_reg__
+    brcs .Lmax
+    ;; Move result into place
+    mov     C3, C2
+    mov     C2, C1
+    ret
+.Lmax:
+    ;; Saturate
+    ldi     C2, 0xff
+    ldi     C3, 0xff
+    ret
+ENDF  __usmuluha3
+#endif /* L_usmuluha3 */
+
+/***********************************************************
+    Fixed signed saturated Multiplication  s8.7 x s8.7
+***********************************************************/
+
+#if defined (L_ssmulha3)
+DEFUN __ssmulha3
+    ;; Widening multiply
+#ifdef __AVR_HAVE_MUL__
+    ;; Adjust interface
+    movw    R26, R22
+    movw    R18, R24
+#endif /* HAVE MUL */
+    XCALL   __mulhisi3
+    ;; Adjust decimal point
+    lsl     C0
+    rol     C1
+    rol     C2
+    brvs .LsatC3.3
+    ;; The 9 MSBs must be the same
+    rol     C3
+    sbc     SS, SS
+    cp      C3, SS
+    brne .LsatSS
+    ;; Round
+    lsl     C0
+    adc     C1, __zero_reg__
+    adc     C2, __zero_reg__
+    brvs .Lmax
+    ;; Move result into place
+    mov    C3, C2
+    mov    C2, C1
+    ret
+.Lmax:
+    ;; Load 0x7fff
+    clr     C3
+.LsatC3.3:
+    ;; C3 <  0 -->  0x8000
+    ;; C3 >= 0 -->  0x7fff
+    mov     SS, C3
+.LsatSS:
+    ;; Load min / max value:
+    ;; SS = -1  -->  0x8000
+    ;; SS =  0  -->  0x7fff
+    ldi     C3, 0x7f
+    ldi     C2, 0xff
+    sbrc    SS, 7
+    adiw    C2, 1
+    ret
+ENDF  __ssmulha3
+#endif /* L_ssmulha3 */
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef SS
+
+/***********************************************************
+    Fixed  unsigned saturated Multiplication  16.16 x 16.16
+***********************************************************/
+
+#define C0  18
+#define C1  C0+1
+#define C2  C0+2
+#define C3  C0+3
+#define C4  C0+4
+#define C5  C0+5
+#define C6  C0+6
+#define C7  C0+7
+#define SS __tmp_reg__
+
+#if defined (L_usmulusa3)
+;; R22[4] = R22[4] *{ssat} R18[4]
+;; Ordinary ABI function
+DEFUN __usmulusa3
+    ;; Widening multiply
+    XCALL   __umulsidi3
+    or      C7, C6
+    brne .Lmax
+    ;; Round, target is in C2..C5
+    lsl     C1
+    adc     C2, __zero_reg__
+    adc     C3, __zero_reg__
+    adc     C4, __zero_reg__
+    adc     C5, __zero_reg__
+    brcs .Lmax
+    ;; Move result into place
+    wmov    C6, C4
+    wmov    C4, C2
+    ret
+.Lmax:
+    ;; Saturate
+    ldi     C7, 0xff
+    ldi     C6, 0xff
+    wmov    C4, C6
+    ret
+ENDF  __usmulusa3
+#endif /* L_usmulusa3 */
+
+/***********************************************************
+    Fixed signed saturated Multiplication  s16.15 x s16.15
+***********************************************************/
+
+#if defined (L_ssmulsa3)
+;; R22[4] = R22[4] *{ssat} R18[4]
+;; Ordinary ABI function
+DEFUN __ssmulsa3
+    ;; Widening multiply
+    XCALL   __mulsidi3
+    ;; Adjust decimal point
+    lsl     C1
+    rol     C2
+    rol     C3
+    rol     C4
+    rol     C5
+    brvs .LsatC7.7
+    ;; The 17 MSBs must be the same
+    rol     C6
+    rol     C7
+    sbc     SS, SS
+    cp      C6, SS
+    cpc     C7, SS
+    brne .LsatSS
+    ;; Round
+    lsl     C1
+    adc     C2, __zero_reg__
+    adc     C3, __zero_reg__
+    adc     C4, __zero_reg__
+    adc     C5, __zero_reg__
+    brvs .Lmax
+    ;; Move result into place
+    wmov    C6, C4
+    wmov    C4, C2
+    ret
+
+.Lmax:
+    ;; Load 0x7fffffff
+    clr     C7
+.LsatC7.7:
+    ;; C7 <  0 -->  0x80000000
+    ;; C7 >= 0 -->  0x7fffffff
+    lsl     C7
+    sbc     SS, SS
+.LsatSS:
+    ;; Load min / max value:
+    ;; SS = -1  -->  0x80000000
+    ;; SS =  0  -->  0x7fffffff
+    com     SS
+    mov     C4, SS
+    mov     C5, C4
+    wmov    C6, C4
+    subi    C7, 0x80
+    ret
+ENDF  __ssmulsa3
+#endif /* L_ssmulsa3 */
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#undef SS
+
 /*******************************************************
       Fractional Division 8 / 8
 *******************************************************/
Index: libgcc/config/avr/lib1funcs.S
===================================================================
--- libgcc/config/avr/lib1funcs.S	(revision 195878)
+++ libgcc/config/avr/lib1funcs.S	(working copy)
@@ -79,6 +79,41 @@  see the files COPYING3 and COPYING.RUNTI
 #define XJMP  rjmp
 #endif
 
+;; Prologue stuff
+
+.macro do_prologue_saves n_pushed n_frame=0
+    ldi r26, lo8(\n_frame)
+    ldi r27, hi8(\n_frame)
+    ldi r30, lo8(gs(.L_prologue_saves.\@))
+    ldi r31, hi8(gs(.L_prologue_saves.\@))
+    XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
+.L_prologue_saves.\@:
+.endm
+
+;; Epilogue stuff
+
+.macro do_epilogue_restores n_pushed n_frame=0
+    in      r28, __SP_L__
+#ifdef __AVR_HAVE_SPH__
+    in      r29, __SP_H__
+.if \n_frame > 63
+    subi    r28, lo8(-\n_frame)
+    sbci    r29, hi8(-\n_frame)
+.elseif \n_frame > 0
+    adiw    r28, \n_frame
+.endif
+#else
+    clr     r29
+.if \n_frame > 0
+    subi    r28, lo8(-\n_frame)
+.endif
+#endif /* HAVE SPH */
+    ldi     r30, \n_pushed
+    XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
+.endm
+
+;; Support function entry and exit for convenience
+
 .macro DEFUN name
 .global \name
 .func \name
@@ -98,6 +133,9 @@  see the files COPYING3 and COPYING.RUNTI
 .endfunc
 .endm
 
+;; Skip next instruction, typically a jump target
+#define skip cpse 0,0
+
 ;; Negate a 2-byte value held in consecutive registers
 .macro NEG2  reg
     com     \reg+1
@@ -736,8 +774,6 @@  ENDF __mulsqipsi3
        Multiplication 64 x 64
 *******************************************************/
 
-#if defined (L_muldi3)
-
 ;; A[] = A[] * B[]
 
 ;; A[0..7]: In: Multiplicand
@@ -774,6 +810,8 @@  ENDF __mulsqipsi3
 #define C6  C4+2
 #define C7  C4+3
 
+#if defined (L_muldi3)
+
 ;; A[]     *= B[]
 ;; R25:R18 *= R17:R10
 ;; Ordinary ABI-Function
@@ -818,7 +856,7 @@  DEFUN __muldi3
 
     wmov    26, B2
     ;; 0 * 1
-    rcall   __muldi3_6
+    XCALL   __muldi3_6
 
     pop     A0
     pop     A1
@@ -829,7 +867,7 @@  DEFUN __muldi3
     pop     r26
     pop     r27
     ;; 1 * 0
-    rcall   __muldi3_6
+    XCALL   __muldi3_6
 
     pop     A0
     pop     A1
@@ -852,7 +890,12 @@  DEFUN __muldi3
     pop     r28
     pop     r29
     ret
+ENDF __muldi3
+#endif /* L_muldi3 */
 
+#if defined (L_muldi3_6)
+;; A helper for some 64-bit multiplications with MUL available
+DEFUN __muldi3_6
 __muldi3_6:
     XCALL   __umulhisi3
     add     C2, 22
@@ -862,7 +905,8 @@  __muldi3_6:
     brcc    0f
     adiw    C6, 1
 0:  ret
-ENDF __muldi3
+ENDF __muldi3_6
+#endif /* L_muldi3_6 */
 
 #undef C7
 #undef C6
@@ -875,6 +919,8 @@  ENDF __muldi3
 
 #else /* !HAVE_MUL */
 
+#if defined (L_muldi3)
+
 #define C0  26
 #define C1  C0+1
 #define C2  C0+2
@@ -952,6 +998,7 @@  ENDF __muldi3
 #undef C1
 #undef C0
 
+#endif /* L_muldi3 */
 #endif /* HAVE_MUL */
 
 #undef B7
@@ -972,7 +1019,240 @@  ENDF __muldi3
 #undef A1
 #undef A0
 
-#endif /* L_muldi3 */
+/*******************************************************
+   Widening Multiplication 64 = 32 x 32  with  MUL
+*******************************************************/
+
+#if defined (__AVR_HAVE_MUL__)
+#define A0 r22
+#define A1 r23 
+#define A2 r24
+#define A3 r25
+ 
+#define B0 r18
+#define B1 r19
+#define B2 r20
+#define B3 r21
+ 
+#define C0  18
+#define C1  C0+1
+#define C2  20
+#define C3  C2+1
+#define C4  28
+#define C5  C4+1
+#define C6  C4+2
+#define C7  C4+3
+
+#if defined (L_umulsidi3)
+
+;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
+
+;; R18[8] = R22[4] * R18[4]
+;;
+;; Ordinary ABI Function, but additionally sets
+;; X = R20[2] = B2[2]
+;; Z = R22[2] = A0[2]
+DEFUN __umulsidi3
+    clt
+    ;; FALLTHRU
+ENDF  __umulsidi3
+    ;; T = sign (A)
+DEFUN __umulsidi3_helper
+    push    29  $  push    28 ; Y
+    wmov    30, A2
+    ;; Counting in Words, we have to perform 4 Multiplications
+    ;; 0 * 0
+    wmov    26, A0
+    XCALL __umulhisi3
+    push    23  $  push    22 ; C0
+    wmov    28, B0
+    wmov    18, B2
+    wmov    C2, 24
+    push    27  $  push    26 ; A0
+    push    19  $  push    18 ; B2
+    ;;
+    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
+    ;;  B2  C2  --  --  --  B0  A2
+    ;; 1 * 1
+    wmov    26, 30      ; A2
+    XCALL __umulhisi3
+    ;; Sign-extend A.  T holds the sign of A
+    brtc    0f
+    ;; Subtract B from the high part of the result
+    sub     22, 28
+    sbc     23, 29
+    sbc     24, 18
+    sbc     25, 19
+0:  wmov    18, 28      ;; B0
+    wmov    C4, 22
+    wmov    C6, 24
+    ;;
+    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
+    ;;  B0  C2  --  --  A2  C4  C6
+    ;;
+    ;; 1 * 0
+    XCALL __muldi3_6
+    ;; 0 * 1
+    pop     26  $   pop 27  ;; B2
+    pop     18  $   pop 19  ;; A0
+    XCALL __muldi3_6
+
+    ;; Move result C into place and save A0 in Z
+    wmov    22, C4
+    wmov    24, C6
+    wmov    30, 18 ; A0
+    pop     C0  $   pop C1
+
+    ;; Epilogue
+    pop     28  $   pop 29  ;; Y
+    ret
+ENDF __umulsidi3_helper
+#endif /* L_umulsidi3 */
+
+
+#if defined (L_mulsidi3)
+
+;; Signed widening 64 = 32 * 32 Multiplication
+;;
+;; R18[8] = R22[4] * R18[4]
+;; Ordinary ABI Function
+DEFUN __mulsidi3
+    bst     A3, 7
+    sbrs    B3, 7           ; Enhanced core has no skip bug
+    XJMP __umulsidi3_helper
+
+    ;; B needs sign-extension
+    push    A3
+    push    A2
+    XCALL __umulsidi3_helper
+    ;; A0 survived in Z
+    sub     r22, r30
+    sbc     r23, r31
+    pop     r26
+    pop     r27
+    sbc     r24, r26
+    sbc     r25, r27
+    ret
+ENDF __mulsidi3
+#endif /* L_mulsidi3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#endif /* HAVE_MUL */
+
+/**********************************************************
+    Widening Multiplication 64 = 32 x 32  without  MUL
+**********************************************************/
+
+#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+#define A4 A0+4
+#define A5 A0+5
+#define A6 A0+6
+#define A7 A0+7
+
+#define B0 10
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+#define B4 B0+4
+#define B5 B0+5
+#define B6 B0+6
+#define B7 B0+7
+
+#define AA0 22
+#define AA1 AA0+1
+#define AA2 AA0+2
+#define AA3 AA0+3
+
+#define BB0 18
+#define BB1 BB0+1
+#define BB2 BB0+2
+#define BB3 BB0+3
+
+#define Mask r30
+
+;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
+;;
+;; R18[8] = R22[4] * R18[4]
+;; Ordinary ABI Function
+DEFUN __mulsidi3
+    set
+    skip
+    ;; FALLTHRU
+ENDF  __mulsidi3
+
+DEFUN __umulsidi3
+    clt     ; skipped
+    ;; Save 10 Registers: R10..R17, R28, R29
+    do_prologue_saves 10
+    ldi     Mask, 0xff
+    bld     Mask, 7
+    ;; Move B into place...
+    wmov    B0, BB0
+    wmov    B2, BB2
+    ;; ...and extend it
+    and     BB3, Mask
+    lsl     BB3
+    sbc     B4, B4
+    mov     B5, B4
+    wmov    B6, B4
+    ;; Move A into place...
+    wmov    A0, AA0
+    wmov    A2, AA2
+    ;; ...and extend it
+    and     AA3, Mask
+    lsl     AA3
+    sbc     A4, A4
+    mov     A5, A4
+    wmov    A6, A4
+    XCALL   __muldi3
+    do_epilogue_restores 10
+ENDF __umulsidi3
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef A4
+#undef A5
+#undef A6
+#undef A7
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef B4
+#undef B5
+#undef B6
+#undef B7
+#undef AA0
+#undef AA1
+#undef AA2
+#undef AA3
+#undef BB0
+#undef BB1
+#undef BB2
+#undef BB3
+#undef Mask
+#endif /* L_mulsidi3 && !HAVE_MUL */
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 	
@@ -1625,14 +1905,10 @@  DEFUN  __divdi3_moddi3
 
 0:  ;; The Prologue
     ;; Save 12 Registers:  Y, 17...8
-    ;; No Frame needed (X = 0)
-    clr r26
-    clr r27
-    ldi r30, lo8(gs(1f))
-    ldi r31, hi8(gs(1f))
-    XJMP __prologue_saves__ + ((18 - 12) * 2)
+    ;; No Frame needed
+    do_prologue_saves 12
 
-1:  ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
+    ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
     ;; SS.6 will contain the Sign of the Remainder (A.sign)
     mov     SS, A7
     asr     SS
@@ -1672,15 +1948,8 @@  DEFUN  __divdi3_moddi3
 #endif /* __AVR_HAVE_JMP_CALL__ */
     XCALL   __negdi2
 
-4:  ;; Epilogue: Restore the Z = 12 Registers and return
-    in r28, __SP_L__
-#if defined (__AVR_HAVE_SPH__)
-    in r29, __SP_H__
-#else
-    clr r29
-#endif /* #SP = 8/16 */
-    ldi r30, 12
-    XJMP __epilogue_restores__ + ((18 - 12) * 2)
+4:  ;; Epilogue: Restore 12 Registers and return
+    do_epilogue_restores 12
 
 ENDF __divdi3_moddi3
 
Index: libgcc/config/avr/t-avr
===================================================================
--- libgcc/config/avr/t-avr	(revision 195878)
+++ libgcc/config/avr/t-avr	(working copy)
@@ -18,7 +18,8 @@  LIB1ASMFUNCS = \
 	_udivmodsi4 \
 	_divmodsi4 \
 	_divdi3 _udivdi3 \
-	_muldi3 \
+	_muldi3 _muldi3_6 \
+	_mulsidi3 _umulsidi3 \
 	_udivmod64 \
 	_negsi2 _negdi2 \
 	_prologue \
@@ -69,6 +70,8 @@  LIB1ASMFUNCS += \
 	_mulhq3 _muluhq3 \
 	_mulha3 _muluha3 _muluha3_round \
 	_mulsa3 _mulusa3 \
+	_usmuluha3 _ssmulha3 \
+	_usmulusa3 _ssmulsa3 \
 	_divqq3 _udivuqq3 _divqq_helper \
 	_divhq3 _udivuhq3 \
 	_divha3 _udivuha3 \
@@ -260,3 +263,15 @@  LIB2FUNCS_EXCLUDE += \
 LIB2FUNCS_EXCLUDE += \
 	$(foreach func,_div,\
 	$(foreach mode,$(sdiv_modes) $(udiv_modes),$(func_X)))
+
+
+ssmul_modes =  HA  SA
+usmul_modes = UHA USA
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach func,_usmul,\
+	$(foreach mode,$(usmul_modes),$(func_X)))
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach func,_ssmul,\
+	$(foreach mode,$(ssmul_modes),$(func_X)))