diff mbox series

[avr,committed] Use monic denominator polynomials to save a multiplication.

Message ID 8bf79b39-f852-747b-7a35-60a74e15b4e8@gjlay.de
State New
Headers show
Series [avr,committed] Use monic denominator polynomials to save a multiplication. | expand

Commit Message

Georg-Johann Lay Oct. 5, 2023, 1:05 p.m. UTC
This is a small tweak in LibF7 to save one multiplication in computation
of denominator polynomials.  The polynomials are monic now, and
f7_horner needs one multiplication less.

Johann

--

LibF7: Use monic denominator polynomials to save a multiplication.

libgcc/config/avr/libf7/
	* libf7.h (F7_FLAGNO_plusx, F7_FLAG_plusx): New macros.
	* libf7.c (f7_horner): Handle F7_FLAG_plusx in highest coefficient.
	* libf7-const.def [F7MOD_atan_]: Denominator: Set F7_FLAG_plusx
	and omit highest term.
	[F7MOD_asinacos_]: Use rational function with normalized denominator.
diff mbox series

Patch

diff --git a/libgcc/config/avr/libf7/libf7-const.def 
b/libgcc/config/avr/libf7/libf7-const.def
index 8764c81ffa4..0e4c4d8701e 100644
--- a/libgcc/config/avr/libf7/libf7-const.def
+++ b/libgcc/config/avr/libf7/libf7-const.def
@@ -121,8 +121,7 @@  F7_CONST_DEF (X, 0, 
0xd6,0xa5,0x2d,0x73,0x34,0xd8,0x60, 11)
  F7_CONST_DEF (X, 0, 0xe5,0x08,0xb8,0x24,0x20,0x81,0xe7, 11)
  F7_CONST_DEF (X, 0, 0xe3,0xb3,0x35,0xfa,0xbf,0x1f,0x81, 10)
  F7_CONST_DEF (X, 0, 0xd3,0x89,0x2b,0xb6,0x3e,0x2e,0x05, 8)
-F7_CONST_DEF (X, 0, 0x9f,0xab,0xe9,0xd9,0x35,0xed,0x27, 5)
-F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0)
+F7_CONST_DEF (X, 8, 0x9f,0xab,0xe9,0xd9,0x35,0xed,0x27, 5)
  #endif

  #elif defined (SWIFT_3_4)
@@ -147,24 +146,22 @@  F7_CONST_DEF (pi_6, 0, 
0x86,0x0a,0x91,0xc1,0x6b,0x9b,0x2c, -1)
  #endif // which MiniMax

  #elif defined (F7MOD_asinacos_)
-// Relative error < 5.6E-18, quality = 1.00000037 (ideal = 1).
+// f(x) = asin(w) / w,  w = sqrt(x/2),  w in [0, 0.5].
+// Relative error < 4.9E-18, Q10 = 21.7
  #if defined (FOR_NUMERATOR)
-// 0.99999999999999999442491073135027586203 - 
1.035234033892197627842731209x + 
0.35290206232981519813422591897720574012x^2 - 
0.04333483170641685705612351801x^3 + 
0.0012557428614630796315205218507940285622x^4 + 
0.0000084705471128435769021718764878041684288x^5
-// p = Poly ([Decimal('0.99999999999999999442491073135027586203'), 
Decimal('-1.0352340338921976278427312087167692142'), 
Decimal('0.35290206232981519813422591897720574012'), 
Decimal('-0.043334831706416857056123518013656946650'), 
Decimal('0.0012557428614630796315205218507940285622'), 
Decimal('0.0000084705471128435769021718764878041684288')])
-F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0)
-F7_CONST_DEF (X, 1, 0x84,0x82,0x8c,0x7f,0xa2,0xf6,0x65, 0)
-F7_CONST_DEF (X, 0, 0xb4,0xaf,0x94,0x40,0xcb,0x86,0x69, -2)
-F7_CONST_DEF (X, 1, 0xb1,0x7f,0xdd,0x4f,0x4e,0xbe,0x1d, -5)
-F7_CONST_DEF (X, 0, 0xa4,0x97,0xbd,0x0b,0x59,0xc9,0x25, -10)
-F7_CONST_DEF (X, 0, 0x8e,0x1c,0xb9,0x0b,0x50,0x6c,0xce, -17)
+// -41050.4389591195072042579 + 43293.8985171424974364797 x - 
15230.0535110759003163511 x^2 + 1996.35047839480810448269 x^3 - 
72.2973010025603956782375 x^4
+F7_CONST_DEF (X, 1, 0xa0,0x5a,0x70,0x5f,0x9f,0xf6,0x90, 15)
+F7_CONST_DEF (X, 0, 0xa9,0x1d,0xe6,0x05,0x38,0x2d,0xec, 15)
+F7_CONST_DEF (X, 1, 0xed,0xf8,0x36,0xcb,0x9b,0x83,0xdd, 13)
+F7_CONST_DEF (X, 0, 0xf9,0x8b,0x37,0x1e,0x77,0x74,0xf9, 10)
+F7_CONST_DEF (X, 1, 0x90,0x98,0x37,0xd6,0x46,0x21,0x3c, 6)
  #elif defined (FOR_DENOMINATOR)
-// 1 - 1.118567367225532923662371649x + 
0.42736600959872448854098334016758333519x^2 - 
0.06355588484963171659942148390x^3 + 
0.0028820878185134035637440105959294542908x^4
-// q = Poly ([Decimal('1'), 
Decimal('-1.1185673672255329236623716486696411533'), 
Decimal('0.42736600959872448854098334016758333519'), 
Decimal('-0.063555884849631716599421483898013782858'), 
Decimal('0.0028820878185134035637440105959294542908')])
-F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0)
-F7_CONST_DEF (X, 1, 0x8f,0x2d,0x37,0x2a,0x4d,0xa1,0x57, 0)
-F7_CONST_DEF (X, 0, 0xda,0xcf,0xb7,0xb5,0x4c,0x0d,0xee, -2)
-F7_CONST_DEF (X, 1, 0x82,0x29,0x96,0x77,0x2e,0x19,0xc7, -4)
-F7_CONST_DEF (X, 0, 0xbc,0xe1,0x68,0xec,0xba,0x20,0x29, -9)
+// -41050.4389591195074048679 + 46714.7684304025268691353 x - 
18353.2551497967388796235 x^2 + 2878.9626098308300020834 x^3 - 
150.822900775648362380508 x^4 + x^5
+F7_CONST_DEF (X, 1, 0xa0,0x5a,0x70,0x5f,0x9f,0xf6,0x91, 15)
+F7_CONST_DEF (X, 0, 0xb6,0x7a,0xc4,0xb7,0xda,0xd8,0x1b, 15)
+F7_CONST_DEF (X, 1, 0x8f,0x62,0x82,0xa2,0xfe,0x81,0x26, 14)
+F7_CONST_DEF (X, 0, 0xb3,0xef,0x66,0xd9,0x90,0xe3,0x91, 11)
+F7_CONST_DEF (X, 9, 0x96,0xd2,0xa9,0xa0,0x0f,0x43,0x44, 7)
  #endif

  #elif defined (F7MOD_sincos_)
diff --git a/libgcc/config/avr/libf7/libf7.c 
b/libgcc/config/avr/libf7/libf7.c
index 8fb57ef90cc..373a8a55d90 100644
--- a/libgcc/config/avr/libf7/libf7.c
+++ b/libgcc/config/avr/libf7/libf7.c
@@ -1527,6 +1527,9 @@  void f7_horner (f7_t *cc, const f7_t *xx, uint8_t 
n_coeff, const f7_t *coeff,

    f7_copy_flash (yy, pcoeff);

+  if (yy->flags & F7_FLAG_plusx)
+    f7_Iadd (yy, xx);
+
    while (1)
      {
        --pcoeff;
diff --git a/libgcc/config/avr/libf7/libf7.h 
b/libgcc/config/avr/libf7/libf7.h
index 03fe6abe839..3f81b5f1f88 100644
--- a/libgcc/config/avr/libf7/libf7.h
+++ b/libgcc/config/avr/libf7/libf7.h
@@ -47,6 +47,11 @@ 
         --  f7_t.is_nan (NaN)
         --  f7_t.is_inf (+Inf or -Inf)
         --  f7_t.sign (negative or -Inf).
+       --  _plusx: This flag is used by f7_horner.  Is is set in some
+	   polynomial coefficients from libf7-const.def to indicate that
+	   the respective polynomial has a leading coefficient of 1.
+	   The flag is set in the second-highest coefficient, and the leading
+	   coefficient is omitted.

     B)  The flags that are returned by f7_classify().  This are the
         flags from A) together with
@@ -56,6 +61,7 @@ 
  #define F7_FLAGNO_sign  0
  #define F7_FLAGNO_zero  1
  #define F7_FLAGNO_nan   2
+#define F7_FLAGNO_plusx 3
  #define F7_FLAGNO_inf   7

  #define F7_HAVE_Inf 1
@@ -64,6 +70,7 @@ 
  #define F7_FLAG_sign            (1 << F7_FLAGNO_sign)
  #define F7_FLAG_zero            (1 << F7_FLAGNO_zero)
  #define F7_FLAG_nan             (1 << F7_FLAGNO_nan)
+#define F7_FLAG_plusx           (1 << F7_FLAGNO_plusx)
  #define F7_FLAG_inf   (F7_HAVE_Inf << F7_FLAGNO_inf)

  // Flags that might be set in f7_t.flags.