Patch Detail

GET /api/patches/1475740/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 1475740,
    "url": "http://patchwork.ozlabs.org/api/patches/1475740/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20210508014802.892561-38-richard.henderson@linaro.org/",
    "project": {
        "id": 14,
        "url": "http://patchwork.ozlabs.org/api/projects/14/?format=api",
        "name": "QEMU Development",
        "link_name": "qemu-devel",
        "list_id": "qemu-devel.nongnu.org",
        "list_email": "qemu-devel@nongnu.org",
        "web_url": "",
        "scm_url": "",
        "webscm_url": "",
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<20210508014802.892561-38-richard.henderson@linaro.org>",
    "list_archive_url": null,
    "date": "2021-05-08T01:47:27",
    "name": "[37/72] softfloat: Move muladd_floats to softfloat-parts.c.inc",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "e954cbeacdbc0c2e03dfc81adbeabfbbf8c47dc8",
    "submitter": {
        "id": 72104,
        "url": "http://patchwork.ozlabs.org/api/people/72104/?format=api",
        "name": "Richard Henderson",
        "email": "richard.henderson@linaro.org"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20210508014802.892561-38-richard.henderson@linaro.org/mbox/",
    "series": [
        {
            "id": 242770,
            "url": "http://patchwork.ozlabs.org/api/series/242770/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=242770",
            "date": "2021-05-08T01:46:53",
            "name": "Convert floatx80 and float128 to FloatParts",
            "version": 1,
            "mbox": "http://patchwork.ozlabs.org/series/242770/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/1475740/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/1475740/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>",
        "X-Original-To": "incoming@patchwork.ozlabs.org",
        "Delivered-To": "patchwork-incoming@bilbo.ozlabs.org",
        "Authentication-Results": [
            "ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org\n (client-ip=209.51.188.17; helo=lists.gnu.org;\n envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n receiver=<UNKNOWN>)",
            "ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n unprotected) header.d=linaro.org header.i=@linaro.org header.a=rsa-sha256\n header.s=google header.b=hYjABf28;\n\tdkim-atps=neutral"
        ],
        "Received": [
            "from lists.gnu.org (lists.gnu.org [209.51.188.17])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 4FcWBt33cTz9sWp\n\tfor <incoming@patchwork.ozlabs.org>; Sat,  8 May 2021 12:16:06 +1000 (AEST)",
            "from localhost ([::1]:58922 helo=lists1p.gnu.org)\n\tby lists.gnu.org with esmtp (Exim 4.90_1)\n\t(envelope-from <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>)\n\tid 1lfCVg-0002GV-EC\n\tfor incoming@patchwork.ozlabs.org; Fri, 07 May 2021 22:16:04 -0400",
            "from eggs.gnu.org ([2001:470:142:3::10]:41018)\n by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <richard.henderson@linaro.org>)\n id 1lfC5L-0004x9-Gf\n for qemu-devel@nongnu.org; Fri, 07 May 2021 21:48:52 -0400",
            "from mail-pg1-x532.google.com ([2607:f8b0:4864:20::532]:39456)\n by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_128_GCM_SHA256:128)\n (Exim 4.90_1) (envelope-from <richard.henderson@linaro.org>)\n id 1lfC50-0003ft-5D\n for qemu-devel@nongnu.org; Fri, 07 May 2021 21:48:51 -0400",
            "by mail-pg1-x532.google.com with SMTP id s22so8629125pgk.6\n for <qemu-devel@nongnu.org>; Fri, 07 May 2021 18:48:28 -0700 (PDT)",
            "from localhost.localdomain ([71.212.144.24])\n by smtp.gmail.com with ESMTPSA id t4sm5819681pfq.165.2021.05.07.18.48.27\n (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n Fri, 07 May 2021 18:48:27 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google;\n h=from:to:cc:subject:date:message-id:in-reply-to:references\n :mime-version:content-transfer-encoding;\n bh=8Xkz1ybjWLeUjZCsE7o98Dkj/GU255PMCjv80HfDnbE=;\n b=hYjABf281qbp4B54Q9Y39BvAXJi6ngFPgDGw8dWDxKbfnV7rRwFc0Oqd5RNXyNjwXn\n dw79swvbeAibOF3VvJPwzMNcsK56EfIKMEabVTTWdEda1hp8vE9yQIM4qFIG8qKB6U8z\n 7Md+GnRT2HLajIchjbXE8Wr1aohmg4OF0EvoSdwR1HW1+ClYxrz4QrkzuP/XufLfadkA\n sNwQ/Ls0cptupKZ5qljsXz5nzkH5VKufV6dj8tXTUW3pLZWUxaPkhFdIjfVjZUDC3q7c\n bWOfxo+7MkjKKEKxhqXkFfozZNvvnczDRb2RaZg/E5cTOi6+waCF4no5+82WqvKlI4hs\n MnDA==",
        "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20161025;\n h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to\n :references:mime-version:content-transfer-encoding;\n bh=8Xkz1ybjWLeUjZCsE7o98Dkj/GU255PMCjv80HfDnbE=;\n b=ECjfhtFGgKdTdOlE/S23unlxXIXDTM9yNbhOlDzPmZrQz+iACJi4LFseqjBmyPbabM\n xW9EjmYP3QvR0odvoVuu+PehfAj0yRxQt5K16lr/exvDJFRtP7zxmgMnDVQRkU3sLGYz\n GUSHfXuvQDczDOTfxw2XEK1omsA7ROV8HeLhrrJ2n4mH3otoPoyJJ1XqphuptH2lu+mQ\n htmBPYGFPhWonyILSzsFvqCfFQwbwWYmB4uRu4bTo5G9JEtDhYmNYEyV27wlyBygUnEu\n vEkGN4vUDwsVuAc4IwxD26nn8ky+lwo54QJZTA+Hqp+mcxMU6LQFlGJ5Nkj/0depdIGQ\n gKHw==",
        "X-Gm-Message-State": "AOAM53269JfdI2t36Ls5GpIY10nCYt10mhN7k12D0wbqTVLgxE6+Ck17\n RTon9IYF4UXnsjqOsw6s9wFWQ2UHWwuvEw==",
        "X-Google-Smtp-Source": "\n ABdhPJx+L3CeUZifByVD+t6BWeudEabo8jAvP9Zn0j838uPsyp28nzKL8f274aSlELc83b0eqbQ+ZQ==",
        "X-Received": "by 2002:aa7:8503:0:b029:27d:497f:1da6 with SMTP id\n v3-20020aa785030000b029027d497f1da6mr13642574pfn.28.1620438507603;\n Fri, 07 May 2021 18:48:27 -0700 (PDT)",
        "From": "Richard Henderson <richard.henderson@linaro.org>",
        "To": "qemu-devel@nongnu.org",
        "Subject": "[PATCH 37/72] softfloat: Move muladd_floats to softfloat-parts.c.inc",
        "Date": "Fri,  7 May 2021 18:47:27 -0700",
        "Message-Id": "<20210508014802.892561-38-richard.henderson@linaro.org>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20210508014802.892561-1-richard.henderson@linaro.org>",
        "References": "<20210508014802.892561-1-richard.henderson@linaro.org>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Received-SPF": "pass client-ip=2607:f8b0:4864:20::532;\n envelope-from=richard.henderson@linaro.org; helo=mail-pg1-x532.google.com",
        "X-Spam_score_int": "-20",
        "X-Spam_score": "-2.1",
        "X-Spam_bar": "--",
        "X-Spam_report": "(-2.1 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1,\n DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1,\n RCVD_IN_DNSWL_NONE=-0.0001, SPF_HELO_NONE=0.001,\n SPF_PASS=-0.001 autolearn=ham autolearn_force=no",
        "X-Spam_action": "no action",
        "X-BeenThere": "qemu-devel@nongnu.org",
        "X-Mailman-Version": "2.1.23",
        "Precedence": "list",
        "List-Id": "<qemu-devel.nongnu.org>",
        "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>",
        "List-Archive": "<https://lists.nongnu.org/archive/html/qemu-devel>",
        "List-Post": "<mailto:qemu-devel@nongnu.org>",
        "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>",
        "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=subscribe>",
        "Cc": "alex.bennee@linaro.org, david@redhat.com",
        "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org",
        "Sender": "\"Qemu-devel\"\n <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>"
    },
    "content": "Rename to parts$N_muladd.\nImplement float128_muladd with FloatParts128.\n\nSigned-off-by: Richard Henderson <richard.henderson@linaro.org>\n---\n include/fpu/softfloat.h   |   2 +\n fpu/softfloat.c           | 406 ++++++++++++++++++--------------------\n tests/fp/fp-bench.c       |   8 +-\n tests/fp/fp-test.c        |   2 +-\n fpu/softfloat-parts.c.inc | 126 ++++++++++++\n tests/fp/wrap.c.inc       |  12 ++\n 6 files changed, 342 insertions(+), 214 deletions(-)",
    "diff": "diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h\nindex 019c2ec66d..53f2c2ea3c 100644\n--- a/include/fpu/softfloat.h\n+++ b/include/fpu/softfloat.h\n@@ -1197,6 +1197,8 @@ float128 float128_round_to_int(float128, float_status *status);\n float128 float128_add(float128, float128, float_status *status);\n float128 float128_sub(float128, float128, float_status *status);\n float128 float128_mul(float128, float128, float_status *status);\n+float128 float128_muladd(float128, float128, float128, int,\n+                         float_status *status);\n float128 float128_div(float128, float128, float_status *status);\n float128 float128_rem(float128, float128, float_status *status);\n float128 float128_sqrt(float128, float_status *status);\ndiff --git a/fpu/softfloat.c b/fpu/softfloat.c\nindex 4f498c11e5..a9ee8498ae 100644\n--- a/fpu/softfloat.c\n+++ b/fpu/softfloat.c\n@@ -715,6 +715,10 @@ static float128 float128_pack_raw(const FloatParts128 *p)\n #define PARTS_GENERIC_64_128(NAME, P) \\\n     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)\n \n+#define PARTS_GENERIC_64_128_256(NAME, P) \\\n+    QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \\\n+                 (FloatParts128 *, parts128_##NAME), parts64_##NAME)\n+\n #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)\n #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)\n \n@@ -760,15 +764,17 @@ static void parts128_uncanon(FloatParts128 *p, float_status *status,\n \n static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);\n static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);\n+static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);\n \n #define parts_add_normal(A, B) \\\n-    PARTS_GENERIC_64_128(add_normal, A)(A, B)\n+    PARTS_GENERIC_64_128_256(add_normal, A)(A, B)\n \n static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);\n static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);\n+static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);\n \n #define parts_sub_normal(A, B) \\\n-    PARTS_GENERIC_64_128(sub_normal, A)(A, B)\n+    PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)\n \n static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,\n                                     float_status *s, bool subtract);\n@@ -786,6 +792,16 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,\n #define parts_mul(A, B, S) \\\n     PARTS_GENERIC_64_128(mul, A)(A, B, S)\n \n+static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,\n+                                    FloatParts64 *c, int flags,\n+                                    float_status *s);\n+static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,\n+                                      FloatParts128 *c, int flags,\n+                                      float_status *s);\n+\n+#define parts_muladd(A, B, C, Z, S) \\\n+    PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)\n+\n /*\n  * Helper functions for softfloat-parts.c.inc, per-size operations.\n  */\n@@ -793,6 +809,10 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,\n #define FRAC_GENERIC_64_128(NAME, P) \\\n     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)\n \n+#define FRAC_GENERIC_64_128_256(NAME, P) \\\n+    QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \\\n+                 (FloatParts128 *, frac128_##NAME), frac64_##NAME)\n+\n static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)\n {\n     return uadd64_overflow(a->frac, b->frac, &r->frac);\n@@ -806,7 +826,17 @@ static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)\n     return c;\n }\n \n-#define frac_add(R, A, B)  FRAC_GENERIC_64_128(add, R)(R, A, B)\n+static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)\n+{\n+    bool c = 0;\n+    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);\n+    r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);\n+    r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);\n+    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);\n+    return c;\n+}\n+\n+#define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)\n \n static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)\n {\n@@ -901,7 +931,16 @@ static void frac128_neg(FloatParts128 *a)\n     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);\n }\n \n-#define frac_neg(A)  FRAC_GENERIC_64_128(neg, A)(A)\n+static void frac256_neg(FloatParts256 *a)\n+{\n+    bool c = 0;\n+    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);\n+    a->frac_lm = usub64_borrow(0, a->frac_lm, &c);\n+    a->frac_hm = usub64_borrow(0, a->frac_hm, &c);\n+    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);\n+}\n+\n+#define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)\n \n static int frac64_normalize(FloatParts64 *a)\n {\n@@ -932,7 +971,55 @@ static int frac128_normalize(FloatParts128 *a)\n     return 128;\n }\n \n-#define frac_normalize(A)  FRAC_GENERIC_64_128(normalize, A)(A)\n+static int frac256_normalize(FloatParts256 *a)\n+{\n+    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;\n+    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;\n+    int ret, shl, shr;\n+\n+    if (likely(a0)) {\n+        shl = clz64(a0);\n+        if (shl == 0) {\n+            return 0;\n+        }\n+        ret = shl;\n+    } else {\n+        if (a1) {\n+            ret = 64;\n+            a0 = a1, a1 = a2, a2 = a3, a3 = 0;\n+        } else if (a2) {\n+            ret = 128;\n+            a0 = a2, a1 = a3, a2 = 0, a3 = 0;\n+        } else if (a3) {\n+            ret = 192;\n+            a0 = a3, a1 = 0, a2 = 0, a3 = 0;\n+        } else {\n+            ret = 256;\n+            a0 = 0, a1 = 0, a2 = 0, a3 = 0;\n+            goto done;\n+        }\n+        shl = clz64(a0);\n+        if (shl == 0) {\n+            goto done;\n+        }\n+        ret += shl;\n+    }\n+\n+    shr = -shl & 63;\n+    a0 = (a0 << shl) | (a1 >> shr);\n+    a1 = (a1 << shl) | (a2 >> shr);\n+    a2 = (a2 << shl) | (a3 >> shr);\n+    a3 = (a3 << shl);\n+\n+ done:\n+    a->frac_hi = a0;\n+    a->frac_hm = a1;\n+    a->frac_lm = a2;\n+    a->frac_lo = a3;\n+    return ret;\n+}\n+\n+#define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)\n \n static void frac64_shl(FloatParts64 *a, int c)\n {\n@@ -968,7 +1055,51 @@ static void frac128_shrjam(FloatParts128 *a, int c)\n     shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);\n }\n \n-#define frac_shrjam(A, C)  FRAC_GENERIC_64_128(shrjam, A)(A, C)\n+static void frac256_shrjam(FloatParts256 *a, int c)\n+{\n+    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;\n+    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;\n+    uint64_t sticky = 0;\n+    int invc;\n+\n+    if (unlikely(c == 0)) {\n+        return;\n+    } else if (likely(c < 64)) {\n+        /* nothing */\n+    } else if (likely(c < 256)) {\n+        if (unlikely(c & 128)) {\n+            sticky |= a2 | a3;\n+            a3 = a1, a2 = a0, a1 = 0, a0 = 0;\n+        }\n+        if (unlikely(c & 64)) {\n+            sticky |= a3;\n+            a3 = a2, a2 = a1, a1 = a0, a0 = 0;\n+        }\n+        c &= 63;\n+        if (c == 0) {\n+            goto done;\n+        }\n+    } else {\n+        sticky = a0 | a1 | a2 | a3;\n+        a0 = a1 = a2 = a3 = 0;\n+        goto done;\n+    }\n+\n+    invc = -c & 63;\n+    sticky |= a3 << invc;\n+    a3 = (a3 >> c) | (a2 << invc);\n+    a2 = (a2 >> c) | (a1 << invc);\n+    a1 = (a1 >> c) | (a0 << invc);\n+    a0 = (a0 >> c);\n+\n+ done:\n+    a->frac_lo = a3 | (sticky != 0);\n+    a->frac_lm = a2;\n+    a->frac_hm = a1;\n+    a->frac_hi = a0;\n+}\n+\n+#define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)\n \n static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)\n {\n@@ -983,7 +1114,17 @@ static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)\n     return c;\n }\n \n-#define frac_sub(R, A, B)  FRAC_GENERIC_64_128(sub, R)(R, A, B)\n+static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)\n+{\n+    bool c = 0;\n+    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);\n+    r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);\n+    r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);\n+    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);\n+    return c;\n+}\n+\n+#define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)\n \n static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)\n {\n@@ -998,6 +1139,22 @@ static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)\n \n #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)\n \n+static void frac64_widen(FloatParts128 *r, FloatParts64 *a)\n+{\n+    r->frac_hi = a->frac;\n+    r->frac_lo = 0;\n+}\n+\n+static void frac128_widen(FloatParts256 *r, FloatParts128 *a)\n+{\n+    r->frac_hi = a->frac_hi;\n+    r->frac_hm = a->frac_lo;\n+    r->frac_lm = 0;\n+    r->frac_lo = 0;\n+}\n+\n+#define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)\n+\n #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)\n #define FloatPartsN    glue(FloatParts,N)\n #define FloatPartsW    glue(FloatParts,W)\n@@ -1016,6 +1173,12 @@ static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)\n #include \"softfloat-parts-addsub.c.inc\"\n #include \"softfloat-parts.c.inc\"\n \n+#undef  N\n+#undef  W\n+#define N            256\n+\n+#include \"softfloat-parts-addsub.c.inc\"\n+\n #undef  N\n #undef  W\n #undef  partsN\n@@ -1386,230 +1549,48 @@ float128_mul(float128 a, float128 b, float_status *status)\n }\n \n /*\n- * Returns the result of multiplying the floating-point values `a' and\n- * `b' then adding 'c', with no intermediate rounding step after the\n- * multiplication. The operation is performed according to the\n- * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.\n- * The flags argument allows the caller to select negation of the\n- * addend, the intermediate product, or the final result. (The\n- * difference between this and having the caller do a separate\n- * negation is that negating externally will flip the sign bit on\n- * NaNs.)\n+ * Fused multiply-add\n  */\n \n-static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,\n-                                int flags, float_status *s)\n-{\n-    bool inf_zero, p_sign;\n-    bool sign_flip = flags & float_muladd_negate_result;\n-    FloatClass p_class;\n-    uint64_t hi, lo;\n-    int p_exp;\n-    int ab_mask, abc_mask;\n-\n-    ab_mask = float_cmask(a.cls) | float_cmask(b.cls);\n-    abc_mask = float_cmask(c.cls) | ab_mask;\n-    inf_zero = ab_mask == float_cmask_infzero;\n-\n-    /* It is implementation-defined whether the cases of (0,inf,qnan)\n-     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN\n-     * they return if they do), so we have to hand this information\n-     * off to the target-specific pick-a-NaN routine.\n-     */\n-    if (unlikely(abc_mask & float_cmask_anynan)) {\n-        return *parts_pick_nan_muladd(&a, &b, &c, s, ab_mask, abc_mask);\n-    }\n-\n-    if (inf_zero) {\n-        float_raise(float_flag_invalid, s);\n-        parts_default_nan(&a, s);\n-        return a;\n-    }\n-\n-    if (flags & float_muladd_negate_c) {\n-        c.sign ^= 1;\n-    }\n-\n-    p_sign = a.sign ^ b.sign;\n-\n-    if (flags & float_muladd_negate_product) {\n-        p_sign ^= 1;\n-    }\n-\n-    if (ab_mask & float_cmask_inf) {\n-        p_class = float_class_inf;\n-    } else if (ab_mask & float_cmask_zero) {\n-        p_class = float_class_zero;\n-    } else {\n-        p_class = float_class_normal;\n-    }\n-\n-    if (c.cls == float_class_inf) {\n-        if (p_class == float_class_inf && p_sign != c.sign) {\n-            float_raise(float_flag_invalid, s);\n-            parts_default_nan(&c, s);\n-        } else {\n-            c.sign ^= sign_flip;\n-        }\n-        return c;\n-    }\n-\n-    if (p_class == float_class_inf) {\n-        a.cls = float_class_inf;\n-        a.sign = p_sign ^ sign_flip;\n-        return a;\n-    }\n-\n-    if (p_class == float_class_zero) {\n-        if (c.cls == float_class_zero) {\n-            if (p_sign != c.sign) {\n-                p_sign = s->float_rounding_mode == float_round_down;\n-            }\n-            c.sign = p_sign;\n-        } else if (flags & float_muladd_halve_result) {\n-            c.exp -= 1;\n-        }\n-        c.sign ^= sign_flip;\n-        return c;\n-    }\n-\n-    /* a & b should be normals now... */\n-    assert(a.cls == float_class_normal &&\n-           b.cls == float_class_normal);\n-\n-    p_exp = a.exp + b.exp;\n-\n-    mul64To128(a.frac, b.frac, &hi, &lo);\n-\n-    /* Renormalize to the msb. */\n-    if (hi & DECOMPOSED_IMPLICIT_BIT) {\n-        p_exp += 1;\n-    } else {\n-        shortShift128Left(hi, lo, 1, &hi, &lo);\n-    }\n-\n-    /* + add/sub */\n-    if (c.cls != float_class_zero) {\n-        int exp_diff = p_exp - c.exp;\n-        if (p_sign == c.sign) {\n-            /* Addition */\n-            if (exp_diff <= 0) {\n-                shift64RightJamming(hi, -exp_diff, &hi);\n-                p_exp = c.exp;\n-                if (uadd64_overflow(hi, c.frac, &hi)) {\n-                    shift64RightJamming(hi, 1, &hi);\n-                    hi |= DECOMPOSED_IMPLICIT_BIT;\n-                    p_exp += 1;\n-                }\n-            } else {\n-                uint64_t c_hi, c_lo, over;\n-                shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);\n-                add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);\n-                if (over) {\n-                    shift64RightJamming(hi, 1, &hi);\n-                    hi |= DECOMPOSED_IMPLICIT_BIT;\n-                    p_exp += 1;\n-                }\n-            }\n-        } else {\n-            /* Subtraction */\n-            uint64_t c_hi = c.frac, c_lo = 0;\n-\n-            if (exp_diff <= 0) {\n-                shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);\n-                if (exp_diff == 0\n-                    &&\n-                    (hi > c_hi || (hi == c_hi && lo >= c_lo))) {\n-                    sub128(hi, lo, c_hi, c_lo, &hi, &lo);\n-                } else {\n-                    sub128(c_hi, c_lo, hi, lo, &hi, &lo);\n-                    p_sign ^= 1;\n-                    p_exp = c.exp;\n-                }\n-            } else {\n-                shift128RightJamming(c_hi, c_lo,\n-                                     exp_diff,\n-                                     &c_hi, &c_lo);\n-                sub128(hi, lo, c_hi, c_lo, &hi, &lo);\n-            }\n-\n-            if (hi == 0 && lo == 0) {\n-                a.cls = float_class_zero;\n-                a.sign = s->float_rounding_mode == float_round_down;\n-                a.sign ^= sign_flip;\n-                return a;\n-            } else {\n-                int shift;\n-                if (hi != 0) {\n-                    shift = clz64(hi);\n-                } else {\n-                    shift = clz64(lo) + 64;\n-                }\n-                /* Normalizing to a binary point of 124 is the\n-                   correct adjust for the exponent.  However since we're\n-                   shifting, we might as well put the binary point back\n-                   at 63 where we really want it.  Therefore shift as\n-                   if we're leaving 1 bit at the top of the word, but\n-                   adjust the exponent as if we're leaving 3 bits.  */\n-                shift128Left(hi, lo, shift, &hi, &lo);\n-                p_exp -= shift;\n-            }\n-        }\n-    }\n-    hi |= (lo != 0);\n-\n-    if (flags & float_muladd_halve_result) {\n-        p_exp -= 1;\n-    }\n-\n-    /* finally prepare our result */\n-    a.cls = float_class_normal;\n-    a.sign = p_sign ^ sign_flip;\n-    a.exp = p_exp;\n-    a.frac = hi;\n-\n-    return a;\n-}\n-\n float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,\n-                                                int flags, float_status *status)\n+                                    int flags, float_status *status)\n {\n-    FloatParts64 pa, pb, pc, pr;\n+    FloatParts64 pa, pb, pc, *pr;\n \n     float16_unpack_canonical(&pa, a, status);\n     float16_unpack_canonical(&pb, b, status);\n     float16_unpack_canonical(&pc, c, status);\n-    pr = muladd_floats(pa, pb, pc, flags, status);\n+    pr = parts_muladd(&pa, &pb, &pc, flags, status);\n \n-    return float16_round_pack_canonical(&pr, status);\n+    return float16_round_pack_canonical(pr, status);\n }\n \n static float32 QEMU_SOFTFLOAT_ATTR\n soft_f32_muladd(float32 a, float32 b, float32 c, int flags,\n                 float_status *status)\n {\n-    FloatParts64 pa, pb, pc, pr;\n+    FloatParts64 pa, pb, pc, *pr;\n \n     float32_unpack_canonical(&pa, a, status);\n     float32_unpack_canonical(&pb, b, status);\n     float32_unpack_canonical(&pc, c, status);\n-    pr = muladd_floats(pa, pb, pc, flags, status);\n+    pr = parts_muladd(&pa, &pb, &pc, flags, status);\n \n-    return float32_round_pack_canonical(&pr, status);\n+    return float32_round_pack_canonical(pr, status);\n }\n \n static float64 QEMU_SOFTFLOAT_ATTR\n soft_f64_muladd(float64 a, float64 b, float64 c, int flags,\n                 float_status *status)\n {\n-    FloatParts64 pa, pb, pc, pr;\n+    FloatParts64 pa, pb, pc, *pr;\n \n     float64_unpack_canonical(&pa, a, status);\n     float64_unpack_canonical(&pb, b, status);\n     float64_unpack_canonical(&pc, c, status);\n-    pr = muladd_floats(pa, pb, pc, flags, status);\n+    pr = parts_muladd(&pa, &pb, &pc, flags, status);\n \n-    return float64_round_pack_canonical(&pr, status);\n+    return float64_round_pack_canonical(pr, status);\n }\n \n static bool force_soft_fma;\n@@ -1756,23 +1737,30 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)\n     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);\n }\n \n-/*\n- * Returns the result of multiplying the bfloat16 values `a'\n- * and `b' then adding 'c', with no intermediate rounding step after the\n- * multiplication.\n- */\n-\n bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,\n                                       int flags, float_status *status)\n {\n-    FloatParts64 pa, pb, pc, pr;\n+    FloatParts64 pa, pb, pc, *pr;\n \n     bfloat16_unpack_canonical(&pa, a, status);\n     bfloat16_unpack_canonical(&pb, b, status);\n     bfloat16_unpack_canonical(&pc, c, status);\n-    pr = muladd_floats(pa, pb, pc, flags, status);\n+    pr = parts_muladd(&pa, &pb, &pc, flags, status);\n \n-    return bfloat16_round_pack_canonical(&pr, status);\n+    return bfloat16_round_pack_canonical(pr, status);\n+}\n+\n+float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,\n+                                      int flags, float_status *status)\n+{\n+    FloatParts128 pa, pb, pc, *pr;\n+\n+    float128_unpack_canonical(&pa, a, status);\n+    float128_unpack_canonical(&pb, b, status);\n+    float128_unpack_canonical(&pc, c, status);\n+    pr = parts_muladd(&pa, &pb, &pc, flags, status);\n+\n+    return float128_round_pack_canonical(pr, status);\n }\n \n /*\ndiff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c\nindex d319993280..c24baf8535 100644\n--- a/tests/fp/fp-bench.c\n+++ b/tests/fp/fp-bench.c\n@@ -386,7 +386,7 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)\n             for (i = 0; i < OPS_PER_ITER; i++) {\n                 float128 a = ops[0].f128;\n                 float128 b = ops[1].f128;\n-                /* float128 c = ops[2].f128; */\n+                float128 c = ops[2].f128;\n \n                 switch (op) {\n                 case OP_ADD:\n@@ -401,9 +401,9 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)\n                 case OP_DIV:\n                     res.f128 = float128_div(a, b, &soft_status);\n                     break;\n-                /* case OP_FMA: */\n-                /*     res.f128 = float128_muladd(a, b, c, 0, &soft_status); */\n-                /*     break; */\n+                case OP_FMA:\n+                    res.f128 = float128_muladd(a, b, c, 0, &soft_status);\n+                    break;\n                 case OP_SQRT:\n                     res.f128 = float128_sqrt(a, &soft_status);\n                     break;\ndiff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c\nindex 5a4cad8c8b..ff131afbde 100644\n--- a/tests/fp/fp-test.c\n+++ b/tests/fp/fp-test.c\n@@ -717,7 +717,7 @@ static void do_testfloat(int op, int rmode, bool exact)\n         test_abz_f128(true_abz_f128M, subj_abz_f128M);\n         break;\n     case F128_MULADD:\n-        not_implemented();\n+        test_abcz_f128(slow_f128M_mulAdd, qemu_f128M_mulAdd);\n         break;\n     case F128_SQRT:\n         test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);\ndiff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc\nindex 9a67ab2bea..a203811299 100644\n--- a/fpu/softfloat-parts.c.inc\n+++ b/fpu/softfloat-parts.c.inc\n@@ -413,3 +413,129 @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,\n     a->sign = sign;\n     return a;\n }\n+\n+/*\n+ * Returns the result of multiplying the floating-point values `a' and\n+ * `b' then adding 'c', with no intermediate rounding step after the\n+ * multiplication. The operation is performed according to the\n+ * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.\n+ * The flags argument allows the caller to select negation of the\n+ * addend, the intermediate product, or the final result. (The\n+ * difference between this and having the caller do a separate\n+ * negation is that negating externally will flip the sign bit on NaNs.)\n+ *\n+ * Requires A and C extracted into a double-sized structure to provide the\n+ * extra space for the widening multiply.\n+ */\n+static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,\n+                                   FloatPartsN *c, int flags, float_status *s)\n+{\n+    int ab_mask, abc_mask;\n+    FloatPartsW p_widen, c_widen;\n+\n+    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);\n+    abc_mask = float_cmask(c->cls) | ab_mask;\n+\n+    /*\n+     * It is implementation-defined whether the cases of (0,inf,qnan)\n+     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN\n+     * they return if they do), so we have to hand this information\n+     * off to the target-specific pick-a-NaN routine.\n+     */\n+    if (unlikely(abc_mask & float_cmask_anynan)) {\n+        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);\n+    }\n+\n+    if (flags & float_muladd_negate_c) {\n+        c->sign ^= 1;\n+    }\n+\n+    /* Compute the sign of the product into A. */\n+    a->sign ^= b->sign;\n+    if (flags & float_muladd_negate_product) {\n+        a->sign ^= 1;\n+    }\n+\n+    if (unlikely(ab_mask != float_cmask_normal)) {\n+        if (unlikely(ab_mask == float_cmask_infzero)) {\n+            goto d_nan;\n+        }\n+\n+        if (ab_mask & float_cmask_inf) {\n+            if (c->cls == float_class_inf && a->sign != c->sign) {\n+                goto d_nan;\n+            }\n+            goto return_inf;\n+        }\n+\n+        g_assert(ab_mask & float_cmask_zero);\n+        if (c->cls == float_class_normal) {\n+            *a = *c;\n+            goto return_normal;\n+        }\n+        if (c->cls == float_class_zero) {\n+            if (a->sign != c->sign) {\n+                goto return_sub_zero;\n+            }\n+            goto return_zero;\n+        }\n+        g_assert(c->cls == float_class_inf);\n+    }\n+\n+    if (unlikely(c->cls == float_class_inf)) {\n+        a->sign = c->sign;\n+        goto return_inf;\n+    }\n+\n+    /* Perform the multiplication step. */\n+    p_widen.sign = a->sign;\n+    p_widen.exp = a->exp + b->exp + 1;\n+    frac_mulw(&p_widen, a, b);\n+    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {\n+        frac_add(&p_widen, &p_widen, &p_widen);\n+        p_widen.exp -= 1;\n+    }\n+\n+    /* Perform the addition step. */\n+    if (c->cls != float_class_zero) {\n+        /* Zero-extend C to less significant bits. */\n+        frac_widen(&c_widen, c);\n+        c_widen.exp = c->exp;\n+\n+        if (a->sign == c->sign) {\n+            parts_add_normal(&p_widen, &c_widen);\n+        } else if (!parts_sub_normal(&p_widen, &c_widen)) {\n+            goto return_sub_zero;\n+        }\n+    }\n+\n+    /* Narrow with sticky bit, for proper rounding later. */\n+    frac_truncjam(a, &p_widen);\n+    a->sign = p_widen.sign;\n+    a->exp = p_widen.exp;\n+\n+ return_normal:\n+    if (flags & float_muladd_halve_result) {\n+        a->exp -= 1;\n+    }\n+ finish_sign:\n+    if (flags & float_muladd_negate_result) {\n+        a->sign ^= 1;\n+    }\n+    return a;\n+\n+ return_sub_zero:\n+    a->sign = s->float_rounding_mode == float_round_down;\n+ return_zero:\n+    a->cls = float_class_zero;\n+    goto finish_sign;\n+\n+ return_inf:\n+    a->cls = float_class_inf;\n+    goto finish_sign;\n+\n+ d_nan:\n+    float_raise(float_flag_invalid, s);\n+    parts_default_nan(a, s);\n+    return a;\n+}\ndiff --git a/tests/fp/wrap.c.inc b/tests/fp/wrap.c.inc\nindex 0cbd20013e..cb1bb77e4c 100644\n--- a/tests/fp/wrap.c.inc\n+++ b/tests/fp/wrap.c.inc\n@@ -574,6 +574,18 @@ WRAP_MULADD(qemu_f32_mulAdd, float32_muladd, float32)\n WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)\n #undef WRAP_MULADD\n \n+static void qemu_f128M_mulAdd(const float128_t *ap, const float128_t *bp,\n+                              const float128_t *cp, float128_t *res)\n+{\n+    float128 a, b, c, ret;\n+\n+    a = soft_to_qemu128(*ap);\n+    b = soft_to_qemu128(*bp);\n+    c = soft_to_qemu128(*cp);\n+    ret = float128_muladd(a, b, c, 0, &qsf);\n+    *res = qemu_to_soft128(ret);\n+}\n+\n #define WRAP_CMP16(name, func, retcond)         \\\n     static bool name(float16_t a, float16_t b)  \\\n     {                                           \\\n",
    "prefixes": [
        "37/72"
    ]
}