Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/1475740/?format=api
{ "id": 1475740, "url": "http://patchwork.ozlabs.org/api/patches/1475740/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20210508014802.892561-38-richard.henderson@linaro.org/", "project": { "id": 14, "url": "http://patchwork.ozlabs.org/api/projects/14/?format=api", "name": "QEMU Development", "link_name": "qemu-devel", "list_id": "qemu-devel.nongnu.org", "list_email": "qemu-devel@nongnu.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20210508014802.892561-38-richard.henderson@linaro.org>", "list_archive_url": null, "date": "2021-05-08T01:47:27", "name": "[37/72] softfloat: Move muladd_floats to softfloat-parts.c.inc", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "e954cbeacdbc0c2e03dfc81adbeabfbbf8c47dc8", "submitter": { "id": 72104, "url": "http://patchwork.ozlabs.org/api/people/72104/?format=api", "name": "Richard Henderson", "email": "richard.henderson@linaro.org" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20210508014802.892561-38-richard.henderson@linaro.org/mbox/", "series": [ { "id": 242770, "url": "http://patchwork.ozlabs.org/api/series/242770/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=242770", "date": "2021-05-08T01:46:53", "name": "Convert floatx80 and float128 to FloatParts", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/242770/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/1475740/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/1475740/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": "patchwork-incoming@bilbo.ozlabs.org", "Authentication-Results": [ "ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=nongnu.org\n (client-ip=209.51.188.17; helo=lists.gnu.org;\n envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n receiver=<UNKNOWN>)", "ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n unprotected) header.d=linaro.org header.i=@linaro.org header.a=rsa-sha256\n header.s=google header.b=hYjABf28;\n\tdkim-atps=neutral" ], "Received": [ "from lists.gnu.org (lists.gnu.org [209.51.188.17])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 4FcWBt33cTz9sWp\n\tfor <incoming@patchwork.ozlabs.org>; Sat, 8 May 2021 12:16:06 +1000 (AEST)", "from localhost ([::1]:58922 helo=lists1p.gnu.org)\n\tby lists.gnu.org with esmtp (Exim 4.90_1)\n\t(envelope-from <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>)\n\tid 1lfCVg-0002GV-EC\n\tfor incoming@patchwork.ozlabs.org; Fri, 07 May 2021 22:16:04 -0400", "from eggs.gnu.org ([2001:470:142:3::10]:41018)\n by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)\n (Exim 4.90_1) (envelope-from <richard.henderson@linaro.org>)\n id 1lfC5L-0004x9-Gf\n for qemu-devel@nongnu.org; Fri, 07 May 2021 21:48:52 -0400", "from mail-pg1-x532.google.com ([2607:f8b0:4864:20::532]:39456)\n by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_128_GCM_SHA256:128)\n (Exim 4.90_1) (envelope-from <richard.henderson@linaro.org>)\n id 1lfC50-0003ft-5D\n for qemu-devel@nongnu.org; Fri, 07 May 2021 21:48:51 -0400", "by mail-pg1-x532.google.com with SMTP id s22so8629125pgk.6\n for <qemu-devel@nongnu.org>; Fri, 07 May 2021 18:48:28 -0700 (PDT)", "from localhost.localdomain ([71.212.144.24])\n by smtp.gmail.com with ESMTPSA id t4sm5819681pfq.165.2021.05.07.18.48.27\n (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n Fri, 07 May 2021 18:48:27 -0700 (PDT)" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google;\n h=from:to:cc:subject:date:message-id:in-reply-to:references\n :mime-version:content-transfer-encoding;\n bh=8Xkz1ybjWLeUjZCsE7o98Dkj/GU255PMCjv80HfDnbE=;\n b=hYjABf281qbp4B54Q9Y39BvAXJi6ngFPgDGw8dWDxKbfnV7rRwFc0Oqd5RNXyNjwXn\n dw79swvbeAibOF3VvJPwzMNcsK56EfIKMEabVTTWdEda1hp8vE9yQIM4qFIG8qKB6U8z\n 7Md+GnRT2HLajIchjbXE8Wr1aohmg4OF0EvoSdwR1HW1+ClYxrz4QrkzuP/XufLfadkA\n sNwQ/Ls0cptupKZ5qljsXz5nzkH5VKufV6dj8tXTUW3pLZWUxaPkhFdIjfVjZUDC3q7c\n bWOfxo+7MkjKKEKxhqXkFfozZNvvnczDRb2RaZg/E5cTOi6+waCF4no5+82WqvKlI4hs\n MnDA==", "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20161025;\n h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to\n :references:mime-version:content-transfer-encoding;\n bh=8Xkz1ybjWLeUjZCsE7o98Dkj/GU255PMCjv80HfDnbE=;\n b=ECjfhtFGgKdTdOlE/S23unlxXIXDTM9yNbhOlDzPmZrQz+iACJi4LFseqjBmyPbabM\n xW9EjmYP3QvR0odvoVuu+PehfAj0yRxQt5K16lr/exvDJFRtP7zxmgMnDVQRkU3sLGYz\n GUSHfXuvQDczDOTfxw2XEK1omsA7ROV8HeLhrrJ2n4mH3otoPoyJJ1XqphuptH2lu+mQ\n htmBPYGFPhWonyILSzsFvqCfFQwbwWYmB4uRu4bTo5G9JEtDhYmNYEyV27wlyBygUnEu\n vEkGN4vUDwsVuAc4IwxD26nn8ky+lwo54QJZTA+Hqp+mcxMU6LQFlGJ5Nkj/0depdIGQ\n gKHw==", "X-Gm-Message-State": "AOAM53269JfdI2t36Ls5GpIY10nCYt10mhN7k12D0wbqTVLgxE6+Ck17\n RTon9IYF4UXnsjqOsw6s9wFWQ2UHWwuvEw==", "X-Google-Smtp-Source": "\n ABdhPJx+L3CeUZifByVD+t6BWeudEabo8jAvP9Zn0j838uPsyp28nzKL8f274aSlELc83b0eqbQ+ZQ==", "X-Received": "by 2002:aa7:8503:0:b029:27d:497f:1da6 with SMTP id\n v3-20020aa785030000b029027d497f1da6mr13642574pfn.28.1620438507603;\n Fri, 07 May 2021 18:48:27 -0700 (PDT)", "From": "Richard Henderson <richard.henderson@linaro.org>", "To": "qemu-devel@nongnu.org", "Subject": "[PATCH 37/72] softfloat: Move muladd_floats to softfloat-parts.c.inc", "Date": "Fri, 7 May 2021 18:47:27 -0700", "Message-Id": "<20210508014802.892561-38-richard.henderson@linaro.org>", "X-Mailer": "git-send-email 2.25.1", "In-Reply-To": "<20210508014802.892561-1-richard.henderson@linaro.org>", "References": "<20210508014802.892561-1-richard.henderson@linaro.org>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "Received-SPF": "pass client-ip=2607:f8b0:4864:20::532;\n envelope-from=richard.henderson@linaro.org; helo=mail-pg1-x532.google.com", "X-Spam_score_int": "-20", "X-Spam_score": "-2.1", "X-Spam_bar": "--", "X-Spam_report": "(-2.1 / 5.0 requ) BAYES_00=-1.9, DKIM_SIGNED=0.1,\n DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1,\n RCVD_IN_DNSWL_NONE=-0.0001, SPF_HELO_NONE=0.001,\n SPF_PASS=-0.001 autolearn=ham autolearn_force=no", "X-Spam_action": "no action", "X-BeenThere": "qemu-devel@nongnu.org", "X-Mailman-Version": "2.1.23", "Precedence": "list", "List-Id": "<qemu-devel.nongnu.org>", "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>", "List-Archive": "<https://lists.nongnu.org/archive/html/qemu-devel>", "List-Post": "<mailto:qemu-devel@nongnu.org>", "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>", "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n <mailto:qemu-devel-request@nongnu.org?subject=subscribe>", "Cc": "alex.bennee@linaro.org, david@redhat.com", "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org", "Sender": "\"Qemu-devel\"\n <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>" }, "content": "Rename to parts$N_muladd.\nImplement float128_muladd with FloatParts128.\n\nSigned-off-by: Richard Henderson <richard.henderson@linaro.org>\n---\n include/fpu/softfloat.h | 2 +\n fpu/softfloat.c | 406 ++++++++++++++++++--------------------\n tests/fp/fp-bench.c | 8 +-\n tests/fp/fp-test.c | 2 +-\n fpu/softfloat-parts.c.inc | 126 ++++++++++++\n tests/fp/wrap.c.inc | 12 ++\n 6 files changed, 342 insertions(+), 214 deletions(-)", "diff": "diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h\nindex 019c2ec66d..53f2c2ea3c 100644\n--- a/include/fpu/softfloat.h\n+++ b/include/fpu/softfloat.h\n@@ -1197,6 +1197,8 @@ float128 float128_round_to_int(float128, float_status *status);\n float128 float128_add(float128, float128, float_status *status);\n float128 float128_sub(float128, float128, float_status *status);\n float128 float128_mul(float128, float128, float_status *status);\n+float128 float128_muladd(float128, float128, float128, int,\n+ float_status *status);\n float128 float128_div(float128, float128, float_status *status);\n float128 float128_rem(float128, float128, float_status *status);\n float128 float128_sqrt(float128, float_status *status);\ndiff --git a/fpu/softfloat.c b/fpu/softfloat.c\nindex 4f498c11e5..a9ee8498ae 100644\n--- a/fpu/softfloat.c\n+++ b/fpu/softfloat.c\n@@ -715,6 +715,10 @@ static float128 float128_pack_raw(const FloatParts128 *p)\n #define PARTS_GENERIC_64_128(NAME, P) \\\n QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)\n \n+#define PARTS_GENERIC_64_128_256(NAME, P) \\\n+ QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \\\n+ (FloatParts128 *, parts128_##NAME), parts64_##NAME)\n+\n #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)\n #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)\n \n@@ -760,15 +764,17 @@ static void parts128_uncanon(FloatParts128 *p, float_status *status,\n \n static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);\n static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);\n+static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);\n \n #define parts_add_normal(A, B) \\\n- PARTS_GENERIC_64_128(add_normal, A)(A, B)\n+ PARTS_GENERIC_64_128_256(add_normal, A)(A, B)\n \n static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);\n static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);\n+static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);\n \n #define parts_sub_normal(A, B) \\\n- PARTS_GENERIC_64_128(sub_normal, A)(A, B)\n+ PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)\n \n static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,\n float_status *s, bool subtract);\n@@ -786,6 +792,16 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,\n #define parts_mul(A, B, S) \\\n PARTS_GENERIC_64_128(mul, A)(A, B, S)\n \n+static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,\n+ FloatParts64 *c, int flags,\n+ float_status *s);\n+static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,\n+ FloatParts128 *c, int flags,\n+ float_status *s);\n+\n+#define parts_muladd(A, B, C, Z, S) \\\n+ PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)\n+\n /*\n * Helper functions for softfloat-parts.c.inc, per-size operations.\n */\n@@ -793,6 +809,10 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,\n #define FRAC_GENERIC_64_128(NAME, P) \\\n QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)\n \n+#define FRAC_GENERIC_64_128_256(NAME, P) \\\n+ QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \\\n+ (FloatParts128 *, frac128_##NAME), frac64_##NAME)\n+\n static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)\n {\n return uadd64_overflow(a->frac, b->frac, &r->frac);\n@@ -806,7 +826,17 @@ static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)\n return c;\n }\n \n-#define frac_add(R, A, B) FRAC_GENERIC_64_128(add, R)(R, A, B)\n+static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)\n+{\n+ bool c = 0;\n+ r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);\n+ r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);\n+ r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);\n+ r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);\n+ return c;\n+}\n+\n+#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)\n \n static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)\n {\n@@ -901,7 +931,16 @@ static void frac128_neg(FloatParts128 *a)\n a->frac_hi = usub64_borrow(0, a->frac_hi, &c);\n }\n \n-#define frac_neg(A) FRAC_GENERIC_64_128(neg, A)(A)\n+static void frac256_neg(FloatParts256 *a)\n+{\n+ bool c = 0;\n+ a->frac_lo = usub64_borrow(0, a->frac_lo, &c);\n+ a->frac_lm = usub64_borrow(0, a->frac_lm, &c);\n+ a->frac_hm = usub64_borrow(0, a->frac_hm, &c);\n+ a->frac_hi = usub64_borrow(0, a->frac_hi, &c);\n+}\n+\n+#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)\n \n static int frac64_normalize(FloatParts64 *a)\n {\n@@ -932,7 +971,55 @@ static int frac128_normalize(FloatParts128 *a)\n return 128;\n }\n \n-#define frac_normalize(A) FRAC_GENERIC_64_128(normalize, A)(A)\n+static int frac256_normalize(FloatParts256 *a)\n+{\n+ uint64_t a0 = a->frac_hi, a1 = a->frac_hm;\n+ uint64_t a2 = a->frac_lm, a3 = a->frac_lo;\n+ int ret, shl, shr;\n+\n+ if (likely(a0)) {\n+ shl = clz64(a0);\n+ if (shl == 0) {\n+ return 0;\n+ }\n+ ret = shl;\n+ } else {\n+ if (a1) {\n+ ret = 64;\n+ a0 = a1, a1 = a2, a2 = a3, a3 = 0;\n+ } else if (a2) {\n+ ret = 128;\n+ a0 = a2, a1 = a3, a2 = 0, a3 = 0;\n+ } else if (a3) {\n+ ret = 192;\n+ a0 = a3, a1 = 0, a2 = 0, a3 = 0;\n+ } else {\n+ ret = 256;\n+ a0 = 0, a1 = 0, a2 = 0, a3 = 0;\n+ goto done;\n+ }\n+ shl = clz64(a0);\n+ if (shl == 0) {\n+ goto done;\n+ }\n+ ret += shl;\n+ }\n+\n+ shr = -shl & 63;\n+ a0 = (a0 << shl) | (a1 >> shr);\n+ a1 = (a1 << shl) | (a2 >> shr);\n+ a2 = (a2 << shl) | (a3 >> shr);\n+ a3 = (a3 << shl);\n+\n+ done:\n+ a->frac_hi = a0;\n+ a->frac_hm = a1;\n+ a->frac_lm = a2;\n+ a->frac_lo = a3;\n+ return ret;\n+}\n+\n+#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)\n \n static void frac64_shl(FloatParts64 *a, int c)\n {\n@@ -968,7 +1055,51 @@ static void frac128_shrjam(FloatParts128 *a, int c)\n shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);\n }\n \n-#define frac_shrjam(A, C) FRAC_GENERIC_64_128(shrjam, A)(A, C)\n+static void frac256_shrjam(FloatParts256 *a, int c)\n+{\n+ uint64_t a0 = a->frac_hi, a1 = a->frac_hm;\n+ uint64_t a2 = a->frac_lm, a3 = a->frac_lo;\n+ uint64_t sticky = 0;\n+ int invc;\n+\n+ if (unlikely(c == 0)) {\n+ return;\n+ } else if (likely(c < 64)) {\n+ /* nothing */\n+ } else if (likely(c < 256)) {\n+ if (unlikely(c & 128)) {\n+ sticky |= a2 | a3;\n+ a3 = a1, a2 = a0, a1 = 0, a0 = 0;\n+ }\n+ if (unlikely(c & 64)) {\n+ sticky |= a3;\n+ a3 = a2, a2 = a1, a1 = a0, a0 = 0;\n+ }\n+ c &= 63;\n+ if (c == 0) {\n+ goto done;\n+ }\n+ } else {\n+ sticky = a0 | a1 | a2 | a3;\n+ a0 = a1 = a2 = a3 = 0;\n+ goto done;\n+ }\n+\n+ invc = -c & 63;\n+ sticky |= a3 << invc;\n+ a3 = (a3 >> c) | (a2 << invc);\n+ a2 = (a2 >> c) | (a1 << invc);\n+ a1 = (a1 >> c) | (a0 << invc);\n+ a0 = (a0 >> c);\n+\n+ done:\n+ a->frac_lo = a3 | (sticky != 0);\n+ a->frac_lm = a2;\n+ a->frac_hm = a1;\n+ a->frac_hi = a0;\n+}\n+\n+#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)\n \n static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)\n {\n@@ -983,7 +1114,17 @@ static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)\n return c;\n }\n \n-#define frac_sub(R, A, B) FRAC_GENERIC_64_128(sub, R)(R, A, B)\n+static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)\n+{\n+ bool c = 0;\n+ r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);\n+ r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);\n+ r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);\n+ r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);\n+ return c;\n+}\n+\n+#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)\n \n static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)\n {\n@@ -998,6 +1139,22 @@ static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)\n \n #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)\n \n+static void frac64_widen(FloatParts128 *r, FloatParts64 *a)\n+{\n+ r->frac_hi = a->frac;\n+ r->frac_lo = 0;\n+}\n+\n+static void frac128_widen(FloatParts256 *r, FloatParts128 *a)\n+{\n+ r->frac_hi = a->frac_hi;\n+ r->frac_hm = a->frac_lo;\n+ r->frac_lm = 0;\n+ r->frac_lo = 0;\n+}\n+\n+#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)\n+\n #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)\n #define FloatPartsN glue(FloatParts,N)\n #define FloatPartsW glue(FloatParts,W)\n@@ -1016,6 +1173,12 @@ static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)\n #include \"softfloat-parts-addsub.c.inc\"\n #include \"softfloat-parts.c.inc\"\n \n+#undef N\n+#undef W\n+#define N 256\n+\n+#include \"softfloat-parts-addsub.c.inc\"\n+\n #undef N\n #undef W\n #undef partsN\n@@ -1386,230 +1549,48 @@ float128_mul(float128 a, float128 b, float_status *status)\n }\n \n /*\n- * Returns the result of multiplying the floating-point values `a' and\n- * `b' then adding 'c', with no intermediate rounding step after the\n- * multiplication. The operation is performed according to the\n- * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.\n- * The flags argument allows the caller to select negation of the\n- * addend, the intermediate product, or the final result. (The\n- * difference between this and having the caller do a separate\n- * negation is that negating externally will flip the sign bit on\n- * NaNs.)\n+ * Fused multiply-add\n */\n \n-static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,\n- int flags, float_status *s)\n-{\n- bool inf_zero, p_sign;\n- bool sign_flip = flags & float_muladd_negate_result;\n- FloatClass p_class;\n- uint64_t hi, lo;\n- int p_exp;\n- int ab_mask, abc_mask;\n-\n- ab_mask = float_cmask(a.cls) | float_cmask(b.cls);\n- abc_mask = float_cmask(c.cls) | ab_mask;\n- inf_zero = ab_mask == float_cmask_infzero;\n-\n- /* It is implementation-defined whether the cases of (0,inf,qnan)\n- * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN\n- * they return if they do), so we have to hand this information\n- * off to the target-specific pick-a-NaN routine.\n- */\n- if (unlikely(abc_mask & float_cmask_anynan)) {\n- return *parts_pick_nan_muladd(&a, &b, &c, s, ab_mask, abc_mask);\n- }\n-\n- if (inf_zero) {\n- float_raise(float_flag_invalid, s);\n- parts_default_nan(&a, s);\n- return a;\n- }\n-\n- if (flags & float_muladd_negate_c) {\n- c.sign ^= 1;\n- }\n-\n- p_sign = a.sign ^ b.sign;\n-\n- if (flags & float_muladd_negate_product) {\n- p_sign ^= 1;\n- }\n-\n- if (ab_mask & float_cmask_inf) {\n- p_class = float_class_inf;\n- } else if (ab_mask & float_cmask_zero) {\n- p_class = float_class_zero;\n- } else {\n- p_class = float_class_normal;\n- }\n-\n- if (c.cls == float_class_inf) {\n- if (p_class == float_class_inf && p_sign != c.sign) {\n- float_raise(float_flag_invalid, s);\n- parts_default_nan(&c, s);\n- } else {\n- c.sign ^= sign_flip;\n- }\n- return c;\n- }\n-\n- if (p_class == float_class_inf) {\n- a.cls = float_class_inf;\n- a.sign = p_sign ^ sign_flip;\n- return a;\n- }\n-\n- if (p_class == float_class_zero) {\n- if (c.cls == float_class_zero) {\n- if (p_sign != c.sign) {\n- p_sign = s->float_rounding_mode == float_round_down;\n- }\n- c.sign = p_sign;\n- } else if (flags & float_muladd_halve_result) {\n- c.exp -= 1;\n- }\n- c.sign ^= sign_flip;\n- return c;\n- }\n-\n- /* a & b should be normals now... */\n- assert(a.cls == float_class_normal &&\n- b.cls == float_class_normal);\n-\n- p_exp = a.exp + b.exp;\n-\n- mul64To128(a.frac, b.frac, &hi, &lo);\n-\n- /* Renormalize to the msb. */\n- if (hi & DECOMPOSED_IMPLICIT_BIT) {\n- p_exp += 1;\n- } else {\n- shortShift128Left(hi, lo, 1, &hi, &lo);\n- }\n-\n- /* + add/sub */\n- if (c.cls != float_class_zero) {\n- int exp_diff = p_exp - c.exp;\n- if (p_sign == c.sign) {\n- /* Addition */\n- if (exp_diff <= 0) {\n- shift64RightJamming(hi, -exp_diff, &hi);\n- p_exp = c.exp;\n- if (uadd64_overflow(hi, c.frac, &hi)) {\n- shift64RightJamming(hi, 1, &hi);\n- hi |= DECOMPOSED_IMPLICIT_BIT;\n- p_exp += 1;\n- }\n- } else {\n- uint64_t c_hi, c_lo, over;\n- shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);\n- add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);\n- if (over) {\n- shift64RightJamming(hi, 1, &hi);\n- hi |= DECOMPOSED_IMPLICIT_BIT;\n- p_exp += 1;\n- }\n- }\n- } else {\n- /* Subtraction */\n- uint64_t c_hi = c.frac, c_lo = 0;\n-\n- if (exp_diff <= 0) {\n- shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);\n- if (exp_diff == 0\n- &&\n- (hi > c_hi || (hi == c_hi && lo >= c_lo))) {\n- sub128(hi, lo, c_hi, c_lo, &hi, &lo);\n- } else {\n- sub128(c_hi, c_lo, hi, lo, &hi, &lo);\n- p_sign ^= 1;\n- p_exp = c.exp;\n- }\n- } else {\n- shift128RightJamming(c_hi, c_lo,\n- exp_diff,\n- &c_hi, &c_lo);\n- sub128(hi, lo, c_hi, c_lo, &hi, &lo);\n- }\n-\n- if (hi == 0 && lo == 0) {\n- a.cls = float_class_zero;\n- a.sign = s->float_rounding_mode == float_round_down;\n- a.sign ^= sign_flip;\n- return a;\n- } else {\n- int shift;\n- if (hi != 0) {\n- shift = clz64(hi);\n- } else {\n- shift = clz64(lo) + 64;\n- }\n- /* Normalizing to a binary point of 124 is the\n- correct adjust for the exponent. However since we're\n- shifting, we might as well put the binary point back\n- at 63 where we really want it. Therefore shift as\n- if we're leaving 1 bit at the top of the word, but\n- adjust the exponent as if we're leaving 3 bits. */\n- shift128Left(hi, lo, shift, &hi, &lo);\n- p_exp -= shift;\n- }\n- }\n- }\n- hi |= (lo != 0);\n-\n- if (flags & float_muladd_halve_result) {\n- p_exp -= 1;\n- }\n-\n- /* finally prepare our result */\n- a.cls = float_class_normal;\n- a.sign = p_sign ^ sign_flip;\n- a.exp = p_exp;\n- a.frac = hi;\n-\n- return a;\n-}\n-\n float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,\n- int flags, float_status *status)\n+ int flags, float_status *status)\n {\n- FloatParts64 pa, pb, pc, pr;\n+ FloatParts64 pa, pb, pc, *pr;\n \n float16_unpack_canonical(&pa, a, status);\n float16_unpack_canonical(&pb, b, status);\n float16_unpack_canonical(&pc, c, status);\n- pr = muladd_floats(pa, pb, pc, flags, status);\n+ pr = parts_muladd(&pa, &pb, &pc, flags, status);\n \n- return float16_round_pack_canonical(&pr, status);\n+ return float16_round_pack_canonical(pr, status);\n }\n \n static float32 QEMU_SOFTFLOAT_ATTR\n soft_f32_muladd(float32 a, float32 b, float32 c, int flags,\n float_status *status)\n {\n- FloatParts64 pa, pb, pc, pr;\n+ FloatParts64 pa, pb, pc, *pr;\n \n float32_unpack_canonical(&pa, a, status);\n float32_unpack_canonical(&pb, b, status);\n float32_unpack_canonical(&pc, c, status);\n- pr = muladd_floats(pa, pb, pc, flags, status);\n+ pr = parts_muladd(&pa, &pb, &pc, flags, status);\n \n- return float32_round_pack_canonical(&pr, status);\n+ return float32_round_pack_canonical(pr, status);\n }\n \n static float64 QEMU_SOFTFLOAT_ATTR\n soft_f64_muladd(float64 a, float64 b, float64 c, int flags,\n float_status *status)\n {\n- FloatParts64 pa, pb, pc, pr;\n+ FloatParts64 pa, pb, pc, *pr;\n \n float64_unpack_canonical(&pa, a, status);\n float64_unpack_canonical(&pb, b, status);\n float64_unpack_canonical(&pc, c, status);\n- pr = muladd_floats(pa, pb, pc, flags, status);\n+ pr = parts_muladd(&pa, &pb, &pc, flags, status);\n \n- return float64_round_pack_canonical(&pr, status);\n+ return float64_round_pack_canonical(pr, status);\n }\n \n static bool force_soft_fma;\n@@ -1756,23 +1737,30 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)\n return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);\n }\n \n-/*\n- * Returns the result of multiplying the bfloat16 values `a'\n- * and `b' then adding 'c', with no intermediate rounding step after the\n- * multiplication.\n- */\n-\n bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,\n int flags, float_status *status)\n {\n- FloatParts64 pa, pb, pc, pr;\n+ FloatParts64 pa, pb, pc, *pr;\n \n bfloat16_unpack_canonical(&pa, a, status);\n bfloat16_unpack_canonical(&pb, b, status);\n bfloat16_unpack_canonical(&pc, c, status);\n- pr = muladd_floats(pa, pb, pc, flags, status);\n+ pr = parts_muladd(&pa, &pb, &pc, flags, status);\n \n- return bfloat16_round_pack_canonical(&pr, status);\n+ return bfloat16_round_pack_canonical(pr, status);\n+}\n+\n+float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,\n+ int flags, float_status *status)\n+{\n+ FloatParts128 pa, pb, pc, *pr;\n+\n+ float128_unpack_canonical(&pa, a, status);\n+ float128_unpack_canonical(&pb, b, status);\n+ float128_unpack_canonical(&pc, c, status);\n+ pr = parts_muladd(&pa, &pb, &pc, flags, status);\n+\n+ return float128_round_pack_canonical(pr, status);\n }\n \n /*\ndiff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c\nindex d319993280..c24baf8535 100644\n--- a/tests/fp/fp-bench.c\n+++ b/tests/fp/fp-bench.c\n@@ -386,7 +386,7 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)\n for (i = 0; i < OPS_PER_ITER; i++) {\n float128 a = ops[0].f128;\n float128 b = ops[1].f128;\n- /* float128 c = ops[2].f128; */\n+ float128 c = ops[2].f128;\n \n switch (op) {\n case OP_ADD:\n@@ -401,9 +401,9 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)\n case OP_DIV:\n res.f128 = float128_div(a, b, &soft_status);\n break;\n- /* case OP_FMA: */\n- /* res.f128 = float128_muladd(a, b, c, 0, &soft_status); */\n- /* break; */\n+ case OP_FMA:\n+ res.f128 = float128_muladd(a, b, c, 0, &soft_status);\n+ break;\n case OP_SQRT:\n res.f128 = float128_sqrt(a, &soft_status);\n break;\ndiff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c\nindex 5a4cad8c8b..ff131afbde 100644\n--- a/tests/fp/fp-test.c\n+++ b/tests/fp/fp-test.c\n@@ -717,7 +717,7 @@ static void do_testfloat(int op, int rmode, bool exact)\n test_abz_f128(true_abz_f128M, subj_abz_f128M);\n break;\n case F128_MULADD:\n- not_implemented();\n+ test_abcz_f128(slow_f128M_mulAdd, qemu_f128M_mulAdd);\n break;\n case F128_SQRT:\n test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);\ndiff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc\nindex 9a67ab2bea..a203811299 100644\n--- a/fpu/softfloat-parts.c.inc\n+++ b/fpu/softfloat-parts.c.inc\n@@ -413,3 +413,129 @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,\n a->sign = sign;\n return a;\n }\n+\n+/*\n+ * Returns the result of multiplying the floating-point values `a' and\n+ * `b' then adding 'c', with no intermediate rounding step after the\n+ * multiplication. The operation is performed according to the\n+ * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.\n+ * The flags argument allows the caller to select negation of the\n+ * addend, the intermediate product, or the final result. (The\n+ * difference between this and having the caller do a separate\n+ * negation is that negating externally will flip the sign bit on NaNs.)\n+ *\n+ * Requires A and C extracted into a double-sized structure to provide the\n+ * extra space for the widening multiply.\n+ */\n+static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,\n+ FloatPartsN *c, int flags, float_status *s)\n+{\n+ int ab_mask, abc_mask;\n+ FloatPartsW p_widen, c_widen;\n+\n+ ab_mask = float_cmask(a->cls) | float_cmask(b->cls);\n+ abc_mask = float_cmask(c->cls) | ab_mask;\n+\n+ /*\n+ * It is implementation-defined whether the cases of (0,inf,qnan)\n+ * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN\n+ * they return if they do), so we have to hand this information\n+ * off to the target-specific pick-a-NaN routine.\n+ */\n+ if (unlikely(abc_mask & float_cmask_anynan)) {\n+ return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);\n+ }\n+\n+ if (flags & float_muladd_negate_c) {\n+ c->sign ^= 1;\n+ }\n+\n+ /* Compute the sign of the product into A. */\n+ a->sign ^= b->sign;\n+ if (flags & float_muladd_negate_product) {\n+ a->sign ^= 1;\n+ }\n+\n+ if (unlikely(ab_mask != float_cmask_normal)) {\n+ if (unlikely(ab_mask == float_cmask_infzero)) {\n+ goto d_nan;\n+ }\n+\n+ if (ab_mask & float_cmask_inf) {\n+ if (c->cls == float_class_inf && a->sign != c->sign) {\n+ goto d_nan;\n+ }\n+ goto return_inf;\n+ }\n+\n+ g_assert(ab_mask & float_cmask_zero);\n+ if (c->cls == float_class_normal) {\n+ *a = *c;\n+ goto return_normal;\n+ }\n+ if (c->cls == float_class_zero) {\n+ if (a->sign != c->sign) {\n+ goto return_sub_zero;\n+ }\n+ goto return_zero;\n+ }\n+ g_assert(c->cls == float_class_inf);\n+ }\n+\n+ if (unlikely(c->cls == float_class_inf)) {\n+ a->sign = c->sign;\n+ goto return_inf;\n+ }\n+\n+ /* Perform the multiplication step. */\n+ p_widen.sign = a->sign;\n+ p_widen.exp = a->exp + b->exp + 1;\n+ frac_mulw(&p_widen, a, b);\n+ if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {\n+ frac_add(&p_widen, &p_widen, &p_widen);\n+ p_widen.exp -= 1;\n+ }\n+\n+ /* Perform the addition step. */\n+ if (c->cls != float_class_zero) {\n+ /* Zero-extend C to less significant bits. */\n+ frac_widen(&c_widen, c);\n+ c_widen.exp = c->exp;\n+\n+ if (a->sign == c->sign) {\n+ parts_add_normal(&p_widen, &c_widen);\n+ } else if (!parts_sub_normal(&p_widen, &c_widen)) {\n+ goto return_sub_zero;\n+ }\n+ }\n+\n+ /* Narrow with sticky bit, for proper rounding later. */\n+ frac_truncjam(a, &p_widen);\n+ a->sign = p_widen.sign;\n+ a->exp = p_widen.exp;\n+\n+ return_normal:\n+ if (flags & float_muladd_halve_result) {\n+ a->exp -= 1;\n+ }\n+ finish_sign:\n+ if (flags & float_muladd_negate_result) {\n+ a->sign ^= 1;\n+ }\n+ return a;\n+\n+ return_sub_zero:\n+ a->sign = s->float_rounding_mode == float_round_down;\n+ return_zero:\n+ a->cls = float_class_zero;\n+ goto finish_sign;\n+\n+ return_inf:\n+ a->cls = float_class_inf;\n+ goto finish_sign;\n+\n+ d_nan:\n+ float_raise(float_flag_invalid, s);\n+ parts_default_nan(a, s);\n+ return a;\n+}\ndiff --git a/tests/fp/wrap.c.inc b/tests/fp/wrap.c.inc\nindex 0cbd20013e..cb1bb77e4c 100644\n--- a/tests/fp/wrap.c.inc\n+++ b/tests/fp/wrap.c.inc\n@@ -574,6 +574,18 @@ WRAP_MULADD(qemu_f32_mulAdd, float32_muladd, float32)\n WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)\n #undef WRAP_MULADD\n \n+static void qemu_f128M_mulAdd(const float128_t *ap, const float128_t *bp,\n+ const float128_t *cp, float128_t *res)\n+{\n+ float128 a, b, c, ret;\n+\n+ a = soft_to_qemu128(*ap);\n+ b = soft_to_qemu128(*bp);\n+ c = soft_to_qemu128(*cp);\n+ ret = float128_muladd(a, b, c, 0, &qsf);\n+ *res = qemu_to_soft128(ret);\n+}\n+\n #define WRAP_CMP16(name, func, retcond) \\\n static bool name(float16_t a, float16_t b) \\\n { \\\n", "prefixes": [ "37/72" ] }