Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.1/patches/2223401/?format=api
{ "id": 2223401, "url": "http://patchwork.ozlabs.org/api/1.1/patches/2223401/?format=api", "web_url": "http://patchwork.ozlabs.org/project/glibc/patch/20260415083244.2560-1-pierre.blanchard@arm.com/", "project": { "id": 41, "url": "http://patchwork.ozlabs.org/api/1.1/projects/41/?format=api", "name": "GNU C Library", "link_name": "glibc", "list_id": "libc-alpha.sourceware.org", "list_email": "libc-alpha@sourceware.org", "web_url": "", "scm_url": "", "webscm_url": "" }, "msgid": "<20260415083244.2560-1-pierre.blanchard@arm.com>", "date": "2026-04-15T08:32:41", "name": "[v2,1/4] AArch64: Improve AdvSIMD and SVE pow(f).", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "35bcf8e0c6af66e94470de28618f866971281bb7", "submitter": { "id": 89873, "url": "http://patchwork.ozlabs.org/api/1.1/people/89873/?format=api", "name": "Pierre Blanchard", "email": "pierre.blanchard@arm.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/glibc/patch/20260415083244.2560-1-pierre.blanchard@arm.com/mbox/", "series": [ { "id": 499946, "url": "http://patchwork.ozlabs.org/api/1.1/series/499946/?format=api", "web_url": "http://patchwork.ozlabs.org/project/glibc/list/?series=499946", "date": "2026-04-15T08:32:43", "name": "[v2,1/4] AArch64: Improve AdvSIMD and SVE pow(f).", "version": 2, "mbox": "http://patchwork.ozlabs.org/series/499946/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2223401/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2223401/checks/", "tags": {}, "headers": { "Return-Path": "<libc-alpha-bounces~incoming=patchwork.ozlabs.org@sourceware.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "libc-alpha@sourceware.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "libc-alpha@sourceware.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (1024-bit key;\n unprotected) header.d=arm.com header.i=@arm.com header.a=rsa-sha256\n header.s=selector1 header.b=NjIUWe/0;\n\tdkim=pass (1024-bit key) header.d=arm.com header.i=@arm.com\n header.a=rsa-sha256 header.s=selector1 header.b=NjIUWe/0;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=sourceware.org\n (client-ip=2620:52:6:3111::32; helo=vm01.sourceware.org;\n envelope-from=libc-alpha-bounces~incoming=patchwork.ozlabs.org@sourceware.org;\n receiver=patchwork.ozlabs.org)", "sourceware.org;\n\tdkim=pass (1024-bit key,\n unprotected) header.d=arm.com header.i=@arm.com header.a=rsa-sha256\n header.s=selector1 header.b=NjIUWe/0;\n\tdkim=pass (1024-bit key) header.d=arm.com header.i=@arm.com\n header.a=rsa-sha256 header.s=selector1 header.b=NjIUWe/0", "sourceware.org;\n dmarc=pass (p=none dis=none) header.from=arm.com", "sourceware.org; spf=pass smtp.mailfrom=arm.com", "server2.sourceware.org;\n arc=pass smtp.remote-ip=52.101.72.38" ], "Received": [ "from vm01.sourceware.org (vm01.sourceware.org\n [IPv6:2620:52:6:3111::32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fwZFl4TGBz1yHd\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 15 Apr 2026 18:34:51 +1000 (AEST)", "from vm01.sourceware.org (localhost [127.0.0.1])\n\tby sourceware.org (Postfix) with ESMTP id 73DE74BA2E11\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 15 Apr 2026 08:34:49 +0000 (GMT)", "from AM0PR02CU008.outbound.protection.outlook.com\n (mail-westeuropeazon11013038.outbound.protection.outlook.com [52.101.72.38])\n by sourceware.org (Postfix) with ESMTPS id 6C0964BA2E04\n for <libc-alpha@sourceware.org>; Wed, 15 Apr 2026 08:34:09 +0000 (GMT)", "from DU2PR04CA0199.eurprd04.prod.outlook.com (2603:10a6:10:28d::24)\n by AS8PR08MB6327.eurprd08.prod.outlook.com (2603:10a6:20b:31a::11)\n with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.9769.48; Wed, 15 Apr\n 2026 08:34:01 +0000", "from DU2PEPF0001E9C1.eurprd03.prod.outlook.com\n (2603:10a6:10:28d:cafe::5f) by DU2PR04CA0199.outlook.office365.com\n (2603:10a6:10:28d::24) with Microsoft SMTP Server (version=TLS1_3,\n cipher=TLS_AES_256_GCM_SHA384) id 15.20.9769.48 via Frontend Transport; Wed,\n 15 Apr 2026 08:34:01 +0000", "from outbound-uk1.az.dlp.m.darktrace.com (4.158.2.129) by\n DU2PEPF0001E9C1.mail.protection.outlook.com (10.167.8.70) with Microsoft SMTP\n Server (version=TLS1_3, cipher=TLS_AES_256_GCM_SHA384) id 15.20.9745.21 via\n Frontend Transport; Wed, 15 Apr 2026 08:34:01 +0000", "from CWLP265CA0392.GBRP265.PROD.OUTLOOK.COM (2603:10a6:400:1d6::17)\n by DU0PR08MB7639.eurprd08.prod.outlook.com (2603:10a6:10:320::11)\n with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.9769.48; Wed, 15 Apr\n 2026 08:32:56 +0000", "from AMS1EPF00000044.eurprd04.prod.outlook.com\n (2603:10a6:400:1d6:cafe::bd) by CWLP265CA0392.outlook.office365.com\n (2603:10a6:400:1d6::17) with Microsoft SMTP Server (version=TLS1_3,\n cipher=TLS_AES_256_GCM_SHA384) id 15.20.9769.48 via Frontend Transport; Wed,\n 15 Apr 2026 08:32:55 +0000", "from nebula.arm.com (172.205.89.229) by\n AMS1EPF00000044.mail.protection.outlook.com (10.167.16.41) with Microsoft\n SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id\n 15.20.9769.17 via Frontend Transport; Wed, 15 Apr 2026 08:32:55 +0000", "from AZ-NEU-EXJ02.Arm.com (10.240.25.139) by AZ-NEU-EX03.Arm.com\n (10.240.25.137) with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.2562.29; Wed, 15 Apr\n 2026 08:32:54 +0000", "from AZ-NEU-EX03.Arm.com (10.240.25.137) by AZ-NEU-EXJ02.Arm.com\n (10.240.25.139) with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.2562.29; Wed, 15 Apr\n 2026 08:32:53 +0000", "from ip-10-252-30-205.eu-west-1.compute.internal (10.252.0.220) by\n mail.arm.com (10.240.25.137) with Microsoft SMTP Server id 15.2.2562.29 via\n Frontend Transport; Wed, 15 Apr 2026 08:32:53 +0000" ], "DKIM-Filter": [ "OpenDKIM Filter v2.11.0 sourceware.org 73DE74BA2E11", "OpenDKIM Filter v2.11.0 sourceware.org 6C0964BA2E04" ], "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org 6C0964BA2E04", "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org 6C0964BA2E04", "ARC-Seal": [ "i=3; a=rsa-sha256; d=sourceware.org; s=key; t=1776242049; cv=pass;\n b=sQHlKJz0K8QTCbUitM2+SellVqqY7PT0QT6ylOEO8Sz+Qy8YZZBBdcPO3IP1FTXBSfvrx2vfcQDhPhBo5n9+/w2illU/Ltc3XxRjnfFn/7MYLG1rCf0qfOZU3Mbd13EChhQouarOQ8FYhhpglv3KuTMMEu4i/aRMc+xervZVjlo=", "i=2; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=pass;\n b=evnzabBsMix6z/NfjoAefAcbxx+FU957UBMbJrYwhS91PrSxiQIMTHnsUSrhK3cbxxOIrHUZjSAgGSrcGTNbK2CB6JA5uHTbelupzE3qdj1VTcgMskqb69upVVyAIjNCWyqGXH2KLA323EPTpN+OHATkFJv4/rTIwGHZidPglCaxfA0HkVjkHkw6TmFBOSQgdRkx8jnn7Q/s/IsKivbd2a+9wtp2K0McT5oUOwbdFJAf7jTRWzCGTwDO6SpsKy+oFzHrgatxj0T3eOSxcH4yVdpcy8qOybOCS7Np0x2ZdpZZrbR+Le79t3/VGB1V/TSbUNYOXGu1zj/6meqWrKbtNg==", "i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none;\n b=lDX5kaocTuJ1NwrHrDBCPJgH4RgvVkoDObBocZkxdBDXRckI5bWXu+Hk6WthSe7fhpeD2CU7IGA/WfKdLBjd19ZuJjjGllD+alzvKk1V7Ns1AgulH2MDD+TjG1wYFDXRhmtL30wpwt9SZkJWel30zqW4Hx2VB6nkbIreDJ2LyTWiLNHmGqGFl43fqyrTrQ4FMBjAT8XqKixYx78yzCZH0N2eHZIXg+c2CChVWG1yG6BheGEZ39zC2iWHLpH2wbo5qB0j0cz8mXPYBsl6XtnZF+WDjd9fqSvZKvl8oRAgt0melXUBWYx3q44FnG/rdrOl6nc2Z8V1SiPxL3PaUS6bbw==" ], "ARC-Message-Signature": [ "i=3; a=rsa-sha256; d=sourceware.org; s=key;\n t=1776242049; c=relaxed/simple;\n bh=mmW8Cj7Kom6dVWb6Xk7lSeGC7Mf7I92dpO8gaHjpM8s=;\n h=DKIM-Signature:DKIM-Signature:From:To:Subject:Date:Message-ID:\n MIME-Version;\n b=DK8DHvkRyuAZxa6VSdpKRTvVGjj29nwQEHg3HObtj/+htfOncqPM5yGuPlJPZZ8gvs7izYXon33xuUAo8aq1IVisuxOxZTQQiTUDnhTFDKXppaoiyzByn6ETapql/AETDe5gmFuSg1vwxvdiPhiMHa4CKL7Sk/Wi9n8mDiId1Jk=", "i=2; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com;\n s=arcselector10001;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1;\n bh=fAqB3mZsrLrPqG7QtLPnJ9YNnj3ckx0Gx3FH7tu8338=;\n b=J2fipOpo7D0coQa+KehNczXvZ64zkOUqt1WRpA1tWYIs42VDvM9SgSpkm+9c2XlqvTNR4u0hjeHKQJSxxBgEHwHY7/+GZV8xpRLT9guZszRbYPnqO+/q7Vlr7I00ycubqq3nug6db8kB/vN4sueLq+UsbNe/Wfij/wWZw84QCtyzH5+eSLWBUL/DA97AHJ3TrOJ8Cl5eVSzBRnIjbPPnZjP0cbHhAWW8Fv0tPkwtXJWfo3H+J8WMazXVPQLc5DynRSPBG+z+owKK4Otg6Y3ZzQyDRzN1r0fLB+PRIoh0n7C2bWI3MkMVJgeeq9lNLjgs5aHQZetV4kDdmJgCYOwMhQ==", "i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com;\n s=arcselector10001;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1;\n bh=fAqB3mZsrLrPqG7QtLPnJ9YNnj3ckx0Gx3FH7tu8338=;\n b=OLoeGcbS2+fGG53m5F5JFdE3V1y81RWv/zrcN6xbFiLQ0oWZ3Dp4YTsTTRwFBvz1bCrq+muwuOlk8oFNI0LVGMo17jvdKl6PDleh5TD4+G+gNcEDv8vf5gdbqZ7I83krUrCfiz0c37Dh77WDz0JKDHdFmF08p/QWTOTh9vjmHmLRIHOiiejZBHCSrmh23bCXh13gnUVXUE7oG9qO4PdHxepXc7MdOobTGWjCybYTKt+glCq9/dckaswVa7U6/DirkOt10Eg4KeLAYuhk/l3MHevDN0sWPQFiBFMGz0pn6bCQHW9YUGD8qavsFUc29mxCtgS32HttGgPeyw4Gax5srg==" ], "ARC-Authentication-Results": [ "i=3; server2.sourceware.org", "i=2; mx.microsoft.com 1; spf=pass (sender ip is\n 4.158.2.129) smtp.rcpttodomain=sourceware.org smtp.mailfrom=arm.com;\n dmarc=pass (p=none sp=none pct=100) action=none header.from=arm.com;\n dkim=pass (signature was verified) header.d=arm.com; arc=pass (0 oda=1 ltdi=1\n spf=[1,1,smtp.mailfrom=arm.com] dmarc=[1,1,header.from=arm.com])", "i=1; mx.microsoft.com 1; spf=pass (sender ip is\n 172.205.89.229) smtp.rcpttodomain=sourceware.org smtp.mailfrom=arm.com;\n dmarc=pass (p=none sp=none pct=100) action=none header.from=arm.com;\n dkim=none (message not signed); arc=none (0)" ], "DKIM-Signature": [ "v=1; a=rsa-sha256; c=relaxed/relaxed; d=arm.com; s=selector1;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck;\n bh=fAqB3mZsrLrPqG7QtLPnJ9YNnj3ckx0Gx3FH7tu8338=;\n b=NjIUWe/0vCqtAyl+QDwX5j5W/zAzvmfpe9R5/Hl1OGjxIPKJ/5C1DEo9LmNBoyBvdiFw8fw5oAX+RgmRu5iso8DEZXxPWbBZYQgJYzw6EJnIwhATc8uUYWs7Eq4JH7FYm1/qDAJm2ciE/xJEeF7RiP6wxmj4WHzkaVkZrBFTefo=", "v=1; a=rsa-sha256; c=relaxed/relaxed; d=arm.com; s=selector1;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck;\n bh=fAqB3mZsrLrPqG7QtLPnJ9YNnj3ckx0Gx3FH7tu8338=;\n b=NjIUWe/0vCqtAyl+QDwX5j5W/zAzvmfpe9R5/Hl1OGjxIPKJ/5C1DEo9LmNBoyBvdiFw8fw5oAX+RgmRu5iso8DEZXxPWbBZYQgJYzw6EJnIwhATc8uUYWs7Eq4JH7FYm1/qDAJm2ciE/xJEeF7RiP6wxmj4WHzkaVkZrBFTefo=" ], "X-MS-Exchange-Authentication-Results": [ "spf=pass (sender IP is 4.158.2.129)\n smtp.mailfrom=arm.com; dkim=pass (signature was verified)\n header.d=arm.com;dmarc=pass action=none header.from=arm.com;", "spf=pass (sender IP is 172.205.89.229)\n smtp.mailfrom=arm.com; dkim=none (message not signed)\n header.d=none;dmarc=pass action=none header.from=arm.com;" ], "Received-SPF": [ "Pass (protection.outlook.com: domain of arm.com designates\n 4.158.2.129 as permitted sender) receiver=protection.outlook.com;\n client-ip=4.158.2.129; helo=outbound-uk1.az.dlp.m.darktrace.com; pr=C", "Pass (protection.outlook.com: domain of arm.com designates\n 172.205.89.229 as permitted sender) receiver=protection.outlook.com;\n client-ip=172.205.89.229; helo=nebula.arm.com; pr=C" ], "From": "Pierre Blanchard <pierre.blanchard@arm.com>", "To": "<libc-alpha@sourceware.org>", "CC": "Pierre Blanchard <pierre.blanchard@arm.com>", "Subject": "[PATCH v2 1/4] AArch64: Improve AdvSIMD and SVE pow(f).", "Date": "Wed, 15 Apr 2026 08:32:41 +0000", "Message-ID": "<20260415083244.2560-1-pierre.blanchard@arm.com>", "X-Mailer": "git-send-email 2.34.1", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "Content-Type": "text/plain", "X-EOPAttributedMessage": "1", "X-MS-TrafficTypeDiagnostic": "\n AMS1EPF00000044:EE_|DU0PR08MB7639:EE_|DU2PEPF0001E9C1:EE_|AS8PR08MB6327:EE_", "X-MS-Office365-Filtering-Correlation-Id": "10986b3f-2908-4ef7-01c9-08de9ac9bec4", "x-checkrecipientrouted": "true", "NoDisclaimer": "true", "X-MS-Exchange-SenderADCheck": "1", "X-MS-Exchange-AntiSpam-Relay": "0", "X-Microsoft-Antispam-Untrusted": "BCL:0;\n ARA:13230040|36860700016|1800799024|82310400026|376014|18002099003|13003099007|56012099003;", "X-Microsoft-Antispam-Message-Info-Original": "\n 0n9IU5gwcK1ItvqpjSa3fr0hZS6Qve25/MJR6iSRlpp/PAyN4zr5ptzaWNGErkWIzqsNywYK3dV1IIILOrwm7K8taj8NlSWtgb2w+rpoNQCISO8G9NZTxbs/oXFAMmtZSOJR6Y3Uhf/AMxWj4GQacCnRY5kx3cxbO/Jle9Sabt3HOHhBrIFbRsEbAvSMx+FysI+NjAa38En5/ALSMLq9C6dG+uCfSRXU3Z9wH1FzJhcUBOT9UzFnNij0AYcpbjB7cAAV4uvpT6oc9HxxNYzTI2r939q6K9kp3PqqnmzOYkw7bAyxZIYwoEuLp/Y/aZhh0IpPpzKH7JZpxuu7AMUNRyB5VImmUbQMq5vMcszEiCJgibHCBTYN4rXGpWpQp/fjGL6QaksRCGocs29C5kJJYtqPCdDTgHuIY5TKbQttJcBmZmBigFpOR+pooaDwCQ/3AI7VPfoX/b9gjobT4MIZdVd+8CXNSvfjNSdBi+qc5YhIw0NwfyOeyT0sVK6+EGFEUFB7ljfTjNtCzfeYq8nneSyKlMXrdD4pCHkKiKRPHiZ4nvMaP276f3cKba1kMbpToMHzauCvPBtmI3SDmXx5dMDRfy6rW+Y2of28O0ccWIhr5UJxrW1OWWedsEX0XJgC4D9890XL3WOpAwIP0C/qw0wvwMNpa7oXshf4j9Sz5ouv4oFCSyzmW4KNVnKIXBfgVGaHEAkuoiSsvlLEPp/qmKTl+/PHrk+VwIlc0FdpQqB5SoNcKYOw0s7+z2SV1kNsuH2xmQdXxBTqhb2gEvTLKA==", "X-Forefront-Antispam-Report-Untrusted": "CIP:172.205.89.229; CTRY:IE; LANG:en;\n SCL:1; SRV:; IPV:NLI; SFV:NSPM; H:nebula.arm.com; PTR:InfoDomainNonexistent;\n CAT:NONE;\n SFS:(13230040)(36860700016)(1800799024)(82310400026)(376014)(18002099003)(13003099007)(56012099003);\n DIR:OUT; SFP:1101;", "X-Exchange-RoutingPolicyChecked": "\n oCC7Ei60ao/+pmNjxxqGGYgaITWcxzw4qntRxpW+cWmDsZD1BgrIW1f8kzNbWgi+St+t0jO2hUpMhLTGSqMo6yztmv4ef38vh1PNoldcRraOVzP9azWJdEQcCoGI95TK+g4GA95vsHoZKTCGHDZKChQu8w162WQrTReIIkXUkFTJFQhaV7xQ4WUAtb79AOn3y+VW3rt66uCBcJTH/7bpurADUROYCVSLot0y8/wOijPGUXIgvBKvYo0bXMLdgO4bWfRgS+xVbKCDmYeADJqZFwsaDWeesstGjgABzyxo1qip7ZqrwsxfOU3JHMKap9E3nPFCitCYgnrqT/SggnT+iw==", "X-MS-Exchange-Transport-CrossTenantHeadersStamped": [ "DU0PR08MB7639", "AS8PR08MB6327" ], "X-MS-Exchange-Transport-CrossTenantHeadersStripped": "\n DU2PEPF0001E9C1.eurprd03.prod.outlook.com", "X-MS-PublicTrafficType": "Email", "X-MS-Office365-Filtering-Correlation-Id-Prvs": "\n 8ffaef08-12cf-40b1-c472-08de9ac997b6", "X-Microsoft-Antispam": "BCL:0;\n ARA:13230040|14060799003|376014|36860700016|1800799024|82310400026|35042699022|13003099007|18002099003|56012099003;", "X-Microsoft-Antispam-Message-Info": "\n d/eLu3tDuc6qloSGfPEhCNAEBIcHa/9mJe6hvEiP+jdo9gKYJm4lduu/iFOcG/21AciNnfmtUaUP+dLwi6K7iZOCLRVwRVLkho8nIf6CxRlI6y1LcoOevbu0Ia3lJQodN8si3umcLtCqwoPX933SN/Ule+7JL0lDmU9AoPrqPJL7oGt3E/AL5jFM8pNLmcZxuLNX4DSf+Sc32kq3lSyh1Vx5Xo6pVrkcukUlTSG9zQ7AR22GWByxV68ZKui/fWW5eyi+MAERWyOZCFJelCXb8V0eGdfkw+c/C+bVPcjze32vHdvo5M74pW25W4cN9wF9KjFpSpt/EyQ1VTR3ePhEcXmEJqgAPGy8fzg09Q9/Q6TyZWizC7D14eGVQX+9OOATYyWcY5KJVVTTIU1SF0CNqfRQRGPpf7kWutNf705dHg41hHB93oiaBNxotYwxIctPA3EH6xnLMx1am5ob49vR+U/24xBLIzmn3YZ7EqnTNCtTWV45rP45U6PW+//Bf3wxQkuMqTh+mJbDunwsc85S9tgqaBPzDEjRdAm9/7vwdeLbEtzmse2dqczJkpFS/r/wZgTe0l7o9762blVD8Z0fjgCYSSTX3hBqhIbKTCaSzjCrsTy8u+CfC95cybE9l3jaT+mLkgFzdvV4zh0u76H/7tLJp63cGTgTkIOICA0HYyKf20J2FnFe5+VayFLcOPuaBZ7oLlX2bC+p+jwuUuoBbdZVTBkvg12rStQzKtpzJ+gr3tV9f7Gr3alFc1Taf9lDdWwkRZqt3Ifjvh3RS9H/gw==", "X-Forefront-Antispam-Report": "CIP:4.158.2.129; CTRY:GB; LANG:en; SCL:1; SRV:;\n IPV:NLI; SFV:NSPM; H:outbound-uk1.az.dlp.m.darktrace.com;\n PTR:InfoDomainNonexistent; CAT:NONE;\n SFS:(13230040)(14060799003)(376014)(36860700016)(1800799024)(82310400026)(35042699022)(13003099007)(18002099003)(56012099003);\n DIR:OUT; SFP:1101;", "X-MS-Exchange-AntiSpam-MessageData-ChunkCount": "1", "X-MS-Exchange-AntiSpam-MessageData-0": "\n sorwVX8vKAjZRXahutWFfHK2poHh0BAcZouyOV9HIS2nb4fohyn9HzvzYigARsBhIhWHDiFEL1dyykzNNPkTdgQYhT5KpGv3JRB1c3NFCz8MpKL/NzUUCygOOj/9szqM/jHDmI7NMngpqM3zWYm7W8Ste+LHA7YVFExgjsYiL0W3l/KSraXr3BkEvzXi9V96tWXJVSP7u95BGysOLxaiEKXrFC4CvtgpuTURljnrMTRrUFUa5qw/MUaiyF2S67OFaOXMyJZzOHo87Bk62usITfvv8CbUdAFhl9gYfhJxyHQTBbaVCU5cvZE4Xp82oXB0WglqQOSLT5Sqrm0YY1vn0RszzfZzj1Q428oESgv4AI2CeDBTIYD3nWxKJxKT2WMJ3LFgeb49lJkiBIzy66TTyTFvxovGaZbGWQaX2Jq8JRmwSR+9hfExilPqoNeVrtfk", "X-OriginatorOrg": "arm.com", "X-MS-Exchange-CrossTenant-OriginalArrivalTime": "15 Apr 2026 08:34:01.2277 (UTC)", "X-MS-Exchange-CrossTenant-Network-Message-Id": "\n 10986b3f-2908-4ef7-01c9-08de9ac9bec4", "X-MS-Exchange-CrossTenant-Id": "f34e5979-57d9-4aaa-ad4d-b122a662184d", "X-MS-Exchange-CrossTenant-OriginalAttributedTenantConnectingIp": "\n TenantId=f34e5979-57d9-4aaa-ad4d-b122a662184d; Ip=[4.158.2.129];\n Helo=[outbound-uk1.az.dlp.m.darktrace.com]", "X-MS-Exchange-CrossTenant-AuthSource": "\n DU2PEPF0001E9C1.eurprd03.prod.outlook.com", "X-MS-Exchange-CrossTenant-AuthAs": "Anonymous", "X-MS-Exchange-CrossTenant-FromEntityHeader": "HybridOnPrem", "X-BeenThere": "libc-alpha@sourceware.org", "X-Mailman-Version": "2.1.30", "Precedence": "list", "List-Id": "Libc-alpha mailing list <libc-alpha.sourceware.org>", "List-Unsubscribe": "<https://sourceware.org/mailman/options/libc-alpha>,\n <mailto:libc-alpha-request@sourceware.org?subject=unsubscribe>", "List-Archive": "<https://sourceware.org/pipermail/libc-alpha/>", "List-Post": "<mailto:libc-alpha@sourceware.org>", "List-Help": "<mailto:libc-alpha-request@sourceware.org?subject=help>", "List-Subscribe": "<https://sourceware.org/mailman/listinfo/libc-alpha>,\n <mailto:libc-alpha-request@sourceware.org?subject=subscribe>", "Errors-To": "libc-alpha-bounces~incoming=patchwork.ozlabs.org@sourceware.org" }, "content": "Optimize handling of subnormal x and/or negative x.\n\nSome cleanup in attributes, macros and improving overall consistency.\n\nMove core computation to header\nIntroduce config parameter to turn sign_bias on/off.\n\nPerformance improvement on V1 with GCC@15:\n- AdvSIMD pow: 10-15% on subnormals.\n- AdvSIMD powf: 30 to 70% on subnormals or x < 0, <=3% on x > 0.\n- SVE pow: 10-15% on subnormals, <=3% otherwise.\n- SVE powf: no significant variations in codegen/perf.\n---\nOk for master? If so please commit for me as I don't have commit rights.\nLikewise if OK for backport this optimization to release branches as far\nback as 2.40 (release that introduced AArch64 vector pow and powf).\nThanks,\nPierre\n sysdeps/aarch64/fpu/finite_pow.h | 158 +-------------\n sysdeps/aarch64/fpu/pow_advsimd.c | 294 +++++++++----------------\n sysdeps/aarch64/fpu/pow_common.h | 59 +++++\n sysdeps/aarch64/fpu/pow_sve.c | 307 +--------------------------\n sysdeps/aarch64/fpu/powf_advsimd.c | 262 +++++++++--------------\n sysdeps/aarch64/fpu/powf_common.h | 51 +++++\n sysdeps/aarch64/fpu/powf_sve.c | 193 +----------------\n sysdeps/aarch64/fpu/sv_pow_inline.h | 282 ++++++++++++++++++++++++\n sysdeps/aarch64/fpu/sv_powf_inline.h | 189 +++++++++++++++++\n sysdeps/aarch64/fpu/v_pow_inline.h | 193 +++++++++++++++++\n sysdeps/aarch64/fpu/v_powf_inline.h | 116 ++++++++++\n sysdeps/aarch64/fpu/v_powrf_inline.h | 239 +++++++++++++++++++++\n 12 files changed, 1349 insertions(+), 994 deletions(-)\n create mode 100644 sysdeps/aarch64/fpu/pow_common.h\n create mode 100644 sysdeps/aarch64/fpu/powf_common.h\n create mode 100644 sysdeps/aarch64/fpu/sv_pow_inline.h\n create mode 100644 sysdeps/aarch64/fpu/sv_powf_inline.h\n create mode 100644 sysdeps/aarch64/fpu/v_pow_inline.h\n create mode 100644 sysdeps/aarch64/fpu/v_powf_inline.h\n create mode 100644 sysdeps/aarch64/fpu/v_powrf_inline.h", "diff": "diff --git a/sysdeps/aarch64/fpu/finite_pow.h b/sysdeps/aarch64/fpu/finite_pow.h\nindex 44aaa1c4ad..a9a9b38282 100644\n--- a/sysdeps/aarch64/fpu/finite_pow.h\n+++ b/sysdeps/aarch64/fpu/finite_pow.h\n@@ -18,8 +18,7 @@\n <https://www.gnu.org/licenses/>. */\n \n #include \"math_config.h\"\n-\n-/* Scalar version of pow used for fallbacks in vector implementations. */\n+#include \"pow_common.h\"\n \n /* Data is defined in v_pow_log_data.c. */\n #define N_LOG (1 << V_POW_LOG_TABLE_BITS)\n@@ -46,13 +45,6 @@\n #define BigPowY 0x43e\t/* top12(0x1.749p62). */\n #define ThresPowY 0x080 /* BigPowY - SmallPowY. */\n \n-/* Top 12 bits of a double (sign and exponent bits). */\n-static inline uint32_t\n-top12 (double x)\n-{\n- return asuint64 (x) >> 52;\n-}\n-\n /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about\n additional 15 bits precision. IX is the bit representation of x, but\n normalized in the subnormal range using the sign bit for the exponent. */\n@@ -180,151 +172,3 @@ exp_inline (double x, double xtail, uint32_t sign_bias)\n is no spurious underflow here even without fma. */\n return scale + scale * tmp;\n }\n-\n-/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.\n- A version of exp_inline that is not inlined and for which sign_bias is\n- equal to 0. */\n-static double NOINLINE\n-exp_nosignbias (double x, double xtail)\n-{\n- uint32_t abstop = top12 (x) & 0x7ff;\n- if (__glibc_unlikely (abstop - SmallExp >= ThresExp))\n- {\n- /* Avoid spurious underflow for tiny x. */\n- if (abstop - SmallExp >= 0x80000000)\n-\treturn 1.0;\n- /* Note: inf and nan are already handled. */\n- if (abstop >= top12 (1024.0))\n-\treturn asuint64 (x) >> 63 ? 0.0 : INFINITY;\n- /* Large x is special cased below. */\n- abstop = 0;\n- }\n-\n- /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */\n- /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N]. */\n- double z = InvLn2N * x;\n- double kd = round (z);\n- uint64_t ki = lround (z);\n- double r = x - kd * Ln2HiN - kd * Ln2LoN;\n- /* The code assumes 2^-200 < |xtail| < 2^-8/N. */\n- r += xtail;\n- /* 2^(k/N) ~= scale. */\n- uint64_t idx = ki & (N_EXP - 1);\n- uint64_t top = ki << (52 - V_POW_EXP_TABLE_BITS);\n- /* This is only a valid scale when -1023*N < k < 1024*N. */\n- uint64_t sbits = SBits[idx] + top;\n- /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */\n- double r2 = r * r;\n- double tmp = r + r2 * Cs[0] + r * r2 * (Cs[1] + r * Cs[2]);\n- if (__glibc_unlikely (abstop == 0))\n- return special_case (tmp, sbits, ki);\n- double scale = asdouble (sbits);\n- /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there\n- is no spurious underflow here even without fma. */\n- return scale + scale * tmp;\n-}\n-\n-/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is\n- the bit representation of a non-zero finite floating-point value. */\n-static inline int\n-checkint (uint64_t iy)\n-{\n- int e = iy >> 52 & 0x7ff;\n- if (e < 0x3ff)\n- return 0;\n- if (e > 0x3ff + 52)\n- return 2;\n- if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))\n- return 0;\n- if (iy & (1ULL << (0x3ff + 52 - e)))\n- return 1;\n- return 2;\n-}\n-\n-/* Returns 1 if input is the bit representation of 0, infinity or nan. */\n-static inline int\n-zeroinfnan (uint64_t i)\n-{\n- return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;\n-}\n-\n-static double NOINLINE\n-pow_scalar_special_case (double x, double y)\n-{\n- uint32_t sign_bias = 0;\n- uint64_t ix, iy;\n- uint32_t topx, topy;\n-\n- ix = asuint64 (x);\n- iy = asuint64 (y);\n- topx = top12 (x);\n- topy = top12 (y);\n- if (__glibc_unlikely (topx - SmallPowX >= ThresPowX\n-\t\t|| (topy & 0x7ff) - SmallPowY >= ThresPowY))\n- {\n- /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0\n-\t and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */\n- /* Special cases: (x < 0x1p-126 or inf or nan) or\n-\t (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */\n- if (__glibc_unlikely (zeroinfnan (iy)))\n-\t{\n-\t if (2 * iy == 0)\n-\t return issignaling (x) ? x + y : 1.0;\n-\t if (ix == asuint64 (1.0))\n-\t return issignaling (y) ? x + y : 1.0;\n-\t if (2 * ix > 2 * asuint64 (INFINITY)\n-\t || 2 * iy > 2 * asuint64 (INFINITY))\n-\t return x + y;\n-\t if (2 * ix == 2 * asuint64 (1.0))\n-\t return 1.0;\n-\t if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))\n-\t return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */\n-\t return y * y;\n-\t}\n- if (__glibc_unlikely (zeroinfnan (ix)))\n-\t{\n-\t double x2 = x * x;\n-\t if (ix >> 63 && checkint (iy) == 1)\n-\t {\n-\t x2 = -x2;\n-\t sign_bias = 1;\n-\t }\n-\t return iy >> 63 ? 1 / x2 : x2;\n-\t}\n- /* Here x and y are non-zero finite. */\n- if (ix >> 63)\n-\t{\n-\t /* Finite x < 0. */\n-\t int yint = checkint (iy);\n-\t if (yint == 0)\n-\t return __builtin_nan (\"\");\n-\t if (yint == 1)\n-\t sign_bias = SignBias;\n-\t ix &= 0x7fffffffffffffff;\n-\t topx &= 0x7ff;\n-\t}\n- if ((topy & 0x7ff) - SmallPowY >= ThresPowY)\n-\t{\n-\t /* Note: sign_bias == 0 here because y is not odd. */\n-\t if (ix == asuint64 (1.0))\n-\t return 1.0;\n-\t /* |y| < 2^-65, x^y ~= 1 + y*log(x). */\n-\t if ((topy & 0x7ff) < SmallPowY)\n-\t return 1.0;\n-\t return (ix > asuint64 (1.0)) == (topy < 0x800) ? INFINITY : 0;\n-\t}\n- if (topx == 0)\n-\t{\n-\t /* Normalize subnormal x so exponent becomes negative. */\n-\t ix = asuint64 (x * 0x1p52);\n-\t ix &= 0x7fffffffffffffff;\n-\t ix -= 52ULL << 52;\n-\t}\n- }\n-\n- double lo;\n- double hi = log_inline (ix, &lo);\n- double ehi = y * hi;\n- double elo = y * lo + fma (y, hi, -ehi);\n- return exp_inline (ehi, elo, sign_bias);\n-}\ndiff --git a/sysdeps/aarch64/fpu/pow_advsimd.c b/sysdeps/aarch64/fpu/pow_advsimd.c\nindex 6b84e00814..51a7941f94 100644\n--- a/sysdeps/aarch64/fpu/pow_advsimd.c\n+++ b/sysdeps/aarch64/fpu/pow_advsimd.c\n@@ -19,186 +19,109 @@\n \n #include \"v_math.h\"\n \n-/* Defines parameters of the approximation and scalar fallback. */\n-#include \"finite_pow.h\"\n+#include \"v_pow_inline.h\"\n \n-#define VecSmallPowX v_u64 (SmallPowX)\n-#define VecThresPowX v_u64 (ThresPowX)\n-#define VecSmallPowY v_u64 (SmallPowY)\n-#define VecThresPowY v_u64 (ThresPowY)\n-\n-static const struct data\n-{\n- uint64x2_t inf;\n- float64x2_t small_powx;\n- uint64x2_t offset, mask;\n- uint64x2_t mask_sub_0, mask_sub_1;\n- float64x2_t log_c0, log_c2, log_c4, log_c5;\n- double log_c1, log_c3;\n- double ln2_lo, ln2_hi;\n- uint64x2_t small_exp, thres_exp;\n- double ln2_lo_n, ln2_hi_n;\n- double inv_ln2_n, exp_c2;\n- float64x2_t exp_c0, exp_c1;\n-} data = {\n- /* Power threshold. */\n- .inf = V2 (0x7ff0000000000000),\n- .small_powx = V2 (0x1p-126),\n- .offset = V2 (Off),\n- .mask = V2 (0xfffULL << 52),\n- .mask_sub_0 = V2 (1ULL << 52),\n- .mask_sub_1 = V2 (52ULL << 52),\n- /* Coefficients copied from v_pow_log_data.c\n- relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8]\n- Coefficients are scaled to match the scaling during evaluation. */\n- .log_c0 = V2 (0x1.555555555556p-2 * -2),\n- .log_c1 = -0x1.0000000000006p-2 * -2,\n- .log_c2 = V2 (0x1.999999959554ep-3 * 4),\n- .log_c3 = -0x1.555555529a47ap-3 * 4,\n- .log_c4 = V2 (0x1.2495b9b4845e9p-3 * -8),\n- .log_c5 = V2 (-0x1.0002b8b263fc3p-3 * -8),\n- .ln2_hi = 0x1.62e42fefa3800p-1,\n- .ln2_lo = 0x1.ef35793c76730p-45,\n- /* Polynomial coefficients: abs error: 1.43*2^-58, ulp error: 0.549\n- (0.550 without fma) if |x| < ln2/512. */\n- .exp_c0 = V2 (0x1.fffffffffffd4p-2),\n- .exp_c1 = V2 (0x1.5555571d6ef9p-3),\n- .exp_c2 = 0x1.5555576a5adcep-5,\n- .small_exp = V2 (0x3c90000000000000),\n- .thres_exp = V2 (0x03f0000000000000),\n- .inv_ln2_n = 0x1.71547652b82fep8, /* N/ln2. */\n- .ln2_hi_n = 0x1.62e42fefc0000p-9, /* ln2/N. */\n- .ln2_lo_n = -0x1.c610ca86c3899p-45,\n-};\n-\n-/* This version implements an algorithm close to scalar pow but\n- - does not implement the trick in the exp's specialcase subroutine to avoid\n- double-rounding,\n- - does not use a tail in the exponential core computation,\n- - and pow's exp polynomial order and table bits might differ.\n-\n- Maximum measured error is 1.04 ULPs:\n- _ZGVnN2vv_pow(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)\n- got 0x1.f71162f473251p-1\n- want 0x1.f71162f473252p-1. */\n-\n-static inline float64x2_t\n-v_masked_lookup_f64 (const double *table, uint64x2_t i)\n+static double NOINLINE\n+pow_scalar_special_case (double x, double y)\n {\n- return (float64x2_t){\n- table[(i[0] >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1)],\n- table[(i[1] >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1)]\n- };\n-}\n+ uint32_t sign_bias = 0;\n+ uint64_t ix, iy;\n+ uint32_t topx, topy;\n+\n+ ix = asuint64 (x);\n+ iy = asuint64 (y);\n+ topx = top12 (x);\n+ topy = top12 (y);\n+ /* Special cases: (x < 0x1p-126 or inf or nan) or\n+ (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */\n+ if (__glibc_unlikely (topx - SmallPowX >= ThresPowX\n+\t\t|| (topy & 0x7ff) - SmallPowY >= ThresPowY))\n+ {\n+ /* |y| is 0, Inf or NaN. */\n+ if (__glibc_unlikely (zeroinfnan (iy)))\n+\t{\n+\t if (2 * iy == 0)\n+\t return __issignaling (x) ? x + y : 1.0;\n+\t if (ix == asuint64 (1.0))\n+\t return __issignaling (y) ? x + y : 1.0;\n+\t if (2 * ix > 2 * asuint64 (INFINITY)\n+\t || 2 * iy > 2 * asuint64 (INFINITY))\n+\t return x + y;\n+\t if (2 * ix == 2 * asuint64 (1.0))\n+\t return 1.0;\n+\t if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))\n+\t return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */\n+\t return y * y;\n+\t}\n+ /* |x| is 0, Inf or NaN. */\n+ if (__glibc_unlikely (zeroinfnan (ix)))\n+\t{\n+\t double x2 = x * x;\n+\t if (ix >> 63 && checkint (iy) == 1)\n+\t {\n+\t x2 = -x2;\n+\t sign_bias = 1;\n+\t }\n+\t return iy >> 63 ? 1 / x2 : x2;\n+\t}\n+ /* Here x and y are non-zero finite. */\n+ /* Finite negative x returns NaN unless y is integer. */\n+ if (ix >> 63)\n+\t{\n+\t /* Finite x < 0. */\n+\t int yint = checkint (iy);\n+\t if (yint == 0)\n+\t return __builtin_nan (\"\");\n+\t if (yint == 1)\n+\t sign_bias = SignBias;\n+\t ix &= 0x7fffffffffffffff;\n+\t topx &= 0x7ff;\n+\t}\n+ /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0\n+\t and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */\n+ if ((topy & 0x7ff) - SmallPowY >= ThresPowY)\n+\t{\n+\t /* Note: sign_bias == 0 here because y is not odd. */\n+\t if (ix == asuint64 (1.0))\n+\t return 1.0;\n+\t /* |y| < 2^-65, x^y ~= 1 + y*log(x). */\n+\t if ((topy & 0x7ff) < SmallPowY)\n+\t return 1.0;\n+\t return (ix > asuint64 (1.0)) == (topy < 0x800) ? INFINITY : 0;\n+\t}\n+ if (topx == 0)\n+\t{\n+\t /* Normalize subnormal x so exponent becomes negative. */\n+\t ix = asuint64 (x * 0x1p52);\n+\t ix &= 0x7fffffffffffffff;\n+\t ix -= 52ULL << 52;\n+\t}\n+ }\n \n-/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about\n- additional 15 bits precision. IX is the bit representation of x, but\n- normalized in the subnormal range using the sign bit for the exponent. */\n-static inline float64x2_t\n-v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)\n-{\n- /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.\n- The range is split into N subintervals.\n- The ith subinterval contains z and c is near its center. */\n- uint64x2_t tmp = vsubq_u64 (ix, d->offset);\n- int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);\n- uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->mask));\n- float64x2_t z = vreinterpretq_f64_u64 (iz);\n- float64x2_t kd = vcvtq_f64_s64 (k);\n- /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */\n- float64x2_t invc = v_masked_lookup_f64 (__v_pow_log_data.invc, tmp);\n- float64x2_t logc = v_masked_lookup_f64 (__v_pow_log_data.logc, tmp);\n- float64x2_t logctail = v_masked_lookup_f64 (__v_pow_log_data.logctail, tmp);\n- /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and\n- |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */\n- float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, invc);\n- /* k*Ln2 + log(c) + r. */\n- float64x2_t ln2 = vld1q_f64 (&d->ln2_lo);\n- float64x2_t t1 = vfmaq_laneq_f64 (logc, kd, ln2, 1);\n- float64x2_t t2 = vaddq_f64 (t1, r);\n- float64x2_t lo1 = vfmaq_laneq_f64 (logctail, kd, ln2, 0);\n- float64x2_t lo2 = vaddq_f64 (vsubq_f64 (t1, t2), r);\n- /* Evaluation is optimized assuming superscalar pipelined execution. */\n- float64x2_t ar = vmulq_f64 (v_f64 (-0.5), r);\n- float64x2_t ar2 = vmulq_f64 (r, ar);\n- float64x2_t ar3 = vmulq_f64 (r, ar2);\n- /* k*Ln2 + log(c) + r + A[0]*r*r. */\n- float64x2_t hi = vaddq_f64 (t2, ar2);\n- float64x2_t lo3 = vfmaq_f64 (vnegq_f64 (ar2), ar, r);\n- float64x2_t lo4 = vaddq_f64 (vsubq_f64 (t2, hi), ar2);\n- /* p = log1p(r) - r - A[0]*r*r. */\n- float64x2_t odd_coeffs = vld1q_f64 (&d->log_c1);\n- float64x2_t a56 = vfmaq_f64 (d->log_c4, r, d->log_c5);\n- float64x2_t a34 = vfmaq_laneq_f64 (d->log_c2, r, odd_coeffs, 1);\n- float64x2_t a12 = vfmaq_laneq_f64 (d->log_c0, r, odd_coeffs, 0);\n- float64x2_t p = vfmaq_f64 (a34, ar2, a56);\n- p = vfmaq_f64 (a12, ar2, p);\n- p = vmulq_f64 (ar3, p);\n- float64x2_t lo\n- = vaddq_f64 (vaddq_f64 (vaddq_f64 (vaddq_f64 (lo1, lo2), lo3), lo4), p);\n- float64x2_t y = vaddq_f64 (hi, lo);\n- *tail = vaddq_f64 (vsubq_f64 (hi, y), lo);\n- return y;\n+ /* Core computation of exp (y * log (x)). */\n+ double lo;\n+ double hi = log_inline (ix, &lo);\n+ double ehi = y * hi;\n+ double elo = y * lo + fma (y, hi, -ehi);\n+ return exp_inline (ehi, elo, sign_bias);\n }\n \n static float64x2_t VPCS_ATTR NOINLINE\n-exp_special_case (float64x2_t x, float64x2_t xtail)\n-{\n- return (float64x2_t){ exp_nosignbias (x[0], xtail[0]),\n-\t\t\texp_nosignbias (x[1], xtail[1]) };\n-}\n-\n-/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. */\n-static inline float64x2_t\n-v_exp_inline (float64x2_t x, float64x2_t neg_xtail, const struct data *d)\n-{\n- /* Fallback to scalar exp_inline for all lanes if any lane\n- contains value of x s.t. |x| <= 2^-54 or >= 512. */\n- uint64x2_t uoflowx = vcgeq_u64 (\n- vsubq_u64 (vreinterpretq_u64_f64 (vabsq_f64 (x)), d->small_exp),\n- d->thres_exp);\n- if (__glibc_unlikely (v_any_u64 (uoflowx)))\n- return exp_special_case (x, vnegq_f64 (neg_xtail));\n-\n- /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */\n- /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N]. */\n- /* z - kd is in [-1, 1] in non-nearest rounding modes. */\n- float64x2_t exp_consts = vld1q_f64 (&d->inv_ln2_n);\n- float64x2_t z = vmulq_laneq_f64 (x, exp_consts, 0);\n- float64x2_t kd = vrndnq_f64 (z);\n- uint64x2_t ki = vreinterpretq_u64_s64 (vcvtaq_s64_f64 (z));\n- float64x2_t ln2_n = vld1q_f64 (&d->ln2_lo_n);\n- float64x2_t r = vfmsq_laneq_f64 (x, kd, ln2_n, 1);\n- r = vfmsq_laneq_f64 (r, kd, ln2_n, 0);\n- /* The code assumes 2^-200 < |xtail| < 2^-8/N. */\n- r = vsubq_f64 (r, neg_xtail);\n- /* 2^(k/N) ~= scale. */\n- uint64x2_t idx = vandq_u64 (ki, v_u64 (N_EXP - 1));\n- uint64x2_t top = vshlq_n_u64 (ki, 52 - V_POW_EXP_TABLE_BITS);\n- /* This is only a valid scale when -1023*N < k < 1024*N. */\n- uint64x2_t sbits = v_lookup_u64 (SBits, idx);\n- sbits = vaddq_u64 (sbits, top);\n- /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */\n- float64x2_t r2 = vmulq_f64 (r, r);\n- float64x2_t tmp = vfmaq_laneq_f64 (d->exp_c1, r, exp_consts, 1);\n- tmp = vfmaq_f64 (d->exp_c0, r, tmp);\n- tmp = vfmaq_f64 (r, r2, tmp);\n- float64x2_t scale = vreinterpretq_f64_u64 (sbits);\n- /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there\n- is no spurious underflow here even without fma. */\n- return vfmaq_f64 (scale, scale, tmp);\n-}\n-\n-static float64x2_t NOINLINE VPCS_ATTR\n scalar_fallback (float64x2_t x, float64x2_t y)\n {\n return (float64x2_t){ pow_scalar_special_case (x[0], y[0]),\n \t\t\tpow_scalar_special_case (x[1], y[1]) };\n }\n \n+/* Implementation of AdvSIMD pow.\n+ Maximum measured error is 1.04 ULPs:\n+ _ZGVnN2vv_pow(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)\n+ got 0x1.f71162f473251p-1\n+ want 0x1.f71162f473252p-1. */\n float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)\n {\n const struct data *d = ptr_barrier (&data);\n+\n /* Case of x <= 0 is too complicated to be vectorised efficiently here,\n fallback to scalar pow for all lanes if any x < 0 detected. */\n if (v_any_u64 (vclezq_s64 (vreinterpretq_s64_f64 (x))))\n@@ -206,43 +129,30 @@ float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)\n \n uint64x2_t vix = vreinterpretq_u64_f64 (x);\n uint64x2_t viy = vreinterpretq_u64_f64 (y);\n- uint64x2_t iay = vandq_u64 (viy, d->inf);\n \n /* Special cases of x or y.\n The case y==0 does not trigger a special case, since in this case it is\n necessary to fix the result only if x is a signalling nan, which already\n triggers a special case. We test y==0 directly in the scalar fallback. */\n- uint64x2_t iax = vandq_u64 (vix, d->inf);\n- uint64x2_t specialx = vcgeq_u64 (iax, d->inf);\n- uint64x2_t specialy = vcgeq_u64 (iay, d->inf);\n- uint64x2_t special = vorrq_u64 (specialx, specialy);\n+ uint64x2_t x_is_inf_or_nan = vcgeq_u64 (vandq_u64 (vix, d->inf), d->inf);\n+ uint64x2_t y_is_inf_or_nan = vcgeq_u64 (vandq_u64 (viy, d->inf), d->inf);\n+ uint64x2_t special = vorrq_u64 (x_is_inf_or_nan, y_is_inf_or_nan);\n+\n /* Fallback to scalar on all lanes if any lane is inf or nan. */\n if (__glibc_unlikely (v_any_u64 (special)))\n return scalar_fallback (x, y);\n \n- /* Small cases of x: |x| < 0x1p-126. */\n- uint64x2_t smallx = vcaltq_f64 (x, d->small_powx);\n- if (__glibc_unlikely (v_any_u64 (smallx)))\n+ /* Cases of subnormal x: |x| < 0x1p-1022. */\n+ uint64x2_t x_is_subnormal = vcaltq_f64 (x, d->subnormal_bound);\n+ if (__glibc_unlikely (v_any_u64 (x_is_subnormal)))\n {\n- /* Update ix if top 12 bits of x are 0. */\n- uint64x2_t sub_x = vceqzq_u64 (vshrq_n_u64 (vix, 52));\n- if (__glibc_unlikely (v_any_u64 (sub_x)))\n-\t{\n-\t /* Normalize subnormal x so exponent becomes negative. */\n-\t uint64x2_t vix_norm = vreinterpretq_u64_f64 (\n-\t vabsq_f64 (vmulq_f64 (x, vcvtq_f64_u64 (d->mask_sub_0))));\n-\t vix_norm = vsubq_u64 (vix_norm, d->mask_sub_1);\n-\t vix = vbslq_u64 (sub_x, vix_norm, vix);\n-\t}\n+ /* Normalize subnormal x so exponent becomes negative. */\n+ uint64x2_t vix_norm = vreinterpretq_u64_f64 (\n+\t vabsq_f64 (vmulq_f64 (x, d->subnormal_scale)));\n+ vix_norm = vsubq_u64 (vix_norm, d->subnormal_bias);\n+ x = vbslq_f64 (x_is_subnormal, vreinterpretq_f64_u64 (vix_norm), x);\n }\n \n- /* Vector Log(ix, &lo). */\n- float64x2_t vlo;\n- float64x2_t vhi = v_log_inline (vix, &vlo, d);\n-\n- /* Vector Exp(y_loghi, y_loglo). */\n- float64x2_t vehi = vmulq_f64 (y, vhi);\n- float64x2_t vemi = vfmsq_f64 (vehi, y, vhi);\n- float64x2_t neg_velo = vfmsq_f64 (vemi, y, vlo);\n- return v_exp_inline (vehi, neg_velo, d);\n+ /* Core computation of exp (y * log (x)). */\n+ return v_pow_inline (x, y, d);\n }\ndiff --git a/sysdeps/aarch64/fpu/pow_common.h b/sysdeps/aarch64/fpu/pow_common.h\nnew file mode 100644\nindex 0000000000..d0e6407aea\n--- /dev/null\n+++ b/sysdeps/aarch64/fpu/pow_common.h\n@@ -0,0 +1,59 @@\n+/* Common helper functions for double-precision pow variants.\n+\n+ Copyright (C) 2026 Free Software Foundation, Inc.\n+ This file is part of the GNU C Library.\n+\n+ The GNU C Library is free software; you can redistribute it and/or\n+ modify it under the terms of the GNU Lesser General Public\n+ License as published by the Free Software Foundation; either\n+ version 2.1 of the License, or (at your option) any later version.\n+\n+ The GNU C Library is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n+ Lesser General Public License for more details.\n+\n+ You should have received a copy of the GNU Lesser General Public\n+ License along with the GNU C Library; if not, see\n+ <https://www.gnu.org/licenses/>. */\n+\n+#ifndef _AARCH64_GLIBC_POW_COMMON_H\n+#define _AARCH64_GLIBC_POW_COMMON_H\n+\n+#include <math.h>\n+#include <stdint.h>\n+\n+#include \"math_config.h\"\n+\n+/* Top 12 bits of a double (sign and exponent bits). */\n+static inline uint32_t\n+top12 (double x)\n+{\n+ return asuint64 (x) >> 52;\n+}\n+\n+/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is\n+ the bit representation of a non-zero finite floating-point value. */\n+static inline int\n+checkint (uint64_t iy)\n+{\n+ int e = iy >> 52 & 0x7ff;\n+ if (e < 0x3ff)\n+ return 0;\n+ if (e > 0x3ff + 52)\n+ return 2;\n+ if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))\n+ return 0;\n+ if (iy & (1ULL << (0x3ff + 52 - e)))\n+ return 1;\n+ return 2;\n+}\n+\n+/* Returns 1 if input is the bit representation of 0, infinity or nan. */\n+static inline int\n+zeroinfnan (uint64_t i)\n+{\n+ return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;\n+}\n+\n+#endif\ndiff --git a/sysdeps/aarch64/fpu/pow_sve.c b/sysdeps/aarch64/fpu/pow_sve.c\nindex 19062b5375..463a8cff0c 100644\n--- a/sysdeps/aarch64/fpu/pow_sve.c\n+++ b/sysdeps/aarch64/fpu/pow_sve.c\n@@ -42,295 +42,8 @@\n #include \"math_config.h\"\n #include \"sv_math.h\"\n \n-/* Data is defined in v_pow_log_data.c. */\n-#define N_LOG (1 << V_POW_LOG_TABLE_BITS)\n-#define Off 0x3fe6955500000000\n-\n-/* Data is defined in v_pow_exp_data.c. */\n-#define N_EXP (1 << V_POW_EXP_TABLE_BITS)\n-#define SignBias (0x800 << V_POW_EXP_TABLE_BITS)\n-#define SmallExp 0x3c9 /* top12(0x1p-54). */\n-#define BigExp 0x408 /* top12(512.). */\n-#define ThresExp 0x03f /* BigExp - SmallExp. */\n-#define HugeExp 0x409 /* top12(1024.). */\n-\n-/* Constants associated with pow. */\n-#define SmallBoundX 0x1p-126\n-#define SmallPowX 0x001 /* top12(0x1p-126). */\n-#define BigPowX 0x7ff\t/* top12(INFINITY). */\n-#define ThresPowX 0x7fe /* BigPowX - SmallPowX. */\n-#define SmallPowY 0x3be /* top12(0x1.e7b6p-65). */\n-#define BigPowY 0x43e\t/* top12(0x1.749p62). */\n-#define ThresPowY 0x080 /* BigPowY - SmallPowY. */\n-\n-static const struct data\n-{\n- double log_c0, log_c2, log_c4, log_c6, ln2_hi, ln2_lo;\n- double log_c1, log_c3, log_c5, off;\n- double n_over_ln2, exp_c2, ln2_over_n_hi, ln2_over_n_lo;\n- double exp_c0, exp_c1;\n-} data = {\n- .log_c0 = -0x1p-1,\n- .log_c1 = -0x1.555555555556p-1,\n- .log_c2 = 0x1.0000000000006p-1,\n- .log_c3 = 0x1.999999959554ep-1,\n- .log_c4 = -0x1.555555529a47ap-1,\n- .log_c5 = -0x1.2495b9b4845e9p0,\n- .log_c6 = 0x1.0002b8b263fc3p0,\n- .off = Off,\n- .exp_c0 = 0x1.fffffffffffd4p-2,\n- .exp_c1 = 0x1.5555571d6ef9p-3,\n- .exp_c2 = 0x1.5555576a5adcep-5,\n- .ln2_hi = 0x1.62e42fefa3800p-1,\n- .ln2_lo = 0x1.ef35793c76730p-45,\n- .n_over_ln2 = 0x1.71547652b82fep0 * N_EXP,\n- .ln2_over_n_hi = 0x1.62e42fefc0000p-9,\n- .ln2_over_n_lo = -0x1.c610ca86c3899p-45,\n-};\n-\n-/* Check if x is an integer. */\n-static inline svbool_t\n-sv_isint (svbool_t pg, svfloat64_t x)\n-{\n- return svcmpeq (pg, svrintz_z (pg, x), x);\n-}\n-\n-/* Check if x is real not integer valued. */\n-static inline svbool_t\n-sv_isnotint (svbool_t pg, svfloat64_t x)\n-{\n- return svcmpne (pg, svrintz_z (pg, x), x);\n-}\n-\n-/* Check if x is an odd integer. */\n-static inline svbool_t\n-sv_isodd (svbool_t pg, svfloat64_t x)\n-{\n- svfloat64_t y = svmul_x (svptrue_b64 (), x, 0.5);\n- return sv_isnotint (pg, y);\n-}\n-\n-/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is\n- the bit representation of a non-zero finite floating-point value. */\n-static inline int\n-checkint (uint64_t iy)\n-{\n- int e = iy >> 52 & 0x7ff;\n- if (e < 0x3ff)\n- return 0;\n- if (e > 0x3ff + 52)\n- return 2;\n- if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))\n- return 0;\n- if (iy & (1ULL << (0x3ff + 52 - e)))\n- return 1;\n- return 2;\n-}\n-\n-/* Top 12 bits (sign and exponent of each double float lane). */\n-static inline svuint64_t\n-sv_top12 (svfloat64_t x)\n-{\n- return svlsr_x (svptrue_b64 (), svreinterpret_u64 (x), 52);\n-}\n-\n-/* Returns 1 if input is the bit representation of 0, infinity or nan. */\n-static inline int\n-zeroinfnan (uint64_t i)\n-{\n- return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;\n-}\n-\n-/* Returns 1 if input is the bit representation of 0, infinity or nan. */\n-static inline svbool_t\n-sv_zeroinfnan (svbool_t pg, svuint64_t i)\n-{\n- return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),\n-\t\t 2 * asuint64 (INFINITY) - 1);\n-}\n-\n-/* Handle cases that may overflow or underflow when computing the result that\n- is scale*(1+TMP) without intermediate rounding. The bit representation of\n- scale is in SBITS, however it has a computed exponent that may have\n- overflown into the sign bit so that needs to be adjusted before using it as\n- a double. (int32_t)KI is the k used in the argument reduction and exponent\n- adjustment of scale, positive k here means the result may overflow and\n- negative k means the result may underflow. */\n-static inline svfloat64_t\n-specialcase (svfloat64_t tmp, svuint64_t sbits, svuint64_t ki, svbool_t cmp)\n-{\n- svbool_t p_pos = svcmpge_n_f64 (cmp, svreinterpret_f64_u64 (ki), 0.0);\n-\n- /* Scale up or down depending on sign of k. */\n- svint64_t offset\n- = svsel_s64 (p_pos, sv_s64 (1009ull << 52), sv_s64 (-1022ull << 52));\n- svfloat64_t factor\n- = svsel_f64 (p_pos, sv_f64 (0x1p1009), sv_f64 (0x1p-1022));\n-\n- svuint64_t offset_sbits\n- = svsub_u64_x (cmp, sbits, svreinterpret_u64_s64 (offset));\n- svfloat64_t scale = svreinterpret_f64_u64 (offset_sbits);\n- svfloat64_t res = svmad_f64_x (cmp, scale, tmp, scale);\n- return svmul_f64_x (cmp, res, factor);\n-}\n-\n-/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about\n- additional 15 bits precision. IX is the bit representation of x, but\n- normalized in the subnormal range using the sign bit for the exponent. */\n-static inline svfloat64_t\n-sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail,\n-\t const struct data *d)\n-{\n- /* x = 2^k z; where z is in range [Off,2*Off) and exact.\n- The range is split into N subintervals.\n- The ith subinterval contains z and c is near its center. */\n- svuint64_t tmp = svsub_x (pg, ix, d->off);\n- svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, 52 - V_POW_LOG_TABLE_BITS),\n-\t\t\t sv_u64 (N_LOG - 1));\n- svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);\n- svuint64_t iz = svsub_x (pg, ix, svlsl_x (pg, svreinterpret_u64 (k), 52));\n- svfloat64_t z = svreinterpret_f64 (iz);\n- svfloat64_t kd = svcvt_f64_x (pg, k);\n-\n- /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */\n- /* SVE lookup requires 3 separate lookup tables, as opposed to scalar version\n- that uses array of structures. We also do the lookup earlier in the code\n- to make sure it finishes as early as possible. */\n- svfloat64_t invc = svld1_gather_index (pg, __v_pow_log_data.invc, i);\n- svfloat64_t logc = svld1_gather_index (pg, __v_pow_log_data.logc, i);\n- svfloat64_t logctail = svld1_gather_index (pg, __v_pow_log_data.logctail, i);\n-\n- /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and\n- |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */\n- svfloat64_t r = svmad_x (pg, z, invc, -1.0);\n- /* k*Ln2 + log(c) + r. */\n-\n- svfloat64_t ln2_hilo = svld1rq_f64 (svptrue_b64 (), &d->ln2_hi);\n- svfloat64_t t1 = svmla_lane_f64 (logc, kd, ln2_hilo, 0);\n- svfloat64_t t2 = svadd_x (pg, t1, r);\n- svfloat64_t lo1 = svmla_lane_f64 (logctail, kd, ln2_hilo, 1);\n- svfloat64_t lo2 = svadd_x (pg, svsub_x (pg, t1, t2), r);\n-\n- /* Evaluation is optimized assuming superscalar pipelined execution. */\n-\n- svfloat64_t log_c02 = svld1rq_f64 (svptrue_b64 (), &d->log_c0);\n- svfloat64_t ar = svmul_lane_f64 (r, log_c02, 0);\n- svfloat64_t ar2 = svmul_x (svptrue_b64 (), r, ar);\n- svfloat64_t ar3 = svmul_x (svptrue_b64 (), r, ar2);\n- /* k*Ln2 + log(c) + r + A[0]*r*r. */\n- svfloat64_t hi = svadd_x (pg, t2, ar2);\n- svfloat64_t lo3 = svmls_x (pg, ar2, ar, r);\n- svfloat64_t lo4 = svadd_x (pg, svsub_x (pg, t2, hi), ar2);\n- /* p = log1p(r) - r - A[0]*r*r. */\n- /* p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r *\n- A[6])))). */\n-\n- svfloat64_t log_c46 = svld1rq_f64 (svptrue_b64 (), &d->log_c4);\n- svfloat64_t a56 = svmla_lane_f64 (sv_f64 (d->log_c5), r, log_c46, 1);\n- svfloat64_t a34 = svmla_lane_f64 (sv_f64 (d->log_c3), r, log_c46, 0);\n- svfloat64_t a12 = svmla_lane_f64 (sv_f64 (d->log_c1), r, log_c02, 1);\n- svfloat64_t p = svmla_x (pg, a34, ar2, a56);\n- p = svmla_x (pg, a12, ar2, p);\n- p = svmul_x (svptrue_b64 (), ar3, p);\n- svfloat64_t lo = svadd_x (\n- pg, svadd_x (pg, svsub_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p);\n- svfloat64_t y = svadd_x (pg, hi, lo);\n- *tail = svadd_x (pg, svsub_x (pg, hi, y), lo);\n- return y;\n-}\n-\n-static inline svfloat64_t\n-sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail,\n-\t svuint64_t sign_bias, svfloat64_t *tmp, svuint64_t *sbits,\n-\t svuint64_t *ki, const struct data *d)\n-{\n- /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */\n- /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */\n- svfloat64_t n_over_ln2_and_c2 = svld1rq_f64 (svptrue_b64 (), &d->n_over_ln2);\n- svfloat64_t z = svmul_lane_f64 (x, n_over_ln2_and_c2, 0);\n- /* z - kd is in [-1, 1] in non-nearest rounding modes. */\n- svfloat64_t kd = svrinta_x (pg, z);\n- *ki = svreinterpret_u64 (svcvt_s64_x (pg, kd));\n-\n- svfloat64_t ln2_over_n_hilo\n- = svld1rq_f64 (svptrue_b64 (), &d->ln2_over_n_hi);\n- svfloat64_t r = x;\n- r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 0);\n- r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 1);\n- /* The code assumes 2^-200 < |xtail| < 2^-8/N. */\n- r = svadd_x (pg, r, xtail);\n- /* 2^(k/N) ~= scale. */\n- svuint64_t idx = svand_x (pg, *ki, N_EXP - 1);\n- svuint64_t top\n- = svlsl_x (pg, svadd_x (pg, *ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS);\n- /* This is only a valid scale when -1023*N < k < 1024*N. */\n- *sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx);\n- *sbits = svadd_x (pg, *sbits, top);\n- /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */\n- svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);\n- *tmp = svmla_lane_f64 (sv_f64 (d->exp_c1), r, n_over_ln2_and_c2, 1);\n- *tmp = svmla_x (pg, sv_f64 (d->exp_c0), r, *tmp);\n- *tmp = svmla_x (pg, r, r2, *tmp);\n- svfloat64_t scale = svreinterpret_f64 (*sbits);\n- /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there\n- is no spurious underflow here even without fma. */\n- z = svmla_x (pg, scale, scale, *tmp);\n- return z;\n-}\n-\n-/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.\n- The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1. */\n-static inline svfloat64_t\n-sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,\n-\t svuint64_t sign_bias, const struct data *d)\n-{\n- /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow)\n- and other cases of large values of x (scale * (1 + TMP) oflow). */\n- svuint64_t abstop = svand_x (pg, sv_top12 (x), 0x7ff);\n- /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54). */\n- svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp);\n-\n- svfloat64_t tmp;\n- svuint64_t sbits, ki;\n- if (__glibc_unlikely (svptest_any (pg, uoflow)))\n- {\n- svfloat64_t z\n-\t = sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);\n-\n- /* |x| is tiny (|x| <= 0x1p-54). */\n- svbool_t uflow\n-\t = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000);\n- uflow = svand_z (pg, uoflow, uflow);\n- /* |x| is huge (|x| >= 1024). */\n- svbool_t oflow = svcmpge (pg, abstop, HugeExp);\n- oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow));\n-\n- /* Handle underflow and overlow in scale.\n-\t For large |x| values (512 < |x| < 1024), scale * (1 + TMP) can\n-\t overflow or underflow. */\n- svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));\n- if (__glibc_unlikely (svptest_any (pg, special)))\n-\tz = svsel (special, specialcase (tmp, sbits, ki, special), z);\n-\n- /* Handle underflow and overflow in exp. */\n- svbool_t x_is_neg = svcmplt (pg, x, 0);\n- svuint64_t sign_mask\n-\t = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);\n- svfloat64_t res_uoflow\n-\t = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));\n- res_uoflow = svreinterpret_f64 (\n-\t svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));\n- /* Avoid spurious underflow for tiny x. */\n- svfloat64_t res_spurious_uflow\n-\t = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));\n-\n- z = svsel (oflow, res_uoflow, z);\n- z = svsel (uflow, res_spurious_uflow, z);\n- return z;\n- }\n-\n- return sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);\n-}\n+#define WANT_SV_POW_SIGN_BIAS 1\n+#include \"sv_pow_inline.h\"\n \n static inline double\n pow_specialcase (double x, double y)\n@@ -398,18 +111,14 @@ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)\n sign_bias = svsel (yisodd_xisneg, sv_u64 (SignBias), sv_u64 (0));\n }\n \n- /* Small cases of x: |x| < 0x1p-126. */\n- svbool_t xsmall = svaclt (yint_or_xpos, x, SmallBoundX);\n- if (__glibc_unlikely (svptest_any (yint_or_xpos, xsmall)))\n+ /* Cases of subnormal x: |x| < 0x1p-1022. */\n+ svbool_t x_is_subnormal = svaclt (yint_or_xpos, x, 0x1p-1022);\n+ if (__glibc_unlikely (svptest_any (yint_or_xpos, x_is_subnormal)))\n {\n /* Normalize subnormal x so exponent becomes negative. */\n- svuint64_t vtopx = svlsr_x (svptrue_b64 (), vix, 52);\n- svbool_t topx_is_null = svcmpeq (xsmall, vtopx, 0);\n-\n- svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52));\n- vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff);\n- vix_norm = svsub_m (xsmall, vix_norm, 52ULL << 52);\n- vix = svsel (topx_is_null, vix_norm, vix);\n+ vix = svreinterpret_u64 (svmul_m (x_is_subnormal, x, 0x1p52));\n+ vix = svand_m (x_is_subnormal, vix, 0x7fffffffffffffff);\n+ vix = svsub_m (x_is_subnormal, vix, 52ULL << 52);\n }\n \n /* y_hi = log(ix, &y_lo). */\ndiff --git a/sysdeps/aarch64/fpu/powf_advsimd.c b/sysdeps/aarch64/fpu/powf_advsimd.c\nindex b30ac7758b..fe3e91f80f 100644\n--- a/sysdeps/aarch64/fpu/powf_advsimd.c\n+++ b/sysdeps/aarch64/fpu/powf_advsimd.c\n@@ -17,194 +17,132 @@\n License along with the GNU C Library; if not, see\n <https://www.gnu.org/licenses/>. */\n \n-#include \"math_config.h\"\n+#include \"flt-32/math_config.h\"\n+#include \"powf_common.h\"\n #include \"v_math.h\"\n+#include \"v_powf_inline.h\"\n \n-#define Min v_u32 (0x00800000)\n-#define Max v_u32 (0x7f800000)\n-#define Thresh v_u32 (0x7f000000) /* Max - Min. */\n-#define MantissaMask v_u32 (0x007fffff)\n-\n-#define A d->log2_poly\n-#define C d->exp2f_poly\n-\n-/* 2.6 ulp ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2). */\n-#define Off v_u32 (0x3f35d000)\n-\n-#define V_POWF_LOG2_TABLE_BITS 5\n-#define V_EXP2F_TABLE_BITS 5\n-#define Log2IdxMask ((1 << V_POWF_LOG2_TABLE_BITS) - 1)\n-#define Scale ((double) (1 << V_EXP2F_TABLE_BITS))\n-\n-static const struct data\n-{\n- struct\n- {\n- double invc, logc;\n- } log2_tab[1 << V_POWF_LOG2_TABLE_BITS];\n- float64x2_t log2_poly[4];\n- uint64_t exp2f_tab[1 << V_EXP2F_TABLE_BITS];\n- float64x2_t exp2f_poly[3];\n-} data = {\n- .log2_tab = {{0x1.6489890582816p+0, -0x1.e960f97b22702p-2 * Scale},\n-\t {0x1.5cf19b35e3472p+0, -0x1.c993406cd4db6p-2 * Scale},\n-\t {0x1.55aac0e956d65p+0, -0x1.aa711d9a7d0f3p-2 * Scale},\n-\t {0x1.4eb0022977e01p+0, -0x1.8bf37bacdce9bp-2 * Scale},\n-\t {0x1.47fcccda1dd1fp+0, -0x1.6e13b3519946ep-2 * Scale},\n-\t {0x1.418ceabab68c1p+0, -0x1.50cb8281e4089p-2 * Scale},\n-\t {0x1.3b5c788f1edb3p+0, -0x1.341504a237e2bp-2 * Scale},\n-\t {0x1.3567de48e9c9ap+0, -0x1.17eaab624ffbbp-2 * Scale},\n-\t {0x1.2fabc80fd19bap+0, -0x1.f88e708f8c853p-3 * Scale},\n-\t {0x1.2a25200ce536bp+0, -0x1.c24b6da113914p-3 * Scale},\n-\t {0x1.24d108e0152e3p+0, -0x1.8d02ee397cb1dp-3 * Scale},\n-\t {0x1.1facd8ab2fbe1p+0, -0x1.58ac1223408b3p-3 * Scale},\n-\t {0x1.1ab614a03efdfp+0, -0x1.253e6fd190e89p-3 * Scale},\n-\t {0x1.15ea6d03af9ffp+0, -0x1.e5641882c12ffp-4 * Scale},\n-\t {0x1.1147b994bb776p+0, -0x1.81fea712926f7p-4 * Scale},\n-\t {0x1.0ccbf650593aap+0, -0x1.203e240de64a3p-4 * Scale},\n-\t {0x1.0875408477302p+0, -0x1.8029b86a78281p-5 * Scale},\n-\t {0x1.0441d42a93328p+0, -0x1.85d713190fb9p-6 * Scale},\n-\t {0x1p+0, 0x0p+0 * Scale},\n-\t {0x1.f1d006c855e86p-1, 0x1.4c1cc07312997p-5 * Scale},\n-\t {0x1.e28c3341aa301p-1, 0x1.5e1848ccec948p-4 * Scale},\n-\t {0x1.d4bdf9aa64747p-1, 0x1.04cfcb7f1196fp-3 * Scale},\n-\t {0x1.c7b45a24e5803p-1, 0x1.582813d463c21p-3 * Scale},\n-\t {0x1.bb5f5eb2ed60ap-1, 0x1.a936fa68760ccp-3 * Scale},\n-\t {0x1.afb0bff8fe6b4p-1, 0x1.f81bc31d6cc4ep-3 * Scale},\n-\t {0x1.a49badf7ab1f5p-1, 0x1.2279a09fae6b1p-2 * Scale},\n-\t {0x1.9a14a111fc4c9p-1, 0x1.47ec0b6df5526p-2 * Scale},\n-\t {0x1.901131f5b2fdcp-1, 0x1.6c71762280f1p-2 * Scale},\n-\t {0x1.8687f73f6d865p-1, 0x1.90155070798dap-2 * Scale},\n-\t {0x1.7d7067eb77986p-1, 0x1.b2e23b1d3068cp-2 * Scale},\n-\t {0x1.74c2c1cf97b65p-1, 0x1.d4e21b0daa86ap-2 * Scale},\n-\t {0x1.6c77f37cff2a1p-1, 0x1.f61e2a2f67f3fp-2 * Scale},},\n- .log2_poly = { /* rel err: 1.5 * 2^-30. */\n-\t\t V2 (-0x1.6ff5daa3b3d7cp-2 * Scale),\n-\t\t V2 (0x1.ec81d03c01aebp-2 * Scale),\n-\t\t V2 (-0x1.71547bb43f101p-1 * Scale),\n-\t\t V2 (0x1.7154764a815cbp0 * Scale)},\n- .exp2f_tab = {0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,\n-\t\t0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,\n-\t\t0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,\n-\t\t0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,\n-\t\t0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,\n-\t\t0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,\n-\t\t0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,\n-\t\t0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,\n-\t\t0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,\n-\t\t0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,\n-\t\t0x3fefa4afa2a490da, 0x3fefd0765b6e4540,},\n- .exp2f_poly = { /* rel err: 1.69 * 2^-34. */\n-\t\t V2 (0x1.c6af84b912394p-5 / Scale / Scale / Scale),\n-\t\t V2 (0x1.ebfce50fac4f3p-3 / Scale / Scale),\n-\t\t V2 (0x1.62e42ff0c52d6p-1 / Scale)}};\n-\n-static float32x4_t VPCS_ATTR NOINLINE\n-special_case (float32x4_t x, float32x4_t y, float32x4_t ret, uint32x4_t cmp)\n+/* Check if x is an integer. */\n+static inline uint32x4_t\n+visint (float32x4_t x)\n {\n- return v_call2_f32 (powf, x, y, ret, cmp);\n+ return vceqq_f32 (vrndq_f32 (x), x);\n }\n \n-static inline float64x2_t\n-ylogx_core (const struct data *d, float64x2_t iz, float64x2_t k,\n-\t float64x2_t invc, float64x2_t logc, float64x2_t y)\n+/* Check if x is real not integer valued. */\n+static inline uint32x4_t\n+visnotint (float32x4_t x)\n {\n-\n- /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k. */\n- float64x2_t r = vfmaq_f64 (v_f64 (-1.0), iz, invc);\n- float64x2_t y0 = vaddq_f64 (logc, k);\n-\n- /* Polynomial to approximate log1p(r)/ln2. */\n- float64x2_t logx = vfmaq_f64 (A[1], r, A[0]);\n- logx = vfmaq_f64 (A[2], logx, r);\n- logx = vfmaq_f64 (A[3], logx, r);\n- logx = vfmaq_f64 (y0, logx, r);\n-\n- return vmulq_f64 (logx, y);\n+ return vmvnq_u32 (vceqq_f32 (vrndq_f32 (x), x));\n }\n \n-static inline float64x2_t\n-log2_lookup (const struct data *d, uint32_t i)\n+/* Check if x is an odd integer. */\n+static inline uint32x4_t\n+visodd (float32x4_t x)\n {\n- return vld1q_f64 (\n- &d->log2_tab[(i >> (23 - V_POWF_LOG2_TABLE_BITS)) & Log2IdxMask].invc);\n+ float32x4_t y = vmulq_n_f32 (x, 0.5f);\n+ return visnotint (y);\n }\n \n-static inline uint64x1_t\n-exp2f_lookup (const struct data *d, uint64_t i)\n+/* A scalar subroutine used to fix main power special cases. Similar to the\n+ preamble of scalar powf except that we do not update ix and sign_bias. This\n+ is done in the preamble of the SVE powf. */\n+static inline float\n+powf_specialcase (float x, float y)\n {\n- return vld1_u64 (&d->exp2f_tab[i % (1 << V_EXP2F_TABLE_BITS)]);\n+ uint32_t ix = asuint (x);\n+ uint32_t iy = asuint (y);\n+ /* Either x or y is 0 or inf or nan. */\n+ if (__glibc_unlikely (zeroinfnan (iy)))\n+ {\n+ if (2 * iy == 0)\n+\treturn __issignalingf (x) ? x + y : 1.0f;\n+ if (ix == 0x3f800000)\n+\treturn __issignalingf (y) ? x + y : 1.0f;\n+ if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)\n+\treturn x + y;\n+ if (2 * ix == 2 * 0x3f800000)\n+\treturn 1.0f;\n+ if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))\n+\treturn 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */\n+ return y * y;\n+ }\n+ if (__glibc_unlikely (zeroinfnan (ix)))\n+ {\n+ float x2 = x * x;\n+ if (ix & 0x80000000 && checkint (iy) == 1)\n+\tx2 = -x2;\n+ return iy & 0x80000000 ? 1 / x2 : x2;\n+ }\n+ return x;\n }\n \n-static inline float32x2_t\n-powf_core (const struct data *d, float64x2_t ylogx)\n+/* Special case function wrapper. */\n+static float32x4_t VPCS_ATTR NOINLINE\n+special_case (float32x4_t x, float32x4_t y, float32x4_t ret, uint32x4_t cmp)\n {\n- /* N*x = k + r with r in [-1/2, 1/2]. */\n- float64x2_t kd = vrndnq_f64 (ylogx);\n- int64x2_t ki = vcvtaq_s64_f64 (ylogx);\n- float64x2_t r = vsubq_f64 (ylogx, kd);\n-\n- /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */\n- uint64x2_t t = vcombine_u64 (exp2f_lookup (d, vgetq_lane_s64 (ki, 0)),\n-\t\t\t exp2f_lookup (d, vgetq_lane_s64 (ki, 1)));\n- t = vaddq_u64 (\n- t, vreinterpretq_u64_s64 (vshlq_n_s64 (ki, 52 - V_EXP2F_TABLE_BITS)));\n- float64x2_t s = vreinterpretq_f64_u64 (t);\n- float64x2_t p = vfmaq_f64 (C[1], r, C[0]);\n- p = vfmaq_f64 (C[2], r, p);\n- p = vfmaq_f64 (s, p, vmulq_f64 (s, r));\n- return vcvt_f32_f64 (p);\n+ return v_call2_f32 (powf_specialcase, x, y, ret, cmp);\n }\n \n-float32x4_t VPCS_ATTR V_NAME_F2 (pow) (float32x4_t x, float32x4_t y)\n+/* Power implementation for x containing negative or subnormal lanes. */\n+static inline float32x4_t\n+v_powf_x_is_neg_or_small (float32x4_t x, float32x4_t y, const struct data *d)\n {\n- const struct data *d = ptr_barrier (&data);\n- uint32x4_t u = vreinterpretq_u32_f32 (x);\n- uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (u, Min), Thresh);\n- uint32x4_t tmp = vsubq_u32 (u, Off);\n- uint32x4_t top = vbicq_u32 (tmp, MantissaMask);\n- float32x4_t iz = vreinterpretq_f32_u32 (vsubq_u32 (u, top));\n- int32x4_t k = vshrq_n_s32 (vreinterpretq_s32_u32 (top),\n-\t\t\t 23 - V_EXP2F_TABLE_BITS); /* arithmetic shift. */\n-\n- /* Use double precision for each lane: split input vectors into lo and hi\n- halves and promote. */\n- float64x2_t tab0 = log2_lookup (d, vgetq_lane_u32 (tmp, 0)),\n-\t tab1 = log2_lookup (d, vgetq_lane_u32 (tmp, 1)),\n-\t tab2 = log2_lookup (d, vgetq_lane_u32 (tmp, 2)),\n-\t tab3 = log2_lookup (d, vgetq_lane_u32 (tmp, 3));\n+ uint32x4_t xisneg = vcltzq_f32 (x);\n+ uint32x4_t xsmall = vcaltq_f32 (x, v_f32 (0x1p-126f));\n \n- float64x2_t iz_lo = vcvt_f64_f32 (vget_low_f32 (iz)),\n-\t iz_hi = vcvt_high_f64_f32 (iz);\n+ /* Set sign_bias depending on sign of x and nature of y. */\n+ uint32x4_t yisodd_xisneg = vandq_u32 (visodd (y), xisneg);\n \n- float64x2_t k_lo = vcvtq_f64_s64 (vmovl_s32 (vget_low_s32 (k))),\n-\t k_hi = vcvtq_f64_s64 (vmovl_high_s32 (k));\n+ /* Set variable to SignBias if x is negative and y is odd. */\n+ uint32x4_t sign_bias = vandq_u32 (d->sign_bias, yisodd_xisneg);\n \n- float64x2_t invc_lo = vzip1q_f64 (tab0, tab1),\n-\t invc_hi = vzip1q_f64 (tab2, tab3),\n-\t logc_lo = vzip2q_f64 (tab0, tab1),\n-\t logc_hi = vzip2q_f64 (tab2, tab3);\n+ /* Normalize subnormals. */\n+ float32x4_t a = vabsq_f32 (x);\n+ uint32x4_t ia_norm = vreinterpretq_u32_f32 (vmulq_f32 (a, d->norm));\n+ ia_norm = vsubq_u32 (ia_norm, d->subnormal_bias);\n+ a = vbslq_f32 (xsmall, vreinterpretq_f32_u32 (ia_norm), a);\n \n- float64x2_t y_lo = vcvt_f64_f32 (vget_low_f32 (y)),\n-\t y_hi = vcvt_high_f64_f32 (y);\n+ /* Evaluate exp (y * log(x)) using |x| and sign bias correction. */\n+ float32x4_t ret = v_powf_core (a, y, sign_bias, d);\n \n- float64x2_t ylogx_lo = ylogx_core (d, iz_lo, k_lo, invc_lo, logc_lo, y_lo);\n- float64x2_t ylogx_hi = ylogx_core (d, iz_hi, k_hi, invc_hi, logc_hi, y_hi);\n-\n- uint32x4_t ylogx_top = vuzp2q_u32 (vreinterpretq_u32_f64 (ylogx_lo),\n-\t\t\t\t vreinterpretq_u32_f64 (ylogx_hi));\n-\n- cmp = vorrq_u32 (\n- cmp, vcgeq_u32 (vandq_u32 (vshrq_n_u32 (ylogx_top, 15), v_u32 (0xffff)),\n-\t\t vdupq_n_u32 (asuint64 (126.0 * (1 << V_EXP2F_TABLE_BITS))\n-\t\t\t\t >> 47)));\n+ /* Cases of finite y and finite negative x. */\n+ uint32x4_t yint_or_xpos = vornq_u32 (visint (y), xisneg);\n+ return vbslq_f32 (yint_or_xpos, ret, d->nan);\n+}\n \n- float32x2_t p_lo = powf_core (d, ylogx_lo);\n- float32x2_t p_hi = powf_core (d, ylogx_hi);\n+/* Implementation of AdvSIMD powf.\n+ The theoretical maximum error is under 2.60 ULPs.\n+ Maximum measured error is 2.57 ULPs:\n+ V_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12)\n+ got 0x1.fff868p+127\n+ want 0x1.fff862p+127. */\n+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (pow) (float32x4_t x, float32x4_t y)\n+{\n+ const struct data *d = ptr_barrier (&data);\n \n+ /* Special cases of x or y: zero, inf and nan. */\n+ uint32x4_t ix = vreinterpretq_u32_f32 (x);\n+ uint32x4_t iy = vreinterpretq_u32_f32 (y);\n+ uint32x4_t xspecial = v_zeroinfnan (d, ix);\n+ uint32x4_t yspecial = v_zeroinfnan (d, iy);\n+ uint32x4_t cmp = vorrq_u32 (xspecial, yspecial);\n+\n+ /* Evaluate pow(x, y) for x containing negative or subnormal lanes. */\n+ uint32x4_t x_is_neg_or_sub = vcltq_f32 (x, v_f32 (0x1p-126f));\n+ if (__glibc_unlikely (v_any_u32 (x_is_neg_or_sub)))\n+ {\n+ float32x4_t ret = v_powf_x_is_neg_or_small (x, y, d);\n+ if (__glibc_unlikely (v_any_u32 (cmp)))\n+\treturn special_case (x, y, ret, cmp);\n+ return ret;\n+ }\n+\n+ /* Else evaluate pow(x, y) for normal and positive x only.\n+ Use the powrf helper routine. */\n if (__glibc_unlikely (v_any_u32 (cmp)))\n- return special_case (x, y, vcombine_f32 (p_lo, p_hi), cmp);\n- return vcombine_f32 (p_lo, p_hi);\n+ return special_case (x, y, v_powrf_core (x, y, d), cmp);\n+ return v_powrf_core (x, y, d);\n }\n libmvec_hidden_def (V_NAME_F2 (pow))\n HALF_WIDTH_ALIAS_F2(pow)\ndiff --git a/sysdeps/aarch64/fpu/powf_common.h b/sysdeps/aarch64/fpu/powf_common.h\nnew file mode 100644\nindex 0000000000..b2c75b5e71\n--- /dev/null\n+++ b/sysdeps/aarch64/fpu/powf_common.h\n@@ -0,0 +1,51 @@\n+/* Common helper functions for single-precision pow variants.\n+\n+ Copyright (C) 2026 Free Software Foundation, Inc.\n+ This file is part of the GNU C Library.\n+\n+ The GNU C Library is free software; you can redistribute it and/or\n+ modify it under the terms of the GNU Lesser General Public\n+ License as published by the Free Software Foundation; either\n+ version 2.1 of the License, or (at your option) any later version.\n+\n+ The GNU C Library is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n+ Lesser General Public License for more details.\n+\n+ You should have received a copy of the GNU Lesser General Public\n+ License along with the GNU C Library; if not, see\n+ <https://www.gnu.org/licenses/>. */\n+\n+#ifndef _AARCH64_GLIBC_POWF_COMMON_H\n+#define _AARCH64_GLIBC_POWF_COMMON_H\n+\n+#include <stdint.h>\n+\n+#include \"flt-32/math_config.h\"\n+\n+/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is\n+ the bit representation of a non-zero finite floating-point value. */\n+static inline int\n+checkint (uint32_t iy)\n+{\n+ int e = iy >> 23 & 0xff;\n+ if (e < 0x7f)\n+ return 0;\n+ if (e > 0x7f + 23)\n+ return 2;\n+ if (iy & ((1 << (0x7f + 23 - e)) - 1))\n+ return 0;\n+ if (iy & (1 << (0x7f + 23 - e)))\n+ return 1;\n+ return 2;\n+}\n+\n+/* Returns 1 if input is the bit representation of 0, infinity or nan. */\n+static inline int\n+zeroinfnan (uint32_t ix)\n+{\n+ return 2 * ix - 1 >= 2u * 0x7f800000 - 1;\n+}\n+\n+#endif\ndiff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c\nindex 46b006c845..44a70105af 100644\n--- a/sysdeps/aarch64/fpu/powf_sve.c\n+++ b/sysdeps/aarch64/fpu/powf_sve.c\n@@ -17,100 +17,11 @@\n License along with the GNU C Library; if not, see\n <https://www.gnu.org/licenses/>. */\n \n-#include \"../ieee754/flt-32/math_config.h\"\n+#include \"flt-32/math_config.h\"\n #include \"sv_math.h\"\n \n-/* The following data is used in the SVE pow core computation\n- and special case detection. */\n-#define Tinvc __v_powf_data.invc\n-#define Tlogc __v_powf_data.logc\n-#define Texp __v_powf_data.scale\n-#define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11))\n-#define Norm 0x1p23f /* 0x4b000000. */\n-\n-/* Overall ULP error bound for pow is 2.6 ulp\n- ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2). */\n-static const struct data\n-{\n- double log_poly[4];\n- double exp_poly[3];\n- float uflow_bound, oflow_bound, small_bound;\n- uint32_t sign_bias, subnormal_bias, off;\n-} data = {\n- /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of\n- V_POWF_EXP2_N. */\n- .log_poly = { -0x1.6ff5daa3b3d7cp+3, 0x1.ec81d03c01aebp+3,\n-\t\t-0x1.71547bb43f101p+4, 0x1.7154764a815cbp+5 },\n- /* rel err: 1.69 * 2^-34. */\n- .exp_poly = {\n- 0x1.c6af84b912394p-20, /* A0 / V_POWF_EXP2_N^3. */\n- 0x1.ebfce50fac4f3p-13, /* A1 / V_POWF_EXP2_N^2. */\n- 0x1.62e42ff0c52d6p-6, /* A3 / V_POWF_EXP2_N. */\n- },\n- .uflow_bound = -0x1.2cp+12f, /* -150.0 * V_POWF_EXP2_N. */\n- .oflow_bound = 0x1p+12f, /* 128.0 * V_POWF_EXP2_N. */\n- .small_bound = 0x1p-126f,\n- .off = 0x3f35d000,\n- .sign_bias = SignBias,\n- .subnormal_bias = 0x0b800000, /* 23 << 23. */\n-};\n-\n-#define A(i) sv_f64 (d->log_poly[i])\n-#define C(i) sv_f64 (d->exp_poly[i])\n-\n-/* Check if x is an integer. */\n-static inline svbool_t\n-svisint (svbool_t pg, svfloat32_t x)\n-{\n- return svcmpeq (pg, svrintz_z (pg, x), x);\n-}\n-\n-/* Check if x is real not integer valued. */\n-static inline svbool_t\n-svisnotint (svbool_t pg, svfloat32_t x)\n-{\n- return svcmpne (pg, svrintz_z (pg, x), x);\n-}\n-\n-/* Check if x is an odd integer. */\n-static inline svbool_t\n-svisodd (svbool_t pg, svfloat32_t x)\n-{\n- svfloat32_t y = svmul_x (pg, x, 0.5f);\n- return svisnotint (pg, y);\n-}\n-\n-/* Check if zero, inf or nan. */\n-static inline svbool_t\n-sv_zeroinfnan (svbool_t pg, svuint32_t i)\n-{\n- return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),\n-\t\t 2u * 0x7f800000 - 1);\n-}\n-\n-/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is\n- the bit representation of a non-zero finite floating-point value. */\n-static inline int\n-checkint (uint32_t iy)\n-{\n- int e = iy >> 23 & 0xff;\n- if (e < 0x7f)\n- return 0;\n- if (e > 0x7f + 23)\n- return 2;\n- if (iy & ((1 << (0x7f + 23 - e)) - 1))\n- return 0;\n- if (iy & (1 << (0x7f + 23 - e)))\n- return 1;\n- return 2;\n-}\n-\n-/* Check if zero, inf or nan. */\n-static inline int\n-zeroinfnan (uint32_t ix)\n-{\n- return 2 * ix - 1 >= 2u * 0x7f800000 - 1;\n-}\n+#define WANT_SV_POWF_SIGN_BIAS 1\n+#include \"sv_powf_inline.h\"\n \n /* A scalar subroutine used to fix main power special cases. Similar to the\n preamble of scalar powf except that we do not update ix and sign_bias. This\n@@ -152,91 +63,6 @@ sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)\n return sv_call2_f32 (powf_specialcase, x1, x2, y, cmp);\n }\n \n-/* Compute core for half of the lanes in double precision. */\n-static inline svfloat64_t\n-sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,\n-\t\t svfloat64_t y, svuint64_t sign_bias, svfloat64_t *pylogx,\n-\t\t const struct data *d)\n-{\n- svfloat64_t invc = svld1_gather_index (pg, Tinvc, i);\n- svfloat64_t logc = svld1_gather_index (pg, Tlogc, i);\n-\n- /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k. */\n- svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), z, invc);\n- svfloat64_t y0 = svadd_x (pg, logc, svcvt_f64_x (pg, k));\n-\n- /* Polynomial to approximate log1p(r)/ln2. */\n- svfloat64_t logx = A (0);\n- logx = svmad_x (pg, r, logx, A (1));\n- logx = svmad_x (pg, r, logx, A (2));\n- logx = svmad_x (pg, r, logx, A (3));\n- logx = svmad_x (pg, r, logx, y0);\n- *pylogx = svmul_x (pg, y, logx);\n-\n- /* z - kd is in [-1, 1] in non-nearest rounding modes. */\n- svfloat64_t kd = svrinta_x (svptrue_b64 (), *pylogx);\n- svuint64_t ki = svreinterpret_u64 (svcvt_s64_x (svptrue_b64 (), kd));\n-\n- r = svsub_x (pg, *pylogx, kd);\n-\n- /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */\n- svuint64_t t = svld1_gather_index (\n- svptrue_b64 (), Texp, svand_x (svptrue_b64 (), ki, V_POWF_EXP2_N - 1));\n- svuint64_t ski = svadd_x (svptrue_b64 (), ki, sign_bias);\n- t = svadd_x (svptrue_b64 (), t,\n-\t svlsl_x (svptrue_b64 (), ski, 52 - V_POWF_EXP2_TABLE_BITS));\n- svfloat64_t s = svreinterpret_f64 (t);\n-\n- svfloat64_t p = C (0);\n- p = svmla_x (pg, C (1), p, r);\n- p = svmla_x (pg, C (2), p, r);\n- p = svmla_x (pg, s, p, svmul_x (svptrue_b64 (), s, r));\n-\n- return p;\n-}\n-\n-/* Widen vector to double precision and compute core on both halves of the\n- vector. Lower cost of promotion by considering all lanes active. */\n-static inline svfloat32_t\n-sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,\n-\t svfloat32_t y, svuint32_t sign_bias, svfloat32_t *pylogx,\n-\t const struct data *d)\n-{\n- const svbool_t ptrue = svptrue_b64 ();\n-\n- /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two\n- in order to perform core computation in double precision. */\n- const svbool_t pg_lo = svunpklo (pg);\n- const svbool_t pg_hi = svunpkhi (pg);\n- svfloat64_t y_lo = svcvt_f64_x (\n- ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));\n- svfloat64_t y_hi = svcvt_f64_x (\n- ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));\n- svfloat64_t z_lo = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpklo (iz)));\n- svfloat64_t z_hi = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpkhi (iz)));\n- svuint64_t i_lo = svunpklo (i);\n- svuint64_t i_hi = svunpkhi (i);\n- svint64_t k_lo = svunpklo (k);\n- svint64_t k_hi = svunpkhi (k);\n- svuint64_t sign_bias_lo = svunpklo (sign_bias);\n- svuint64_t sign_bias_hi = svunpkhi (sign_bias);\n-\n- /* Compute each part in double precision. */\n- svfloat64_t ylogx_lo, ylogx_hi;\n- svfloat64_t lo = sv_powf_core_ext (pg_lo, i_lo, z_lo, k_lo, y_lo,\n-\t\t\t\t sign_bias_lo, &ylogx_lo, d);\n- svfloat64_t hi = sv_powf_core_ext (pg_hi, i_hi, z_hi, k_hi, y_hi,\n-\t\t\t\t sign_bias_hi, &ylogx_hi, d);\n-\n- /* Convert back to single-precision and interleave. */\n- svfloat32_t ylogx_lo_32 = svcvt_f32_x (ptrue, ylogx_lo);\n- svfloat32_t ylogx_hi_32 = svcvt_f32_x (ptrue, ylogx_hi);\n- *pylogx = svuzp1 (ylogx_lo_32, ylogx_hi_32);\n- svfloat32_t lo_32 = svcvt_f32_x (ptrue, lo);\n- svfloat32_t hi_32 = svcvt_f32_x (ptrue, hi);\n- return svuzp1 (lo_32, hi_32);\n-}\n-\n /* Implementation of SVE powf.\n Provides the same accuracy as AdvSIMD powf, since it relies on the same\n algorithm. The theoretical maximum error is under 2.60 ULPs.\n@@ -273,15 +99,14 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)\n svbool_t yspecial = sv_zeroinfnan (pg, viy0);\n svbool_t cmp = svorr_z (pg, xspecial, yspecial);\n \n- /* Small cases of x: |x| < 0x1p-126. */\n- svbool_t xsmall = svaclt (yint_or_xpos, x, d->small_bound);\n- if (__glibc_unlikely (svptest_any (yint_or_xpos, xsmall)))\n+ /* Cases of subnormal x: |x| < 0x1p-126. */\n+ svbool_t x_is_subnormal = svaclt (yint_or_xpos, x, d->small_bound);\n+ if (__glibc_unlikely (svptest_any (yint_or_xpos, x_is_subnormal)))\n {\n /* Normalize subnormal x so exponent becomes negative. */\n- svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm));\n- vix_norm = svand_x (xsmall, vix_norm, 0x7fffffff);\n- vix_norm = svsub_x (xsmall, vix_norm, d->subnormal_bias);\n- vix = svsel (xsmall, vix_norm, vix);\n+ vix = svreinterpret_u32 (svmul_m (x_is_subnormal, x, 0x1p23f));\n+ vix = svand_m (x_is_subnormal, vix, 0x7fffffff);\n+ vix = svsub_m (x_is_subnormal, vix, d->subnormal_bias);\n }\n /* Part of core computation carried in working precision. */\n svuint32_t tmp = svsub_x (yint_or_xpos, vix, d->off);\ndiff --git a/sysdeps/aarch64/fpu/sv_pow_inline.h b/sysdeps/aarch64/fpu/sv_pow_inline.h\nnew file mode 100644\nindex 0000000000..ebe71d1d1e\n--- /dev/null\n+++ b/sysdeps/aarch64/fpu/sv_pow_inline.h\n@@ -0,0 +1,282 @@\n+/* Helper for SVE double-precision pow\n+\n+ Copyright (C) 2025-2026 Free Software Foundation, Inc.\n+ This file is part of the GNU C Library.\n+\n+ The GNU C Library is free software; you can redistribute it and/or\n+ modify it under the terms of the GNU Lesser General Public\n+ License as published by the Free Software Foundation; either\n+ version 2.1 of the License, or (at your option) any later version.\n+\n+ The GNU C Library is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n+ Lesser General Public License for more details.\n+\n+ You should have received a copy of the GNU Lesser General Public\n+ License along with the GNU C Library; if not, see\n+ <https://www.gnu.org/licenses/>. */\n+\n+#include \"pow_common.h\"\n+\n+#ifndef WANT_SV_POW_SIGN_BIAS\n+# error \\\n+ \"Cannot use sv_pow_inline.h without specifying whether you need sign_bias.\"\n+#endif\n+\n+/* Data is defined in v_pow_log_data.c. */\n+#define N_LOG (1 << V_POW_LOG_TABLE_BITS)\n+#define Off 0x3fe6955500000000\n+\n+/* Data is defined in v_pow_exp_data.c. */\n+#define N_EXP (1 << V_POW_EXP_TABLE_BITS)\n+#define SignBias (0x800 << V_POW_EXP_TABLE_BITS)\n+#define SmallExp 0x3c9 /* top12(0x1p-54). */\n+#define BigExp 0x408 /* top12(512.). */\n+#define ThresExp 0x03f /* BigExp - SmallExp. */\n+#define HugeExp 0x409 /* top12(1024.). */\n+\n+static const struct data\n+{\n+ double log_c0, log_c2, log_c4, log_c6, ln2_hi, ln2_lo;\n+ double log_c1, log_c3, log_c5, off;\n+ double n_over_ln2, exp_c2, ln2_over_n_hi, ln2_over_n_lo;\n+ double exp_c0, exp_c1;\n+} data = {\n+ .log_c0 = -0x1p-1,\n+ .log_c1 = -0x1.555555555556p-1,\n+ .log_c2 = 0x1.0000000000006p-1,\n+ .log_c3 = 0x1.999999959554ep-1,\n+ .log_c4 = -0x1.555555529a47ap-1,\n+ .log_c5 = -0x1.2495b9b4845e9p0,\n+ .log_c6 = 0x1.0002b8b263fc3p0,\n+ .off = Off,\n+ .exp_c0 = 0x1.fffffffffffd4p-2,\n+ .exp_c1 = 0x1.5555571d6ef9p-3,\n+ .exp_c2 = 0x1.5555576a5adcep-5,\n+ .ln2_hi = 0x1.62e42fefa3800p-1,\n+ .ln2_lo = 0x1.ef35793c76730p-45,\n+ .n_over_ln2 = 0x1.71547652b82fep0 * N_EXP,\n+ .ln2_over_n_hi = 0x1.62e42fefc0000p-9,\n+ .ln2_over_n_lo = -0x1.c610ca86c3899p-45,\n+};\n+\n+/* Check if x is an integer. */\n+static inline svbool_t\n+sv_isint (svbool_t pg, svfloat64_t x)\n+{\n+ return svcmpeq (pg, svrintz_z (pg, x), x);\n+}\n+\n+/* Check if x is real not integer valued. */\n+static inline svbool_t\n+sv_isnotint (svbool_t pg, svfloat64_t x)\n+{\n+ return svcmpne (pg, svrintz_z (pg, x), x);\n+}\n+\n+/* Check if x is an odd integer. */\n+static inline svbool_t\n+sv_isodd (svbool_t pg, svfloat64_t x)\n+{\n+ svfloat64_t y = svmul_x (svptrue_b64 (), x, 0.5);\n+ return sv_isnotint (pg, y);\n+}\n+\n+/* Top 12 bits (sign and exponent of each double float lane). */\n+static inline svuint64_t\n+sv_top12 (svfloat64_t x)\n+{\n+ return svlsr_x (svptrue_b64 (), svreinterpret_u64 (x), 52);\n+}\n+\n+/* Returns 1 if input is the bit representation of 0, infinity or nan. */\n+static inline svbool_t\n+sv_zeroinfnan (svbool_t pg, svuint64_t i)\n+{\n+ return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),\n+\t\t 2 * asuint64 (INFINITY) - 1);\n+}\n+\n+/* Handle cases that may overflow or underflow when computing the result that\n+ is scale*(1+TMP) without intermediate rounding. The bit representation of\n+ scale is in SBITS, however it has a computed exponent that may have\n+ overflown into the sign bit so that needs to be adjusted before using it as\n+ a double. (int32_t)KI is the k used in the argument reduction and exponent\n+ adjustment of scale, positive k here means the result may overflow and\n+ negative k means the result may underflow. */\n+static inline svfloat64_t\n+specialcase (svfloat64_t tmp, svuint64_t sbits, svuint64_t ki, svbool_t cmp)\n+{\n+ svbool_t p_pos = svcmpge_n_f64 (cmp, svreinterpret_f64_u64 (ki), 0.0);\n+\n+ /* Scale up or down depending on sign of k. */\n+ svint64_t offset\n+ = svsel_s64 (p_pos, sv_s64 (1009ull << 52), sv_s64 (-1022ull << 52));\n+ svfloat64_t factor\n+ = svsel_f64 (p_pos, sv_f64 (0x1p1009), sv_f64 (0x1p-1022));\n+\n+ svuint64_t offset_sbits\n+ = svsub_u64_x (cmp, sbits, svreinterpret_u64_s64 (offset));\n+ svfloat64_t scale = svreinterpret_f64_u64 (offset_sbits);\n+ svfloat64_t res = svmad_f64_x (cmp, scale, tmp, scale);\n+ return svmul_f64_x (cmp, res, factor);\n+}\n+\n+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about\n+ additional 15 bits precision. IX is the bit representation of x, but\n+ normalized in the subnormal range using the sign bit for the exponent. */\n+static inline svfloat64_t\n+sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail,\n+\t const struct data *d)\n+{\n+ /* x = 2^k z; where z is in range [Off,2*Off) and exact.\n+ The range is split into N subintervals.\n+ The ith subinterval contains z and c is near its center. */\n+ svuint64_t tmp = svsub_x (pg, ix, d->off);\n+ svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, 52 - V_POW_LOG_TABLE_BITS),\n+\t\t\t sv_u64 (N_LOG - 1));\n+ svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);\n+ svuint64_t iz = svsub_x (pg, ix, svlsl_x (pg, svreinterpret_u64 (k), 52));\n+ svfloat64_t z = svreinterpret_f64 (iz);\n+ svfloat64_t kd = svcvt_f64_x (pg, k);\n+\n+ /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */\n+ /* SVE lookup requires 3 separate lookup tables, as opposed to scalar version\n+ that uses array of structures. We also do the lookup earlier in the code\n+ to make sure it finishes as early as possible. */\n+ svfloat64_t invc = svld1_gather_index (pg, __v_pow_log_data.invc, i);\n+ svfloat64_t logc = svld1_gather_index (pg, __v_pow_log_data.logc, i);\n+ svfloat64_t logctail = svld1_gather_index (pg, __v_pow_log_data.logctail, i);\n+\n+ /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and\n+ |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */\n+ svfloat64_t r = svmad_x (pg, z, invc, -1.0);\n+ /* k*Ln2 + log(c) + r. */\n+\n+ svfloat64_t ln2_hilo = svld1rq_f64 (svptrue_b64 (), &d->ln2_hi);\n+ svfloat64_t t1 = svmla_lane_f64 (logc, kd, ln2_hilo, 0);\n+ svfloat64_t t2 = svadd_x (pg, t1, r);\n+ svfloat64_t lo1 = svmla_lane_f64 (logctail, kd, ln2_hilo, 1);\n+ svfloat64_t lo2 = svadd_x (pg, svsub_x (pg, t1, t2), r);\n+\n+ /* Evaluation is optimized assuming superscalar pipelined execution. */\n+\n+ svfloat64_t log_c02 = svld1rq_f64 (svptrue_b64 (), &d->log_c0);\n+ svfloat64_t ar = svmul_lane_f64 (r, log_c02, 0);\n+ svfloat64_t ar2 = svmul_x (svptrue_b64 (), r, ar);\n+ svfloat64_t ar3 = svmul_x (svptrue_b64 (), r, ar2);\n+ /* k*Ln2 + log(c) + r + A[0]*r*r. */\n+ svfloat64_t hi = svadd_x (pg, t2, ar2);\n+ svfloat64_t lo3 = svmls_x (pg, ar2, ar, r);\n+ svfloat64_t lo4 = svadd_x (pg, svsub_x (pg, t2, hi), ar2);\n+ /* p = log1p(r) - r - A[0]*r*r. */\n+ /* p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r *\n+ A[6])))). */\n+\n+ svfloat64_t log_c46 = svld1rq_f64 (svptrue_b64 (), &d->log_c4);\n+ svfloat64_t a56 = svmla_lane_f64 (sv_f64 (d->log_c5), r, log_c46, 1);\n+ svfloat64_t a34 = svmla_lane_f64 (sv_f64 (d->log_c3), r, log_c46, 0);\n+ svfloat64_t a12 = svmla_lane_f64 (sv_f64 (d->log_c1), r, log_c02, 1);\n+ svfloat64_t p = svmla_x (pg, a34, ar2, a56);\n+ p = svmla_x (pg, a12, ar2, p);\n+ p = svmul_x (svptrue_b64 (), ar3, p);\n+ svfloat64_t lo = svadd_x (\n+ pg, svadd_x (pg, svsub_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p);\n+ svfloat64_t y = svadd_x (pg, hi, lo);\n+ *tail = svadd_x (pg, svsub_x (pg, hi, y), lo);\n+ return y;\n+}\n+\n+static inline svfloat64_t\n+sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail,\n+\t svuint64_t sign_bias, svfloat64_t *tmp, svuint64_t *sbits,\n+\t svuint64_t *ki, const struct data *d)\n+{\n+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */\n+ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */\n+ svfloat64_t n_over_ln2_and_c2 = svld1rq_f64 (svptrue_b64 (), &d->n_over_ln2);\n+ svfloat64_t z = svmul_lane_f64 (x, n_over_ln2_and_c2, 0);\n+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */\n+ svfloat64_t kd = svrinta_x (pg, z);\n+ *ki = svreinterpret_u64 (svcvt_s64_x (pg, kd));\n+\n+ svfloat64_t ln2_over_n_hilo\n+ = svld1rq_f64 (svptrue_b64 (), &d->ln2_over_n_hi);\n+ svfloat64_t r = x;\n+ r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 0);\n+ r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 1);\n+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */\n+ r = svadd_x (pg, r, xtail);\n+ /* 2^(k/N) ~= scale. */\n+ svuint64_t idx = svand_x (pg, *ki, N_EXP - 1);\n+ svuint64_t top\n+ = svlsl_x (pg, svadd_x (pg, *ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS);\n+ /* This is only a valid scale when -1023*N < k < 1024*N. */\n+ *sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx);\n+ *sbits = svadd_x (pg, *sbits, top);\n+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */\n+ svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);\n+ *tmp = svmla_lane_f64 (sv_f64 (d->exp_c1), r, n_over_ln2_and_c2, 1);\n+ *tmp = svmla_x (pg, sv_f64 (d->exp_c0), r, *tmp);\n+ *tmp = svmla_x (pg, r, r2, *tmp);\n+ svfloat64_t scale = svreinterpret_f64 (*sbits);\n+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there\n+ is no spurious underflow here even without fma. */\n+ z = svmla_x (pg, scale, scale, *tmp);\n+ return z;\n+}\n+\n+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.\n+ The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1. */\n+static inline svfloat64_t\n+sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,\n+\t svuint64_t sign_bias, const struct data *d)\n+{\n+ /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow)\n+ and other cases of large values of x (scale * (1 + TMP) oflow). */\n+ svuint64_t abstop = svand_x (pg, sv_top12 (x), 0x7ff);\n+ /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54). */\n+ svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp);\n+\n+ svfloat64_t tmp;\n+ svuint64_t sbits, ki;\n+ if (__glibc_unlikely (svptest_any (pg, uoflow)))\n+ {\n+ svfloat64_t z\n+\t = sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);\n+\n+ /* |x| is tiny (|x| <= 0x1p-54). */\n+ svbool_t uflow\n+\t = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000);\n+ uflow = svand_z (pg, uoflow, uflow);\n+ /* |x| is huge (|x| >= 1024). */\n+ svbool_t oflow = svcmpge (pg, abstop, HugeExp);\n+ oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow));\n+\n+ /* Handle underflow and overlow in scale.\n+\t For large |x| values (512 < |x| < 1024), scale * (1 + TMP) can\n+\t overflow or underflow. */\n+ svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));\n+ if (__glibc_unlikely (svptest_any (pg, special)))\n+\tz = svsel (special, specialcase (tmp, sbits, ki, special), z);\n+\n+ /* Handle underflow and overflow in exp. */\n+ svbool_t x_is_neg = svcmplt (pg, x, 0);\n+ svuint64_t sign_mask\n+\t = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);\n+ svfloat64_t res_uoflow\n+\t = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));\n+ res_uoflow = svreinterpret_f64 (\n+\t svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));\n+ /* Avoid spurious underflow for tiny x. */\n+ svfloat64_t res_spurious_uflow\n+\t = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));\n+\n+ z = svsel (oflow, res_uoflow, z);\n+ z = svsel (uflow, res_spurious_uflow, z);\n+ return z;\n+ }\n+\n+ return sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);\n+}\ndiff --git a/sysdeps/aarch64/fpu/sv_powf_inline.h b/sysdeps/aarch64/fpu/sv_powf_inline.h\nnew file mode 100644\nindex 0000000000..be3f0cfeed\n--- /dev/null\n+++ b/sysdeps/aarch64/fpu/sv_powf_inline.h\n@@ -0,0 +1,189 @@\n+/* Helper for SVE single-precision pow\n+\n+ Copyright (C) 2025-2026 Free Software Foundation, Inc.\n+ This file is part of the GNU C Library.\n+\n+ The GNU C Library is free software; you can redistribute it and/or\n+ modify it under the terms of the GNU Lesser General Public\n+ License as published by the Free Software Foundation; either\n+ version 2.1 of the License, or (at your option) any later version.\n+\n+ The GNU C Library is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n+ Lesser General Public License for more details.\n+\n+ You should have received a copy of the GNU Lesser General Public\n+ License along with the GNU C Library; if not, see\n+ <https://www.gnu.org/licenses/>. */\n+\n+#include \"powf_common.h\"\n+\n+#ifndef WANT_SV_POWF_SIGN_BIAS\n+# error \\\n+ \"Cannot use sv_powf_inline.h without specifying whether you need sign_bias.\"\n+#endif\n+\n+/* The following data is used in the SVE pow core computation\n+ and special case detection. */\n+#define Tinvc __v_powf_data.invc\n+#define Tlogc __v_powf_data.logc\n+#define Texp __v_powf_data.scale\n+#define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11))\n+static const struct data\n+{\n+ double log_poly[4];\n+ double exp_poly[3];\n+ float uflow_bound, oflow_bound, small_bound;\n+ uint32_t sign_bias, subnormal_bias, off;\n+} data = {\n+ /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of\n+ V_POWF_EXP2_N. */\n+ .log_poly = { -0x1.6ff5daa3b3d7cp+3, 0x1.ec81d03c01aebp+3,\n+\t\t-0x1.71547bb43f101p+4, 0x1.7154764a815cbp+5 },\n+ /* rel err: 1.69 * 2^-34. */\n+ .exp_poly = {\n+ 0x1.c6af84b912394p-20, /* A0 / V_POWF_EXP2_N^3. */\n+ 0x1.ebfce50fac4f3p-13, /* A1 / V_POWF_EXP2_N^2. */\n+ 0x1.62e42ff0c52d6p-6, /* A3 / V_POWF_EXP2_N. */\n+ },\n+ .uflow_bound = -0x1.2cp+12f, /* -150.0 * V_POWF_EXP2_N. */\n+ .oflow_bound = 0x1p+12f, /* 128.0 * V_POWF_EXP2_N. */\n+ .small_bound = 0x1p-126f,\n+ .off = 0x3f35d000,\n+ .sign_bias = SignBias,\n+ .subnormal_bias = 0x0b800000, /* 23 << 23. */\n+};\n+\n+/* Check if x is an integer. */\n+static inline svbool_t\n+svisint (svbool_t pg, svfloat32_t x)\n+{\n+ return svcmpeq (pg, svrintz_z (pg, x), x);\n+}\n+\n+/* Check if x is real not integer valued. */\n+static inline svbool_t\n+svisnotint (svbool_t pg, svfloat32_t x)\n+{\n+ return svcmpne (pg, svrintz_z (pg, x), x);\n+}\n+\n+/* Check if x is an odd integer. */\n+static inline svbool_t\n+svisodd (svbool_t pg, svfloat32_t x)\n+{\n+ svfloat32_t y = svmul_x (pg, x, 0.5f);\n+ return svisnotint (pg, y);\n+}\n+\n+/* Check if zero, inf or nan. */\n+static inline svbool_t\n+sv_zeroinfnan (svbool_t pg, svuint32_t i)\n+{\n+ return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),\n+\t\t 2u * 0x7f800000 - 1);\n+}\n+\n+/* Compute core for half of the lanes in double precision. */\n+static inline svfloat64_t\n+sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,\n+#if WANT_SV_POWF_SIGN_BIAS\n+\t\t svfloat64_t y, svuint64_t sign_bias, svfloat64_t *pylogx,\n+#else\n+\t\t svfloat64_t y, svfloat64_t *pylogx,\n+#endif\n+\t\t const struct data *d)\n+{\n+ svfloat64_t invc = svld1_gather_index (pg, Tinvc, i);\n+ svfloat64_t logc = svld1_gather_index (pg, Tlogc, i);\n+\n+ /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k. */\n+ svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), z, invc);\n+ svfloat64_t y0 = svadd_x (pg, logc, svcvt_f64_x (pg, k));\n+\n+ /* Polynomial to approximate log1p(r)/ln2. */\n+ svfloat64_t logx = sv_f64 (d->log_poly[0]);\n+ logx = svmad_x (pg, r, logx, sv_f64 (d->log_poly[1]));\n+ logx = svmad_x (pg, r, logx, sv_f64 (d->log_poly[2]));\n+ logx = svmad_x (pg, r, logx, sv_f64 (d->log_poly[3]));\n+ logx = svmad_x (pg, r, logx, y0);\n+ *pylogx = svmul_x (pg, y, logx);\n+\n+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */\n+ svfloat64_t kd = svrinta_x (svptrue_b64 (), *pylogx);\n+ svuint64_t ki = svreinterpret_u64 (svcvt_s64_x (svptrue_b64 (), kd));\n+\n+ r = svsub_x (pg, *pylogx, kd);\n+\n+ /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */\n+ svuint64_t t = svld1_gather_index (\n+ svptrue_b64 (), Texp, svand_x (svptrue_b64 (), ki, V_POWF_EXP2_N - 1));\n+#if WANT_SV_POWF_SIGN_BIAS\n+ svuint64_t ski = svadd_x (svptrue_b64 (), ki, sign_bias);\n+ t = svadd_x (svptrue_b64 (), t,\n+\t svlsl_x (svptrue_b64 (), ski, 52 - V_POWF_EXP2_TABLE_BITS));\n+#else\n+ t = svadd_x (svptrue_b64 (), t,\n+\t svlsl_x (svptrue_b64 (), ki, 52 - V_POWF_EXP2_TABLE_BITS));\n+#endif\n+ svfloat64_t s = svreinterpret_f64 (t);\n+\n+ svfloat64_t p = sv_f64 (d->exp_poly[0]);\n+ p = svmla_x (pg, sv_f64 (d->exp_poly[1]), p, r);\n+ p = svmla_x (pg, sv_f64 (d->exp_poly[2]), p, r);\n+ p = svmla_x (pg, s, p, svmul_x (svptrue_b64 (), s, r));\n+\n+ return p;\n+}\n+\n+/* Widen vector to double precision and compute core on both halves of the\n+ vector. Lower cost of promotion by considering all lanes active. */\n+static inline svfloat32_t\n+sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,\n+\t svfloat32_t y, svuint32_t sign_bias, svfloat32_t *pylogx,\n+\t const struct data *d)\n+{\n+ const svbool_t ptrue = svptrue_b64 ();\n+\n+ /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two\n+ in order to perform core computation in double precision. */\n+ const svbool_t pg_lo = svunpklo (pg);\n+ const svbool_t pg_hi = svunpkhi (pg);\n+ svfloat64_t y_lo = svcvt_f64_x (\n+ ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));\n+ svfloat64_t y_hi = svcvt_f64_x (\n+ ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));\n+ svfloat64_t z_lo = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpklo (iz)));\n+ svfloat64_t z_hi = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpkhi (iz)));\n+ svuint64_t i_lo = svunpklo (i);\n+ svuint64_t i_hi = svunpkhi (i);\n+ svint64_t k_lo = svunpklo (k);\n+ svint64_t k_hi = svunpkhi (k);\n+#if WANT_SV_POWF_SIGN_BIAS\n+ svuint64_t sign_bias_lo = svunpklo (sign_bias);\n+ svuint64_t sign_bias_hi = svunpkhi (sign_bias);\n+#endif\n+\n+ /* Compute each part in double precision. */\n+ svfloat64_t ylogx_lo, ylogx_hi;\n+#if WANT_SV_POWF_SIGN_BIAS\n+ svfloat64_t lo = sv_powf_core_ext (pg_lo, i_lo, z_lo, k_lo, y_lo,\n+\t\t\t\t sign_bias_lo, &ylogx_lo, d);\n+ svfloat64_t hi = sv_powf_core_ext (pg_hi, i_hi, z_hi, k_hi, y_hi,\n+\t\t\t\t sign_bias_hi, &ylogx_hi, d);\n+#else\n+ svfloat64_t lo\n+ = sv_powf_core_ext (pg_lo, i_lo, z_lo, k_lo, y_lo, &ylogx_lo, d);\n+ svfloat64_t hi\n+ = sv_powf_core_ext (pg_hi, i_hi, z_hi, k_hi, y_hi, &ylogx_hi, d);\n+#endif\n+\n+ /* Convert back to single-precision and interleave. */\n+ svfloat32_t ylogx_lo_32 = svcvt_f32_x (ptrue, ylogx_lo);\n+ svfloat32_t ylogx_hi_32 = svcvt_f32_x (ptrue, ylogx_hi);\n+ *pylogx = svuzp1 (ylogx_lo_32, ylogx_hi_32);\n+ svfloat32_t lo_32 = svcvt_f32_x (ptrue, lo);\n+ svfloat32_t hi_32 = svcvt_f32_x (ptrue, hi);\n+ return svuzp1 (lo_32, hi_32);\n+}\ndiff --git a/sysdeps/aarch64/fpu/v_pow_inline.h b/sysdeps/aarch64/fpu/v_pow_inline.h\nnew file mode 100644\nindex 0000000000..5020fda843\n--- /dev/null\n+++ b/sysdeps/aarch64/fpu/v_pow_inline.h\n@@ -0,0 +1,193 @@\n+/* Helper for AdvSIMD double-precision pow\n+\n+ Copyright (C) 2025 Free Software Foundation, Inc.\n+ This file is part of the GNU C Library.\n+\n+ The GNU C Library is free software; you can redistribute it and/or\n+ modify it under the terms of the GNU Lesser General Public\n+ License as published by the Free Software Foundation; either\n+ version 2.1 of the License, or (at your option) any later version.\n+\n+ The GNU C Library is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n+ Lesser General Public License for more details.\n+\n+ You should have received a copy of the GNU Lesser General Public\n+ License along with the GNU C Library; if not, see\n+ <https://www.gnu.org/licenses/>. */\n+\n+/* This header defines parameters of the approximation and scalar fallback. */\n+#include \"finite_pow.h\"\n+\n+static const struct data\n+{\n+ uint64x2_t inf;\n+ float64x2_t subnormal_bound, subnormal_scale;\n+ uint64x2_t subnormal_bias;\n+ uint64x2_t offset, mask;\n+ float64x2_t log_c0, log_c2, log_c4, log_c5;\n+ double log_c1, log_c3;\n+ double ln2_lo, ln2_hi;\n+ uint64x2_t small_exp, thres_exp;\n+ double ln2_lo_n, ln2_hi_n;\n+ double inv_ln2_n, exp_c2;\n+ float64x2_t exp_c0, exp_c1;\n+} data = {\n+ /* Power thresholds. */\n+ .inf = V2 (0x7ff0000000000000),\n+ .subnormal_bound = V2 (0x1p-1022),\n+ /* Subnormal x update. */\n+ .subnormal_scale = V2 (0x1p52),\n+ .subnormal_bias = V2 (52ULL << 52),\n+ /* Constants for logarithm. */\n+ .offset = V2 (Off),\n+ .mask = V2 (0xfffULL << 52),\n+ /* Coefficients copied from v_pow_log_data.c\n+ relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8]\n+ Coefficients are scaled to match the scaling during evaluation. */\n+ .log_c0 = V2 (0x1.555555555556p-2 * -2),\n+ .log_c1 = -0x1.0000000000006p-2 * -2,\n+ .log_c2 = V2 (0x1.999999959554ep-3 * 4),\n+ .log_c3 = -0x1.555555529a47ap-3 * 4,\n+ .log_c4 = V2 (0x1.2495b9b4845e9p-3 * -8),\n+ .log_c5 = V2 (-0x1.0002b8b263fc3p-3 * -8),\n+ .ln2_hi = 0x1.62e42fefa3800p-1,\n+ .ln2_lo = 0x1.ef35793c76730p-45,\n+ /* Polynomial coefficients: abs error: 1.43*2^-58, ulp error: 0.549\n+ (0.550 without fma) if |x| < ln2/512. */\n+ .exp_c0 = V2 (0x1.fffffffffffd4p-2),\n+ .exp_c1 = V2 (0x1.5555571d6ef9p-3),\n+ .exp_c2 = 0x1.5555576a5adcep-5,\n+ .small_exp = V2 (0x3c90000000000000),\n+ .thres_exp = V2 (0x03f0000000000000),\n+ .inv_ln2_n = 0x1.71547652b82fep8, /* N/ln2. */\n+ .ln2_hi_n = 0x1.62e42fefc0000p-9, /* ln2/N. */\n+ .ln2_lo_n = -0x1.c610ca86c3899p-45,\n+};\n+\n+static inline float64x2_t VPCS_ATTR\n+v_masked_lookup_f64 (const double *table, uint64x2_t i)\n+{\n+ return (float64x2_t){\n+ table[(i[0] >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1)],\n+ table[(i[1] >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1)]\n+ };\n+}\n+\n+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about\n+ additional 15 bits precision. IX is the bit representation of x, but\n+ normalized in the subnormal range using the sign bit for the exponent. */\n+static inline float64x2_t VPCS_ATTR\n+v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)\n+{\n+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.\n+ The range is split into N subintervals.\n+ The ith subinterval contains z and c is near its center. */\n+ uint64x2_t tmp = vsubq_u64 (ix, d->offset);\n+ int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);\n+ uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->mask));\n+ float64x2_t z = vreinterpretq_f64_u64 (iz);\n+ float64x2_t kd = vcvtq_f64_s64 (k);\n+ /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */\n+ float64x2_t invc = v_masked_lookup_f64 (__v_pow_log_data.invc, tmp);\n+ float64x2_t logc = v_masked_lookup_f64 (__v_pow_log_data.logc, tmp);\n+ float64x2_t logctail = v_masked_lookup_f64 (__v_pow_log_data.logctail, tmp);\n+ /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and\n+ |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */\n+ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, invc);\n+ /* k*Ln2 + log(c) + r. */\n+ float64x2_t ln2 = vld1q_f64 (&d->ln2_lo);\n+ float64x2_t t1 = vfmaq_laneq_f64 (logc, kd, ln2, 1);\n+ float64x2_t t2 = vaddq_f64 (t1, r);\n+ float64x2_t lo1 = vfmaq_laneq_f64 (logctail, kd, ln2, 0);\n+ float64x2_t lo2 = vaddq_f64 (vsubq_f64 (t1, t2), r);\n+ /* Evaluation is optimized assuming superscalar pipelined execution. */\n+ float64x2_t ar = vmulq_f64 (v_f64 (-0.5), r);\n+ float64x2_t ar2 = vmulq_f64 (r, ar);\n+ float64x2_t ar3 = vmulq_f64 (r, ar2);\n+ /* k*Ln2 + log(c) + r + A[0]*r*r. */\n+ float64x2_t hi = vaddq_f64 (t2, ar2);\n+ float64x2_t lo3 = vfmaq_f64 (vnegq_f64 (ar2), ar, r);\n+ float64x2_t lo4 = vaddq_f64 (vsubq_f64 (t2, hi), ar2);\n+ /* p = log1p(r) - r - A[0]*r*r. */\n+ float64x2_t odd_coeffs = vld1q_f64 (&d->log_c1);\n+ float64x2_t a56 = vfmaq_f64 (d->log_c4, r, d->log_c5);\n+ float64x2_t a34 = vfmaq_laneq_f64 (d->log_c2, r, odd_coeffs, 1);\n+ float64x2_t a12 = vfmaq_laneq_f64 (d->log_c0, r, odd_coeffs, 0);\n+ float64x2_t p = vfmaq_f64 (a34, ar2, a56);\n+ p = vfmaq_f64 (a12, ar2, p);\n+ p = vmulq_f64 (ar3, p);\n+ float64x2_t lo\n+ = vaddq_f64 (vaddq_f64 (vaddq_f64 (vaddq_f64 (lo1, lo2), lo3), lo4), p);\n+ float64x2_t y = vaddq_f64 (hi, lo);\n+ *tail = vaddq_f64 (vsubq_f64 (hi, y), lo);\n+ return y;\n+}\n+\n+static float64x2_t VPCS_ATTR NOINLINE\n+exp_special_case (float64x2_t x, float64x2_t xtail)\n+{\n+ return (float64x2_t){ exp_inline (x[0], xtail[0], 0),\n+\t\t\texp_inline (x[1], xtail[1], 0) };\n+}\n+\n+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. */\n+static inline float64x2_t VPCS_ATTR\n+v_exp_inline (float64x2_t x, float64x2_t neg_xtail, const struct data *d)\n+{\n+ /* Fallback to scalar exp_inline for all lanes if any lane\n+ contains value of x s.t. |x| <= 2^-54 or >= 512. */\n+ uint64x2_t uoflowx = vcgeq_u64 (\n+ vsubq_u64 (vreinterpretq_u64_f64 (vabsq_f64 (x)), d->small_exp),\n+ d->thres_exp);\n+ if (__glibc_unlikely (v_any_u64 (uoflowx)))\n+ return exp_special_case (x, vnegq_f64 (neg_xtail));\n+\n+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */\n+ /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N]. */\n+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */\n+ float64x2_t exp_consts = vld1q_f64 (&d->inv_ln2_n);\n+ float64x2_t z = vmulq_laneq_f64 (x, exp_consts, 0);\n+ float64x2_t kd = vrndnq_f64 (z);\n+ uint64x2_t ki = vreinterpretq_u64_s64 (vcvtaq_s64_f64 (z));\n+ float64x2_t ln2_n = vld1q_f64 (&d->ln2_lo_n);\n+ float64x2_t r = vfmsq_laneq_f64 (x, kd, ln2_n, 1);\n+ r = vfmsq_laneq_f64 (r, kd, ln2_n, 0);\n+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */\n+ r = vsubq_f64 (r, neg_xtail);\n+ /* 2^(k/N) ~= scale. */\n+ uint64x2_t idx = vandq_u64 (ki, v_u64 (N_EXP - 1));\n+ uint64x2_t top = vshlq_n_u64 (ki, 52 - V_POW_EXP_TABLE_BITS);\n+ /* This is only a valid scale when -1023*N < k < 1024*N. */\n+ uint64x2_t sbits = v_lookup_u64 (SBits, idx);\n+ sbits = vaddq_u64 (sbits, top);\n+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */\n+ float64x2_t r2 = vmulq_f64 (r, r);\n+ float64x2_t tmp = vfmaq_laneq_f64 (d->exp_c1, r, exp_consts, 1);\n+ tmp = vfmaq_f64 (d->exp_c0, r, tmp);\n+ tmp = vfmaq_f64 (r, r2, tmp);\n+ float64x2_t scale = vreinterpretq_f64_u64 (sbits);\n+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there\n+ is no spurious underflow here even without fma. */\n+ return vfmaq_f64 (scale, scale, tmp);\n+}\n+\n+/* This version of AdvSIMD pow implements an algorithm close to AOR scalar pow\n+ but:\n+ - it does not prevent double-rounding in the exp's specialcase subroutine,\n+ - it does not use a tail in the exponential core computation,\n+ - and pow's exp polynomial order and table bits might differ. */\n+static inline float64x2_t VPCS_ATTR\n+v_pow_inline (float64x2_t x, float64x2_t y, const struct data *d)\n+{\n+ /* Vector Log(ix, &lo). */\n+ float64x2_t vlo;\n+ float64x2_t vhi = v_log_inline (vreinterpretq_u64_f64 (x), &vlo, d);\n+\n+ /* Vector Exp(y_loghi, y_loglo). */\n+ float64x2_t vehi = vmulq_f64 (y, vhi);\n+ float64x2_t vemi = vfmsq_f64 (vehi, y, vhi);\n+ float64x2_t neg_velo = vfmsq_f64 (vemi, y, vlo);\n+ return v_exp_inline (vehi, neg_velo, d);\n+}\ndiff --git a/sysdeps/aarch64/fpu/v_powf_inline.h b/sysdeps/aarch64/fpu/v_powf_inline.h\nnew file mode 100644\nindex 0000000000..f4c86bf403\n--- /dev/null\n+++ b/sysdeps/aarch64/fpu/v_powf_inline.h\n@@ -0,0 +1,116 @@\n+/* Helper for AdvSIMD single-precision pow\n+\n+ Copyright (C) 2025 Free Software Foundation, Inc.\n+ This file is part of the GNU C Library.\n+\n+ The GNU C Library is free software; you can redistribute it and/or\n+ modify it under the terms of the GNU Lesser General Public\n+ License as published by the Free Software Foundation; either\n+ version 2.1 of the License, or (at your option) any later version.\n+\n+ The GNU C Library is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n+ Lesser General Public License for more details.\n+\n+ You should have received a copy of the GNU Lesser General Public\n+ License along with the GNU C Library; if not, see\n+ <https://www.gnu.org/licenses/>. */\n+\n+/* Define data structure and some routines specific to positive x. */\n+#include \"v_powrf_inline.h\"\n+\n+static inline float64x2_t\n+exp2_core_with_sbias (const struct data *d, float64x2_t ylogx,\n+\t\t uint64x2_t sign_bias)\n+{\n+ /* N*x = k + r with r in [-1/2, 1/2]. */\n+ float64x2_t kd = vrndnq_f64 (ylogx);\n+ int64x2_t ki = vcvtaq_s64_f64 (ylogx);\n+ float64x2_t r = vsubq_f64 (ylogx, kd);\n+\n+ /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */\n+ uint64x2_t t = vcombine_u64 (exp2_lookup (d, vgetq_lane_s64 (ki, 0)),\n+\t\t\t exp2_lookup (d, vgetq_lane_s64 (ki, 1)));\n+ int64x2_t ski = vaddq_s64 (ki, vreinterpretq_s64_u64 (sign_bias));\n+ t = vaddq_u64 (t, vreinterpretq_u64_s64 (\n+\t\t\tvshlq_n_s64 (ski, 52 - V_POWF_EXP2_TABLE_BITS)));\n+ float64x2_t s = vreinterpretq_f64_u64 (t);\n+ float64x2_t p = vfmaq_f64 (d->exp2_poly[1], r, d->exp2_poly[0]);\n+ p = vfmaq_f64 (d->exp2_poly[2], r, p);\n+ p = vfmaq_f64 (s, p, vmulq_f64 (s, r));\n+ return p;\n+}\n+\n+static inline float32x4_t\n+powf_core (const struct data *d, float32x4_t *ylogx, uint32x4_t tmp,\n+\t float32x4_t iz, float32x4_t y, int32x4_t k, uint32x4_t sign_bias)\n+{\n+ /* Use double precision for each lane: split input vectors into lo and hi\n+ halves and promote. */\n+ float64x2_t tab0 = log2_lookup (d, vgetq_lane_u32 (tmp, 0)),\n+\t tab1 = log2_lookup (d, vgetq_lane_u32 (tmp, 1)),\n+\t tab2 = log2_lookup (d, vgetq_lane_u32 (tmp, 2)),\n+\t tab3 = log2_lookup (d, vgetq_lane_u32 (tmp, 3));\n+\n+ float64x2_t iz_lo = vcvt_f64_f32 (vget_low_f32 (iz)),\n+\t iz_hi = vcvt_high_f64_f32 (iz);\n+\n+ float64x2_t k_lo = vcvtq_f64_s64 (vmovl_s32 (vget_low_s32 (k))),\n+\t k_hi = vcvtq_f64_s64 (vmovl_high_s32 (k));\n+\n+ float64x2_t invc_lo = vzip1q_f64 (tab0, tab1),\n+\t invc_hi = vzip1q_f64 (tab2, tab3),\n+\t logc_lo = vzip2q_f64 (tab0, tab1),\n+\t logc_hi = vzip2q_f64 (tab2, tab3);\n+\n+ float64x2_t y_lo = vcvt_f64_f32 (vget_low_f32 (y)),\n+\t y_hi = vcvt_high_f64_f32 (y);\n+\n+ float64x2_t ylogx_lo = ylogx_core (d, iz_lo, k_lo, invc_lo, logc_lo, y_lo);\n+ float64x2_t ylogx_hi = ylogx_core (d, iz_hi, k_hi, invc_hi, logc_hi, y_hi);\n+\n+ uint64x2_t sign_bias_lo = vmovl_u32 (vget_low_u32 (sign_bias));\n+ uint64x2_t sign_bias_hi = vmovl_high_u32 (sign_bias);\n+\n+ float32x2_t p_lo\n+ = vcvt_f32_f64 (exp2_core_with_sbias (d, ylogx_lo, sign_bias_lo));\n+ float32x2_t p_hi\n+ = vcvt_f32_f64 (exp2_core_with_sbias (d, ylogx_hi, sign_bias_hi));\n+\n+ *ylogx = vcombine_f32 (vcvt_f32_f64 (ylogx_lo), vcvt_f32_f64 (ylogx_hi));\n+\n+ return vcombine_f32 (p_lo, p_hi);\n+}\n+\n+/* Power implementation without assumptions on x or y.\n+ Evaluate powr(|x|) = exp (y * log(|x|)), and handle\n+ sign of x using the sign bias.\n+ Handle underflow and overflow in exponential. */\n+static inline float32x4_t\n+v_powf_core (float32x4_t x, float32x4_t y, uint32x4_t sign_bias,\n+\t const struct data *d)\n+{\n+ uint32x4_t ix = vreinterpretq_u32_f32 (x);\n+\n+ /* Part of core computation carried in working precision. */\n+ uint32x4_t tmp = vsubq_u32 (ix, d->off);\n+ uint32x4_t top = vbicq_u32 (tmp, v_u32 (MantissaMask));\n+ float32x4_t iz = vreinterpretq_f32_u32 (vsubq_u32 (ix, top));\n+ int32x4_t k\n+ = vshrq_n_s32 (vreinterpretq_s32_u32 (top),\n+\t\t 23 - V_POWF_EXP2_TABLE_BITS); /* arithmetic shift. */\n+\n+ /* Compute core in extended precision and return intermediate ylogx results\n+ to handle cases of underflow and overflow in exp. */\n+ float32x4_t ylogx;\n+ float32x4_t ret = powf_core (d, &ylogx, tmp, iz, y, k, sign_bias);\n+\n+ /* Handle exp special cases of underflow and overflow. */\n+ uint32x4_t sign = vshlq_n_u32 (sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);\n+ float32x4_t ret_oflow = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->inf));\n+ float32x4_t ret_uflow = vreinterpretq_f32_u32 (sign);\n+ ret = vbslq_f32 (vcleq_f32 (ylogx, d->uflow_bound), ret_uflow, ret);\n+ ret = vbslq_f32 (vcgtq_f32 (ylogx, d->oflow_bound), ret_oflow, ret);\n+ return ret;\n+}\ndiff --git a/sysdeps/aarch64/fpu/v_powrf_inline.h b/sysdeps/aarch64/fpu/v_powrf_inline.h\nnew file mode 100644\nindex 0000000000..0168c32338\n--- /dev/null\n+++ b/sysdeps/aarch64/fpu/v_powrf_inline.h\n@@ -0,0 +1,239 @@\n+/* Helper for AdvSIMD single-precision powr\n+\n+ Copyright (C) 2025 Free Software Foundation, Inc.\n+ This file is part of the GNU C Library.\n+\n+ The GNU C Library is free software; you can redistribute it and/or\n+ modify it under the terms of the GNU Lesser General Public\n+ License as published by the Free Software Foundation; either\n+ version 2.1 of the License, or (at your option) any later version.\n+\n+ The GNU C Library is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n+ Lesser General Public License for more details.\n+\n+ You should have received a copy of the GNU Lesser General Public\n+ License along with the GNU C Library; if not, see\n+ <https://www.gnu.org/licenses/>. */\n+\n+#define Log2IdxMask (V_POWF_LOG2_N - 1)\n+#define Exp2IdxMask (V_POWF_EXP2_N - 1)\n+#define Scale ((double) V_POWF_EXP2_N)\n+#define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11))\n+\n+#define MantissaMask 0x007fffff\n+\n+static const struct data\n+{\n+ uint32x4_t one, special_bound, sign_bias;\n+ float32x4_t norm;\n+ uint32x4_t subnormal_bias;\n+ uint32x4_t off;\n+ float32x4_t uflow_bound, oflow_bound;\n+ uint32x4_t inf;\n+ float32x4_t nan;\n+ struct\n+ {\n+ double invc, logc;\n+ } log2_tab[V_POWF_LOG2_N];\n+ float64x2_t log2_poly[4];\n+ uint64_t exp2_tab[V_POWF_EXP2_N];\n+ float64x2_t exp2_poly[3];\n+} data = {\n+ /* Table and polynomial for log2 approximation. */\n+ .log2_tab = {\n+ {0x1.6489890582816p+0, -0x1.e960f97b22702p-2 * Scale},\n+ {0x1.5cf19b35e3472p+0, -0x1.c993406cd4db6p-2 * Scale},\n+ {0x1.55aac0e956d65p+0, -0x1.aa711d9a7d0f3p-2 * Scale},\n+ {0x1.4eb0022977e01p+0, -0x1.8bf37bacdce9bp-2 * Scale},\n+ {0x1.47fcccda1dd1fp+0, -0x1.6e13b3519946ep-2 * Scale},\n+ {0x1.418ceabab68c1p+0, -0x1.50cb8281e4089p-2 * Scale},\n+ {0x1.3b5c788f1edb3p+0, -0x1.341504a237e2bp-2 * Scale},\n+ {0x1.3567de48e9c9ap+0, -0x1.17eaab624ffbbp-2 * Scale},\n+ {0x1.2fabc80fd19bap+0, -0x1.f88e708f8c853p-3 * Scale},\n+ {0x1.2a25200ce536bp+0, -0x1.c24b6da113914p-3 * Scale},\n+ {0x1.24d108e0152e3p+0, -0x1.8d02ee397cb1dp-3 * Scale},\n+ {0x1.1facd8ab2fbe1p+0, -0x1.58ac1223408b3p-3 * Scale},\n+ {0x1.1ab614a03efdfp+0, -0x1.253e6fd190e89p-3 * Scale},\n+ {0x1.15ea6d03af9ffp+0, -0x1.e5641882c12ffp-4 * Scale},\n+ {0x1.1147b994bb776p+0, -0x1.81fea712926f7p-4 * Scale},\n+ {0x1.0ccbf650593aap+0, -0x1.203e240de64a3p-4 * Scale},\n+ {0x1.0875408477302p+0, -0x1.8029b86a78281p-5 * Scale},\n+ {0x1.0441d42a93328p+0, -0x1.85d713190fb9p-6 * Scale},\n+ {0x1p+0, 0x0p+0 * Scale},\n+ {0x1.f1d006c855e86p-1, 0x1.4c1cc07312997p-5 * Scale},\n+ {0x1.e28c3341aa301p-1, 0x1.5e1848ccec948p-4 * Scale},\n+ {0x1.d4bdf9aa64747p-1, 0x1.04cfcb7f1196fp-3 * Scale},\n+ {0x1.c7b45a24e5803p-1, 0x1.582813d463c21p-3 * Scale},\n+ {0x1.bb5f5eb2ed60ap-1, 0x1.a936fa68760ccp-3 * Scale},\n+ {0x1.afb0bff8fe6b4p-1, 0x1.f81bc31d6cc4ep-3 * Scale},\n+ {0x1.a49badf7ab1f5p-1, 0x1.2279a09fae6b1p-2 * Scale},\n+ {0x1.9a14a111fc4c9p-1, 0x1.47ec0b6df5526p-2 * Scale},\n+ {0x1.901131f5b2fdcp-1, 0x1.6c71762280f1p-2 * Scale},\n+ {0x1.8687f73f6d865p-1, 0x1.90155070798dap-2 * Scale},\n+ {0x1.7d7067eb77986p-1, 0x1.b2e23b1d3068cp-2 * Scale},\n+ {0x1.74c2c1cf97b65p-1, 0x1.d4e21b0daa86ap-2 * Scale},\n+ {0x1.6c77f37cff2a1p-1, 0x1.f61e2a2f67f3fp-2 * Scale},\n+ },\n+ .log2_poly = { /* rel err: 1.5 * 2^-30. */\n+ V2 (-0x1.6ff5daa3b3d7cp-2 * Scale),\n+ V2 (0x1.ec81d03c01aebp-2 * Scale),\n+ V2 (-0x1.71547bb43f101p-1 * Scale),\n+ V2 (0x1.7154764a815cbp0 * Scale)\n+ },\n+ /* Table and polynomial for exp2 approximation. */\n+ .exp2_tab = {\n+ 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,\n+ 0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,\n+ 0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,\n+ 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,\n+ 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,\n+ 0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,\n+ 0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,\n+ 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,\n+ 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,\n+ 0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,\n+ 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,\n+ },\n+ .exp2_poly = { /* rel err: 1.69 * 2^-34. */\n+ V2 (0x1.c6af84b912394p-5 / Scale / Scale / Scale),\n+ V2 (0x1.ebfce50fac4f3p-3 / Scale / Scale),\n+ V2 (0x1.62e42ff0c52d6p-1 / Scale),\n+ },\n+ .one = V4 (1),\n+ .special_bound = V4 (2u * 0x7f800000 - 1),\n+ .norm = V4 (0x1p23f),\n+ .subnormal_bias = V4 (0x0b800000), /* 23 << 23. */\n+ .off = V4 (0x3f35d000),\n+ .sign_bias = V4 (SignBias),\n+ .inf = V4 (0x7f800000),\n+ .nan = V4 (__builtin_nanf (\"\")),\n+ /* 2.6 ulp ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2). */\n+ .uflow_bound = V4 (-0x1.2cp+12f), /* -150.0 * V_POWF_EXP2_N. */\n+ .oflow_bound = V4 (0x1p+12f), /* 128.0 * V_POWF_EXP2_N. */\n+};\n+\n+/* Check if zero, inf or nan. */\n+static inline uint32x4_t\n+v_zeroinfnan (const struct data *d, uint32x4_t i)\n+{\n+ return vcgeq_u32 (vsubq_u32 (vaddq_u32 (i, i), d->one), d->special_bound);\n+}\n+\n+static inline float64x2_t\n+ylogx_core (const struct data *d, float64x2_t iz, float64x2_t k,\n+\t float64x2_t invc, float64x2_t logc, float64x2_t y)\n+{\n+\n+ /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k. */\n+ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), iz, invc);\n+ float64x2_t y0 = vaddq_f64 (logc, k);\n+\n+ /* Polynomial to approximate log1p(r)/ln2. */\n+ float64x2_t logx = vfmaq_f64 (d->log2_poly[1], r, d->log2_poly[0]);\n+ logx = vfmaq_f64 (d->log2_poly[2], logx, r);\n+ logx = vfmaq_f64 (d->log2_poly[3], logx, r);\n+ logx = vfmaq_f64 (y0, logx, r);\n+\n+ return vmulq_f64 (logx, y);\n+}\n+\n+static inline float64x2_t\n+log2_lookup (const struct data *d, uint32_t i)\n+{\n+ return vld1q_f64 (\n+ &d->log2_tab[(i >> (23 - V_POWF_LOG2_TABLE_BITS)) & Log2IdxMask].invc);\n+}\n+\n+static inline uint64x1_t\n+exp2_lookup (const struct data *d, uint64_t i)\n+{\n+ return vld1_u64 (&d->exp2_tab[i & Exp2IdxMask]);\n+}\n+\n+static inline float64x2_t\n+exp2_core (const struct data *d, float64x2_t ylogx)\n+{\n+ /* N*x = k + r with r in [-1/2, 1/2]. */\n+ float64x2_t kd = vrndnq_f64 (ylogx);\n+ int64x2_t ki = vcvtaq_s64_f64 (ylogx);\n+ float64x2_t r = vsubq_f64 (ylogx, kd);\n+\n+ /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */\n+ uint64x2_t t = vcombine_u64 (exp2_lookup (d, vgetq_lane_s64 (ki, 0)),\n+\t\t\t exp2_lookup (d, vgetq_lane_s64 (ki, 1)));\n+ t = vaddq_u64 (t, vreinterpretq_u64_s64 (\n+\t\t\tvshlq_n_s64 (ki, 52 - V_POWF_EXP2_TABLE_BITS)));\n+ float64x2_t s = vreinterpretq_f64_u64 (t);\n+ float64x2_t p = vfmaq_f64 (d->exp2_poly[1], r, d->exp2_poly[0]);\n+ p = vfmaq_f64 (d->exp2_poly[2], r, p);\n+ p = vfmaq_f64 (s, p, vmulq_f64 (s, r));\n+ return p;\n+}\n+\n+static inline float32x4_t\n+powrf_core (const struct data *d, float32x4_t *ylogx, uint32x4_t tmp,\n+\t float32x4_t iz, float32x4_t y, int32x4_t k)\n+{\n+ /* Use double precision for each lane: split input vectors into lo and hi\n+ halves and promote. */\n+ float64x2_t tab0 = log2_lookup (d, vgetq_lane_u32 (tmp, 0)),\n+\t tab1 = log2_lookup (d, vgetq_lane_u32 (tmp, 1)),\n+\t tab2 = log2_lookup (d, vgetq_lane_u32 (tmp, 2)),\n+\t tab3 = log2_lookup (d, vgetq_lane_u32 (tmp, 3));\n+\n+ float64x2_t iz_lo = vcvt_f64_f32 (vget_low_f32 (iz)),\n+\t iz_hi = vcvt_high_f64_f32 (iz);\n+\n+ float64x2_t k_lo = vcvtq_f64_s64 (vmovl_s32 (vget_low_s32 (k))),\n+\t k_hi = vcvtq_f64_s64 (vmovl_high_s32 (k));\n+\n+ float64x2_t invc_lo = vzip1q_f64 (tab0, tab1),\n+\t invc_hi = vzip1q_f64 (tab2, tab3),\n+\t logc_lo = vzip2q_f64 (tab0, tab1),\n+\t logc_hi = vzip2q_f64 (tab2, tab3);\n+\n+ float64x2_t y_lo = vcvt_f64_f32 (vget_low_f32 (y)),\n+\t y_hi = vcvt_high_f64_f32 (y);\n+\n+ float64x2_t ylogx_lo = ylogx_core (d, iz_lo, k_lo, invc_lo, logc_lo, y_lo);\n+ float64x2_t ylogx_hi = ylogx_core (d, iz_hi, k_hi, invc_hi, logc_hi, y_hi);\n+\n+ float32x2_t p_lo = vcvt_f32_f64 (exp2_core (d, ylogx_lo));\n+ float32x2_t p_hi = vcvt_f32_f64 (exp2_core (d, ylogx_hi));\n+\n+ *ylogx = vcombine_f32 (vcvt_f32_f64 (ylogx_lo), vcvt_f32_f64 (ylogx_hi));\n+\n+ return vcombine_f32 (p_lo, p_hi);\n+}\n+\n+/* Power implementation without assumptions on x or y.\n+ Evaluate powr(|x|) = exp (y * log(|x|)), and handle\n+ sign of x using the sign bias.\n+ Handle underflow and overflow in exponential. */\n+static inline float32x4_t\n+v_powrf_core (float32x4_t x, float32x4_t y, const struct data *d)\n+{\n+ uint32x4_t ix = vreinterpretq_u32_f32 (x);\n+\n+ /* Part of core computation carried in working precision. */\n+ uint32x4_t tmp = vsubq_u32 (ix, d->off);\n+ uint32x4_t top = vbicq_u32 (tmp, v_u32 (MantissaMask));\n+ float32x4_t iz = vreinterpretq_f32_u32 (vsubq_u32 (ix, top));\n+ int32x4_t k\n+ = vshrq_n_s32 (vreinterpretq_s32_u32 (top),\n+\t\t 23 - V_POWF_EXP2_TABLE_BITS); /* arithmetic shift. */\n+\n+ /* Compute core in extended precision and return intermediate ylogx results\n+ to handle cases of underflow and overflow in exp. */\n+ float32x4_t ylogx;\n+ float32x4_t ret = powrf_core (d, &ylogx, tmp, iz, y, k);\n+\n+ /* Handle exp special cases of underflow and overflow. */\n+ float32x4_t ret_oflow = vreinterpretq_f32_u32 (d->inf);\n+ float32x4_t ret_uflow = v_f32 (0);\n+ ret = vbslq_f32 (vcleq_f32 (ylogx, d->uflow_bound), ret_uflow, ret);\n+ ret = vbslq_f32 (vcgtq_f32 (ylogx, d->oflow_bound), ret_oflow, ret);\n+ return ret;\n+}\n", "prefixes": [ "v2", "1/4" ] }