Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.0/patches/2175666/?format=api
{ "id": 2175666, "url": "http://patchwork.ozlabs.org/api/1.0/patches/2175666/?format=api", "project": { "id": 17, "url": "http://patchwork.ozlabs.org/api/1.0/projects/17/?format=api", "name": "GNU Compiler Collection", "link_name": "gcc", "list_id": "gcc-patches.gcc.gnu.org", "list_email": "gcc-patches@gcc.gnu.org", "web_url": null, "scm_url": null, "webscm_url": null }, "msgid": "<20251218171459.75831-3-claudio.bantaloukas@arm.com>", "date": "2025-12-18T17:14:52", "name": "[v5,2/9] aarch64: extend sme intrinsics to mfp8", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "393ea3cb3c6c30633f113b431208febcd22968ef", "submitter": { "id": 88972, "url": "http://patchwork.ozlabs.org/api/1.0/people/88972/?format=api", "name": "Claudio Bantaloukas", "email": "claudio.bantaloukas@arm.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/20251218171459.75831-3-claudio.bantaloukas@arm.com/mbox/", "series": [ { "id": 485887, "url": "http://patchwork.ozlabs.org/api/1.0/series/485887/?format=api", "date": "2025-12-18T17:14:53", "name": "aarch64: Add fp8 sme 2.1 features per ACLE 2024Q4", "version": 5, "mbox": "http://patchwork.ozlabs.org/series/485887/mbox/" } ], "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2175666/checks/", "tags": {}, "headers": { "Return-Path": "<gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (1024-bit key;\n unprotected) header.d=arm.com header.i=@arm.com header.a=rsa-sha256\n header.s=selector1 header.b=rTK/6tId;\n\tdkim=pass (1024-bit key) header.d=arm.com header.i=@arm.com\n header.a=rsa-sha256 header.s=selector1 header.b=rTK/6tId;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org\n (client-ip=38.145.34.32; helo=vm01.sourceware.org;\n envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;\n receiver=patchwork.ozlabs.org)", "sourceware.org;\n\tdkim=pass (1024-bit key,\n unprotected) header.d=arm.com header.i=@arm.com header.a=rsa-sha256\n header.s=selector1 header.b=rTK/6tId;\n\tdkim=pass (1024-bit key) header.d=arm.com header.i=@arm.com\n header.a=rsa-sha256 header.s=selector1 header.b=rTK/6tId", "sourceware.org;\n dmarc=pass (p=none dis=none) header.from=arm.com", "sourceware.org; spf=pass smtp.mailfrom=arm.com", "server2.sourceware.org;\n arc=pass smtp.remote-ip=40.107.130.23" ], "Received": [ "from vm01.sourceware.org (vm01.sourceware.org [38.145.34.32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4dXHWz1Y46z1xty\n\tfor <incoming@patchwork.ozlabs.org>; Fri, 19 Dec 2025 04:21:35 +1100 (AEDT)", "from vm01.sourceware.org (localhost [127.0.0.1])\n\tby sourceware.org (Postfix) with ESMTP id 74A164BA2E33\n\tfor <incoming@patchwork.ozlabs.org>; Thu, 18 Dec 2025 17:21:32 +0000 (GMT)", "from MRWPR03CU001.outbound.protection.outlook.com\n (mail-francesouthazon11011023.outbound.protection.outlook.com\n [40.107.130.23])\n by sourceware.org (Postfix) with ESMTPS id 832B94BA2E28\n for <gcc-patches@gcc.gnu.org>; Thu, 18 Dec 2025 17:17:06 +0000 (GMT)", "from AM0P190CA0005.EURP190.PROD.OUTLOOK.COM (2603:10a6:208:190::15)\n by GV2PR08MB11253.eurprd08.prod.outlook.com (2603:10a6:150:2f6::15)\n with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.9412.13; Thu, 18 Dec\n 2025 17:16:59 +0000", "from AM4PEPF00027A69.eurprd04.prod.outlook.com\n (2603:10a6:208:190:cafe::f9) by AM0P190CA0005.outlook.office365.com\n (2603:10a6:208:190::15) with Microsoft SMTP Server (version=TLS1_3,\n cipher=TLS_AES_256_GCM_SHA384) id 15.20.9434.6 via Frontend Transport; Thu,\n 18 Dec 2025 17:16:57 +0000", "from outbound-uk1.az.dlp.m.darktrace.com (4.158.2.129) by\n AM4PEPF00027A69.mail.protection.outlook.com (10.167.16.87) with Microsoft\n SMTP Server (version=TLS1_3, cipher=TLS_AES_256_GCM_SHA384) id 15.20.9434.6\n via Frontend Transport; Thu, 18 Dec 2025 17:16:58 +0000", "from DB9PR05CA0017.eurprd05.prod.outlook.com (2603:10a6:10:1da::22)\n by DB9PR08MB9708.eurprd08.prod.outlook.com (2603:10a6:10:460::16)\n with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.9434.8; Thu, 18 Dec\n 2025 17:15:18 +0000", "from DB1PEPF000509F3.eurprd02.prod.outlook.com\n (2603:10a6:10:1da:cafe::70) by DB9PR05CA0017.outlook.office365.com\n (2603:10a6:10:1da::22) with Microsoft SMTP Server (version=TLS1_3,\n cipher=TLS_AES_256_GCM_SHA384) id 15.20.9434.8 via Frontend Transport; Thu,\n 18 Dec 2025 17:15:18 +0000", "from nebula.arm.com (172.205.89.229) by\n DB1PEPF000509F3.mail.protection.outlook.com (10.167.242.149) with Microsoft\n SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id\n 15.20.9434.6 via Frontend Transport; Thu, 18 Dec 2025 17:15:18 +0000", "from AZ-NEU-EX04.Arm.com (10.240.25.138) by AZ-NEU-EX03.Arm.com\n (10.240.25.137) with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.2562.29; Thu, 18 Dec\n 2025 17:15:07 +0000", "from e72c20ac6da1.eu-west-1.compute.internal (10.249.56.29) by\n mail.arm.com (10.240.25.138) with Microsoft SMTP Server (version=TLS1_2,\n cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.2562.29 via Frontend\n Transport; Thu, 18 Dec 2025 17:15:07 +0000" ], "DKIM-Filter": [ "OpenDKIM Filter v2.11.0 sourceware.org 74A164BA2E33", "OpenDKIM Filter v2.11.0 sourceware.org 832B94BA2E28" ], "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org 832B94BA2E28", "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org 832B94BA2E28", "ARC-Seal": [ "i=3; a=rsa-sha256; d=sourceware.org; s=key; t=1766078227; cv=pass;\n b=d1SehQELrs3P5zMd5cXlmdWYCxfLwKscM+afOITQiFit2oBXKD0V9B+xZvU+MMQuaaSuSCdHciu6gWJVkubSRS9tueXKr3PXDinKnGyNhSgFiUN9gQHkkguiNlbyih/lQDY3AKCvx/MjK930GbkERaRz/UNQBio8E987GfgtEZA=", "i=2; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=pass;\n b=wHWxDo+sldNZ6e1xroPMXxDXI/MrMMNdT/WK7yKeldh9HVTIc5dE1I/3AAoz8MTLlAKvkdhwTVNRRsG25w4yXXBA0ptTO2Sc0idYjW0ChFaBoaUmQOWHy5iXs1c1p77lgAnWXv8la9156BJJNBHYqsBL6RFLpyUBVazboTZUOTKP2lhIpdAxhsxGLcmyvgkXvfLEIsK9dRNuZqh44dsG/zXTi2NDQvKJF45QKOlWbKxUcp3xhVGoxcTLVpjfQT4omQVy0TIySSTSzJ1iT32GnrbaRkLEDO3//hBvblsko9CttjjuCTZp3dFUERcfNqYq+70OdWnIRo1IxulMWdex6w==", "i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none;\n b=I8zUlQNYMFIhZe1LodPZixhbNR5JHfY4/4KOUQ+5aJTVHO72S07a6YS6fGtkwwMADdpfC0P0Jgj1MEWRy9SKblUnZLy96jdSQA+UY1Z9egtsg3qFxknJwAvfjtxq3pyXYbY8R/4VAdRrajzhlEk/KVYFQS5xU29GwpBuDgZqLmLI5PU2tgzcTEKOq6i0K3N8wYVC5QguF+BzvsmZS0wnEe9hT53ur7Mmm36U8kJrwGa8yneaKtKMF4rKCx5sOrluxN8CFe+rYUjqIpZJ+d8Ii0Ag0S79kmowXCGe2aUTwsgVaXweupxLOEPIGDCVwoEMo1hgutu33k2Kv2RRbfZ5eA==" ], "ARC-Message-Signature": [ "i=3; a=rsa-sha256; d=sourceware.org; s=key;\n t=1766078227; c=relaxed/simple;\n bh=7AXI9Bf7PWkxEwODLGsc/1wNJoWZgLGKtx1f/jJF9gI=;\n h=DKIM-Signature:DKIM-Signature:From:To:Subject:Date:Message-ID:\n MIME-Version;\n b=XCdMmMeekU4pwCHONS8c7tJWj97HMjoUKIvon7BiOgXSEeI0ymW/qrAi5yoDuNkPGOolng0B4Wl8+BMHki+lGxmG8HnDLgFwXrrvAaB/epaJeGJ+cTnNXNvXmUOJgC5QBxY8yyMfDvSI8AAm66N8j0f1cpl8S6NDbhA0m3tN3gI=", "i=2; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com;\n s=arcselector10001;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1;\n bh=u17N086EWe8S85YzDxGLnzWvmQnQkMO0wAw/bPxi23Q=;\n b=r3mJpJ3JyrBQMg1e1CQ3MZl1wswDRIH9J1DffGmUvzPH/ZMGawKWGbZrgzKBa89nu6s/W1gysctBaqX2a8uo8sSF/bZ29XYnR+HKXwYmivWx7SJtOFeKr9Dkc/BfjU9AAxYiE9v/c5LHCdvNy6ijz0TnFeIMAOlpYqD0M8+/sI1JldlWyyBrvSfMWVNjQLxqB81LZM8N5w9ZyjYNcfki+A845ta11SkEi0OjJ/UUuVOoRycVLFq2Q3q998H1Khwkqa3El+BC+0YzAJCEjILb4JgUZxy+qYHifkctE4Xr+LeWF+F+U7OKbk26bMwFsnIxxQHGQFcwiI7DumwbRVLw2w==", "i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com;\n s=arcselector10001;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1;\n bh=u17N086EWe8S85YzDxGLnzWvmQnQkMO0wAw/bPxi23Q=;\n b=Nz0RAyQ3TkS690/gRlUaLHZzoUNUPO9rv47kIhIwwdeMzWSfAoxcMUrxBAH0t5jhcmrnTSKN8GiA165Cxc0lvuPyvlLDdsSrrbwAA7MnHX0A8n+x0qBFkDquwOdx3sLrgui5Rf1MW3CVSlF05/bq+N+3fo4i2u/Gex7QJWNlySlMKfBynL63qSbQ6XqqxKlaNpB3DbF0uZa4o//5ksIQrpl2Wsjff+r0ae2rXzNGGbOnrHDIf5Ad6hEH1oZhLYrDj+T+hlTMdfg51MTQhpdDymIH4upBH1PtFQ1UQOofOtlhEweKU3+KPScjCihQGwWtSRSUfEbiYDoxcFHwThpRrQ==" ], "ARC-Authentication-Results": [ "i=3; server2.sourceware.org", "i=2; mx.microsoft.com 1; spf=pass (sender ip is\n 4.158.2.129) smtp.rcpttodomain=oss.qualcomm.com smtp.mailfrom=arm.com;\n dmarc=pass (p=none sp=none pct=100) action=none header.from=arm.com;\n dkim=pass (signature was verified) header.d=arm.com; arc=pass (0 oda=1 ltdi=1\n spf=[1,1,smtp.mailfrom=arm.com] dmarc=[1,1,header.from=arm.com])", "i=1; mx.microsoft.com 1; spf=pass (sender ip is\n 172.205.89.229) smtp.rcpttodomain=gcc.gnu.org smtp.mailfrom=arm.com;\n dmarc=pass (p=none sp=none pct=100) action=none header.from=arm.com;\n dkim=none (message not signed); arc=none (0)" ], "DKIM-Signature": [ "v=1; a=rsa-sha256; c=relaxed/relaxed; d=arm.com; s=selector1;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck;\n bh=u17N086EWe8S85YzDxGLnzWvmQnQkMO0wAw/bPxi23Q=;\n b=rTK/6tId5G/5CryuWErLmJXx3nlIb8eOxuUHwFQtO+4YpHG55LOkXwSB99Vkglw17STQuKobPn8m2TXwhvrkmivi/ESziIX14CJhb+zSv0kmZ7gzufdd71wf1qBSAd+N5Va51JOCNEZdSCi82JHkHyiWLmAcxM9rKpUuPFbWgtg=", "v=1; a=rsa-sha256; c=relaxed/relaxed; d=arm.com; s=selector1;\n h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck;\n bh=u17N086EWe8S85YzDxGLnzWvmQnQkMO0wAw/bPxi23Q=;\n b=rTK/6tId5G/5CryuWErLmJXx3nlIb8eOxuUHwFQtO+4YpHG55LOkXwSB99Vkglw17STQuKobPn8m2TXwhvrkmivi/ESziIX14CJhb+zSv0kmZ7gzufdd71wf1qBSAd+N5Va51JOCNEZdSCi82JHkHyiWLmAcxM9rKpUuPFbWgtg=" ], "X-MS-Exchange-Authentication-Results": [ "spf=pass (sender IP is 4.158.2.129)\n smtp.mailfrom=arm.com; dkim=pass (signature was verified)\n header.d=arm.com;dmarc=pass action=none header.from=arm.com;", "spf=pass (sender IP is 172.205.89.229)\n smtp.mailfrom=arm.com; dkim=none (message not signed)\n header.d=none;dmarc=pass action=none header.from=arm.com;" ], "Received-SPF": [ "Pass (protection.outlook.com: domain of arm.com designates\n 4.158.2.129 as permitted sender) receiver=protection.outlook.com;\n client-ip=4.158.2.129; helo=outbound-uk1.az.dlp.m.darktrace.com; pr=C", "Pass (protection.outlook.com: domain of arm.com designates\n 172.205.89.229 as permitted sender) receiver=protection.outlook.com;\n client-ip=172.205.89.229; helo=nebula.arm.com; pr=C" ], "From": "Claudio Bantaloukas <claudio.bantaloukas@arm.com>", "To": "Gcc Patches ML <gcc-patches@gcc.gnu.org>", "CC": "Alex Coplan <alex.coplan@arm.com>, Alice Carlotti\n <alice.carlotti@arm.com>, Andrew Pinski <andrew.pinski@oss.qualcomm.com>,\n Kyrylo Tkachov <ktkachov@nvidia.com>, Richard Earnshaw\n <richard.earnshaw@arm.com>, Tamar Christina <tamar.christina@arm.com>, \"Wilco\n Dijkstra\" <wilco.dijkstra@arm.com>, Claudio Bantaloukas\n <claudio.bantaloukas@arm.com>", "Subject": "[PATCH v5 2/9] aarch64: extend sme intrinsics to mfp8", "Date": "Thu, 18 Dec 2025 17:14:52 +0000", "Message-ID": "<20251218171459.75831-3-claudio.bantaloukas@arm.com>", "X-Mailer": "git-send-email 2.51.0", "In-Reply-To": "<20251218171459.75831-1-claudio.bantaloukas@arm.com>", "References": "<20251218171459.75831-1-claudio.bantaloukas@arm.com>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "Content-Type": "text/plain", "X-EOPAttributedMessage": "1", "X-MS-TrafficTypeDiagnostic": "\n DB1PEPF000509F3:EE_|DB9PR08MB9708:EE_|AM4PEPF00027A69:EE_|GV2PR08MB11253:EE_", "X-MS-Office365-Filtering-Correlation-Id": "9cf6c92e-c3cd-4979-66ca-08de3e59407a", "x-checkrecipientrouted": "true", "NoDisclaimer": "true", "X-MS-Exchange-SenderADCheck": "1", "X-MS-Exchange-AntiSpam-Relay": "0", "X-Microsoft-Antispam-Untrusted": "BCL:0;\n ARA:13230040|82310400026|376014|1800799024|36860700013|13003099007;", "X-Microsoft-Antispam-Message-Info-Original": "\n JMujjNnH+fvhU8G+tl67RpC1+IGb1y6G+utqJ8qXOYQZsgNqmRjrzTYGgho0/TOSdKMSJKVBaCbKxzo86O125CNtXDjOGH7KZp/aEtt09WImZaQGPeqtCM6iEEdApg+7pt1NwgNBhWRx1Z4uFx9WV9r1IDvt/dUMKte+YFXZuJ+kC/jkLzERyr0NaCUSxD5qByiWlHLFuMao4ioZhysHVGjitH2nE5ay+/0MOPfuWvPi62ixhCuZC82rDHImyapNlOPODXOwhjvLSVG9O4AiEg+E7748r/6y6KNLnM6nkeo0zVAHKipjkMZChY3uALaVZV0pYMX25G07s49j4d/xabbDwbZlv8CMzB5M7Ol0We4EwnkV0okb1FWr9WCd2/Ow0H8kro3Nm1EAdWKwl3X3do9A3fl53iXbZGx9jNbzV1dPFf2F3YjOkpz9vnSrz486TvvoU31jRvXs+F55ePnsqLE5WrWcbmkzwGywzfTvIWKx6vzSe1Z79XEyJd54MbtK5mUtNTXMo1Sgldy/mxd7FJBncCyw2KWyLOIo1KfxDoWVBm92ofZp90hH2JtGlDXu3dGKqhbTKLuf63FQrB4sk4axheuhWzrduNAW6OZQvirwqrxOIz24cdM0akqMIznI8BZaUAcnqvnYw75HOkB5ygqiz/XryAlOISPYvnPlDNQ5NF5AiytqHhQcvNV1AebJRvELAs/4kQ908QoAJK/iIQ6gxhwNHooxdJlDNhs0KaHeSZZwEgqMKs1yTOsPGfe0dULq7z/vNaYDB2Eln5xQhwjb8oHzY/4haUuzZDbT3u80jvmxPcaaMseaMox5VyQYs3Gr3Q9AW257FOLJPw5NWMEtcAnjZbsQihMGjymUwKHFcUFcqYhicJ28PYklcl+8c8M/78CgVwxUEqKLO1Y/8enOKPiEx1loUg+ZYthdL2CGPlR/nPCCz7HZz4WAnCovW+V/gq5RTdJXWwTiehqD4W2JXsYWO3TfjALfdM4EskgyYODte8L8MJNA1fkZggRiKcRA2JPBPEeo7RvkA/jzR/Y4OoUPkeXQYVScBtBMCr8ApzLqhhvYJZZBTHiDvThbkTAqCFhClFjqiAYdHEExpK2geUkTQJt54Hka/l8F5X9sv9oK10ki9WKVo43Tz7C37D+HUmWThQg2h7TDKm1HaaFm5QRbi4rEFmvtw9QMop5rpC8FdJBE6xqrPPvGjRCZM1VfuaMUv6OswNHXmUQUO4gMN4mbHzKWsLnwO4lk0YHbATYvDadY0W+CWI8oMIIJthlX3X9TKIbA/ODtOhELPjPSKjSOSYsFXNFZ2zn9dW+tT2uY2p0yg2td6Gm5v/mpz13Hn1qrgc8D32g0WjlRv5j3Ol6ir2dI0vnDVNtphz4ojH3dDSffZnAblIlmOd64jM1M0llTXrl9vJ3F2gjKIxQxLtTR8H+GD4qj65zXIv8WWjFQPFizV7GDQcwhM58KY/b0I2+e8KaJ1S+EZDJaEcP0uzQKlb7CYLQ+jyNywTZfYo3G725MaXq/CqgfQnhXKIYaAhTPLHjdaP8mePc4NBWybf5QBxH5Mp+4iRaGjpU=", "X-Forefront-Antispam-Report-Untrusted": "CIP:172.205.89.229; CTRY:IE; LANG:en;\n SCL:1; SRV:; IPV:NLI; SFV:NSPM; H:nebula.arm.com; PTR:InfoDomainNonexistent;\n CAT:NONE;\n SFS:(13230040)(82310400026)(376014)(1800799024)(36860700013)(13003099007);\n DIR:OUT; SFP:1101;", "X-MS-Exchange-Transport-CrossTenantHeadersStamped": [ "DB9PR08MB9708", "GV2PR08MB11253" ], "X-MS-Exchange-Transport-CrossTenantHeadersStripped": "\n AM4PEPF00027A69.eurprd04.prod.outlook.com", "X-MS-PublicTrafficType": "Email", "X-MS-Office365-Filtering-Correlation-Id-Prvs": "\n 85ea4b7c-285d-476b-05d5-08de3e5904e2", "X-Microsoft-Antispam": "BCL:0;\n ARA:13230040|1800799024|376014|36860700013|35042699022|14060799003|82310400026|13003099007;", "X-Microsoft-Antispam-Message-Info": "\n DqZT1NHnCmyVuy2aZKjkGBgtFcGuAmKveNz3+uiC699rudHxPnruthKlkuWeCh4Yvt1l70oRjNhvsgYN0AmM+ZAwzqO+39c0JFB83A5W2pns03UIFTPfakTvutylzA4Y4k4TjgLCks8+IpyQxvY/Lmoj+z1vP8fqC7RUVAd65mODJRosDCuWREu//VPJtWD7rhSxAP+m+el2fxEAz67CilhuqfQa+3zp+tCplwfvinngat1KgS2JchnW+Sn/rjFMlhmrCqzIoNN/fZdaWb5mn/pnD5XfnWru5oO1huRF2z9EPM4riiD5BQ20vvNln5RU5AKVvBHdTAJd9q/AKbjHaqwDX6mrsUUFS2eqzBUgny2yDOU/p1DBt4FReyssCMQvDP+FYF5yTS2Ncyh/v3h64OOwMUZpaNA+Zhqm8IUVta1POi7onOPCOQ9y7e1tsYoCXJJhPd/DeL5/BfLbXr3EaS+DAK1HSeoOe6fGtJHa4KpsK8XeApmC6j+eQBBzGzC4Y2t3YT7iu+Riqa0W32DzqRtGVtDICjuzk4YbeTZJEg4KoiAmlwIe3bIzkza3tSHwkQ3DqoJvoKyC/L0oEsnOBulKsz0/xN8sGFuLPsbcbV58YsH/n1gYh6Eqi1Wd3wIa5hDzBAIh9tix6W6qsmQD3nuoTnHlHZtou1NWSN48UlV4g+2lyZwNNSe/K+qmJGGWYkVWytCzbn5AZp95j7MMIvrCzZNu5f+R8mE+PMXk74xRYrpP2RIKGrlRsAaWeSz3gm0oBuZfjh71UUzLu22hg/UaiucyDOI+Foz9jMckU5YRN6s+X3XUSrf1YZz+3DzuAMPCO9nhsBKbtfjcRe6+VXwYzj+y8TrEpiYsGDZGpjcZ45R7A5Y/9em3vC45/t7oDE85WmCWr4dfUJTQWxXz9l30OXJ1Io9NtUtNCn12iXQ1xt7dSiJ8lN3HG/WkeskgmLKFj009u9Z0dsJupA/qDBvs+SlABKBeYyCgy39H1UCSeiDzzuxjOtR+JRLHXRRGBNHDutJjilhi9I4Ub24phXHasuCno9Q3bwQAep/leCf/3R2d1HNaV2+a+zg1Q90IhPFcfskrg9y6+dqoX3P/NCjSmZrCH7z7/oRZV/Jb3bg/8tt6AlXR+MK9IFGQjw/yftGj+GlxWSxG0Z3X/DmL2RMdEbi6SZDkVhCoTvwrv3xtapAKL2MZhFGOWJFvdaynU8Uk7ZzceCX5hPmq0wZ5rJ5YV2mQ5BSA1eClggkkhUd/JxoM1ALKe9T5HGnHiZZrtZnS0zwudNGrhEgT/qaiFpxVwggt3U4Aay3xqbfWsZOn/mPpRuqeCTUOUhehFaYeJQam0sDsI546QCFYMm2wqbCTp+dTzn1Rrl1nz7dfPtqUDCl9/jk8dmWcu5rAaPoBiKyNw3sHsoYBY7ZJqEF70vXSQ3+sLRzul9BRD8DolMIeuhodr+EgmUlFfaLo9A/nI8Bx57CGbOONjzIZQ4ER61m1vy9ptz/ofUp+i+1tzE8et6EaoVbESswkNJW7WN4b8xgkLNo2khTSIAWSXgE+Z4R9e15lVfisyuk3nVHKM5Q=", "X-Forefront-Antispam-Report": "CIP:4.158.2.129; CTRY:GB; LANG:en; SCL:1; SRV:;\n IPV:NLI; SFV:NSPM; H:outbound-uk1.az.dlp.m.darktrace.com;\n PTR:InfoDomainNonexistent; CAT:NONE;\n SFS:(13230040)(1800799024)(376014)(36860700013)(35042699022)(14060799003)(82310400026)(13003099007);\n DIR:OUT; SFP:1101;", "X-OriginatorOrg": "arm.com", "X-MS-Exchange-CrossTenant-OriginalArrivalTime": "18 Dec 2025 17:16:58.6867 (UTC)", "X-MS-Exchange-CrossTenant-Network-Message-Id": "\n 9cf6c92e-c3cd-4979-66ca-08de3e59407a", "X-MS-Exchange-CrossTenant-Id": "f34e5979-57d9-4aaa-ad4d-b122a662184d", "X-MS-Exchange-CrossTenant-OriginalAttributedTenantConnectingIp": "\n TenantId=f34e5979-57d9-4aaa-ad4d-b122a662184d; Ip=[4.158.2.129];\n Helo=[outbound-uk1.az.dlp.m.darktrace.com]", "X-MS-Exchange-CrossTenant-AuthSource": "\n AM4PEPF00027A69.eurprd04.prod.outlook.com", "X-MS-Exchange-CrossTenant-AuthAs": "Anonymous", "X-MS-Exchange-CrossTenant-FromEntityHeader": "HybridOnPrem", "X-BeenThere": "gcc-patches@gcc.gnu.org", "X-Mailman-Version": "2.1.30", "Precedence": "list", "List-Id": "Gcc-patches mailing list <gcc-patches.gcc.gnu.org>", "List-Unsubscribe": "<https://gcc.gnu.org/mailman/options/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>", "List-Archive": "<https://gcc.gnu.org/pipermail/gcc-patches/>", "List-Post": "<mailto:gcc-patches@gcc.gnu.org>", "List-Help": "<mailto:gcc-patches-request@gcc.gnu.org?subject=help>", "List-Subscribe": "<https://gcc.gnu.org/mailman/listinfo/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>", "Errors-To": "gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org" }, "content": "This patch extends the following intrinsics to support svmfloat8_t types and\nadds tests based on the equivalent ones for svuint8_t.\n\nSME:\n- svread_hor_za8[_mf8]_m, svread_hor_za128[_mf8]_m and related ver.\n- svwrite_hor_za8[_mf8]_m, svwrite_hor_za128[_mf8]_m and related ver.\n\nSME2:\n- svread_hor_za8_mf8_vg2, svread_hor_za8_mf8_vg4 and related ver.\n- svwrite_hor_za8[_mf8]_vg2, svwrite_hor_za8[_mf8]_vg4 and related ver.\n- svread_za8[_mf8]_vg1x2, svread_za8[_mf8]_vg1x4.\n- svwrite_za8[_mf8]_vg1x2, svwrite_za8[_mf8]_vg1x4.\n- svsel[_mf8_x2], svsel[_mf8_x4].\n- svzip[_mf8_x2], svzip[_mf8_x4].\n- svzipq[_mf8_x2], svzipq[_mf8_x4].\n- svuzp[_mf8_x2], svuzp[_mf8_x4].\n- svuzpq[_mf8_x2], svuzpq[_mf8_x4].\n- svld1[_mf8]_x2, svld1[_mf8]_x4.\n- svld1_vnum[_mf8]_x2, svld1_vnum[_mf8]_x4.\n\nSVE2.1/SME2:\n- svldnt1[_mf8]_x2, svldnt1[_mf8]_x4.\n- svldnt1_vnum[_mf8]_x2, svldnt1_vnum[_mf8]_x4.\n- svrevd[_mf8]_m, svrevd[_mf8]_z, svrevd[_mf8]_x.\n- svst1[_mf8_x2], svst1[_mf8_x4].\n- svst1_vnum[_mf8_x2], svst1_vnum[_mf8_x4].\n- svstnt1[_mf8_x2], svstnt1[_mf8_x4].\n- svstnt1_vnum[_mf8_x2], svstnt1_vnum[_mf8_x4].\n\nSME2.1:\n- svreadz_hor_za8_u8, svreadz_hor_za8_u8_vg2, svreadz_hor_za8_u8_vg4 and related\n ver.\n- svreadz_hor_za128_u8, svreadz_ver_za128_u8.\n- svreadz_za8_u8_vg1x2, svreadz_za8_u8_vg1x4.\n\nThis change follows ACLE 2024Q4.\n\ngcc/\n\t* config/aarch64/aarch64-sve-builtins.cc (TYPES_za_bhsd_data): Add\n\tD (za8, mf8) combination to za_bhsd_data.\n\ngcc/testsuite/\n\t* gcc.target/aarch64/sme/acle-asm/revd_mf8.c: Added test file.\n\t* gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/readz_ver_za128.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/sel_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/sel_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/st1_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/st1_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/zip_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/zip_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sve2/acle/asm/revd_mf8.c: Likewise.\n\t* gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x2.c: Likewise.\n\t* gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x4.c: Likewise.\n\t* gcc.target/aarch64/sme/acle-asm/read_hor_za128.c: Added mf8 tests.\n\t* gcc.target/aarch64/sme/acle-asm/read_hor_za8.c: Likewise.\n\t* gcc.target/aarch64/sme/acle-asm/read_ver_za128.c: Likewise.\n\t* gcc.target/aarch64/sme/acle-asm/read_ver_za8.c: Likewise.\n\t* gcc.target/aarch64/sme/acle-asm/write_hor_za128.c: Likewise.\n\t* gcc.target/aarch64/sme/acle-asm/write_hor_za8.c: Likewise.\n\t* gcc.target/aarch64/sme/acle-asm/write_ver_za128.c: Likewise.\n\t* gcc.target/aarch64/sme/acle-asm/write_ver_za8.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c: Likewise.\n\t* gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c: Likewise.\n---\n gcc/config/aarch64/aarch64-sve-builtins.cc | 4 +-\n .../aarch64/sme/acle-asm/read_hor_za128.c | 31 ++\n .../aarch64/sme/acle-asm/read_hor_za8.c | 31 ++\n .../aarch64/sme/acle-asm/read_ver_za128.c | 31 ++\n .../aarch64/sme/acle-asm/read_ver_za8.c | 31 ++\n .../aarch64/sme/acle-asm/revd_mf8.c | 76 ++++\n .../aarch64/sme/acle-asm/write_hor_za128.c | 10 +\n .../aarch64/sme/acle-asm/write_hor_za8.c | 10 +\n .../aarch64/sme/acle-asm/write_ver_za128.c | 10 +\n .../aarch64/sme/acle-asm/write_ver_za8.c | 10 +\n .../aarch64/sme2/acle-asm/ld1_mf8_x2.c | 262 +++++++++++++\n .../aarch64/sme2/acle-asm/ld1_mf8_x4.c | 354 +++++++++++++++++\n .../aarch64/sme2/acle-asm/ldnt1_mf8_x2.c | 262 +++++++++++++\n .../aarch64/sme2/acle-asm/ldnt1_mf8_x4.c | 354 +++++++++++++++++\n .../aarch64/sme2/acle-asm/read_hor_za8_vg2.c | 78 ++++\n .../aarch64/sme2/acle-asm/read_hor_za8_vg4.c | 91 +++++\n .../aarch64/sme2/acle-asm/read_ver_za8_vg2.c | 78 ++++\n .../aarch64/sme2/acle-asm/read_ver_za8_vg4.c | 91 +++++\n .../aarch64/sme2/acle-asm/read_za8_vg1x2.c | 48 +++\n .../aarch64/sme2/acle-asm/read_za8_vg1x4.c | 54 +++\n .../aarch64/sme2/acle-asm/readz_hor_za128.c | 10 +\n .../aarch64/sme2/acle-asm/readz_hor_za8.c | 10 +\n .../aarch64/sme2/acle-asm/readz_hor_za8_vg2.c | 78 ++++\n .../aarch64/sme2/acle-asm/readz_hor_za8_vg4.c | 91 +++++\n .../aarch64/sme2/acle-asm/readz_ver_za128.c | 197 ++++++++++\n .../aarch64/sme2/acle-asm/readz_ver_za8.c | 10 +\n .../aarch64/sme2/acle-asm/readz_ver_za8_vg2.c | 77 ++++\n .../aarch64/sme2/acle-asm/readz_ver_za8_vg4.c | 90 +++++\n .../aarch64/sme2/acle-asm/readz_za8_vg1x2.c | 48 +++\n .../aarch64/sme2/acle-asm/readz_za8_vg1x4.c | 56 +++\n .../aarch64/sme2/acle-asm/sel_mf8_x2.c | 92 +++++\n .../aarch64/sme2/acle-asm/sel_mf8_x4.c | 92 +++++\n .../aarch64/sme2/acle-asm/st1_mf8_x2.c | 262 +++++++++++++\n .../aarch64/sme2/acle-asm/st1_mf8_x4.c | 354 +++++++++++++++++\n .../aarch64/sme2/acle-asm/stnt1_mf8_x2.c | 262 +++++++++++++\n .../aarch64/sme2/acle-asm/stnt1_mf8_x4.c | 354 +++++++++++++++++\n .../aarch64/sme2/acle-asm/uzp_mf8_x2.c | 77 ++++\n .../aarch64/sme2/acle-asm/uzp_mf8_x4.c | 73 ++++\n .../aarch64/sme2/acle-asm/uzpq_mf8_x2.c | 77 ++++\n .../aarch64/sme2/acle-asm/uzpq_mf8_x4.c | 73 ++++\n .../aarch64/sme2/acle-asm/write_hor_za8_vg2.c | 78 ++++\n .../aarch64/sme2/acle-asm/write_hor_za8_vg4.c | 91 +++++\n .../aarch64/sme2/acle-asm/write_ver_za8_vg2.c | 78 ++++\n .../aarch64/sme2/acle-asm/write_ver_za8_vg4.c | 91 +++++\n .../aarch64/sme2/acle-asm/write_za8_vg1x2.c | 48 +++\n .../aarch64/sme2/acle-asm/write_za8_vg1x4.c | 54 +++\n .../aarch64/sme2/acle-asm/zip_mf8_x2.c | 77 ++++\n .../aarch64/sme2/acle-asm/zip_mf8_x4.c | 73 ++++\n .../aarch64/sme2/acle-asm/zipq_mf8_x2.c | 77 ++++\n .../aarch64/sme2/acle-asm/zipq_mf8_x4.c | 73 ++++\n .../aarch64/sve2/acle/asm/ld1_mf8_x2.c | 269 +++++++++++++\n .../aarch64/sve2/acle/asm/ld1_mf8_x4.c | 361 ++++++++++++++++++\n .../aarch64/sve2/acle/asm/ldnt1_mf8_x2.c | 269 +++++++++++++\n .../aarch64/sve2/acle/asm/ldnt1_mf8_x4.c | 361 ++++++++++++++++++\n .../aarch64/sve2/acle/asm/revd_mf8.c | 80 ++++\n .../aarch64/sve2/acle/asm/stnt1_mf8_x2.c | 269 +++++++++++++\n .../aarch64/sve2/acle/asm/stnt1_mf8_x4.c | 361 ++++++++++++++++++\n 57 files changed, 7007 insertions(+), 2 deletions(-)\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_mf8.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za128.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x4.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_mf8.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x2.c\n create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x4.c", "diff": "diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc\nindex dbd80cab627..e8eeedb4d36 100644\n--- a/gcc/config/aarch64/aarch64-sve-builtins.cc\n+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc\n@@ -640,7 +640,7 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {\n #define TYPES_d_za(S, D) \\\n S (za64)\n \n-/* { _za8 } x { _s8 _u8 }\n+/* { _za8 } x { _mf8 _s8 _u8 }\n \n { _za16 } x { _bf16 _f16 _s16 _u16 }\n \n@@ -648,7 +648,7 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {\n \n { _za64 } x { _f64 _s64 _u64 }. */\n #define TYPES_za_bhsd_data(S, D) \\\n- D (za8, s8), D (za8, u8), \\\n+ D (za8, mf8), D (za8, s8), D (za8, u8), \\\n D (za16, bf16), D (za16, f16), D (za16, s16), D (za16, u16), \\\n D (za32, f32), D (za32, s32), D (za32, u32), \\\n D (za64, f64), D (za64, s64), D (za64, u64)\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c\nindex c8eef3b16fd..fedefe5b824 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c\n@@ -103,6 +103,16 @@ TEST_READ_ZA (read_za128_u8_0_w0_tied, svuint8_t,\n \t z0 = svread_hor_za128_u8_m (z0, p0, 0, w0),\n \t z0 = svread_hor_za128_m (z0, p0, 0, w0))\n \n+/*\n+** read_za128_mf8_0_w0_tied:\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz0\\.q, p0/m, za0h\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (read_za128_mf8_0_w0_tied, svmfloat8_t,\n+\t z0 = svread_hor_za128_mf8_m (z0, p0, 0, w0),\n+\t z0 = svread_hor_za128_m (z0, p0, 0, w0))\n+\n /*\n ** read_za128_u8_0_w0_untied:\n ** (\n@@ -124,6 +134,27 @@ TEST_READ_ZA (read_za128_u8_0_w0_untied, svuint8_t,\n \t z0 = svread_hor_za128_u8_m (z1, p0, 0, w0),\n \t z0 = svread_hor_za128_m (z1, p0, 0, w0))\n \n+/*\n+** read_za128_mf8_0_w0_untied:\n+** (\n+**\tmov\t(w1[2-5]), w0\n+**\tmov\tz0\\.d, z1\\.d\n+**\tmova\tz0\\.q, p0/m, za0h\\.q\\[\\1, 0\\]\n+** |\n+**\tmov\tz0\\.d, z1\\.d\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz0\\.q, p0/m, za0h\\.q\\[\\2, 0\\]\n+** |\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz1\\.q, p0/m, za0h\\.q\\[\\3, 0\\]\n+**\tmov\tz0\\.d, z1\\.d\n+** )\n+**\tret\n+*/\n+TEST_READ_ZA (read_za128_mf8_0_w0_untied, svmfloat8_t,\n+\t z0 = svread_hor_za128_mf8_m (z1, p0, 0, w0),\n+\t z0 = svread_hor_za128_m (z1, p0, 0, w0))\n+\n /*\n ** read_za128_s16_0_w0_tied:\n **\tmov\t(w1[2-5]), w0\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c\nindex 0ad5a953f6b..7c04ef30fd0 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c\n@@ -103,6 +103,16 @@ TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t,\n \t z0 = svread_hor_za8_u8_m (z0, p0, 0, w0),\n \t z0 = svread_hor_za8_m (z0, p0, 0, w0))\n \n+/*\n+** read_za8_mf8_0_w0_tied:\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz0\\.b, p0/m, za0h\\.b\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (read_za8_mf8_0_w0_tied, svmfloat8_t,\n+\t z0 = svread_hor_za8_mf8_m (z0, p0, 0, w0),\n+\t z0 = svread_hor_za8_m (z0, p0, 0, w0))\n+\n /*\n ** read_za8_u8_0_w0_untied:\n ** (\n@@ -123,3 +133,24 @@ TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t,\n TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t,\n \t z0 = svread_hor_za8_u8_m (z1, p0, 0, w0),\n \t z0 = svread_hor_za8_m (z1, p0, 0, w0))\n+\n+/*\n+** read_za8_mf8_0_w0_untied:\n+** (\n+**\tmov\t(w1[2-5]), w0\n+**\tmov\tz0\\.d, z1\\.d\n+**\tmova\tz0\\.b, p0/m, za0h\\.b\\[\\1, 0\\]\n+** |\n+**\tmov\tz0\\.d, z1\\.d\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz0\\.b, p0/m, za0h\\.b\\[\\2, 0\\]\n+** |\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz1\\.b, p0/m, za0h\\.b\\[\\3, 0\\]\n+**\tmov\tz0\\.d, z1\\.d\n+** )\n+**\tret\n+*/\n+TEST_READ_ZA (read_za8_mf8_0_w0_untied, svmfloat8_t,\n+\t z0 = svread_hor_za8_mf8_m (z1, p0, 0, w0),\n+\t z0 = svread_hor_za8_m (z1, p0, 0, w0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c\nindex 93d5d60ea57..c4214d19e5d 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c\n@@ -103,6 +103,16 @@ TEST_READ_ZA (read_za128_u8_0_w0_tied, svuint8_t,\n \t z0 = svread_ver_za128_u8_m (z0, p0, 0, w0),\n \t z0 = svread_ver_za128_m (z0, p0, 0, w0))\n \n+/*\n+** read_za128_mf8_0_w0_tied:\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz0\\.q, p0/m, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (read_za128_mf8_0_w0_tied, svmfloat8_t,\n+\t z0 = svread_ver_za128_mf8_m (z0, p0, 0, w0),\n+\t z0 = svread_ver_za128_m (z0, p0, 0, w0))\n+\n /*\n ** read_za128_u8_0_w0_untied:\n ** (\n@@ -124,6 +134,27 @@ TEST_READ_ZA (read_za128_u8_0_w0_untied, svuint8_t,\n \t z0 = svread_ver_za128_u8_m (z1, p0, 0, w0),\n \t z0 = svread_ver_za128_m (z1, p0, 0, w0))\n \n+/*\n+** read_za128_mf8_0_w0_untied:\n+** (\n+**\tmov\t(w1[2-5]), w0\n+**\tmov\tz0\\.d, z1\\.d\n+**\tmova\tz0\\.q, p0/m, za0v\\.q\\[\\1, 0\\]\n+** |\n+**\tmov\tz0\\.d, z1\\.d\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz0\\.q, p0/m, za0v\\.q\\[\\2, 0\\]\n+** |\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz1\\.q, p0/m, za0v\\.q\\[\\3, 0\\]\n+**\tmov\tz0\\.d, z1\\.d\n+** )\n+**\tret\n+*/\n+TEST_READ_ZA (read_za128_mf8_0_w0_untied, svmfloat8_t,\n+\t z0 = svread_ver_za128_mf8_m (z1, p0, 0, w0),\n+\t z0 = svread_ver_za128_m (z1, p0, 0, w0))\n+\n /*\n ** read_za128_s16_0_w0_tied:\n **\tmov\t(w1[2-5]), w0\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c\nindex 87564d1fa68..3859b2351fb 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c\n@@ -103,6 +103,16 @@ TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t,\n \t z0 = svread_ver_za8_u8_m (z0, p0, 0, w0),\n \t z0 = svread_ver_za8_m (z0, p0, 0, w0))\n \n+/*\n+** read_za8_mf8_0_w0_tied:\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz0\\.b, p0/m, za0v\\.b\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (read_za8_mf8_0_w0_tied, svmfloat8_t,\n+\t z0 = svread_ver_za8_mf8_m (z0, p0, 0, w0),\n+\t z0 = svread_ver_za8_m (z0, p0, 0, w0))\n+\n /*\n ** read_za8_u8_0_w0_untied:\n ** (\n@@ -123,3 +133,24 @@ TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t,\n TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t,\n \t z0 = svread_ver_za8_u8_m (z1, p0, 0, w0),\n \t z0 = svread_ver_za8_m (z1, p0, 0, w0))\n+\n+/*\n+** read_za8_mf8_0_w0_untied:\n+** (\n+**\tmov\t(w1[2-5]), w0\n+**\tmov\tz0\\.d, z1\\.d\n+**\tmova\tz0\\.b, p0/m, za0v\\.b\\[\\1, 0\\]\n+** |\n+**\tmov\tz0\\.d, z1\\.d\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz0\\.b, p0/m, za0v\\.b\\[\\2, 0\\]\n+** |\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tz1\\.b, p0/m, za0v\\.b\\[\\3, 0\\]\n+**\tmov\tz0\\.d, z1\\.d\n+** )\n+**\tret\n+*/\n+TEST_READ_ZA (read_za8_mf8_0_w0_untied, svmfloat8_t,\n+\t z0 = svread_ver_za8_mf8_m (z1, p0, 0, w0),\n+\t z0 = svread_ver_za8_m (z1, p0, 0, w0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_mf8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_mf8.c\nnew file mode 100644\nindex 00000000000..611714b539b\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/revd_mf8.c\n@@ -0,0 +1,76 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme_acle.h\"\n+\n+/*\n+** revd_mf8_m_tied12:\n+**\trevd\tz0\\.q, p0/m, z0\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_m_tied12, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_m (z0, p0, z0),\n+\t\tz0 = svrevd_m (z0, p0, z0))\n+\n+/*\n+** revd_mf8_m_tied1:\n+**\trevd\tz0\\.q, p0/m, z1\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_m_tied1, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_m (z0, p0, z1),\n+\t\tz0 = svrevd_m (z0, p0, z1))\n+\n+/*\n+** revd_mf8_m_tied2:\n+**\tmov\t(z[0-9]+)\\.d, z0\\.d\n+**\tmovprfx\tz0, z1\n+**\trevd\tz0\\.q, p0/m, \\1\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_m_tied2, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_m (z1, p0, z0),\n+\t\tz0 = svrevd_m (z1, p0, z0))\n+\n+/*\n+** revd_mf8_m_untied:\n+**\tmovprfx\tz0, z2\n+**\trevd\tz0\\.q, p0/m, z1\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_m_untied, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_m (z2, p0, z1),\n+\t\tz0 = svrevd_m (z2, p0, z1))\n+\n+/* Awkward register allocation. Don't require specific output. */\n+TEST_UNIFORM_Z (revd_mf8_z_tied1, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_z (p0, z0),\n+\t\tz0 = svrevd_z (p0, z0))\n+\n+/*\n+** revd_mf8_z_untied:\n+**\tmovi?\t[vdz]0\\.?(?:[0-9]*[bhsd])?, #?0\n+**\trevd\tz0\\.q, p0/m, z1\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_z_untied, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_z (p0, z1),\n+\t\tz0 = svrevd_z (p0, z1))\n+\n+/*\n+** revd_mf8_x_tied1:\n+**\trevd\tz0\\.q, p0/m, z0\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_x_tied1, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_x (p0, z0),\n+\t\tz0 = svrevd_x (p0, z0))\n+\n+/*\n+** revd_mf8_x_untied:\n+**\tmovprfx\tz0, z1\n+**\trevd\tz0\\.q, p0/m, z1\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_x_untied, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_x (p0, z1),\n+\t\tz0 = svrevd_x (p0, z1))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c\nindex 119a2535e99..09447b35619 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c\n@@ -92,6 +92,16 @@ TEST_WRITE_ZA (write_za128_u8_0_w0_z0, svuint8_t,\n \t svwrite_hor_za128_u8_m (0, w0, p0, z0),\n \t svwrite_hor_za128_m (0, w0, p0, z0))\n \n+/*\n+** write_za128_mf8_0_w0_z0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tza0h\\.q\\[\\1, 0\\], p0/m, z0\\.q\n+**\tret\n+*/\n+TEST_WRITE_ZA (write_za128_mf8_0_w0_z0, svmfloat8_t,\n+\t svwrite_hor_za128_mf8_m (0, w0, p0, z0),\n+\t svwrite_hor_za128_m (0, w0, p0, z0))\n+\n /*\n ** write_za128_s16_0_w0_z0:\n **\tmov\t(w1[2-5]), w0\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c\nindex 683e1a64ab3..6529f9597fc 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c\n@@ -91,3 +91,13 @@ TEST_WRITE_ZA (write_za8_s8_0_w0_z1, svint8_t,\n TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t,\n \t svwrite_hor_za8_u8_m (0, w0, p0, z0),\n \t svwrite_hor_za8_m (0, w0, p0, z0))\n+\n+/*\n+** write_za8_mf8_0_w0_z0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tza0h\\.b\\[\\1, 0\\], p0/m, z0\\.b\n+**\tret\n+*/\n+TEST_WRITE_ZA (write_za8_mf8_0_w0_z0, svmfloat8_t,\n+\t svwrite_hor_za8_mf8_m (0, w0, p0, z0),\n+\t svwrite_hor_za8_m (0, w0, p0, z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c\nindex 9622e99dde1..6c0d334c3dc 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c\n@@ -92,6 +92,16 @@ TEST_WRITE_ZA (write_za128_u8_0_w0_z0, svuint8_t,\n \t svwrite_ver_za128_u8_m (0, w0, p0, z0),\n \t svwrite_ver_za128_m (0, w0, p0, z0))\n \n+/*\n+** write_za128_mf8_0_w0_z0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tza0v\\.q\\[\\1, 0\\], p0/m, z0\\.q\n+**\tret\n+*/\n+TEST_WRITE_ZA (write_za128_mf8_0_w0_z0, svmfloat8_t,\n+\t svwrite_ver_za128_mf8_m (0, w0, p0, z0),\n+\t svwrite_ver_za128_m (0, w0, p0, z0))\n+\n /*\n ** write_za128_s16_0_w0_z0:\n **\tmov\t(w1[2-5]), w0\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c\nindex dd61828219c..0e7cda809f2 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c\n@@ -91,3 +91,13 @@ TEST_WRITE_ZA (write_za8_s8_0_w0_z1, svint8_t,\n TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t,\n \t svwrite_ver_za8_u8_m (0, w0, p0, z0),\n \t svwrite_ver_za8_m (0, w0, p0, z0))\n+\n+/*\n+** write_za8_mf8_0_w0_z0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmova\tza0v\\.b\\[\\1, 0\\], p0/m, z0\\.b\n+**\tret\n+*/\n+TEST_WRITE_ZA (write_za8_mf8_0_w0_z0, svmfloat8_t,\n+\t svwrite_ver_za8_mf8_m (0, w0, p0, z0),\n+\t svwrite_ver_za8_m (0, w0, p0, z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x2.c\nnew file mode 100644\nindex 00000000000..6891c5c009a\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x2.c\n@@ -0,0 +1,262 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** ld1_mf8_base:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0),\n+\t\t z0 = svld1_x2 (pn8, x0))\n+\n+/*\n+** ld1_mf8_index:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 + x1),\n+\t\t z0 = svld1_x2 (pn8, x0 + x1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_1:\n+**\tincb\tx0\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 + svcntb ()),\n+\t\t z0 = svld1_x2 (pn8, x0 + svcntb ()))\n+\n+/*\n+** ld1_mf8_2:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 2),\n+\t\t z0 = svld1_x2 (pn8, x0 + svcntb () * 2))\n+\n+/*\n+** ld1_mf8_14:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 14),\n+\t\t z0 = svld1_x2 (pn8, x0 + svcntb () * 14))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 16),\n+\t\t z0 = svld1_x2 (pn8, x0 + svcntb () * 16))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_m1:\n+**\tdecb\tx0\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 - svcntb ()),\n+\t\t z0 = svld1_x2 (pn8, x0 - svcntb ()))\n+\n+/*\n+** ld1_mf8_m2:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 2),\n+\t\t z0 = svld1_x2 (pn8, x0 - svcntb () * 2))\n+\n+/*\n+** ld1_mf8_m16:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 16),\n+\t\t z0 = svld1_x2 (pn8, x0 - svcntb () * 16))\n+\n+/*\n+** ld1_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 18),\n+\t\t z0 = svld1_x2 (pn8, x0 - svcntb () * 18))\n+\n+/*\n+** ld1_mf8_z17:\n+**\tld1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x2_t, mfloat8_t,\n+\t\t z17 = svld1_mf8_x2 (pn8, x0),\n+\t\t z17 = svld1_x2 (pn8, x0))\n+\n+/*\n+** ld1_mf8_z22:\n+**\tld1b\t{z22\\.b(?: - |, )z23\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x2_t, mfloat8_t,\n+\t\t z22 = svld1_mf8_x2 (pn8, x0),\n+\t\t z22 = svld1_x2 (pn8, x0))\n+\n+/*\n+** ld1_mf8_z28:\n+**\tld1b\t{z28\\.b(?: - |, )z29\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x2_t, mfloat8_t,\n+\t\t z28 = svld1_mf8_x2 (pn8, x0),\n+\t\t z28 = svld1_x2 (pn8, x0))\n+\n+/*\n+** ld1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn0, x0),\n+\t\t z0 = svld1_x2 (pn0, x0))\n+\n+/*\n+** ld1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn7, x0),\n+\t\t z0 = svld1_x2 (pn7, x0))\n+\n+/*\n+** ld1_mf8_pn15:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn15/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn15, x0),\n+\t\t z0 = svld1_x2 (pn15, x0))\n+\n+/*\n+** ld1_vnum_mf8_0:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, 0),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, 0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, 1),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, 1))\n+\n+/*\n+** ld1_vnum_mf8_2:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, 2),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, 2))\n+\n+/*\n+** ld1_vnum_mf8_14:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, 14),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, 14))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, 16),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, 16))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, -1),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, -1))\n+\n+/*\n+** ld1_vnum_mf8_m2:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, -2),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, -2))\n+\n+/*\n+** ld1_vnum_mf8_m16:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, -16),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, -16))\n+\n+/*\n+** ld1_vnum_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, -18),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, -18))\n+\n+/*\n+** ld1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, x1),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, x1))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x4.c\nnew file mode 100644\nindex 00000000000..a95a33e6665\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ld1_mf8_x4.c\n@@ -0,0 +1,354 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** ld1_mf8_base:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0),\n+\t\t z0 = svld1_x4 (pn8, x0))\n+\n+/*\n+** ld1_mf8_index:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + x1),\n+\t\t z0 = svld1_x4 (pn8, x0 + x1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_1:\n+**\tincb\tx0\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb ()),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb ()))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 2),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb () * 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 3),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb () * 3))\n+\n+/*\n+** ld1_mf8_4:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 4),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb () * 4))\n+\n+/*\n+** ld1_mf8_28:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 28),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb () * 28))\n+\n+/*\n+** ld1_mf8_32:\n+**\t[^{]*\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 32),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb () * 32))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_m1:\n+**\tdecb\tx0\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb ()),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb ()))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 2),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb () * 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 3),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb () * 3))\n+\n+/*\n+** ld1_mf8_m4:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+ TEST_LOAD_COUNT (ld1_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 4),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb () * 4))\n+\n+/*\n+** ld1_mf8_m32:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 32),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb () * 32))\n+\n+/*\n+** ld1_mf8_m36:\n+**\t[^{]*\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 36),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb () * 36))\n+\n+/*\n+** ld1_mf8_z17:\n+**\tld1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x4_t, mfloat8_t,\n+\t\t z17 = svld1_mf8_x4 (pn8, x0),\n+\t\t z17 = svld1_x4 (pn8, x0))\n+\n+/*\n+** ld1_mf8_z22:\n+**\tld1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x4_t, mfloat8_t,\n+\t\t z22 = svld1_mf8_x4 (pn8, x0),\n+\t\t z22 = svld1_x4 (pn8, x0))\n+\n+/*\n+** ld1_mf8_z28:\n+**\tld1b\t{z28\\.b(?: - |, )z31\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x4_t, mfloat8_t,\n+\t\t z28 = svld1_mf8_x4 (pn8, x0),\n+\t\t z28 = svld1_x4 (pn8, x0))\n+\n+/*\n+** ld1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tld1b\t{z0\\.b(?: - |, )z3\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn0, x0),\n+\t\t z0 = svld1_x4 (pn0, x0))\n+\n+/*\n+** ld1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tld1b\t{z0\\.b(?: - |, )z3\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn7, x0),\n+\t\t z0 = svld1_x4 (pn7, x0))\n+\n+/*\n+** ld1_mf8_pn15:\n+**\tld1b\t{z0\\.b(?: - |, )z3\\.b}, pn15/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn15, x0),\n+\t\t z0 = svld1_x4 (pn15, x0))\n+\n+/*\n+** ld1_vnum_mf8_0:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 0),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 1),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 2),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 3),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 3))\n+\n+/*\n+** ld1_vnum_mf8_4:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 4),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 4))\n+\n+/*\n+** ld1_vnum_mf8_28:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 28),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 28))\n+\n+/*\n+** ld1_vnum_mf8_32:\n+**\t[^{]*\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 32),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 32))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -1),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -2),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -3),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -3))\n+\n+/*\n+** ld1_vnum_mf8_m4:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -4),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -4))\n+\n+/*\n+** ld1_vnum_mf8_m32:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -32),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -32))\n+\n+/*\n+** ld1_vnum_mf8_m36:\n+**\t[^{]*\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -36),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -36))\n+\n+/*\n+** ld1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, x1),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, x1))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x2.c\nnew file mode 100644\nindex 00000000000..1855dd115c7\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x2.c\n@@ -0,0 +1,262 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** ldnt1_mf8_base:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0),\n+\t\t z0 = svldnt1_x2 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_index:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 + x1),\n+\t\t z0 = svldnt1_x2 (pn8, x0 + x1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_1:\n+**\tincb\tx0\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb ()),\n+\t\t z0 = svldnt1_x2 (pn8, x0 + svcntb ()))\n+\n+/*\n+** ldnt1_mf8_2:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 2),\n+\t\t z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2))\n+\n+/*\n+** ldnt1_mf8_14:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 14),\n+\t\t z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 16),\n+\t\t z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_m1:\n+**\tdecb\tx0\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb ()),\n+\t\t z0 = svldnt1_x2 (pn8, x0 - svcntb ()))\n+\n+/*\n+** ldnt1_mf8_m2:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 2),\n+\t\t z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2))\n+\n+/*\n+** ldnt1_mf8_m16:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 16),\n+\t\t z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16))\n+\n+/*\n+** ldnt1_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 18),\n+\t\t z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18))\n+\n+/*\n+** ldnt1_mf8_z17:\n+**\tldnt1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x2_t, mfloat8_t,\n+\t\t z17 = svldnt1_mf8_x2 (pn8, x0),\n+\t\t z17 = svldnt1_x2 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_z22:\n+**\tldnt1b\t{z22\\.b(?: - |, )z23\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x2_t, mfloat8_t,\n+\t\t z22 = svldnt1_mf8_x2 (pn8, x0),\n+\t\t z22 = svldnt1_x2 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_z28:\n+**\tldnt1b\t{z28\\.b(?: - |, )z29\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x2_t, mfloat8_t,\n+\t\t z28 = svldnt1_mf8_x2 (pn8, x0),\n+\t\t z28 = svldnt1_x2 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn0, x0),\n+\t\t z0 = svldnt1_x2 (pn0, x0))\n+\n+/*\n+** ldnt1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn7, x0),\n+\t\t z0 = svldnt1_x2 (pn7, x0))\n+\n+/*\n+** ldnt1_mf8_pn15:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn15/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn15, x0),\n+\t\t z0 = svldnt1_x2 (pn15, x0))\n+\n+/*\n+** ldnt1_vnum_mf8_0:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 0),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, 0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 1),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, 1))\n+\n+/*\n+** ldnt1_vnum_mf8_2:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 2),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, 2))\n+\n+/*\n+** ldnt1_vnum_mf8_14:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 14),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, 14))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 16),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, 16))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -1),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, -1))\n+\n+/*\n+** ldnt1_vnum_mf8_m2:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -2),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, -2))\n+\n+/*\n+** ldnt1_vnum_mf8_m16:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -16),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, -16))\n+\n+/*\n+** ldnt1_vnum_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -18),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, -18))\n+\n+/*\n+** ldnt1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, x1),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, x1))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x4.c\nnew file mode 100644\nindex 00000000000..0fad26f4616\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/ldnt1_mf8_x4.c\n@@ -0,0 +1,354 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** ldnt1_mf8_base:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0),\n+\t\t z0 = svldnt1_x4 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_index:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + x1),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + x1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_1:\n+**\tincb\tx0\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb ()),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb ()))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 2),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 3),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3))\n+\n+/*\n+** ldnt1_mf8_4:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 4),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4))\n+\n+/*\n+** ldnt1_mf8_28:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 28),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28))\n+\n+/*\n+** ldnt1_mf8_32:\n+**\t[^{]*\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 32),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_m1:\n+**\tdecb\tx0\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb ()),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb ()))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 2),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 3),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3))\n+\n+/*\n+** ldnt1_mf8_m4:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+ TEST_LOAD_COUNT (ldnt1_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 4),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4))\n+\n+/*\n+** ldnt1_mf8_m32:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 32),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32))\n+\n+/*\n+** ldnt1_mf8_m36:\n+**\t[^{]*\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 36),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36))\n+\n+/*\n+** ldnt1_mf8_z17:\n+**\tldnt1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x4_t, mfloat8_t,\n+\t\t z17 = svldnt1_mf8_x4 (pn8, x0),\n+\t\t z17 = svldnt1_x4 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_z22:\n+**\tldnt1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x4_t, mfloat8_t,\n+\t\t z22 = svldnt1_mf8_x4 (pn8, x0),\n+\t\t z22 = svldnt1_x4 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_z28:\n+**\tldnt1b\t{z28\\.b(?: - |, )z31\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x4_t, mfloat8_t,\n+\t\t z28 = svldnt1_mf8_x4 (pn8, x0),\n+\t\t z28 = svldnt1_x4 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tldnt1b\t{z0\\.b(?: - |, )z3\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn0, x0),\n+\t\t z0 = svldnt1_x4 (pn0, x0))\n+\n+/*\n+** ldnt1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tldnt1b\t{z0\\.b(?: - |, )z3\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn7, x0),\n+\t\t z0 = svldnt1_x4 (pn7, x0))\n+\n+/*\n+** ldnt1_mf8_pn15:\n+**\tldnt1b\t{z0\\.b(?: - |, )z3\\.b}, pn15/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn15, x0),\n+\t\t z0 = svldnt1_x4 (pn15, x0))\n+\n+/*\n+** ldnt1_vnum_mf8_0:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 0),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 1),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 2),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 3),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 3))\n+\n+/*\n+** ldnt1_vnum_mf8_4:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 4),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 4))\n+\n+/*\n+** ldnt1_vnum_mf8_28:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 28),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 28))\n+\n+/*\n+** ldnt1_vnum_mf8_32:\n+**\t[^{]*\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 32),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 32))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -1),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -2),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -3),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -3))\n+\n+/*\n+** ldnt1_vnum_mf8_m4:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -4),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -4))\n+\n+/*\n+** ldnt1_vnum_mf8_m32:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -32),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -32))\n+\n+/*\n+** ldnt1_vnum_mf8_m36:\n+**\t[^{]*\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -36),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -36))\n+\n+/*\n+** ldnt1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, x1),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, x1))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c\nindex ec31a68b46e..724ba852ef4 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg2.c\n@@ -22,6 +22,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x2_t,\n \t\t z4 = svread_hor_za8_u8_vg2 (0, 1),\n \t\t z4 = svread_hor_za8_u8_vg2 (0, 1))\n \n+/*\n+** read_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmova\t{z4\\.b - z5\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x2_t,\n+\t\t z4 = svread_hor_za8_mf8_vg2 (0, 1),\n+\t\t z4 = svread_hor_za8_mf8_vg2 (0, 1))\n+\n /*\n ** read_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -50,6 +60,15 @@ TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x2_t,\n \t\t z18 = svread_hor_za8_u8_vg2 (0, w15),\n \t\t z18 = svread_hor_za8_u8_vg2 (0, w15))\n \n+/*\n+** read_za8_mf8_z18_0_w15:\n+**\tmova\t{z18\\.b - z19\\.b}, za0h\\.b\\[w15, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x2_t,\n+\t\t z18 = svread_hor_za8_mf8_vg2 (0, w15),\n+\t\t z18 = svread_hor_za8_mf8_vg2 (0, w15))\n+\n /*\n ** read_za8_s8_z23_0_w12p14:\n **\tmova\t{[^\\n]+}, za0h\\.b\\[w12, 14:15\\]\n@@ -71,6 +90,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x2_t,\n \t\t z4 = svread_hor_za8_u8_vg2 (0, w12 + 1),\n \t\t z4 = svread_hor_za8_u8_vg2 (0, w12 + 1))\n \n+/*\n+** read_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmova\t{z4\\.b - z5\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x2_t,\n+\t\t z4 = svread_hor_za8_mf8_vg2 (0, w12 + 1),\n+\t\t z4 = svread_hor_za8_mf8_vg2 (0, w12 + 1))\n+\n /*\n ** read_za8_s8_z28_0_w12p2:\n **\tmova\t{z28\\.b - z29\\.b}, za0h\\.b\\[w12, 2:3\\]\n@@ -90,6 +119,16 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x2_t,\n \t\t z0 = svread_hor_za8_u8_vg2 (0, w15 + 3),\n \t\t z0 = svread_hor_za8_u8_vg2 (0, w15 + 3))\n \n+/*\n+** read_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmova\t{z0\\.b - z1\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x2_t,\n+\t\t z0 = svread_hor_za8_mf8_vg2 (0, w15 + 3),\n+\t\t z0 = svread_hor_za8_mf8_vg2 (0, w15 + 3))\n+\n /*\n ** read_za8_u8_z4_0_w15p12:\n **\tmova\t{z4\\.b - z5\\.b}, za0h\\.b\\[w15, 12:13\\]\n@@ -99,6 +138,15 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x2_t,\n \t\t z4 = svread_hor_za8_u8_vg2 (0, w15 + 12),\n \t\t z4 = svread_hor_za8_u8_vg2 (0, w15 + 12))\n \n+/*\n+** read_za8_mf8_z4_0_w15p12:\n+**\tmova\t{z4\\.b - z5\\.b}, za0h\\.b\\[w15, 12:13\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x2_t,\n+\t\t z4 = svread_hor_za8_mf8_vg2 (0, w15 + 12),\n+\t\t z4 = svread_hor_za8_mf8_vg2 (0, w15 + 12))\n+\n /*\n ** read_za8_u8_z28_0_w12p15:\n **\tadd\t(w[0-9]+), w12, #?15\n@@ -109,6 +157,16 @@ TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p15, svuint8x2_t,\n \t\t z28 = svread_hor_za8_u8_vg2 (0, w12 + 15),\n \t\t z28 = svread_hor_za8_u8_vg2 (0, w12 + 15))\n \n+/*\n+** read_za8_mf8_z28_0_w12p15:\n+**\tadd\t(w[0-9]+), w12, #?15\n+**\tmova\t{z28\\.b - z29\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p15, svmfloat8x2_t,\n+\t\t z28 = svread_hor_za8_mf8_vg2 (0, w12 + 15),\n+\t\t z28 = svread_hor_za8_mf8_vg2 (0, w12 + 15))\n+\n /*\n ** read_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -129,6 +187,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t,\n \t\t z4 = svread_hor_za8_u8_vg2 (0, w12 - 1),\n \t\t z4 = svread_hor_za8_u8_vg2 (0, w12 - 1))\n \n+/*\n+** read_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmova\t{z4\\.b - z5\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x2_t,\n+\t\t z4 = svread_hor_za8_mf8_vg2 (0, w12 - 1),\n+\t\t z4 = svread_hor_za8_mf8_vg2 (0, w12 - 1))\n+\n /*\n ** read_za8_u8_z18_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -138,3 +206,13 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t,\n TEST_READ_ZA_XN (read_za8_u8_z18_0_w16, svuint8x2_t,\n \t\t z18 = svread_hor_za8_u8_vg2 (0, w16),\n \t\t z18 = svread_hor_za8_u8_vg2 (0, w16))\n+\n+/*\n+** read_za8_mf8_z18_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmova\t{z18\\.b - z19\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w16, svmfloat8x2_t,\n+\t\t z18 = svread_hor_za8_mf8_vg2 (0, w16),\n+\t\t z18 = svread_hor_za8_mf8_vg2 (0, w16))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c\nindex 261cbead442..2c3132dc6a8 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_hor_za8_vg4.c\n@@ -22,6 +22,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x4_t,\n \t\t z4 = svread_hor_za8_u8_vg4 (0, 1),\n \t\t z4 = svread_hor_za8_u8_vg4 (0, 1))\n \n+/*\n+** read_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmova\t{z4\\.b - z7\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x4_t,\n+\t\t z4 = svread_hor_za8_mf8_vg4 (0, 1),\n+\t\t z4 = svread_hor_za8_mf8_vg4 (0, 1))\n+\n /*\n ** read_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -54,6 +64,19 @@ TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x4_t,\n \t\t z18 = svread_hor_za8_u8_vg4 (0, w15),\n \t\t z18 = svread_hor_za8_u8_vg4 (0, w15))\n \n+/*\n+** read_za8_mf8_z18_0_w15:\n+**\tmova\t{[^\\n]+}, za0h\\.b\\[w15, 0:3\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x4_t,\n+\t\t z18 = svread_hor_za8_mf8_vg4 (0, w15),\n+\t\t z18 = svread_hor_za8_mf8_vg4 (0, w15))\n+\n /*\n ** read_za8_s8_z23_0_w12p12:\n **\tmova\t{[^\\n]+}, za0h\\.b\\[w12, 12:15\\]\n@@ -77,6 +100,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x4_t,\n \t\t z4 = svread_hor_za8_u8_vg4 (0, w12 + 1),\n \t\t z4 = svread_hor_za8_u8_vg4 (0, w12 + 1))\n \n+/*\n+** read_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmova\t{z4\\.b - z7\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x4_t,\n+\t\t z4 = svread_hor_za8_mf8_vg4 (0, w12 + 1),\n+\t\t z4 = svread_hor_za8_mf8_vg4 (0, w12 + 1))\n+\n /*\n ** read_za8_s8_z28_0_w12p2:\n **\tadd\t(w[0-9]+), w12, #?2\n@@ -97,6 +130,16 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x4_t,\n \t\t z0 = svread_hor_za8_u8_vg4 (0, w15 + 3),\n \t\t z0 = svread_hor_za8_u8_vg4 (0, w15 + 3))\n \n+/*\n+** read_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmova\t{z0\\.b - z3\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x4_t,\n+\t\t z0 = svread_hor_za8_mf8_vg4 (0, w15 + 3),\n+\t\t z0 = svread_hor_za8_mf8_vg4 (0, w15 + 3))\n+\n /*\n ** read_za8_u8_z0_0_w12p4:\n **\tmova\t{z0\\.b - z3\\.b}, za0h\\.b\\[w12, 4:7\\]\n@@ -106,6 +149,15 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w12p4, svuint8x4_t,\n \t\t z0 = svread_hor_za8_u8_vg4 (0, w12 + 4),\n \t\t z0 = svread_hor_za8_u8_vg4 (0, w12 + 4))\n \n+/*\n+** read_za8_mf8_z0_0_w12p4:\n+**\tmova\t{z0\\.b - z3\\.b}, za0h\\.b\\[w12, 4:7\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w12p4, svmfloat8x4_t,\n+\t\t z0 = svread_hor_za8_mf8_vg4 (0, w12 + 4),\n+\t\t z0 = svread_hor_za8_mf8_vg4 (0, w12 + 4))\n+\n /*\n ** read_za8_u8_z4_0_w15p12:\n **\tmova\t{z4\\.b - z7\\.b}, za0h\\.b\\[w15, 12:15\\]\n@@ -115,6 +167,15 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x4_t,\n \t\t z4 = svread_hor_za8_u8_vg4 (0, w15 + 12),\n \t\t z4 = svread_hor_za8_u8_vg4 (0, w15 + 12))\n \n+/*\n+** read_za8_mf8_z4_0_w15p12:\n+**\tmova\t{z4\\.b - z7\\.b}, za0h\\.b\\[w15, 12:15\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x4_t,\n+\t\t z4 = svread_hor_za8_mf8_vg4 (0, w15 + 12),\n+\t\t z4 = svread_hor_za8_mf8_vg4 (0, w15 + 12))\n+\n /*\n ** read_za8_u8_z28_0_w12p14:\n **\tadd\t(w[0-9]+), w12, #?14\n@@ -125,6 +186,16 @@ TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p14, svuint8x4_t,\n \t\t z28 = svread_hor_za8_u8_vg4 (0, w12 + 14),\n \t\t z28 = svread_hor_za8_u8_vg4 (0, w12 + 14))\n \n+/*\n+** read_za8_mf8_z28_0_w12p14:\n+**\tadd\t(w[0-9]+), w12, #?14\n+**\tmova\t{z28\\.b - z31\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p14, svmfloat8x4_t,\n+\t\t z28 = svread_hor_za8_mf8_vg4 (0, w12 + 14),\n+\t\t z28 = svread_hor_za8_mf8_vg4 (0, w12 + 14))\n+\n /*\n ** read_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -145,6 +216,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t,\n \t\t z4 = svread_hor_za8_u8_vg4 (0, w12 - 1),\n \t\t z4 = svread_hor_za8_u8_vg4 (0, w12 - 1))\n \n+/*\n+** read_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmova\t{z4\\.b - z7\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x4_t,\n+\t\t z4 = svread_hor_za8_mf8_vg4 (0, w12 - 1),\n+\t\t z4 = svread_hor_za8_mf8_vg4 (0, w12 - 1))\n+\n /*\n ** read_za8_u8_z28_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -154,3 +235,13 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t,\n TEST_READ_ZA_XN (read_za8_u8_z28_0_w16, svuint8x4_t,\n \t\t z28 = svread_hor_za8_u8_vg4 (0, w16),\n \t\t z28 = svread_hor_za8_u8_vg4 (0, w16))\n+\n+/*\n+** read_za8_u8_z28_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmova\t{z28\\.b - z31\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w16, svmfloat8x4_t,\n+\t\t z28 = svread_hor_za8_mf8_vg4 (0, w16),\n+\t\t z28 = svread_hor_za8_mf8_vg4 (0, w16))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c\nindex 55970616ba8..5cd101a4988 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg2.c\n@@ -22,6 +22,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x2_t,\n \t\t z4 = svread_ver_za8_u8_vg2 (0, 1),\n \t\t z4 = svread_ver_za8_u8_vg2 (0, 1))\n \n+/*\n+** read_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmova\t{z4\\.b - z5\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x2_t,\n+\t\t z4 = svread_ver_za8_mf8_vg2 (0, 1),\n+\t\t z4 = svread_ver_za8_mf8_vg2 (0, 1))\n+\n /*\n ** read_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -50,6 +60,15 @@ TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x2_t,\n \t\t z18 = svread_ver_za8_u8_vg2 (0, w15),\n \t\t z18 = svread_ver_za8_u8_vg2 (0, w15))\n \n+/*\n+** read_za8_mf8_z18_0_w15:\n+**\tmova\t{z18\\.b - z19\\.b}, za0v\\.b\\[w15, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x2_t,\n+\t\t z18 = svread_ver_za8_mf8_vg2 (0, w15),\n+\t\t z18 = svread_ver_za8_mf8_vg2 (0, w15))\n+\n /*\n ** read_za8_s8_z23_0_w12p14:\n **\tmova\t{[^\\n]+}, za0v\\.b\\[w12, 14:15\\]\n@@ -71,6 +90,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x2_t,\n \t\t z4 = svread_ver_za8_u8_vg2 (0, w12 + 1),\n \t\t z4 = svread_ver_za8_u8_vg2 (0, w12 + 1))\n \n+/*\n+** read_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmova\t{z4\\.b - z5\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x2_t,\n+\t\t z4 = svread_ver_za8_mf8_vg2 (0, w12 + 1),\n+\t\t z4 = svread_ver_za8_mf8_vg2 (0, w12 + 1))\n+\n /*\n ** read_za8_s8_z28_0_w12p2:\n **\tmova\t{z28\\.b - z29\\.b}, za0v\\.b\\[w12, 2:3\\]\n@@ -90,6 +119,16 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x2_t,\n \t\t z0 = svread_ver_za8_u8_vg2 (0, w15 + 3),\n \t\t z0 = svread_ver_za8_u8_vg2 (0, w15 + 3))\n \n+/*\n+** read_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmova\t{z0\\.b - z1\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x2_t,\n+\t\t z0 = svread_ver_za8_mf8_vg2 (0, w15 + 3),\n+\t\t z0 = svread_ver_za8_mf8_vg2 (0, w15 + 3))\n+\n /*\n ** read_za8_u8_z4_0_w15p12:\n **\tmova\t{z4\\.b - z5\\.b}, za0v\\.b\\[w15, 12:13\\]\n@@ -99,6 +138,15 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x2_t,\n \t\t z4 = svread_ver_za8_u8_vg2 (0, w15 + 12),\n \t\t z4 = svread_ver_za8_u8_vg2 (0, w15 + 12))\n \n+/*\n+** read_za8_mf8_z4_0_w15p12:\n+**\tmova\t{z4\\.b - z5\\.b}, za0v\\.b\\[w15, 12:13\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x2_t,\n+\t\t z4 = svread_ver_za8_mf8_vg2 (0, w15 + 12),\n+\t\t z4 = svread_ver_za8_mf8_vg2 (0, w15 + 12))\n+\n /*\n ** read_za8_u8_z28_0_w12p15:\n **\tadd\t(w[0-9]+), w12, #?15\n@@ -109,6 +157,16 @@ TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p15, svuint8x2_t,\n \t\t z28 = svread_ver_za8_u8_vg2 (0, w12 + 15),\n \t\t z28 = svread_ver_za8_u8_vg2 (0, w12 + 15))\n \n+/*\n+** read_za8_mf8_z28_0_w12p15:\n+**\tadd\t(w[0-9]+), w12, #?15\n+**\tmova\t{z28\\.b - z29\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p15, svmfloat8x2_t,\n+\t\t z28 = svread_ver_za8_mf8_vg2 (0, w12 + 15),\n+\t\t z28 = svread_ver_za8_mf8_vg2 (0, w12 + 15))\n+\n /*\n ** read_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -129,6 +187,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t,\n \t\t z4 = svread_ver_za8_u8_vg2 (0, w12 - 1),\n \t\t z4 = svread_ver_za8_u8_vg2 (0, w12 - 1))\n \n+/*\n+** read_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmova\t{z4\\.b - z5\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x2_t,\n+\t\t z4 = svread_ver_za8_mf8_vg2 (0, w12 - 1),\n+\t\t z4 = svread_ver_za8_mf8_vg2 (0, w12 - 1))\n+\n /*\n ** read_za8_u8_z18_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -138,3 +206,13 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x2_t,\n TEST_READ_ZA_XN (read_za8_u8_z18_0_w16, svuint8x2_t,\n \t\t z18 = svread_ver_za8_u8_vg2 (0, w16),\n \t\t z18 = svread_ver_za8_u8_vg2 (0, w16))\n+\n+/*\n+** read_za8_mf8_z18_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmova\t{z18\\.b - z19\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w16, svmfloat8x2_t,\n+\t\t z18 = svread_ver_za8_mf8_vg2 (0, w16),\n+\t\t z18 = svread_ver_za8_mf8_vg2 (0, w16))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c\nindex 6fd8a976d4f..daae8bc7285 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_ver_za8_vg4.c\n@@ -22,6 +22,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_1, svuint8x4_t,\n \t\t z4 = svread_ver_za8_u8_vg4 (0, 1),\n \t\t z4 = svread_ver_za8_u8_vg4 (0, 1))\n \n+/*\n+** read_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmova\t{z4\\.b - z7\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_1, svmfloat8x4_t,\n+\t\t z4 = svread_ver_za8_mf8_vg4 (0, 1),\n+\t\t z4 = svread_ver_za8_mf8_vg4 (0, 1))\n+\n /*\n ** read_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -54,6 +64,19 @@ TEST_READ_ZA_XN (read_za8_u8_z18_0_w15, svuint8x4_t,\n \t\t z18 = svread_ver_za8_u8_vg4 (0, w15),\n \t\t z18 = svread_ver_za8_u8_vg4 (0, w15))\n \n+/*\n+** read_za8_mf8_z18_0_w15:\n+**\tmova\t{[^\\n]+}, za0v\\.b\\[w15, 0:3\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z18_0_w15, svmfloat8x4_t,\n+\t\t z18 = svread_ver_za8_mf8_vg4 (0, w15),\n+\t\t z18 = svread_ver_za8_mf8_vg4 (0, w15))\n+\n /*\n ** read_za8_s8_z23_0_w12p12:\n **\tmova\t{[^\\n]+}, za0v\\.b\\[w12, 12:15\\]\n@@ -77,6 +100,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12p1, svuint8x4_t,\n \t\t z4 = svread_ver_za8_u8_vg4 (0, w12 + 1),\n \t\t z4 = svread_ver_za8_u8_vg4 (0, w12 + 1))\n \n+/*\n+** read_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmova\t{z4\\.b - z7\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12p1, svmfloat8x4_t,\n+\t\t z4 = svread_ver_za8_mf8_vg4 (0, w12 + 1),\n+\t\t z4 = svread_ver_za8_mf8_vg4 (0, w12 + 1))\n+\n /*\n ** read_za8_s8_z28_0_w12p2:\n **\tadd\t(w[0-9]+), w12, #?2\n@@ -97,6 +130,16 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w15p3, svuint8x4_t,\n \t\t z0 = svread_ver_za8_u8_vg4 (0, w15 + 3),\n \t\t z0 = svread_ver_za8_u8_vg4 (0, w15 + 3))\n \n+/*\n+** read_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmova\t{z0\\.b - z3\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w15p3, svmfloat8x4_t,\n+\t\t z0 = svread_ver_za8_mf8_vg4 (0, w15 + 3),\n+\t\t z0 = svread_ver_za8_mf8_vg4 (0, w15 + 3))\n+\n /*\n ** read_za8_u8_z0_0_w12p4:\n **\tmova\t{z0\\.b - z3\\.b}, za0v\\.b\\[w12, 4:7\\]\n@@ -106,6 +149,15 @@ TEST_READ_ZA_XN (read_za8_u8_z0_0_w12p4, svuint8x4_t,\n \t\t z0 = svread_ver_za8_u8_vg4 (0, w12 + 4),\n \t\t z0 = svread_ver_za8_u8_vg4 (0, w12 + 4))\n \n+/*\n+** read_za8_mf8_z0_0_w12p4:\n+**\tmova\t{z0\\.b - z3\\.b}, za0v\\.b\\[w12, 4:7\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z0_0_w12p4, svmfloat8x4_t,\n+\t\t z0 = svread_ver_za8_mf8_vg4 (0, w12 + 4),\n+\t\t z0 = svread_ver_za8_mf8_vg4 (0, w12 + 4))\n+\n /*\n ** read_za8_u8_z4_0_w15p12:\n **\tmova\t{z4\\.b - z7\\.b}, za0v\\.b\\[w15, 12:15\\]\n@@ -115,6 +167,15 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w15p12, svuint8x4_t,\n \t\t z4 = svread_ver_za8_u8_vg4 (0, w15 + 12),\n \t\t z4 = svread_ver_za8_u8_vg4 (0, w15 + 12))\n \n+/*\n+** read_za8_mf8_z4_0_w15p12:\n+**\tmova\t{z4\\.b - z7\\.b}, za0v\\.b\\[w15, 12:15\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w15p12, svmfloat8x4_t,\n+\t\t z4 = svread_ver_za8_mf8_vg4 (0, w15 + 12),\n+\t\t z4 = svread_ver_za8_mf8_vg4 (0, w15 + 12))\n+\n /*\n ** read_za8_u8_z28_0_w12p14:\n **\tadd\t(w[0-9]+), w12, #?14\n@@ -125,6 +186,16 @@ TEST_READ_ZA_XN (read_za8_u8_z28_0_w12p14, svuint8x4_t,\n \t\t z28 = svread_ver_za8_u8_vg4 (0, w12 + 14),\n \t\t z28 = svread_ver_za8_u8_vg4 (0, w12 + 14))\n \n+/*\n+** read_za8_mf8_z28_0_w12p14:\n+**\tadd\t(w[0-9]+), w12, #?14\n+**\tmova\t{z28\\.b - z31\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w12p14, svmfloat8x4_t,\n+\t\t z28 = svread_ver_za8_mf8_vg4 (0, w12 + 14),\n+\t\t z28 = svread_ver_za8_mf8_vg4 (0, w12 + 14))\n+\n /*\n ** read_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -145,6 +216,16 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t,\n \t\t z4 = svread_ver_za8_u8_vg4 (0, w12 - 1),\n \t\t z4 = svread_ver_za8_u8_vg4 (0, w12 - 1))\n \n+/*\n+** read_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmova\t{z4\\.b - z7\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z4_0_w12m1, svmfloat8x4_t,\n+\t\t z4 = svread_ver_za8_mf8_vg4 (0, w12 - 1),\n+\t\t z4 = svread_ver_za8_mf8_vg4 (0, w12 - 1))\n+\n /*\n ** read_za8_u8_z28_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -154,3 +235,13 @@ TEST_READ_ZA_XN (read_za8_u8_z4_0_w12m1, svuint8x4_t,\n TEST_READ_ZA_XN (read_za8_u8_z28_0_w16, svuint8x4_t,\n \t\t z28 = svread_ver_za8_u8_vg4 (0, w16),\n \t\t z28 = svread_ver_za8_u8_vg4 (0, w16))\n+\n+/*\n+** read_za8_mf8_z28_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmova\t{z28\\.b - z31\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_za8_mf8_z28_0_w16, svmfloat8x4_t,\n+\t\t z28 = svread_ver_za8_mf8_vg4 (0, w16),\n+\t\t z28 = svread_ver_za8_mf8_vg4 (0, w16))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c\nindex 9b151abf4fa..819bf786a4f 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x2.c\n@@ -32,6 +32,16 @@ TEST_READ_ZA_XN (read_w7_z0, svuint8x2_t,\n \t\t z0 = svread_za8_u8_vg1x2 (w7),\n \t\t z0 = svread_za8_u8_vg1x2 (w7))\n \n+/*\n+** read_mf8_w7_z0:\n+**\tmov\t(w8|w9|w10|w11), w7\n+**\tmova\t{z0\\.d - z1\\.d}, za\\.d\\[\\1, 0, vgx2\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_mf8_w7_z0, svmfloat8x2_t,\n+\t\t z0 = svread_za8_mf8_vg1x2 (w7),\n+\t\t z0 = svread_za8_mf8_vg1x2 (w7))\n+\n /*\n ** read_w8_z0:\n **\tmova\t{z0\\.d - z1\\.d}, za\\.d\\[w8, 0, vgx2\\]\n@@ -61,6 +71,16 @@ TEST_READ_ZA_XN (read_w12_z0, svuint8x2_t,\n \t\t z0 = svread_za8_u8_vg1x2 (w12),\n \t\t z0 = svread_za8_u8_vg1x2 (w12))\n \n+/*\n+** read_mf8_w12_z0:\n+**\tmov\t(w8|w9|w10|w11), w12\n+**\tmova\t{z0\\.d - z1\\.d}, za\\.d\\[\\1, 0, vgx2\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_mf8_w12_z0, svmfloat8x2_t,\n+\t\t z0 = svread_za8_mf8_vg1x2 (w12),\n+\t\t z0 = svread_za8_mf8_vg1x2 (w12))\n+\n /*\n ** read_w8p7_z0:\n **\tmova\t{z0\\.d - z1\\.d}, za\\.d\\[w8, 7, vgx2\\]\n@@ -90,6 +110,16 @@ TEST_READ_ZA_XN (read_w8m1_z0, svuint8x2_t,\n \t\t z0 = svread_za8_u8_vg1x2 (w8 - 1),\n \t\t z0 = svread_za8_u8_vg1x2 (w8 - 1))\n \n+/*\n+** read_mf8_w8m1_z0:\n+**\tsub\t(w8|w9|w10|w11), w8, #?1\n+**\tmova\t{z0\\.d - z1\\.d}, za\\.d\\[\\1, 0, vgx2\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_mf8_w8m1_z0, svmfloat8x2_t,\n+\t\t z0 = svread_za8_mf8_vg1x2 (w8 - 1),\n+\t\t z0 = svread_za8_mf8_vg1x2 (w8 - 1))\n+\n /*\n ** read_w8_z18:\n **\tmova\t{z18\\.d - z19\\.d}, za\\.d\\[w8, 0, vgx2\\]\n@@ -99,6 +129,15 @@ TEST_READ_ZA_XN (read_w8_z18, svuint8x2_t,\n \t\t z18 = svread_za8_u8_vg1x2 (w8),\n \t\t z18 = svread_za8_u8_vg1x2 (w8))\n \n+/*\n+** read_mf8_w8_z18:\n+**\tmova\t{z18\\.d - z19\\.d}, za\\.d\\[w8, 0, vgx2\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_mf8_w8_z18, svmfloat8x2_t,\n+\t\t z18 = svread_za8_mf8_vg1x2 (w8),\n+\t\t z18 = svread_za8_mf8_vg1x2 (w8))\n+\n /* Leave the assembler to check for correctness for misaligned registers. */\n \n /*\n@@ -120,3 +159,12 @@ TEST_READ_ZA_XN (read_w8_z23, svint8x2_t,\n TEST_READ_ZA_XN (read_w8_z28, svuint8x2_t,\n \t\t z28 = svread_za8_u8_vg1x2 (w8),\n \t\t z28 = svread_za8_u8_vg1x2 (w8))\n+\n+/*\n+** read_mf8_w8_z28:\n+**\tmova\t{z28\\.d - z29\\.d}, za\\.d\\[w8, 0, vgx2\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_mf8_w8_z28, svmfloat8x2_t,\n+\t\t z28 = svread_za8_mf8_vg1x2 (w8),\n+\t\t z28 = svread_za8_mf8_vg1x2 (w8))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c\nindex 80c81dde097..f8c6d2a3d43 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/read_za8_vg1x4.c\n@@ -22,6 +22,16 @@ TEST_READ_ZA_XN (read_w0_z0, svuint8x4_t,\n \t\t z0 = svread_za8_u8_vg1x4 (w0),\n \t\t z0 = svread_za8_u8_vg1x4 (w0))\n \n+/*\n+** read_mf8_w0_z0:\n+**\tmov\t(w8|w9|w10|w11), w0\n+**\tmova\t{z0\\.d - z3\\.d}, za\\.d\\[\\1, 0, vgx4\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_mf8_w0_z0, svmfloat8x4_t,\n+\t\t z0 = svread_za8_mf8_vg1x4 (w0),\n+\t\t z0 = svread_za8_mf8_vg1x4 (w0))\n+\n /*\n ** read_w7_z0:\n **\tmov\t(w8|w9|w10|w11), w7\n@@ -50,6 +60,14 @@ TEST_READ_ZA_XN (read_w11_z0, svuint8x4_t,\n \t\t z0 = svread_za8_u8_vg1x4 (w11),\n \t\t z0 = svread_za8_u8_vg1x4 (w11))\n \n+/*\n+** read_mf8_w11_z0:\n+**\tmova\t{z0\\.d - z3\\.d}, za\\.d\\[w11, 0, vgx4\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_mf8_w11_z0, svmfloat8x4_t,\n+\t\t z0 = svread_za8_mf8_vg1x4 (w11),\n+\t\t z0 = svread_za8_mf8_vg1x4 (w11))\n \n /*\n ** read_w12_z0:\n@@ -80,6 +98,16 @@ TEST_READ_ZA_XN (read_w8p8_z0, svuint8x4_t,\n \t\t z0 = svread_za8_u8_vg1x4 (w8 + 8),\n \t\t z0 = svread_za8_u8_vg1x4 (w8 + 8))\n \n+/*\n+** read_mf8_w8p8_z0:\n+**\tadd\t(w8|w9|w10|w11), w8, #?8\n+**\tmova\t{z0\\.d - z3\\.d}, za\\.d\\[\\1, 0, vgx4\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_mf8_w8p8_z0, svmfloat8x4_t,\n+\t\t z0 = svread_za8_mf8_vg1x4 (w8 + 8),\n+\t\t z0 = svread_za8_mf8_vg1x4 (w8 + 8))\n+\n /*\n ** read_w8m1_z0:\n **\tsub\t(w8|w9|w10|w11), w8, #?1\n@@ -114,6 +142,19 @@ TEST_READ_ZA_XN (read_w8_z18, svuint8x4_t,\n \t\t z18 = svread_za8_u8_vg1x4 (w8),\n \t\t z18 = svread_za8_u8_vg1x4 (w8))\n \n+/*\n+** read_mf8_w8_z18:\n+**\tmova\t[^\\n]+, za\\.d\\[w8, 0, vgx4\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_mf8_w8_z18, svmfloat8x4_t,\n+\t\t z18 = svread_za8_mf8_vg1x4 (w8),\n+\t\t z18 = svread_za8_mf8_vg1x4 (w8))\n+\n /*\n ** read_w8_z23:\n **\tmova\t[^\\n]+, za\\.d\\[w8, 0, vgx4\\]\n@@ -127,6 +168,19 @@ TEST_READ_ZA_XN (read_w8_z23, svuint8x4_t,\n \t\t z23 = svread_za8_u8_vg1x4 (w8),\n \t\t z23 = svread_za8_u8_vg1x4 (w8))\n \n+/*\n+** read_mf8_w8_z23:\n+**\tmova\t[^\\n]+, za\\.d\\[w8, 0, vgx4\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_READ_ZA_XN (read_mf8_w8_z23, svmfloat8x4_t,\n+\t\t z23 = svread_za8_mf8_vg1x4 (w8),\n+\t\t z23 = svread_za8_mf8_vg1x4 (w8))\n+\n /*\n ** read_w8_z28:\n **\tmova\t{z28\\.d - z31\\.d}, za\\.d\\[w8, 0, vgx4\\]\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c\nindex 8b6644f1d6e..aa29879331e 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c\n@@ -86,6 +86,16 @@ TEST_READ_ZA (readz_za128_u8_0_w0, svuint8_t,\n \t z0 = svreadz_hor_za128_u8 (0, w0),\n \t z0 = svreadz_hor_za128_u8 (0, w0))\n \n+/*\n+** readz_za128_mf8_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0h\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_mf8_0_w0, svmfloat8_t,\n+\t z0 = svreadz_hor_za128_mf8 (0, w0),\n+\t z0 = svreadz_hor_za128_mf8 (0, w0))\n+\n /*\n ** readz_za128_s16_0_w0:\n **\tmov\t(w1[2-5]), w0\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c\nindex 6fea16459e2..f6f595f5697 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c\n@@ -85,3 +85,13 @@ TEST_READ_ZA (readz_za8_s8_0_w0m1, svint8_t,\n TEST_READ_ZA (readz_za8_u8_0_w0, svuint8_t,\n \t z0 = svreadz_hor_za8_u8 (0, w0),\n \t z0 = svreadz_hor_za8_u8 (0, w0))\n+\n+/*\n+** readz_za8_mf8_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.b, za0h\\.b\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za8_mf8_0_w0, svmfloat8_t,\n+\t z0 = svreadz_hor_za8_mf8 (0, w0),\n+\t z0 = svreadz_hor_za8_mf8 (0, w0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c\nindex a1a63104ad4..d09687e3674 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c\n@@ -26,6 +26,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_1, svuint8x2_t,\n \t\t z4 = svreadz_hor_za8_u8_vg2 (0, 1),\n \t\t z4 = svreadz_hor_za8_u8_vg2 (0, 1))\n \n+/*\n+** readz_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmovaz\t{z4\\.b - z5\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x2_t,\n+\t\t z4 = svreadz_hor_za8_mf8_vg2 (0, 1),\n+\t\t z4 = svreadz_hor_za8_mf8_vg2 (0, 1))\n+\n /*\n ** readz_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -54,6 +64,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z18_0_w15, svuint8x2_t,\n \t\t z18 = svreadz_hor_za8_u8_vg2 (0, w15),\n \t\t z18 = svreadz_hor_za8_u8_vg2 (0, w15))\n \n+/*\n+** readz_za8_mf8_z18_0_w15:\n+**\tmovaz\t{z18\\.b - z19\\.b}, za0h\\.b\\[w15, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x2_t,\n+\t\t z18 = svreadz_hor_za8_mf8_vg2 (0, w15),\n+\t\t z18 = svreadz_hor_za8_mf8_vg2 (0, w15))\n+\n /*\n ** readz_za8_s8_z23_0_w12p14:\n **\tmovaz\t{[^\\n]+}, za0h\\.b\\[w12, 14:15\\]\n@@ -75,6 +94,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12p1, svuint8x2_t,\n \t\t z4 = svreadz_hor_za8_u8_vg2 (0, w12 + 1),\n \t\t z4 = svreadz_hor_za8_u8_vg2 (0, w12 + 1))\n \n+/*\n+** readz_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmovaz\t{z4\\.b - z5\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x2_t,\n+\t\t z4 = svreadz_hor_za8_mf8_vg2 (0, w12 + 1),\n+\t\t z4 = svreadz_hor_za8_mf8_vg2 (0, w12 + 1))\n+\n /*\n ** readz_za8_s8_z28_0_w12p2:\n **\tmovaz\t{z28\\.b - z29\\.b}, za0h\\.b\\[w12, 2:3\\]\n@@ -94,6 +123,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w15p3, svuint8x2_t,\n \t\t z0 = svreadz_hor_za8_u8_vg2 (0, w15 + 3),\n \t\t z0 = svreadz_hor_za8_u8_vg2 (0, w15 + 3))\n \n+/*\n+** readz_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmovaz\t{z0\\.b - z1\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x2_t,\n+\t\t z0 = svreadz_hor_za8_mf8_vg2 (0, w15 + 3),\n+\t\t z0 = svreadz_hor_za8_mf8_vg2 (0, w15 + 3))\n+\n /*\n ** readz_za8_u8_z4_0_w15p12:\n **\tmovaz\t{z4\\.b - z5\\.b}, za0h\\.b\\[w15, 12:13\\]\n@@ -103,6 +142,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w15p12, svuint8x2_t,\n \t\t z4 = svreadz_hor_za8_u8_vg2 (0, w15 + 12),\n \t\t z4 = svreadz_hor_za8_u8_vg2 (0, w15 + 12))\n \n+/*\n+** readz_za8_mf8_z4_0_w15p12:\n+**\tmovaz\t{z4\\.b - z5\\.b}, za0h\\.b\\[w15, 12:13\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x2_t,\n+\t\t z4 = svreadz_hor_za8_mf8_vg2 (0, w15 + 12),\n+\t\t z4 = svreadz_hor_za8_mf8_vg2 (0, w15 + 12))\n+\n /*\n ** readz_za8_u8_z28_0_w12p15:\n **\tadd\t(w[0-9]+), w12, #?15\n@@ -113,6 +161,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z28_0_w12p15, svuint8x2_t,\n \t\t z28 = svreadz_hor_za8_u8_vg2 (0, w12 + 15),\n \t\t z28 = svreadz_hor_za8_u8_vg2 (0, w12 + 15))\n \n+/*\n+** readz_za8_mf8_z28_0_w12p15:\n+**\tadd\t(w[0-9]+), w12, #?15\n+**\tmovaz\t{z28\\.b - z29\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p15, svmfloat8x2_t,\n+\t\t z28 = svreadz_hor_za8_mf8_vg2 (0, w12 + 15),\n+\t\t z28 = svreadz_hor_za8_mf8_vg2 (0, w12 + 15))\n+\n /*\n ** readz_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -133,6 +191,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x2_t,\n \t\t z4 = svreadz_hor_za8_u8_vg2 (0, w12 - 1),\n \t\t z4 = svreadz_hor_za8_u8_vg2 (0, w12 - 1))\n \n+/*\n+** readz_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmovaz\t{z4\\.b - z5\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x2_t,\n+\t\t z4 = svreadz_hor_za8_mf8_vg2 (0, w12 - 1),\n+\t\t z4 = svreadz_hor_za8_mf8_vg2 (0, w12 - 1))\n+\n /*\n ** readz_za8_u8_z18_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -142,3 +210,13 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x2_t,\n TEST_READ_ZA_XN (readz_za8_u8_z18_0_w16, svuint8x2_t,\n \t\t z18 = svreadz_hor_za8_u8_vg2 (0, w16),\n \t\t z18 = svreadz_hor_za8_u8_vg2 (0, w16))\n+\n+/*\n+** readz_za8_mf8_z18_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmovaz\t{z18\\.b - z19\\.b}, za0h\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w16, svmfloat8x2_t,\n+\t\t z18 = svreadz_hor_za8_mf8_vg2 (0, w16),\n+\t\t z18 = svreadz_hor_za8_mf8_vg2 (0, w16))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c\nindex ca71bc513e3..eec47bf3152 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c\n@@ -26,6 +26,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_1, svuint8x4_t,\n \t\t z4 = svreadz_hor_za8_u8_vg4 (0, 1),\n \t\t z4 = svreadz_hor_za8_u8_vg4 (0, 1))\n \n+/*\n+** readz_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmovaz\t{z4\\.b - z7\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x4_t,\n+\t\t z4 = svreadz_hor_za8_mf8_vg4 (0, 1),\n+\t\t z4 = svreadz_hor_za8_mf8_vg4 (0, 1))\n+\n /*\n ** readz_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -58,6 +68,19 @@ TEST_READ_ZA_XN (readz_za8_u8_z18_0_w15, svuint8x4_t,\n \t\t z18 = svreadz_hor_za8_u8_vg4 (0, w15),\n \t\t z18 = svreadz_hor_za8_u8_vg4 (0, w15))\n \n+/*\n+** readz_za8_mf8_z18_0_w15:\n+**\tmovaz\t{[^\\n]+}, za0h\\.b\\[w15, 0:3\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x4_t,\n+\t\t z18 = svreadz_hor_za8_mf8_vg4 (0, w15),\n+\t\t z18 = svreadz_hor_za8_mf8_vg4 (0, w15))\n+\n /*\n ** readz_za8_s8_z23_0_w12p12:\n **\tmovaz\t{[^\\n]+}, za0h\\.b\\[w12, 12:15\\]\n@@ -81,6 +104,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12p1, svuint8x4_t,\n \t\t z4 = svreadz_hor_za8_u8_vg4 (0, w12 + 1),\n \t\t z4 = svreadz_hor_za8_u8_vg4 (0, w12 + 1))\n \n+/*\n+** readz_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmovaz\t{z4\\.b - z7\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x4_t,\n+\t\t z4 = svreadz_hor_za8_mf8_vg4 (0, w12 + 1),\n+\t\t z4 = svreadz_hor_za8_mf8_vg4 (0, w12 + 1))\n+\n /*\n ** readz_za8_s8_z28_0_w12p2:\n **\tadd\t(w[0-9]+), w12, #?2\n@@ -101,6 +134,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w15p3, svuint8x4_t,\n \t\t z0 = svreadz_hor_za8_u8_vg4 (0, w15 + 3),\n \t\t z0 = svreadz_hor_za8_u8_vg4 (0, w15 + 3))\n \n+/*\n+** readz_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmovaz\t{z0\\.b - z3\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x4_t,\n+\t\t z0 = svreadz_hor_za8_mf8_vg4 (0, w15 + 3),\n+\t\t z0 = svreadz_hor_za8_mf8_vg4 (0, w15 + 3))\n+\n /*\n ** readz_za8_u8_z0_0_w12p4:\n **\tmovaz\t{z0\\.b - z3\\.b}, za0h\\.b\\[w12, 4:7\\]\n@@ -110,6 +153,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w12p4, svuint8x4_t,\n \t\t z0 = svreadz_hor_za8_u8_vg4 (0, w12 + 4),\n \t\t z0 = svreadz_hor_za8_u8_vg4 (0, w12 + 4))\n \n+/*\n+** readz_za8_mf8_z0_0_w12p4:\n+**\tmovaz\t{z0\\.b - z3\\.b}, za0h\\.b\\[w12, 4:7\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w12p4, svmfloat8x4_t,\n+\t\t z0 = svreadz_hor_za8_mf8_vg4 (0, w12 + 4),\n+\t\t z0 = svreadz_hor_za8_mf8_vg4 (0, w12 + 4))\n+\n /*\n ** readz_za8_u8_z4_0_w15p12:\n **\tmovaz\t{z4\\.b - z7\\.b}, za0h\\.b\\[w15, 12:15\\]\n@@ -119,6 +171,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w15p12, svuint8x4_t,\n \t\t z4 = svreadz_hor_za8_u8_vg4 (0, w15 + 12),\n \t\t z4 = svreadz_hor_za8_u8_vg4 (0, w15 + 12))\n \n+/*\n+** readz_za8_mf8_z4_0_w15p12:\n+**\tmovaz\t{z4\\.b - z7\\.b}, za0h\\.b\\[w15, 12:15\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x4_t,\n+\t\t z4 = svreadz_hor_za8_mf8_vg4 (0, w15 + 12),\n+\t\t z4 = svreadz_hor_za8_mf8_vg4 (0, w15 + 12))\n+\n /*\n ** readz_za8_u8_z28_0_w12p14:\n **\tadd\t(w[0-9]+), w12, #?14\n@@ -129,6 +190,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z28_0_w12p14, svuint8x4_t,\n \t\t z28 = svreadz_hor_za8_u8_vg4 (0, w12 + 14),\n \t\t z28 = svreadz_hor_za8_u8_vg4 (0, w12 + 14))\n \n+/*\n+** readz_za8_mf8_z28_0_w12p14:\n+**\tadd\t(w[0-9]+), w12, #?14\n+**\tmovaz\t{z28\\.b - z31\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p14, svmfloat8x4_t,\n+\t\t z28 = svreadz_hor_za8_mf8_vg4 (0, w12 + 14),\n+\t\t z28 = svreadz_hor_za8_mf8_vg4 (0, w12 + 14))\n+\n /*\n ** readz_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -149,6 +220,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x4_t,\n \t\t z4 = svreadz_hor_za8_u8_vg4 (0, w12 - 1),\n \t\t z4 = svreadz_hor_za8_u8_vg4 (0, w12 - 1))\n \n+/*\n+** readz_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmovaz\t{z4\\.b - z7\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x4_t,\n+\t\t z4 = svreadz_hor_za8_mf8_vg4 (0, w12 - 1),\n+\t\t z4 = svreadz_hor_za8_mf8_vg4 (0, w12 - 1))\n+\n /*\n ** readz_za8_u8_z28_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -158,3 +239,13 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x4_t,\n TEST_READ_ZA_XN (readz_za8_u8_z28_0_w16, svuint8x4_t,\n \t\t z28 = svreadz_hor_za8_u8_vg4 (0, w16),\n \t\t z28 = svreadz_hor_za8_u8_vg4 (0, w16))\n+\n+/*\n+** readz_za8_mf8_z28_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmovaz\t{z28\\.b - z31\\.b}, za0h\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w16, svmfloat8x4_t,\n+\t\t z28 = svreadz_hor_za8_mf8_vg4 (0, w16),\n+\t\t z28 = svreadz_hor_za8_mf8_vg4 (0, w16))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za128.c\nnew file mode 100644\nindex 00000000000..401543cbbcd\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za128.c\n@@ -0,0 +1,197 @@\n+/* { dg-do assemble { target aarch64_asm_sme2p1_ok } } */\n+/* { dg-do compile { target { ! aarch64_asm_sme2p1_ok } } } */\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+#pragma GCC target \"+sme2p1\"\n+\n+/*\n+** readz_za128_s8_0_0:\n+**\tmov\t(w1[2-5]), (?:wzr|#?0)\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_s8_0_0, svint8_t,\n+\t z0 = svreadz_ver_za128_s8 (0, 0),\n+\t z0 = svreadz_ver_za128_s8 (0, 0))\n+\n+/*\n+** readz_za128_s8_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_s8_0_1, svint8_t,\n+\t z0 = svreadz_ver_za128_s8 (0, 1),\n+\t z0 = svreadz_ver_za128_s8 (0, 1))\n+\n+/*\n+** readz_za128_s8_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_s8_0_w0, svint8_t,\n+\t z0 = svreadz_ver_za128_s8 (0, w0),\n+\t z0 = svreadz_ver_za128_s8 (0, w0))\n+\n+/*\n+** readz_za128_s8_0_w0p1:\n+**\tadd\t(w1[2-5]), w0, #?1\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_s8_0_w0p1, svint8_t,\n+\t z0 = svreadz_ver_za128_s8 (0, w0 + 1),\n+\t z0 = svreadz_ver_za128_s8 (0, w0 + 1))\n+\n+/*\n+** readz_za128_s8_0_w0m1:\n+**\tsub\t(w1[2-5]), w0, #?1\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_s8_0_w0m1, svint8_t,\n+\t z0 = svreadz_ver_za128_s8 (0, w0 - 1),\n+\t z0 = svreadz_ver_za128_s8 (0, w0 - 1))\n+\n+/*\n+** readz_za128_s8_1_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za1v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_s8_1_w0, svint8_t,\n+\t z0 = svreadz_ver_za128_s8 (1, w0),\n+\t z0 = svreadz_ver_za128_s8 (1, w0))\n+\n+/*\n+** readz_za128_s8_15_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za15v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_s8_15_w0, svint8_t,\n+\t z0 = svreadz_ver_za128_s8 (15, w0),\n+\t z0 = svreadz_ver_za128_s8 (15, w0))\n+\n+/*\n+** readz_za128_u8_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_u8_0_w0, svuint8_t,\n+\t z0 = svreadz_ver_za128_u8 (0, w0),\n+\t z0 = svreadz_ver_za128_u8 (0, w0))\n+\n+/*\n+** readz_za128_mf8_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_mf8_0_w0, svmfloat8_t,\n+\t z0 = svreadz_ver_za128_mf8 (0, w0),\n+\t z0 = svreadz_ver_za128_mf8 (0, w0))\n+\n+/*\n+** readz_za128_s16_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_s16_0_w0, svint16_t,\n+\t z0 = svreadz_ver_za128_s16 (0, w0),\n+\t z0 = svreadz_ver_za128_s16 (0, w0))\n+\n+/*\n+** readz_za128_u16_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_u16_0_w0, svuint16_t,\n+\t z0 = svreadz_ver_za128_u16 (0, w0),\n+\t z0 = svreadz_ver_za128_u16 (0, w0))\n+\n+/*\n+** readz_za128_f16_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_f16_0_w0, svfloat16_t,\n+\t z0 = svreadz_ver_za128_f16 (0, w0),\n+\t z0 = svreadz_ver_za128_f16 (0, w0))\n+\n+/*\n+** readz_za128_bf16_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_bf16_0_w0, svbfloat16_t,\n+\t z0 = svreadz_ver_za128_bf16 (0, w0),\n+\t z0 = svreadz_ver_za128_bf16 (0, w0))\n+\n+/*\n+** readz_za128_s32_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_s32_0_w0, svint32_t,\n+\t z0 = svreadz_ver_za128_s32 (0, w0),\n+\t z0 = svreadz_ver_za128_s32 (0, w0))\n+\n+/*\n+** readz_za128_u32_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_u32_0_w0, svuint32_t,\n+\t z0 = svreadz_ver_za128_u32 (0, w0),\n+\t z0 = svreadz_ver_za128_u32 (0, w0))\n+\n+/*\n+** readz_za128_f32_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_f32_0_w0, svfloat32_t,\n+\t z0 = svreadz_ver_za128_f32 (0, w0),\n+\t z0 = svreadz_ver_za128_f32 (0, w0))\n+\n+/*\n+** readz_za128_s64_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_s64_0_w0, svint64_t,\n+\t z0 = svreadz_ver_za128_s64 (0, w0),\n+\t z0 = svreadz_ver_za128_s64 (0, w0))\n+\n+/*\n+** readz_za128_u64_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_u64_0_w0, svuint64_t,\n+\t z0 = svreadz_ver_za128_u64 (0, w0),\n+\t z0 = svreadz_ver_za128_u64 (0, w0))\n+\n+/*\n+** readz_za128_f64_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.q, za0v\\.q\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za128_f64_0_w0, svfloat64_t,\n+\t z0 = svreadz_ver_za128_f64 (0, w0),\n+\t z0 = svreadz_ver_za128_f64 (0, w0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c\nindex 4bd5ae783ef..66c42cecd31 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c\n@@ -85,3 +85,13 @@ TEST_READ_ZA (readz_za8_s8_0_w0m1, svint8_t,\n TEST_READ_ZA (readz_za8_u8_0_w0, svuint8_t,\n \t z0 = svreadz_ver_za8_u8 (0, w0),\n \t z0 = svreadz_ver_za8_u8 (0, w0))\n+\n+/*\n+** readz_za8_mf8_0_w0:\n+**\tmov\t(w1[2-5]), w0\n+**\tmovaz\tz0\\.b, za0v\\.b\\[\\1, 0\\]\n+**\tret\n+*/\n+TEST_READ_ZA (readz_za8_mf8_0_w0, svmfloat8_t,\n+\t z0 = svreadz_ver_za8_mf8 (0, w0),\n+\t z0 = svreadz_ver_za8_mf8 (0, w0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c\nindex 940a5619a13..daa6b131587 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c\n@@ -26,6 +26,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_1, svuint8x2_t,\n \t\t z4 = svreadz_ver_za8_u8_vg2 (0, 1),\n \t\t z4 = svreadz_ver_za8_u8_vg2 (0, 1))\n \n+/*\n+** readz_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmovaz\t{z4\\.b - z5\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x2_t,\n+\t\t z4 = svreadz_ver_za8_mf8_vg2 (0, 1),\n+\t\t z4 = svreadz_ver_za8_mf8_vg2 (0, 1))\n+\n /*\n ** readz_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -54,6 +64,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z18_0_w15, svuint8x2_t,\n \t\t z18 = svreadz_ver_za8_u8_vg2 (0, w15),\n \t\t z18 = svreadz_ver_za8_u8_vg2 (0, w15))\n \n+/*\n+** readz_za8_mf8_z18_0_w15:\n+**\tmovaz\t{z18\\.b - z19\\.b}, za0v\\.b\\[w15, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x2_t,\n+\t\t z18 = svreadz_ver_za8_mf8_vg2 (0, w15),\n+\t\t z18 = svreadz_ver_za8_mf8_vg2 (0, w15))\n+\n /*\n ** readz_za8_s8_z23_0_w12p14:\n **\tmovaz\t{[^\\n]+}, za0v\\.b\\[w12, 14:15\\]\n@@ -75,6 +94,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12p1, svuint8x2_t,\n \t\t z4 = svreadz_ver_za8_u8_vg2 (0, w12 + 1),\n \t\t z4 = svreadz_ver_za8_u8_vg2 (0, w12 + 1))\n \n+/*\n+** readz_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmovaz\t{z4\\.b - z5\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x2_t,\n+\t\t z4 = svreadz_ver_za8_mf8_vg2 (0, w12 + 1),\n+\t\t z4 = svreadz_ver_za8_mf8_vg2 (0, w12 + 1))\n+\n /*\n ** readz_za8_s8_z28_0_w12p2:\n **\tmovaz\t{z28\\.b - z29\\.b}, za0v\\.b\\[w12, 2:3\\]\n@@ -94,6 +123,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w15p3, svuint8x2_t,\n \t\t z0 = svreadz_ver_za8_u8_vg2 (0, w15 + 3),\n \t\t z0 = svreadz_ver_za8_u8_vg2 (0, w15 + 3))\n \n+/*\n+** readz_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmovaz\t{z0\\.b - z1\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x2_t,\n+\t\t z0 = svreadz_ver_za8_mf8_vg2 (0, w15 + 3),\n+\t\t z0 = svreadz_ver_za8_mf8_vg2 (0, w15 + 3))\n+\n /*\n ** readz_za8_u8_z4_0_w15p12:\n **\tmovaz\t{z4\\.b - z5\\.b}, za0v\\.b\\[w15, 12:13\\]\n@@ -103,6 +142,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w15p12, svuint8x2_t,\n \t\t z4 = svreadz_ver_za8_u8_vg2 (0, w15 + 12),\n \t\t z4 = svreadz_ver_za8_u8_vg2 (0, w15 + 12))\n \n+/*\n+** readz_za8_mf8_z4_0_w15p12:\n+**\tmovaz\t{z4\\.b - z5\\.b}, za0v\\.b\\[w15, 12:13\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x2_t,\n+\t\t z4 = svreadz_ver_za8_mf8_vg2 (0, w15 + 12),\n+\t\t z4 = svreadz_ver_za8_mf8_vg2 (0, w15 + 12))\n+\n /*\n ** readz_za8_u8_z28_0_w12p15:\n **\tadd\t(w[0-9]+), w12, #?15\n@@ -113,6 +161,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z28_0_w12p15, svuint8x2_t,\n \t\t z28 = svreadz_ver_za8_u8_vg2 (0, w12 + 15),\n \t\t z28 = svreadz_ver_za8_u8_vg2 (0, w12 + 15))\n \n+/*\n+** readz_za8_mf8_z28_0_w12p15:\n+**\tadd\t(w[0-9]+), w12, #?15\n+**\tmovaz\t{z28\\.b - z29\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p15, svmfloat8x2_t,\n+\t\t z28 = svreadz_ver_za8_mf8_vg2 (0, w12 + 15),\n+\t\t z28 = svreadz_ver_za8_mf8_vg2 (0, w12 + 15))\n+\n /*\n ** readz_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -133,6 +191,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x2_t,\n \t\t z4 = svreadz_ver_za8_u8_vg2 (0, w12 - 1),\n \t\t z4 = svreadz_ver_za8_u8_vg2 (0, w12 - 1))\n \n+/*\n+** readz_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmovaz\t{z4\\.b - z5\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x2_t,\n+\t\t z4 = svreadz_ver_za8_mf8_vg2 (0, w12 - 1),\n+\t\t z4 = svreadz_ver_za8_mf8_vg2 (0, w12 - 1))\n+\n /*\n ** readz_za8_u8_z18_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -142,3 +210,12 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x2_t,\n TEST_READ_ZA_XN (readz_za8_u8_z18_0_w16, svuint8x2_t,\n \t\t z18 = svreadz_ver_za8_u8_vg2 (0, w16),\n \t\t z18 = svreadz_ver_za8_u8_vg2 (0, w16))\n+/*\n+** readz_za8_mf8_z18_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmovaz\t{z18\\.b - z19\\.b}, za0v\\.b\\[\\1, 0:1\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w16, svmfloat8x2_t,\n+\t\t z18 = svreadz_ver_za8_mf8_vg2 (0, w16),\n+\t\t z18 = svreadz_ver_za8_mf8_vg2 (0, w16))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c\nindex 9f776ded80f..f3c06d8f029 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c\n@@ -26,6 +26,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_1, svuint8x4_t,\n \t\t z4 = svreadz_ver_za8_u8_vg4 (0, 1),\n \t\t z4 = svreadz_ver_za8_u8_vg4 (0, 1))\n \n+/*\n+** readz_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmovaz\t{z4\\.b - z7\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_1, svmfloat8x4_t,\n+\t\t z4 = svreadz_ver_za8_mf8_vg4 (0, 1),\n+\t\t z4 = svreadz_ver_za8_mf8_vg4 (0, 1))\n+\n /*\n ** readz_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -58,6 +68,19 @@ TEST_READ_ZA_XN (readz_za8_u8_z18_0_w15, svuint8x4_t,\n \t\t z18 = svreadz_ver_za8_u8_vg4 (0, w15),\n \t\t z18 = svreadz_ver_za8_u8_vg4 (0, w15))\n \n+/*\n+** readz_za8_mf8_z18_0_w15:\n+**\tmovaz\t{[^\\n]+}, za0v\\.b\\[w15, 0:3\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z18_0_w15, svmfloat8x4_t,\n+\t\t z18 = svreadz_ver_za8_mf8_vg4 (0, w15),\n+\t\t z18 = svreadz_ver_za8_mf8_vg4 (0, w15))\n+\n /*\n ** readz_za8_s8_z23_0_w12p12:\n **\tmovaz\t{[^\\n]+}, za0v\\.b\\[w12, 12:15\\]\n@@ -81,6 +104,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12p1, svuint8x4_t,\n \t\t z4 = svreadz_ver_za8_u8_vg4 (0, w12 + 1),\n \t\t z4 = svreadz_ver_za8_u8_vg4 (0, w12 + 1))\n \n+/*\n+** readz_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmovaz\t{z4\\.b - z7\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12p1, svmfloat8x4_t,\n+\t\t z4 = svreadz_ver_za8_mf8_vg4 (0, w12 + 1),\n+\t\t z4 = svreadz_ver_za8_mf8_vg4 (0, w12 + 1))\n+\n /*\n ** readz_za8_s8_z28_0_w12p2:\n **\tadd\t(w[0-9]+), w12, #?2\n@@ -101,6 +134,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w15p3, svuint8x4_t,\n \t\t z0 = svreadz_ver_za8_u8_vg4 (0, w15 + 3),\n \t\t z0 = svreadz_ver_za8_u8_vg4 (0, w15 + 3))\n \n+/*\n+** readz_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmovaz\t{z0\\.b - z3\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w15p3, svmfloat8x4_t,\n+\t\t z0 = svreadz_ver_za8_mf8_vg4 (0, w15 + 3),\n+\t\t z0 = svreadz_ver_za8_mf8_vg4 (0, w15 + 3))\n+\n /*\n ** readz_za8_u8_z0_0_w12p4:\n **\tmovaz\t{z0\\.b - z3\\.b}, za0v\\.b\\[w12, 4:7\\]\n@@ -110,6 +153,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z0_0_w12p4, svuint8x4_t,\n \t\t z0 = svreadz_ver_za8_u8_vg4 (0, w12 + 4),\n \t\t z0 = svreadz_ver_za8_u8_vg4 (0, w12 + 4))\n \n+/*\n+** readz_za8_mf8_z0_0_w12p4:\n+**\tmovaz\t{z0\\.b - z3\\.b}, za0v\\.b\\[w12, 4:7\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z0_0_w12p4, svmfloat8x4_t,\n+\t\t z0 = svreadz_ver_za8_mf8_vg4 (0, w12 + 4),\n+\t\t z0 = svreadz_ver_za8_mf8_vg4 (0, w12 + 4))\n+\n /*\n ** readz_za8_u8_z4_0_w15p12:\n **\tmovaz\t{z4\\.b - z7\\.b}, za0v\\.b\\[w15, 12:15\\]\n@@ -119,6 +171,15 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w15p12, svuint8x4_t,\n \t\t z4 = svreadz_ver_za8_u8_vg4 (0, w15 + 12),\n \t\t z4 = svreadz_ver_za8_u8_vg4 (0, w15 + 12))\n \n+/*\n+** readz_za8_mf8_z4_0_w15p12:\n+**\tmovaz\t{z4\\.b - z7\\.b}, za0v\\.b\\[w15, 12:15\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w15p12, svmfloat8x4_t,\n+\t\t z4 = svreadz_ver_za8_mf8_vg4 (0, w15 + 12),\n+\t\t z4 = svreadz_ver_za8_mf8_vg4 (0, w15 + 12))\n+\n /*\n ** readz_za8_u8_z28_0_w12p14:\n **\tadd\t(w[0-9]+), w12, #?14\n@@ -129,6 +190,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z28_0_w12p14, svuint8x4_t,\n \t\t z28 = svreadz_ver_za8_u8_vg4 (0, w12 + 14),\n \t\t z28 = svreadz_ver_za8_u8_vg4 (0, w12 + 14))\n \n+/*\n+** readz_za8_mf8_z28_0_w12p14:\n+**\tadd\t(w[0-9]+), w12, #?14\n+**\tmovaz\t{z28\\.b - z31\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w12p14, svmfloat8x4_t,\n+\t\t z28 = svreadz_ver_za8_mf8_vg4 (0, w12 + 14),\n+\t\t z28 = svreadz_ver_za8_mf8_vg4 (0, w12 + 14))\n+\n /*\n ** readz_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -149,6 +220,16 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x4_t,\n \t\t z4 = svreadz_ver_za8_u8_vg4 (0, w12 - 1),\n \t\t z4 = svreadz_ver_za8_u8_vg4 (0, w12 - 1))\n \n+/*\n+** readz_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmovaz\t{z4\\.b - z7\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z4_0_w12m1, svmfloat8x4_t,\n+\t\t z4 = svreadz_ver_za8_mf8_vg4 (0, w12 - 1),\n+\t\t z4 = svreadz_ver_za8_mf8_vg4 (0, w12 - 1))\n+\n /*\n ** readz_za8_u8_z28_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -158,3 +239,12 @@ TEST_READ_ZA_XN (readz_za8_u8_z4_0_w12m1, svuint8x4_t,\n TEST_READ_ZA_XN (readz_za8_u8_z28_0_w16, svuint8x4_t,\n \t\t z28 = svreadz_ver_za8_u8_vg4 (0, w16),\n \t\t z28 = svreadz_ver_za8_u8_vg4 (0, w16))\n+/*\n+** readz_za8_mf8_z28_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmovaz\t{z28\\.b - z31\\.b}, za0v\\.b\\[\\1, 0:3\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_za8_mf8_z28_0_w16, svmfloat8x4_t,\n+\t\t z28 = svreadz_ver_za8_mf8_vg4 (0, w16),\n+\t\t z28 = svreadz_ver_za8_mf8_vg4 (0, w16))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c\nindex 7bdb17d7e79..f4d40315acd 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c\n@@ -36,6 +36,16 @@ TEST_READ_ZA_XN (readz_w7_z0, svuint8x2_t,\n \t\t z0 = svreadz_za8_u8_vg1x2 (w7),\n \t\t z0 = svreadz_za8_u8_vg1x2 (w7))\n \n+/*\n+** readz_mf8_w7_z0:\n+**\tmov\t(w8|w9|w10|w11), w7\n+**\tmovaz\t{z0\\.d - z1\\.d}, za\\.d\\[\\1, 0, vgx2\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_mf8_w7_z0, svmfloat8x2_t,\n+\t\t z0 = svreadz_za8_mf8_vg1x2 (w7),\n+\t\t z0 = svreadz_za8_mf8_vg1x2 (w7))\n+\n /*\n ** readz_w8_z0:\n **\tmovaz\t{z0\\.d - z1\\.d}, za\\.d\\[w8, 0, vgx2\\]\n@@ -65,6 +75,16 @@ TEST_READ_ZA_XN (readz_w12_z0, svuint8x2_t,\n \t\t z0 = svreadz_za8_u8_vg1x2 (w12),\n \t\t z0 = svreadz_za8_u8_vg1x2 (w12))\n \n+/*\n+** readz_mf8_w12_z0:\n+**\tmov\t(w8|w9|w10|w11), w12\n+**\tmovaz\t{z0\\.d - z1\\.d}, za\\.d\\[\\1, 0, vgx2\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_mf8_w12_z0, svmfloat8x2_t,\n+\t\t z0 = svreadz_za8_mf8_vg1x2 (w12),\n+\t\t z0 = svreadz_za8_mf8_vg1x2 (w12))\n+\n /*\n ** readz_w8p7_z0:\n **\tmovaz\t{z0\\.d - z1\\.d}, za\\.d\\[w8, 7, vgx2\\]\n@@ -94,6 +114,16 @@ TEST_READ_ZA_XN (readz_w8m1_z0, svuint8x2_t,\n \t\t z0 = svreadz_za8_u8_vg1x2 (w8 - 1),\n \t\t z0 = svreadz_za8_u8_vg1x2 (w8 - 1))\n \n+/*\n+** readz_mf8_w8m1_z0:\n+**\tsub\t(w8|w9|w10|w11), w8, #?1\n+**\tmovaz\t{z0\\.d - z1\\.d}, za\\.d\\[\\1, 0, vgx2\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_mf8_w8m1_z0, svmfloat8x2_t,\n+\t\t z0 = svreadz_za8_mf8_vg1x2 (w8 - 1),\n+\t\t z0 = svreadz_za8_mf8_vg1x2 (w8 - 1))\n+\n /*\n ** readz_w8_z18:\n **\tmovaz\t{z18\\.d - z19\\.d}, za\\.d\\[w8, 0, vgx2\\]\n@@ -103,6 +133,15 @@ TEST_READ_ZA_XN (readz_w8_z18, svuint8x2_t,\n \t\t z18 = svreadz_za8_u8_vg1x2 (w8),\n \t\t z18 = svreadz_za8_u8_vg1x2 (w8))\n \n+/*\n+** readz_mf8_w8_z18:\n+**\tmovaz\t{z18\\.d - z19\\.d}, za\\.d\\[w8, 0, vgx2\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_mf8_w8_z18, svmfloat8x2_t,\n+\t\t z18 = svreadz_za8_mf8_vg1x2 (w8),\n+\t\t z18 = svreadz_za8_mf8_vg1x2 (w8))\n+\n /* Leave the assembler to check for correctness for misaligned registers. */\n \n /*\n@@ -124,3 +163,12 @@ TEST_READ_ZA_XN (readz_w8_z23, svint8x2_t,\n TEST_READ_ZA_XN (readz_w8_z28, svuint8x2_t,\n \t\t z28 = svreadz_za8_u8_vg1x2 (w8),\n \t\t z28 = svreadz_za8_u8_vg1x2 (w8))\n+\n+/*\n+** readz_mf8_w8_z28:\n+**\tmovaz\t{z28\\.d - z29\\.d}, za\\.d\\[w8, 0, vgx2\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_mf8_w8_z28, svmfloat8x2_t,\n+\t\t z28 = svreadz_za8_mf8_vg1x2 (w8),\n+\t\t z28 = svreadz_za8_mf8_vg1x2 (w8))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c\nindex 02beaae85c6..d9be244c62c 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c\n@@ -26,6 +26,16 @@ TEST_READ_ZA_XN (readz_w0_z0, svuint8x4_t,\n \t\t z0 = svreadz_za8_u8_vg1x4 (w0),\n \t\t z0 = svreadz_za8_u8_vg1x4 (w0))\n \n+/*\n+** readz_mf8_w0_z0:\n+**\tmov\t(w8|w9|w10|w11), w0\n+**\tmovaz\t{z0\\.d - z3\\.d}, za\\.d\\[\\1, 0, vgx4\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_mf8_w0_z0, svmfloat8x4_t,\n+\t\t z0 = svreadz_za8_mf8_vg1x4 (w0),\n+\t\t z0 = svreadz_za8_mf8_vg1x4 (w0))\n+\n /*\n ** readz_w7_z0:\n **\tmov\t(w8|w9|w10|w11), w7\n@@ -55,6 +65,16 @@ TEST_READ_ZA_XN (readz_w11_z0, svuint8x4_t,\n \t\t z0 = svreadz_za8_u8_vg1x4 (w11))\n \n \n+/*\n+** readz_mf8_w11_z0:\n+**\tmovaz\t{z0\\.d - z3\\.d}, za\\.d\\[w11, 0, vgx4\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_mf8_w11_z0, svmfloat8x4_t,\n+\t\t z0 = svreadz_za8_mf8_vg1x4 (w11),\n+\t\t z0 = svreadz_za8_mf8_vg1x4 (w11))\n+\n+\n /*\n ** readz_w12_z0:\n **\tmov\t(w8|w9|w10|w11), w12\n@@ -84,6 +104,16 @@ TEST_READ_ZA_XN (readz_w8p8_z0, svuint8x4_t,\n \t\t z0 = svreadz_za8_u8_vg1x4 (w8 + 8),\n \t\t z0 = svreadz_za8_u8_vg1x4 (w8 + 8))\n \n+/*\n+** readz_mf8_w8p8_z0:\n+**\tadd\t(w8|w9|w10|w11), w8, #?8\n+**\tmovaz\t{z0\\.d - z3\\.d}, za\\.d\\[\\1, 0, vgx4\\]\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_mf8_w8p8_z0, svmfloat8x4_t,\n+\t\t z0 = svreadz_za8_mf8_vg1x4 (w8 + 8),\n+\t\t z0 = svreadz_za8_mf8_vg1x4 (w8 + 8))\n+\n /*\n ** readz_w8m1_z0:\n **\tsub\t(w8|w9|w10|w11), w8, #?1\n@@ -118,6 +148,19 @@ TEST_READ_ZA_XN (readz_w8_z18, svuint8x4_t,\n \t\t z18 = svreadz_za8_u8_vg1x4 (w8),\n \t\t z18 = svreadz_za8_u8_vg1x4 (w8))\n \n+/*\n+** readz_mf8_w8_z18:\n+**\tmovaz\t[^\\n]+, za\\.d\\[w8, 0, vgx4\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_mf8_w8_z18, svmfloat8x4_t,\n+\t\t z18 = svreadz_za8_mf8_vg1x4 (w8),\n+\t\t z18 = svreadz_za8_mf8_vg1x4 (w8))\n+\n /*\n ** readz_w8_z23:\n **\tmovaz\t[^\\n]+, za\\.d\\[w8, 0, vgx4\\]\n@@ -131,6 +174,19 @@ TEST_READ_ZA_XN (readz_w8_z23, svuint8x4_t,\n \t\t z23 = svreadz_za8_u8_vg1x4 (w8),\n \t\t z23 = svreadz_za8_u8_vg1x4 (w8))\n \n+/*\n+** readz_mf8_w8_z23:\n+**\tmovaz\t[^\\n]+, za\\.d\\[w8, 0, vgx4\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_READ_ZA_XN (readz_mf8_w8_z23, svmfloat8x4_t,\n+\t\t z23 = svreadz_za8_mf8_vg1x4 (w8),\n+\t\t z23 = svreadz_za8_mf8_vg1x4 (w8))\n+\n /*\n ** readz_w8_z28:\n **\tmovaz\t{z28\\.d - z31\\.d}, za\\.d\\[w8, 0, vgx4\\]\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x2.c\nnew file mode 100644\nindex 00000000000..1192aa84dc2\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x2.c\n@@ -0,0 +1,92 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** sel_z0_pn0_z0_z4:\n+**\tmov\tp([0-9]+)\\.b, p0\\.b\n+**\tsel\t{z0\\.b - z1\\.b}, pn\\1, {z0\\.b - z1\\.b}, {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z0_pn0_z0_z4, svmfloat8x2_t, z0,\n+\t svsel_mf8_x2 (pn0, z0, z4),\n+\t svsel (pn0, z0, z4))\n+\n+/*\n+** sel_z0_pn7_z0_z4:\n+**\tmov\tp([0-9]+)\\.b, p7\\.b\n+**\tsel\t{z0\\.b - z1\\.b}, pn\\1, {z0\\.b - z1\\.b}, {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z0_pn7_z0_z4, svmfloat8x2_t, z0,\n+\t svsel_mf8_x2 (pn7, z0, z4),\n+\t svsel (pn7, z0, z4))\n+\n+/*\n+** sel_z0_pn8_z4_z28:\n+**\tsel\t{z0\\.b - z1\\.b}, pn8, {z4\\.b - z5\\.b}, {z28\\.b - z29\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z0_pn8_z4_z28, svmfloat8x2_t, z0,\n+\t svsel_mf8_x2 (pn8, z4, z28),\n+\t svsel (pn8, z4, z28))\n+\n+/*\n+** sel_z4_pn8_z18_z0:\n+**\tsel\t{z4\\.b - z5\\.b}, pn8, {z18\\.b - z19\\.b}, {z0\\.b - z1\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z4_pn8_z18_z0, svmfloat8x2_t, z4,\n+\t svsel_mf8_x2 (pn8, z18, z0),\n+\t svsel (pn8, z18, z0))\n+\n+/*\n+** sel_z18_pn15_z28_z4:\n+**\tsel\t{z18\\.b - z19\\.b}, pn15, {z28\\.b - z29\\.b}, {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z18_pn15_z28_z4, svmfloat8x2_t, z18,\n+\t svsel_mf8_x2 (pn15, z28, z4),\n+\t svsel (pn15, z28, z4))\n+\n+/*\n+** sel_z18_pn8_z18_z4:\n+**\tsel\t{z18\\.b - z19\\.b}, pn8, {z18\\.b - z19\\.b}, {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z18_pn8_z18_z4, svmfloat8x2_t, z18,\n+\t svsel_mf8_x2 (pn8, z18, z4),\n+\t svsel (pn8, z18, z4))\n+\n+/*\n+** sel_z23_pn15_z0_z18:\n+**\tsel\t[^\\n]+, pn15, {z0\\.b - z1\\.b}, {z18\\.b - z19\\.b}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (sel_z23_pn15_z0_z18, svmfloat8x2_t, z23,\n+\t svsel_mf8_x2 (pn15, z0, z18),\n+\t svsel (pn15, z0, z18))\n+\n+/*\n+** sel_z0_pn15_z23_z28:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tsel\t{z0\\.b - z1\\.b}, pn15, {[^}]+}, {z28\\.b - z29\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z0_pn15_z23_z28, svmfloat8x2_t, z0,\n+\t svsel_mf8_x2 (pn15, z23, z28),\n+\t svsel (pn15, z23, z28))\n+\n+/*\n+** sel_z0_pn8_z28_z23:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tsel\t{z0\\.b - z1\\.b}, pn8, {z28\\.b - z29\\.b}, {[^}]+}\n+**\tret\n+*/\n+TEST_XN (sel_z0_pn8_z28_z23, svmfloat8x2_t, z0,\n+\t svsel_mf8_x2 (pn8, z28, z23),\n+\t svsel (pn8, z28, z23))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x4.c\nnew file mode 100644\nindex 00000000000..ddcba0318d9\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sel_mf8_x4.c\n@@ -0,0 +1,92 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** sel_z0_pn0_z0_z4:\n+**\tmov\tp([0-9]+)\\.b, p0\\.b\n+**\tsel\t{z0\\.b - z3\\.b}, pn\\1, {z0\\.b - z3\\.b}, {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z0_pn0_z0_z4, svmfloat8x4_t, z0,\n+\t svsel_mf8_x4 (pn0, z0, z4),\n+\t svsel (pn0, z0, z4))\n+\n+/*\n+** sel_z0_pn7_z0_z4:\n+**\tmov\tp([0-9]+)\\.b, p7\\.b\n+**\tsel\t{z0\\.b - z3\\.b}, pn\\1, {z0\\.b - z3\\.b}, {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z0_pn7_z0_z4, svmfloat8x4_t, z0,\n+\t svsel_mf8_x4 (pn7, z0, z4),\n+\t svsel (pn7, z0, z4))\n+\n+/*\n+** sel_z0_pn8_z4_z28:\n+**\tsel\t{z0\\.b - z3\\.b}, pn8, {z4\\.b - z7\\.b}, {z28\\.b - z31\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z0_pn8_z4_z28, svmfloat8x4_t, z0,\n+\t svsel_mf8_x4 (pn8, z4, z28),\n+\t svsel (pn8, z4, z28))\n+\n+/*\n+** sel_z4_pn8_z18_z0:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tsel\t{z4\\.b - z7\\.b}, pn8, {[^}]+}, {z0\\.b - z3\\.b}\n+**\tret\n+*/\n+TEST_XN (sel_z4_pn8_z18_z0, svmfloat8x4_t, z4,\n+\t svsel_mf8_x4 (pn8, z18, z0),\n+\t svsel (pn8, z18, z0))\n+\n+/*\n+** sel_z18_pn15_z28_z4:\n+**\tsel\t{[^}]+}, pn15, {z28\\.b - z31\\.b}, {z4\\.b - z7\\.b}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (sel_z18_pn15_z28_z4, svmfloat8x4_t, z18,\n+\t svsel_mf8_x4 (pn15, z28, z4),\n+\t svsel (pn15, z28, z4))\n+\n+/*\n+** sel_z18_pn8_z18_z4:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tsel\t{[^}]+}, pn8, {[^}]+}, {z4\\.b - z7\\.b}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (sel_z18_pn8_z18_z4, svmfloat8x4_t, z18,\n+\t svsel_mf8_x4 (pn8, z18, z4),\n+\t svsel (pn8, z18, z4))\n+\n+/*\n+** sel_z23_pn15_z0_z18:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tsel\t[^\\n]+, pn15, {z0\\.b - z3\\.b}, {[^}]+}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (sel_z23_pn15_z0_z18, svmfloat8x4_t, z23,\n+\t svsel_mf8_x4 (pn15, z0, z18),\n+\t svsel (pn15, z0, z18))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x2.c\nnew file mode 100644\nindex 00000000000..c778c139e8e\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x2.c\n@@ -0,0 +1,262 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** st1_mf8_base:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_base, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0, z0),\n+\t\t svst1 (pn8, x0, z0))\n+\n+/*\n+** st1_mf8_index:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_index, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0 + x1, z0),\n+\t\t svst1 (pn8, x0 + x1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_mf8_1:\n+**\tincb\tx0\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0 + svcntb (), z0),\n+\t\t svst1 (pn8, x0 + svcntb (), z0))\n+\n+/*\n+** st1_mf8_2:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0 + svcntb () * 2, z0),\n+\t\t svst1 (pn8, x0 + svcntb () * 2, z0))\n+\n+/*\n+** st1_mf8_14:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0 + svcntb () * 14, z0),\n+\t\t svst1 (pn8, x0 + svcntb () * 14, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0 + svcntb () * 16, z0),\n+\t\t svst1 (pn8, x0 + svcntb () * 16, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_mf8_m1:\n+**\tdecb\tx0\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0 - svcntb (), z0),\n+\t\t svst1 (pn8, x0 - svcntb (), z0))\n+\n+/*\n+** st1_mf8_m2:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0 - svcntb () * 2, z0),\n+\t\t svst1 (pn8, x0 - svcntb () * 2, z0))\n+\n+/*\n+** st1_mf8_m16:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0 - svcntb () * 16, z0),\n+\t\t svst1 (pn8, x0 - svcntb () * 16, z0))\n+\n+/*\n+** st1_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[\\1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0 - svcntb () * 18, z0),\n+\t\t svst1 (pn8, x0 - svcntb () * 18, z0))\n+\n+/*\n+** st1_mf8_z17:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tst1b\t{z[^\\n]+}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_z17, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0, z17),\n+\t\t svst1 (pn8, x0, z17))\n+\n+/*\n+** st1_mf8_z22:\n+**\tst1b\t{z22\\.b(?: - |, )z23\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_z22, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0, z22),\n+\t\t svst1 (pn8, x0, z22))\n+\n+/*\n+** st1_mf8_z28:\n+**\tst1b\t{z28\\.b(?: - |, )z29\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_z28, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn8, x0, z28),\n+\t\t svst1 (pn8, x0, z28))\n+\n+/*\n+** st1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_pn0, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn0, x0, z0),\n+\t\t svst1 (pn0, x0, z0))\n+\n+/*\n+** st1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_pn7, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn7, x0, z0),\n+\t\t svst1 (pn7, x0, z0))\n+\n+/*\n+** st1_mf8_pn15:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn15, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_pn15, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_mf8_x2 (pn15, x0, z0),\n+\t\t svst1 (pn15, x0, z0))\n+\n+/*\n+** st1_vnum_mf8_0:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x2 (pn8, x0, 0, z0),\n+\t\t svst1_vnum (pn8, x0, 0, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x2 (pn8, x0, 1, z0),\n+\t\t svst1_vnum (pn8, x0, 1, z0))\n+\n+/*\n+** st1_vnum_mf8_2:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x2 (pn8, x0, 2, z0),\n+\t\t svst1_vnum (pn8, x0, 2, z0))\n+\n+/*\n+** st1_vnum_mf8_14:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x2 (pn8, x0, 14, z0),\n+\t\t svst1_vnum (pn8, x0, 14, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_vnum_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x2 (pn8, x0, 16, z0),\n+\t\t svst1_vnum (pn8, x0, 16, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x2 (pn8, x0, -1, z0),\n+\t\t svst1_vnum (pn8, x0, -1, z0))\n+\n+/*\n+** st1_vnum_mf8_m2:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x2 (pn8, x0, -2, z0),\n+\t\t svst1_vnum (pn8, x0, -2, z0))\n+\n+/*\n+** st1_vnum_mf8_m16:\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x2 (pn8, x0, -16, z0),\n+\t\t svst1_vnum (pn8, x0, -16, z0))\n+\n+/*\n+** st1_vnum_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[\\1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x2 (pn8, x0, -18, z0),\n+\t\t svst1_vnum (pn8, x0, -18, z0))\n+\n+/*\n+** st1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tst1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x2 (pn8, x0, x1, z0),\n+\t\t svst1_vnum (pn8, x0, x1, z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x4.c\nnew file mode 100644\nindex 00000000000..5f60757f07b\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/st1_mf8_x4.c\n@@ -0,0 +1,354 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** st1_mf8_base:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_base, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0, z0),\n+\t\t svst1 (pn8, x0, z0))\n+\n+/*\n+** st1_mf8_index:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_index, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 + x1, z0),\n+\t\t svst1 (pn8, x0 + x1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_mf8_1:\n+**\tincb\tx0\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 + svcntb (), z0),\n+\t\t svst1 (pn8, x0 + svcntb (), z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 + svcntb () * 2, z0),\n+\t\t svst1 (pn8, x0 + svcntb () * 2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 + svcntb () * 3, z0),\n+\t\t svst1 (pn8, x0 + svcntb () * 3, z0))\n+\n+/*\n+** st1_mf8_4:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 + svcntb () * 4, z0),\n+\t\t svst1 (pn8, x0 + svcntb () * 4, z0))\n+\n+/*\n+** st1_mf8_28:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 + svcntb () * 28, z0),\n+\t\t svst1 (pn8, x0 + svcntb () * 28, z0))\n+\n+/*\n+** st1_mf8_32:\n+**\t[^{]*\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 + svcntb () * 32, z0),\n+\t\t svst1 (pn8, x0 + svcntb () * 32, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_mf8_m1:\n+**\tdecb\tx0\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 - svcntb (), z0),\n+\t\t svst1 (pn8, x0 - svcntb (), z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 - svcntb () * 2, z0),\n+\t\t svst1 (pn8, x0 - svcntb () * 2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 - svcntb () * 3, z0),\n+\t\t svst1 (pn8, x0 - svcntb () * 3, z0))\n+\n+/*\n+** st1_mf8_m4:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 - svcntb () * 4, z0),\n+\t\t svst1 (pn8, x0 - svcntb () * 4, z0))\n+\n+/*\n+** st1_mf8_m32:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 - svcntb () * 32, z0),\n+\t\t svst1 (pn8, x0 - svcntb () * 32, z0))\n+\n+/*\n+** st1_mf8_m36:\n+**\t[^{]*\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0 - svcntb () * 36, z0),\n+\t\t svst1 (pn8, x0 - svcntb () * 36, z0))\n+\n+/*\n+** st1_mf8_z17:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tst1b\t{z[^\\n]+}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_z17, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0, z17),\n+\t\t svst1 (pn8, x0, z17))\n+\n+/*\n+** st1_mf8_z22:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tst1b\t{z[^\\n]+}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_z22, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0, z22),\n+\t\t svst1 (pn8, x0, z22))\n+\n+/*\n+** st1_mf8_z28:\n+**\tst1b\t{z28\\.b - z31\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_z28, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn8, x0, z28),\n+\t\t svst1 (pn8, x0, z28))\n+\n+/*\n+** st1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tst1b\t{z0\\.b - z3\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_pn0, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn0, x0, z0),\n+\t\t svst1 (pn0, x0, z0))\n+\n+/*\n+** st1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tst1b\t{z0\\.b - z3\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_pn7, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn7, x0, z0),\n+\t\t svst1 (pn7, x0, z0))\n+\n+/*\n+** st1_mf8_pn15:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn15, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_mf8_pn15, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_mf8_x4 (pn15, x0, z0),\n+\t\t svst1 (pn15, x0, z0))\n+\n+/*\n+** st1_vnum_mf8_0:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, 0, z0),\n+\t\t svst1_vnum (pn8, x0, 0, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, 1, z0),\n+\t\t svst1_vnum (pn8, x0, 1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_vnum_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, 2, z0),\n+\t\t svst1_vnum (pn8, x0, 2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_vnum_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, 3, z0),\n+\t\t svst1_vnum (pn8, x0, 3, z0))\n+\n+/*\n+** st1_vnum_mf8_4:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, 4, z0),\n+\t\t svst1_vnum (pn8, x0, 4, z0))\n+\n+/*\n+** st1_vnum_mf8_28:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, 28, z0),\n+\t\t svst1_vnum (pn8, x0, 28, z0))\n+\n+/*\n+** st1_vnum_mf8_32:\n+**\t[^{]*\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, 32, z0),\n+\t\t svst1_vnum (pn8, x0, 32, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, -1, z0),\n+\t\t svst1_vnum (pn8, x0, -1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_vnum_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, -2, z0),\n+\t\t svst1_vnum (pn8, x0, -2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** st1_vnum_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, -3, z0),\n+\t\t svst1_vnum (pn8, x0, -3, z0))\n+\n+/*\n+** st1_vnum_mf8_m4:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, -4, z0),\n+\t\t svst1_vnum (pn8, x0, -4, z0))\n+\n+/*\n+** st1_vnum_mf8_m32:\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, -32, z0),\n+\t\t svst1_vnum (pn8, x0, -32, z0))\n+\n+/*\n+** st1_vnum_mf8_m36:\n+**\t[^{]*\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, -36, z0),\n+\t\t svst1_vnum (pn8, x0, -36, z0))\n+\n+/*\n+** st1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tst1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_STORE_COUNT (st1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,\n+\t\t svst1_vnum_mf8_x4 (pn8, x0, x1, z0),\n+\t\t svst1_vnum (pn8, x0, x1, z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x2.c\nnew file mode 100644\nindex 00000000000..f9a90fbe9b0\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x2.c\n@@ -0,0 +1,262 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** stnt1_mf8_base:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0, z0),\n+\t\t svstnt1 (pn8, x0, z0))\n+\n+/*\n+** stnt1_mf8_index:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 + x1, z0),\n+\t\t svstnt1 (pn8, x0 + x1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_1:\n+**\tincb\tx0\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 + svcntb (), z0),\n+\t\t svstnt1 (pn8, x0 + svcntb (), z0))\n+\n+/*\n+** stnt1_mf8_2:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 + svcntb () * 2, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 2, z0))\n+\n+/*\n+** stnt1_mf8_14:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 + svcntb () * 14, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 14, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 + svcntb () * 16, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 16, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_m1:\n+**\tdecb\tx0\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 - svcntb (), z0),\n+\t\t svstnt1 (pn8, x0 - svcntb (), z0))\n+\n+/*\n+** stnt1_mf8_m2:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 - svcntb () * 2, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 2, z0))\n+\n+/*\n+** stnt1_mf8_m16:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 - svcntb () * 16, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 16, z0))\n+\n+/*\n+** stnt1_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[\\1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 - svcntb () * 18, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 18, z0))\n+\n+/*\n+** stnt1_mf8_z17:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tstnt1b\t{z[^\\n]+}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0, z17),\n+\t\t svstnt1 (pn8, x0, z17))\n+\n+/*\n+** stnt1_mf8_z22:\n+**\tstnt1b\t{z22\\.b(?: - |, )z23\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0, z22),\n+\t\t svstnt1 (pn8, x0, z22))\n+\n+/*\n+** stnt1_mf8_z28:\n+**\tstnt1b\t{z28\\.b(?: - |, )z29\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0, z28),\n+\t\t svstnt1 (pn8, x0, z28))\n+\n+/*\n+** stnt1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn0, x0, z0),\n+\t\t svstnt1 (pn0, x0, z0))\n+\n+/*\n+** stnt1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn7, x0, z0),\n+\t\t svstnt1 (pn7, x0, z0))\n+\n+/*\n+** stnt1_mf8_pn15:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn15, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn15, x0, z0),\n+\t\t svstnt1 (pn15, x0, z0))\n+\n+/*\n+** stnt1_vnum_mf8_0:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, 0, z0),\n+\t\t svstnt1_vnum (pn8, x0, 0, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, 1, z0),\n+\t\t svstnt1_vnum (pn8, x0, 1, z0))\n+\n+/*\n+** stnt1_vnum_mf8_2:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, 2, z0),\n+\t\t svstnt1_vnum (pn8, x0, 2, z0))\n+\n+/*\n+** stnt1_vnum_mf8_14:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, 14, z0),\n+\t\t svstnt1_vnum (pn8, x0, 14, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, 16, z0),\n+\t\t svstnt1_vnum (pn8, x0, 16, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, -1, z0),\n+\t\t svstnt1_vnum (pn8, x0, -1, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m2:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, -2, z0),\n+\t\t svstnt1_vnum (pn8, x0, -2, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m16:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, -16, z0),\n+\t\t svstnt1_vnum (pn8, x0, -16, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[\\1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, -18, z0),\n+\t\t svstnt1_vnum (pn8, x0, -18, z0))\n+\n+/*\n+** stnt1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, x1, z0),\n+\t\t svstnt1_vnum (pn8, x0, x1, z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x4.c\nnew file mode 100644\nindex 00000000000..a204f796982\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/stnt1_mf8_x4.c\n@@ -0,0 +1,354 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** stnt1_mf8_base:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0, z0),\n+\t\t svstnt1 (pn8, x0, z0))\n+\n+/*\n+** stnt1_mf8_index:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + x1, z0),\n+\t\t svstnt1 (pn8, x0 + x1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_1:\n+**\tincb\tx0\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb (), z0),\n+\t\t svstnt1 (pn8, x0 + svcntb (), z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb () * 2, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb () * 3, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 3, z0))\n+\n+/*\n+** stnt1_mf8_4:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb () * 4, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 4, z0))\n+\n+/*\n+** stnt1_mf8_28:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb () * 28, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 28, z0))\n+\n+/*\n+** stnt1_mf8_32:\n+**\t[^{]*\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb () * 32, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 32, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_m1:\n+**\tdecb\tx0\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb (), z0),\n+\t\t svstnt1 (pn8, x0 - svcntb (), z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb () * 2, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb () * 3, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 3, z0))\n+\n+/*\n+** stnt1_mf8_m4:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb () * 4, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 4, z0))\n+\n+/*\n+** stnt1_mf8_m32:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb () * 32, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 32, z0))\n+\n+/*\n+** stnt1_mf8_m36:\n+**\t[^{]*\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb () * 36, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 36, z0))\n+\n+/*\n+** stnt1_mf8_z17:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tstnt1b\t{z[^\\n]+}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0, z17),\n+\t\t svstnt1 (pn8, x0, z17))\n+\n+/*\n+** stnt1_mf8_z22:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tstnt1b\t{z[^\\n]+}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0, z22),\n+\t\t svstnt1 (pn8, x0, z22))\n+\n+/*\n+** stnt1_mf8_z28:\n+**\tstnt1b\t{z28\\.b - z31\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0, z28),\n+\t\t svstnt1 (pn8, x0, z28))\n+\n+/*\n+** stnt1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn0, x0, z0),\n+\t\t svstnt1 (pn0, x0, z0))\n+\n+/*\n+** stnt1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn7, x0, z0),\n+\t\t svstnt1 (pn7, x0, z0))\n+\n+/*\n+** stnt1_mf8_pn15:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn15, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn15, x0, z0),\n+\t\t svstnt1 (pn15, x0, z0))\n+\n+/*\n+** stnt1_vnum_mf8_0:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 0, z0),\n+\t\t svstnt1_vnum (pn8, x0, 0, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 1, z0),\n+\t\t svstnt1_vnum (pn8, x0, 1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 2, z0),\n+\t\t svstnt1_vnum (pn8, x0, 2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 3, z0),\n+\t\t svstnt1_vnum (pn8, x0, 3, z0))\n+\n+/*\n+** stnt1_vnum_mf8_4:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 4, z0),\n+\t\t svstnt1_vnum (pn8, x0, 4, z0))\n+\n+/*\n+** stnt1_vnum_mf8_28:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 28, z0),\n+\t\t svstnt1_vnum (pn8, x0, 28, z0))\n+\n+/*\n+** stnt1_vnum_mf8_32:\n+**\t[^{]*\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 32, z0),\n+\t\t svstnt1_vnum (pn8, x0, 32, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -1, z0),\n+\t\t svstnt1_vnum (pn8, x0, -1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -2, z0),\n+\t\t svstnt1_vnum (pn8, x0, -2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -3, z0),\n+\t\t svstnt1_vnum (pn8, x0, -3, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m4:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -4, z0),\n+\t\t svstnt1_vnum (pn8, x0, -4, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m32:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -32, z0),\n+\t\t svstnt1_vnum (pn8, x0, -32, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m36:\n+**\t[^{]*\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -36, z0),\n+\t\t svstnt1_vnum (pn8, x0, -36, z0))\n+\n+/*\n+** stnt1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, x1, z0),\n+\t\t svstnt1_vnum (pn8, x0, x1, z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x2.c\nnew file mode 100644\nindex 00000000000..f107b4c7a18\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x2.c\n@@ -0,0 +1,77 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** uzp_z0_z0:\n+**\tuzp\t{z0\\.b - z1\\.b}, z0\\.b, z1\\.b\n+**\tret\n+*/\n+TEST_XN (uzp_z0_z0, svmfloat8x2_t, z0,\n+\t svuzp_mf8_x2 (z0),\n+\t svuzp (z0))\n+\n+/*\n+** uzp_z0_z4:\n+**\tuzp\t{z0\\.b - z1\\.b}, z4\\.b, z5\\.b\n+**\tret\n+*/\n+TEST_XN (uzp_z0_z4, svmfloat8x2_t, z0,\n+\t svuzp_mf8_x2 (z4),\n+\t svuzp (z4))\n+\n+/*\n+** uzp_z4_z18:\n+**\tuzp\t{z4\\.b - z5\\.b}, z18\\.b, z19\\.b\n+**\tret\n+*/\n+TEST_XN (uzp_z4_z18, svmfloat8x2_t, z4,\n+\t svuzp_mf8_x2 (z18),\n+\t svuzp (z18))\n+\n+/*\n+** uzp_z18_z23:\n+**\tuzp\t{z18\\.b - z19\\.b}, z23\\.b, z24\\.b\n+**\tret\n+*/\n+TEST_XN (uzp_z18_z23, svmfloat8x2_t, z18,\n+\t svuzp_mf8_x2 (z23),\n+\t svuzp (z23))\n+\n+/*\n+** uzp_z23_z28:\n+**\tuzp\t[^\\n]+, z28\\.b, z29\\.b\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (uzp_z23_z28, svmfloat8x2_t, z23,\n+\t svuzp_mf8_x2 (z28),\n+\t svuzp (z28))\n+\n+/*\n+** uzp_z28_z0:\n+**\tuzp\t{z28\\.b - z29\\.b}, z0\\.b, z1\\.b\n+**\tret\n+*/\n+TEST_XN (uzp_z28_z0, svmfloat8x2_t, z28,\n+\t svuzp_mf8_x2 (z0),\n+\t svuzp (z0))\n+\n+/*\n+** uzp_z28_z0_z23:\t{ xfail aarch64_big_endian }\n+**\tuzp\t{z28\\.b - z29\\.b}, z0\\.b, z23\\.b\n+**\tret\n+*/\n+TEST_XN (uzp_z28_z0_z23, svmfloat8x2_t, z28,\n+\t svuzp_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),\n+\t svuzp (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))\n+\n+/*\n+** uzp_z28_z5_z19:\n+**\tuzp\t{z28\\.b - z29\\.b}, z5\\.b, z19\\.b\n+**\tret\n+*/\n+TEST_XN (uzp_z28_z5_z19, svmfloat8x2_t, z28,\n+\t svuzp_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),\n+\t svuzp (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x4.c\nnew file mode 100644\nindex 00000000000..bbaf26c85a5\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_mf8_x4.c\n@@ -0,0 +1,73 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** uzp_z0_z0:\n+**\tuzp\t{z0\\.b - z3\\.b}, {z0\\.b - z3\\.b}\n+**\tret\n+*/\n+TEST_XN (uzp_z0_z0, svmfloat8x4_t, z0,\n+\t svuzp_mf8_x4 (z0),\n+\t svuzp (z0))\n+\n+/*\n+** uzp_z0_z4:\n+**\tuzp\t{z0\\.b - z3\\.b}, {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_XN (uzp_z0_z4, svmfloat8x4_t, z0,\n+\t svuzp_mf8_x4 (z4),\n+\t svuzp (z4))\n+\n+/*\n+** uzp_z4_z18:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tuzp\t{z4\\.b - z7\\.b}, [^\\n]+\n+**\tret\n+*/\n+TEST_XN (uzp_z4_z18, svmfloat8x4_t, z4,\n+\t svuzp_mf8_x4 (z18),\n+\t svuzp (z18))\n+\n+/*\n+** uzp_z18_z23:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tuzp\t{z[^\\n]+}, {z[^\\n]+}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (uzp_z18_z23, svmfloat8x4_t, z18,\n+\t svuzp_mf8_x4 (z23),\n+\t svuzp (z23))\n+\n+/*\n+** uzp_z23_z28:\n+**\tuzp\t[^\\n]+, {z28\\.b - z31\\.b}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (uzp_z23_z28, svmfloat8x4_t, z23,\n+\t svuzp_mf8_x4 (z28),\n+\t svuzp (z28))\n+\n+/*\n+** uzp_z28_z0:\n+**\tuzp\t{z28\\.b - z31\\.b}, {z0\\.b - z3\\.b}\n+**\tret\n+*/\n+TEST_XN (uzp_z28_z0, svmfloat8x4_t, z28,\n+\t svuzp_mf8_x4 (z0),\n+\t svuzp (z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x2.c\nnew file mode 100644\nindex 00000000000..cef514c46e8\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x2.c\n@@ -0,0 +1,77 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** uzpq_z0_z0:\n+**\tuzp\t{z0\\.q - z1\\.q}, z0\\.q, z1\\.q\n+**\tret\n+*/\n+TEST_XN (uzpq_z0_z0, svmfloat8x2_t, z0,\n+\t svuzpq_mf8_x2 (z0),\n+\t svuzpq (z0))\n+\n+/*\n+** uzpq_z0_z4:\n+**\tuzp\t{z0\\.q - z1\\.q}, z4\\.q, z5\\.q\n+**\tret\n+*/\n+TEST_XN (uzpq_z0_z4, svmfloat8x2_t, z0,\n+\t svuzpq_mf8_x2 (z4),\n+\t svuzpq (z4))\n+\n+/*\n+** uzpq_z4_z18:\n+**\tuzp\t{z4\\.q - z5\\.q}, z18\\.q, z19\\.q\n+**\tret\n+*/\n+TEST_XN (uzpq_z4_z18, svmfloat8x2_t, z4,\n+\t svuzpq_mf8_x2 (z18),\n+\t svuzpq (z18))\n+\n+/*\n+** uzpq_z18_z23:\n+**\tuzp\t{z18\\.q - z19\\.q}, z23\\.q, z24\\.q\n+**\tret\n+*/\n+TEST_XN (uzpq_z18_z23, svmfloat8x2_t, z18,\n+\t svuzpq_mf8_x2 (z23),\n+\t svuzpq (z23))\n+\n+/*\n+** uzpq_z23_z28:\n+**\tuzp\t[^\\n]+, z28\\.q, z29\\.q\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (uzpq_z23_z28, svmfloat8x2_t, z23,\n+\t svuzpq_mf8_x2 (z28),\n+\t svuzpq (z28))\n+\n+/*\n+** uzpq_z28_z0:\n+**\tuzp\t{z28\\.q - z29\\.q}, z0\\.q, z1\\.q\n+**\tret\n+*/\n+TEST_XN (uzpq_z28_z0, svmfloat8x2_t, z28,\n+\t svuzpq_mf8_x2 (z0),\n+\t svuzpq (z0))\n+\n+/*\n+** uzpq_z28_z0_z23:\t{ xfail aarch64_big_endian }\n+**\tuzp\t{z28\\.q - z29\\.q}, z0\\.q, z23\\.q\n+**\tret\n+*/\n+TEST_XN (uzpq_z28_z0_z23, svmfloat8x2_t, z28,\n+\t svuzpq_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),\n+\t svuzpq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))\n+\n+/*\n+** uzpq_z28_z5_z19:\n+**\tuzp\t{z28\\.q - z29\\.q}, z5\\.q, z19\\.q\n+**\tret\n+*/\n+TEST_XN (uzpq_z28_z5_z19, svmfloat8x2_t, z28,\n+\t svuzpq_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),\n+\t svuzpq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x4.c\nnew file mode 100644\nindex 00000000000..6b348c95f83\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_mf8_x4.c\n@@ -0,0 +1,73 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** uzpq_z0_z0:\n+**\tuzp\t{z0\\.q - z3\\.q}, {z0\\.q - z3\\.q}\n+**\tret\n+*/\n+TEST_XN (uzpq_z0_z0, svmfloat8x4_t, z0,\n+\t svuzpq_mf8_x4 (z0),\n+\t svuzpq (z0))\n+\n+/*\n+** uzpq_z0_z4:\n+**\tuzp\t{z0\\.q - z3\\.q}, {z4\\.q - z7\\.q}\n+**\tret\n+*/\n+TEST_XN (uzpq_z0_z4, svmfloat8x4_t, z0,\n+\t svuzpq_mf8_x4 (z4),\n+\t svuzpq (z4))\n+\n+/*\n+** uzpq_z4_z18:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tuzp\t{z4\\.q - z7\\.q}, [^\\n]+\n+**\tret\n+*/\n+TEST_XN (uzpq_z4_z18, svmfloat8x4_t, z4,\n+\t svuzpq_mf8_x4 (z18),\n+\t svuzpq (z18))\n+\n+/*\n+** uzpq_z18_z23:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tuzp\t{z[^\\n]+}, {z[^\\n]+}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (uzpq_z18_z23, svmfloat8x4_t, z18,\n+\t svuzpq_mf8_x4 (z23),\n+\t svuzpq (z23))\n+\n+/*\n+** uzpq_z23_z28:\n+**\tuzp\t[^\\n]+, {z28\\.q - z31\\.q}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (uzpq_z23_z28, svmfloat8x4_t, z23,\n+\t svuzpq_mf8_x4 (z28),\n+\t svuzpq (z28))\n+\n+/*\n+** uzpq_z28_z0:\n+**\tuzp\t{z28\\.q - z31\\.q}, {z0\\.q - z3\\.q}\n+**\tret\n+*/\n+TEST_XN (uzpq_z28_z0, svmfloat8x4_t, z28,\n+\t svuzpq_mf8_x4 (z0),\n+\t svuzpq (z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c\nindex a2af846b60b..8df504cb423 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg2.c\n@@ -22,6 +22,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x2_t,\n \t svwrite_hor_za8_u8_vg2 (0, 1, z4),\n \t svwrite_hor_za8_u8_vg2 (0, 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmova\tza0h\\.b\\[\\1, 0:1\\], {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x2_t,\n+\t svwrite_hor_za8_mf8_vg2 (0, 1, z4),\n+\t svwrite_hor_za8_mf8_vg2 (0, 1, z4))\n+\n /*\n ** write_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -50,6 +60,15 @@ TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x2_t,\n \t svwrite_hor_za8_u8_vg2 (0, w15, z18),\n \t svwrite_hor_za8_u8_vg2 (0, w15, z18))\n \n+/*\n+** write_za8_mf8_z18_0_w15:\n+**\tmova\tza0h\\.b\\[w15, 0:1\\], {z18\\.b - z19\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x2_t,\n+\t svwrite_hor_za8_mf8_vg2 (0, w15, z18),\n+\t svwrite_hor_za8_mf8_vg2 (0, w15, z18))\n+\n /*\n ** write_za8_s8_z23_0_w12p14:\n **\tmov\t[^\\n]+\n@@ -71,6 +90,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x2_t,\n \t svwrite_hor_za8_u8_vg2 (0, w12 + 1, z4),\n \t svwrite_hor_za8_u8_vg2 (0, w12 + 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmova\tza0h\\.b\\[\\1, 0:1\\], {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x2_t,\n+\t svwrite_hor_za8_mf8_vg2 (0, w12 + 1, z4),\n+\t svwrite_hor_za8_mf8_vg2 (0, w12 + 1, z4))\n+\n /*\n ** write_za8_s8_z28_0_w12p2:\n **\tmova\tza0h\\.b\\[w12, 2:3\\], {z28\\.b - z29\\.b}\n@@ -90,6 +119,16 @@ TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x2_t,\n \t svwrite_hor_za8_u8_vg2 (0, w15 + 3, z0),\n \t svwrite_hor_za8_u8_vg2 (0, w15 + 3, z0))\n \n+/*\n+** write_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmova\tza0h\\.b\\[\\1, 0:1\\], {z0\\.b - z1\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x2_t,\n+\t svwrite_hor_za8_mf8_vg2 (0, w15 + 3, z0),\n+\t svwrite_hor_za8_mf8_vg2 (0, w15 + 3, z0))\n+\n /*\n ** write_za8_u8_z4_0_w15p12:\n **\tmova\tza0h\\.b\\[w15, 12:13\\], {z4\\.b - z5\\.b}\n@@ -99,6 +138,15 @@ TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x2_t,\n \t svwrite_hor_za8_u8_vg2 (0, w15 + 12, z4),\n \t svwrite_hor_za8_u8_vg2 (0, w15 + 12, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w15p12:\n+**\tmova\tza0h\\.b\\[w15, 12:13\\], {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x2_t,\n+\t svwrite_hor_za8_mf8_vg2 (0, w15 + 12, z4),\n+\t svwrite_hor_za8_mf8_vg2 (0, w15 + 12, z4))\n+\n /*\n ** write_za8_u8_z28_0_w12p15:\n **\tadd\t(w[0-9]+), w12, #?15\n@@ -109,6 +157,16 @@ TEST_ZA_XN (write_za8_u8_z28_0_w12p15, svuint8x2_t,\n \t svwrite_hor_za8_u8_vg2 (0, w12 + 15, z28),\n \t svwrite_hor_za8_u8_vg2 (0, w12 + 15, z28))\n \n+/*\n+** write_za8_mf8_z28_0_w12p15:\n+**\tadd\t(w[0-9]+), w12, #?15\n+**\tmova\tza0h\\.b\\[\\1, 0:1\\], {z28\\.b - z29\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z28_0_w12p15, svmfloat8x2_t,\n+\t svwrite_hor_za8_mf8_vg2 (0, w12 + 15, z28),\n+\t svwrite_hor_za8_mf8_vg2 (0, w12 + 15, z28))\n+\n /*\n ** write_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -129,6 +187,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t,\n \t svwrite_hor_za8_u8_vg2 (0, w12 - 1, z4),\n \t svwrite_hor_za8_u8_vg2 (0, w12 - 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmova\tza0h\\.b\\[\\1, 0:1\\], {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x2_t,\n+\t svwrite_hor_za8_mf8_vg2 (0, w12 - 1, z4),\n+\t svwrite_hor_za8_mf8_vg2 (0, w12 - 1, z4))\n+\n /*\n ** write_za8_u8_z18_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -138,3 +206,13 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t,\n TEST_ZA_XN (write_za8_u8_z18_0_w16, svuint8x2_t,\n \t svwrite_hor_za8_u8_vg2 (0, w16, z18),\n \t svwrite_hor_za8_u8_vg2 (0, w16, z18))\n+\n+/*\n+** write_za8_mf8_z18_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmova\tza0h\\.b\\[\\1, 0:1\\], {z18\\.b - z19\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z18_0_w16, svmfloat8x2_t,\n+\t svwrite_hor_za8_mf8_vg2 (0, w16, z18),\n+\t svwrite_hor_za8_mf8_vg2 (0, w16, z18))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c\nindex e333ce699e3..70a2e95db96 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_hor_za8_vg4.c\n@@ -22,6 +22,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x4_t,\n \t svwrite_hor_za8_u8_vg4 (0, 1, z4),\n \t svwrite_hor_za8_u8_vg4 (0, 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmova\tza0h\\.b\\[\\1, 0:3\\], {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x4_t,\n+\t svwrite_hor_za8_mf8_vg4 (0, 1, z4),\n+\t svwrite_hor_za8_mf8_vg4 (0, 1, z4))\n+\n /*\n ** write_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -54,6 +64,19 @@ TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x4_t,\n \t svwrite_hor_za8_u8_vg4 (0, w15, z18),\n \t svwrite_hor_za8_u8_vg4 (0, w15, z18))\n \n+/*\n+** write_za8_mf8_z18_0_w15:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmova\tza0h\\.b\\[w15, 0:3\\], {[^\\n]+}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x4_t,\n+\t svwrite_hor_za8_mf8_vg4 (0, w15, z18),\n+\t svwrite_hor_za8_mf8_vg4 (0, w15, z18))\n+\n /*\n ** write_za8_s8_z23_0_w12p12:\n **\tmov\t[^\\n]+\n@@ -77,6 +100,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x4_t,\n \t svwrite_hor_za8_u8_vg4 (0, w12 + 1, z4),\n \t svwrite_hor_za8_u8_vg4 (0, w12 + 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmova\tza0h\\.b\\[\\1, 0:3\\], {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x4_t,\n+\t svwrite_hor_za8_mf8_vg4 (0, w12 + 1, z4),\n+\t svwrite_hor_za8_mf8_vg4 (0, w12 + 1, z4))\n+\n /*\n ** write_za8_s8_z28_0_w12p2:\n **\tadd\t(w[0-9]+), w12, #?2\n@@ -97,6 +130,16 @@ TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x4_t,\n \t svwrite_hor_za8_u8_vg4 (0, w15 + 3, z0),\n \t svwrite_hor_za8_u8_vg4 (0, w15 + 3, z0))\n \n+/*\n+** write_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmova\tza0h\\.b\\[\\1, 0:3\\], {z0\\.b - z3\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x4_t,\n+\t svwrite_hor_za8_mf8_vg4 (0, w15 + 3, z0),\n+\t svwrite_hor_za8_mf8_vg4 (0, w15 + 3, z0))\n+\n /*\n ** write_za8_u8_z0_0_w12p4:\n **\tmova\tza0h\\.b\\[w12, 4:7\\], {z0\\.b - z3\\.b}\n@@ -106,6 +149,15 @@ TEST_ZA_XN (write_za8_u8_z0_0_w12p4, svuint8x4_t,\n \t svwrite_hor_za8_u8_vg4 (0, w12 + 4, z0),\n \t svwrite_hor_za8_u8_vg4 (0, w12 + 4, z0))\n \n+/*\n+** write_za8_mf8_z0_0_w12p4:\n+**\tmova\tza0h\\.b\\[w12, 4:7\\], {z0\\.b - z3\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z0_0_w12p4, svmfloat8x4_t,\n+\t svwrite_hor_za8_mf8_vg4 (0, w12 + 4, z0),\n+\t svwrite_hor_za8_mf8_vg4 (0, w12 + 4, z0))\n+\n /*\n ** write_za8_u8_z4_0_w15p12:\n **\tmova\tza0h\\.b\\[w15, 12:15\\], {z4\\.b - z7\\.b}\n@@ -115,6 +167,15 @@ TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x4_t,\n \t svwrite_hor_za8_u8_vg4 (0, w15 + 12, z4),\n \t svwrite_hor_za8_u8_vg4 (0, w15 + 12, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w15p12:\n+**\tmova\tza0h\\.b\\[w15, 12:15\\], {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x4_t,\n+\t svwrite_hor_za8_mf8_vg4 (0, w15 + 12, z4),\n+\t svwrite_hor_za8_mf8_vg4 (0, w15 + 12, z4))\n+\n /*\n ** write_za8_u8_z28_0_w12p14:\n **\tadd\t(w[0-9]+), w12, #?14\n@@ -125,6 +186,16 @@ TEST_ZA_XN (write_za8_u8_z28_0_w12p14, svuint8x4_t,\n \t svwrite_hor_za8_u8_vg4 (0, w12 + 14, z28),\n \t svwrite_hor_za8_u8_vg4 (0, w12 + 14, z28))\n \n+/*\n+** write_za8_mf8_z28_0_w12p14:\n+**\tadd\t(w[0-9]+), w12, #?14\n+**\tmova\tza0h\\.b\\[\\1, 0:3\\], {z28\\.b - z31\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z28_0_w12p14, svmfloat8x4_t,\n+\t svwrite_hor_za8_mf8_vg4 (0, w12 + 14, z28),\n+\t svwrite_hor_za8_mf8_vg4 (0, w12 + 14, z28))\n+\n /*\n ** write_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -145,6 +216,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t,\n \t svwrite_hor_za8_u8_vg4 (0, w12 - 1, z4),\n \t svwrite_hor_za8_u8_vg4 (0, w12 - 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmova\tza0h\\.b\\[\\1, 0:3\\], {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x4_t,\n+\t svwrite_hor_za8_mf8_vg4 (0, w12 - 1, z4),\n+\t svwrite_hor_za8_mf8_vg4 (0, w12 - 1, z4))\n+\n /*\n ** write_za8_u8_z28_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -154,3 +235,13 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t,\n TEST_ZA_XN (write_za8_u8_z28_0_w16, svuint8x4_t,\n \t svwrite_hor_za8_u8_vg4 (0, w16, z28),\n \t svwrite_hor_za8_u8_vg4 (0, w16, z28))\n+\n+/*\n+** write_za8_mf8_z28_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmova\tza0h\\.b\\[\\1, 0:3\\], {z28\\.b - z31\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z28_0_w16, svmfloat8x4_t,\n+\t svwrite_hor_za8_mf8_vg4 (0, w16, z28),\n+\t svwrite_hor_za8_mf8_vg4 (0, w16, z28))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c\nindex ce3dbdd8729..a576b753301 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg2.c\n@@ -22,6 +22,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x2_t,\n \t svwrite_ver_za8_u8_vg2 (0, 1, z4),\n \t svwrite_ver_za8_u8_vg2 (0, 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmova\tza0v\\.b\\[\\1, 0:1\\], {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x2_t,\n+\t svwrite_ver_za8_mf8_vg2 (0, 1, z4),\n+\t svwrite_ver_za8_mf8_vg2 (0, 1, z4))\n+\n /*\n ** write_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -50,6 +60,15 @@ TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x2_t,\n \t svwrite_ver_za8_u8_vg2 (0, w15, z18),\n \t svwrite_ver_za8_u8_vg2 (0, w15, z18))\n \n+/*\n+** write_za8_mf8_z18_0_w15:\n+**\tmova\tza0v\\.b\\[w15, 0:1\\], {z18\\.b - z19\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x2_t,\n+\t svwrite_ver_za8_mf8_vg2 (0, w15, z18),\n+\t svwrite_ver_za8_mf8_vg2 (0, w15, z18))\n+\n /*\n ** write_za8_s8_z23_0_w12p14:\n **\tmov\t[^\\n]+\n@@ -71,6 +90,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x2_t,\n \t svwrite_ver_za8_u8_vg2 (0, w12 + 1, z4),\n \t svwrite_ver_za8_u8_vg2 (0, w12 + 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmova\tza0v\\.b\\[\\1, 0:1\\], {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x2_t,\n+\t svwrite_ver_za8_mf8_vg2 (0, w12 + 1, z4),\n+\t svwrite_ver_za8_mf8_vg2 (0, w12 + 1, z4))\n+\n /*\n ** write_za8_s8_z28_0_w12p2:\n **\tmova\tza0v\\.b\\[w12, 2:3\\], {z28\\.b - z29\\.b}\n@@ -90,6 +119,16 @@ TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x2_t,\n \t svwrite_ver_za8_u8_vg2 (0, w15 + 3, z0),\n \t svwrite_ver_za8_u8_vg2 (0, w15 + 3, z0))\n \n+/*\n+** write_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmova\tza0v\\.b\\[\\1, 0:1\\], {z0\\.b - z1\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x2_t,\n+\t svwrite_ver_za8_mf8_vg2 (0, w15 + 3, z0),\n+\t svwrite_ver_za8_mf8_vg2 (0, w15 + 3, z0))\n+\n /*\n ** write_za8_u8_z4_0_w15p12:\n **\tmova\tza0v\\.b\\[w15, 12:13\\], {z4\\.b - z5\\.b}\n@@ -99,6 +138,15 @@ TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x2_t,\n \t svwrite_ver_za8_u8_vg2 (0, w15 + 12, z4),\n \t svwrite_ver_za8_u8_vg2 (0, w15 + 12, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w15p12:\n+**\tmova\tza0v\\.b\\[w15, 12:13\\], {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x2_t,\n+\t svwrite_ver_za8_mf8_vg2 (0, w15 + 12, z4),\n+\t svwrite_ver_za8_mf8_vg2 (0, w15 + 12, z4))\n+\n /*\n ** write_za8_u8_z28_0_w12p15:\n **\tadd\t(w[0-9]+), w12, #?15\n@@ -109,6 +157,16 @@ TEST_ZA_XN (write_za8_u8_z28_0_w12p15, svuint8x2_t,\n \t svwrite_ver_za8_u8_vg2 (0, w12 + 15, z28),\n \t svwrite_ver_za8_u8_vg2 (0, w12 + 15, z28))\n \n+/*\n+** write_za8_mf8_z28_0_w12p15:\n+**\tadd\t(w[0-9]+), w12, #?15\n+**\tmova\tza0v\\.b\\[\\1, 0:1\\], {z28\\.b - z29\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z28_0_w12p15, svmfloat8x2_t,\n+\t svwrite_ver_za8_mf8_vg2 (0, w12 + 15, z28),\n+\t svwrite_ver_za8_mf8_vg2 (0, w12 + 15, z28))\n+\n /*\n ** write_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -129,6 +187,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t,\n \t svwrite_ver_za8_u8_vg2 (0, w12 - 1, z4),\n \t svwrite_ver_za8_u8_vg2 (0, w12 - 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmova\tza0v\\.b\\[\\1, 0:1\\], {z4\\.b - z5\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x2_t,\n+\t svwrite_ver_za8_mf8_vg2 (0, w12 - 1, z4),\n+\t svwrite_ver_za8_mf8_vg2 (0, w12 - 1, z4))\n+\n /*\n ** write_za8_u8_z18_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -138,3 +206,13 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x2_t,\n TEST_ZA_XN (write_za8_u8_z18_0_w16, svuint8x2_t,\n \t svwrite_ver_za8_u8_vg2 (0, w16, z18),\n \t svwrite_ver_za8_u8_vg2 (0, w16, z18))\n+\n+/*\n+** write_za8_mf8_z18_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmova\tza0v\\.b\\[\\1, 0:1\\], {z18\\.b - z19\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z18_0_w16, svmfloat8x2_t,\n+\t svwrite_ver_za8_mf8_vg2 (0, w16, z18),\n+\t svwrite_ver_za8_mf8_vg2 (0, w16, z18))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c\nindex 8972fed59e3..0444f80fa42 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_ver_za8_vg4.c\n@@ -22,6 +22,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_1, svuint8x4_t,\n \t svwrite_ver_za8_u8_vg4 (0, 1, z4),\n \t svwrite_ver_za8_u8_vg4 (0, 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_1:\n+**\tmov\t(w1[2-5]), #?1\n+**\tmova\tza0v\\.b\\[\\1, 0:3\\], {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_1, svmfloat8x4_t,\n+\t svwrite_ver_za8_mf8_vg4 (0, 1, z4),\n+\t svwrite_ver_za8_mf8_vg4 (0, 1, z4))\n+\n /*\n ** write_za8_s8_z28_0_w11:\n **\tmov\t(w1[2-5]), w11\n@@ -54,6 +64,19 @@ TEST_ZA_XN (write_za8_u8_z18_0_w15, svuint8x4_t,\n \t svwrite_ver_za8_u8_vg4 (0, w15, z18),\n \t svwrite_ver_za8_u8_vg4 (0, w15, z18))\n \n+/*\n+** write_za8_mf8_z18_0_w15:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmova\tza0v\\.b\\[w15, 0:3\\], {[^\\n]+}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z18_0_w15, svmfloat8x4_t,\n+\t svwrite_ver_za8_mf8_vg4 (0, w15, z18),\n+\t svwrite_ver_za8_mf8_vg4 (0, w15, z18))\n+\n /*\n ** write_za8_s8_z23_0_w12p12:\n **\tmov\t[^\\n]+\n@@ -77,6 +100,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12p1, svuint8x4_t,\n \t svwrite_ver_za8_u8_vg4 (0, w12 + 1, z4),\n \t svwrite_ver_za8_u8_vg4 (0, w12 + 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w12p1:\n+**\tadd\t(w[0-9]+), w12, #?1\n+**\tmova\tza0v\\.b\\[\\1, 0:3\\], {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w12p1, svmfloat8x4_t,\n+\t svwrite_ver_za8_mf8_vg4 (0, w12 + 1, z4),\n+\t svwrite_ver_za8_mf8_vg4 (0, w12 + 1, z4))\n+\n /*\n ** write_za8_s8_z28_0_w12p2:\n **\tadd\t(w[0-9]+), w12, #?2\n@@ -97,6 +130,16 @@ TEST_ZA_XN (write_za8_u8_z0_0_w15p3, svuint8x4_t,\n \t svwrite_ver_za8_u8_vg4 (0, w15 + 3, z0),\n \t svwrite_ver_za8_u8_vg4 (0, w15 + 3, z0))\n \n+/*\n+** write_za8_mf8_z0_0_w15p3:\n+**\tadd\t(w[0-9]+), w15, #?3\n+**\tmova\tza0v\\.b\\[\\1, 0:3\\], {z0\\.b - z3\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z0_0_w15p3, svmfloat8x4_t,\n+\t svwrite_ver_za8_mf8_vg4 (0, w15 + 3, z0),\n+\t svwrite_ver_za8_mf8_vg4 (0, w15 + 3, z0))\n+\n /*\n ** write_za8_u8_z0_0_w12p4:\n **\tmova\tza0v\\.b\\[w12, 4:7\\], {z0\\.b - z3\\.b}\n@@ -106,6 +149,15 @@ TEST_ZA_XN (write_za8_u8_z0_0_w12p4, svuint8x4_t,\n \t svwrite_ver_za8_u8_vg4 (0, w12 + 4, z0),\n \t svwrite_ver_za8_u8_vg4 (0, w12 + 4, z0))\n \n+/*\n+** write_za8_mf8_z0_0_w12p4:\n+**\tmova\tza0v\\.b\\[w12, 4:7\\], {z0\\.b - z3\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z0_0_w12p4, svmfloat8x4_t,\n+\t svwrite_ver_za8_mf8_vg4 (0, w12 + 4, z0),\n+\t svwrite_ver_za8_mf8_vg4 (0, w12 + 4, z0))\n+\n /*\n ** write_za8_u8_z4_0_w15p12:\n **\tmova\tza0v\\.b\\[w15, 12:15\\], {z4\\.b - z7\\.b}\n@@ -115,6 +167,15 @@ TEST_ZA_XN (write_za8_u8_z4_0_w15p12, svuint8x4_t,\n \t svwrite_ver_za8_u8_vg4 (0, w15 + 12, z4),\n \t svwrite_ver_za8_u8_vg4 (0, w15 + 12, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w15p12:\n+**\tmova\tza0v\\.b\\[w15, 12:15\\], {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w15p12, svmfloat8x4_t,\n+\t svwrite_ver_za8_mf8_vg4 (0, w15 + 12, z4),\n+\t svwrite_ver_za8_mf8_vg4 (0, w15 + 12, z4))\n+\n /*\n ** write_za8_u8_z28_0_w12p14:\n **\tadd\t(w[0-9]+), w12, #?14\n@@ -125,6 +186,16 @@ TEST_ZA_XN (write_za8_u8_z28_0_w12p14, svuint8x4_t,\n \t svwrite_ver_za8_u8_vg4 (0, w12 + 14, z28),\n \t svwrite_ver_za8_u8_vg4 (0, w12 + 14, z28))\n \n+/*\n+** write_za8_mf8_z28_0_w12p14:\n+**\tadd\t(w[0-9]+), w12, #?14\n+**\tmova\tza0v\\.b\\[\\1, 0:3\\], {z28\\.b - z31\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z28_0_w12p14, svmfloat8x4_t,\n+\t svwrite_ver_za8_mf8_vg4 (0, w12 + 14, z28),\n+\t svwrite_ver_za8_mf8_vg4 (0, w12 + 14, z28))\n+\n /*\n ** write_za8_s8_z0_0_w15p16:\n **\tadd\t(w[0-9]+), w15, #?16\n@@ -145,6 +216,16 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t,\n \t svwrite_ver_za8_u8_vg4 (0, w12 - 1, z4),\n \t svwrite_ver_za8_u8_vg4 (0, w12 - 1, z4))\n \n+/*\n+** write_za8_mf8_z4_0_w12m1:\n+**\tsub\t(w[0-9]+), w12, #?1\n+**\tmova\tza0v\\.b\\[\\1, 0:3\\], {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z4_0_w12m1, svmfloat8x4_t,\n+\t svwrite_ver_za8_mf8_vg4 (0, w12 - 1, z4),\n+\t svwrite_ver_za8_mf8_vg4 (0, w12 - 1, z4))\n+\n /*\n ** write_za8_u8_z28_0_w16:\n **\tmov\t(w1[2-5]), w16\n@@ -154,3 +235,13 @@ TEST_ZA_XN (write_za8_u8_z4_0_w12m1, svuint8x4_t,\n TEST_ZA_XN (write_za8_u8_z28_0_w16, svuint8x4_t,\n \t svwrite_ver_za8_u8_vg4 (0, w16, z28),\n \t svwrite_ver_za8_u8_vg4 (0, w16, z28))\n+\n+/*\n+** write_za8_mf8_z28_0_w16:\n+**\tmov\t(w1[2-5]), w16\n+**\tmova\tza0v\\.b\\[\\1, 0:3\\], {z28\\.b - z31\\.b}\n+**\tret\n+*/\n+TEST_ZA_XN (write_za8_mf8_z28_0_w16, svmfloat8x4_t,\n+\t svwrite_ver_za8_mf8_vg4 (0, w16, z28),\n+\t svwrite_ver_za8_mf8_vg4 (0, w16, z28))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c\nindex 4b83a37edd2..836118b0fa7 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x2.c\n@@ -32,6 +32,16 @@ TEST_ZA_XN (write_w7_z0, svuint8x2_t,\n \t svwrite_za8_u8_vg1x2 (w7, z0),\n \t svwrite_za8_vg1x2 (w7, z0))\n \n+/*\n+** write_mf8_w7_z0:\n+**\tmov\t(w8|w9|w10|w11), w7\n+**\tmova\tza\\.d\\[\\1, 0, vgx2\\], {z0\\.d - z1\\.d}\n+**\tret\n+*/\n+TEST_ZA_XN (write_mf8_w7_z0, svmfloat8x2_t,\n+\t svwrite_za8_mf8_vg1x2 (w7, z0),\n+\t svwrite_za8_vg1x2 (w7, z0))\n+\n /*\n ** write_w8_z0:\n **\tmova\tza\\.d\\[w8, 0, vgx2\\], {z0\\.d - z1\\.d}\n@@ -61,6 +71,16 @@ TEST_ZA_XN (write_w12_z0, svuint8x2_t,\n \t svwrite_za8_u8_vg1x2 (w12, z0),\n \t svwrite_za8_vg1x2 (w12, z0))\n \n+/*\n+** write_mf8_w12_z0:\n+**\tmov\t(w8|w9|w10|w11), w12\n+**\tmova\tza\\.d\\[\\1, 0, vgx2\\], {z0\\.d - z1\\.d}\n+**\tret\n+*/\n+TEST_ZA_XN (write_mf8_w12_z0, svmfloat8x2_t,\n+\t svwrite_za8_mf8_vg1x2 (w12, z0),\n+\t svwrite_za8_vg1x2 (w12, z0))\n+\n /*\n ** write_w8p7_z0:\n **\tmova\tza\\.d\\[w8, 7, vgx2\\], {z0\\.d - z1\\.d}\n@@ -90,6 +110,16 @@ TEST_ZA_XN (write_w8m1_z0, svuint8x2_t,\n \t svwrite_za8_u8_vg1x2 (w8 - 1, z0),\n \t svwrite_za8_vg1x2 (w8 - 1, z0))\n \n+/*\n+** write_mf8_w8m1_z0:\n+**\tsub\t(w8|w9|w10|w11), w8, #?1\n+**\tmova\tza\\.d\\[\\1, 0, vgx2\\], {z0\\.d - z1\\.d}\n+**\tret\n+*/\n+TEST_ZA_XN (write_mf8_w8m1_z0, svmfloat8x2_t,\n+\t svwrite_za8_mf8_vg1x2 (w8 - 1, z0),\n+\t svwrite_za8_vg1x2 (w8 - 1, z0))\n+\n /*\n ** write_w8_z18:\n **\tmova\tza\\.d\\[w8, 0, vgx2\\], {z18\\.d - z19\\.d}\n@@ -99,6 +129,15 @@ TEST_ZA_XN (write_w8_z18, svuint8x2_t,\n \t svwrite_za8_u8_vg1x2 (w8, z18),\n \t svwrite_za8_vg1x2 (w8, z18))\n \n+/*\n+** write_mf8_w8_z18:\n+**\tmova\tza\\.d\\[w8, 0, vgx2\\], {z18\\.d - z19\\.d}\n+**\tret\n+*/\n+TEST_ZA_XN (write_mf8_w8_z18, svmfloat8x2_t,\n+\t svwrite_za8_mf8_vg1x2 (w8, z18),\n+\t svwrite_za8_vg1x2 (w8, z18))\n+\n /* Leave the assembler to check for correctness for misaligned registers. */\n \n /*\n@@ -120,3 +159,12 @@ TEST_ZA_XN (write_w8_z23, svint8x2_t,\n TEST_ZA_XN (write_w8_z28, svuint8x2_t,\n \t svwrite_za8_u8_vg1x2 (w8, z28),\n \t svwrite_za8_vg1x2 (w8, z28))\n+\n+/*\n+** write_mf8_w8_z28:\n+**\tmova\tza\\.d\\[w8, 0, vgx2\\], {z28\\.d - z29\\.d}\n+**\tret\n+*/\n+TEST_ZA_XN (write_mf8_w8_z28, svmfloat8x2_t,\n+\t svwrite_za8_mf8_vg1x2 (w8, z28),\n+\t svwrite_za8_vg1x2 (w8, z28))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c\nindex a529bf9fcca..649a5c0ca63 100644\n--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/write_za8_vg1x4.c\n@@ -22,6 +22,16 @@ TEST_ZA_XN (write_w0_z0, svuint8x4_t,\n \t svwrite_za8_u8_vg1x4 (w0, z0),\n \t svwrite_za8_vg1x4 (w0, z0))\n \n+/*\n+** write_mf8_w0_z0:\n+**\tmov\t(w8|w9|w10|w11), w0\n+**\tmova\tza\\.d\\[\\1, 0, vgx4\\], {z0\\.d - z3\\.d}\n+**\tret\n+*/\n+TEST_ZA_XN (write_mf8_w0_z0, svmfloat8x4_t,\n+\t svwrite_za8_mf8_vg1x4 (w0, z0),\n+\t svwrite_za8_vg1x4 (w0, z0))\n+\n /*\n ** write_w7_z0:\n **\tmov\t(w8|w9|w10|w11), w7\n@@ -50,6 +60,14 @@ TEST_ZA_XN (write_w11_z0, svuint8x4_t,\n \t svwrite_za8_u8_vg1x4 (w11, z0),\n \t svwrite_za8_vg1x4 (w11, z0))\n \n+/*\n+** write_mf8_w11_z0:\n+**\tmova\tza\\.d\\[w11, 0, vgx4\\], {z0\\.d - z3\\.d}\n+**\tret\n+*/\n+TEST_ZA_XN (write_mf8_w11_z0, svmfloat8x4_t,\n+\t svwrite_za8_mf8_vg1x4 (w11, z0),\n+\t svwrite_za8_vg1x4 (w11, z0))\n \n /*\n ** write_w12_z0:\n@@ -80,6 +98,16 @@ TEST_ZA_XN (write_w8p8_z0, svuint8x4_t,\n \t svwrite_za8_u8_vg1x4 (w8 + 8, z0),\n \t svwrite_za8_vg1x4 (w8 + 8, z0))\n \n+/*\n+** write_mf8_w8p8_z0:\n+**\tadd\t(w8|w9|w10|w11), w8, #?8\n+**\tmova\tza\\.d\\[\\1, 0, vgx4\\], {z0\\.d - z3\\.d}\n+**\tret\n+*/\n+TEST_ZA_XN (write_mf8_w8p8_z0, svmfloat8x4_t,\n+\t svwrite_za8_mf8_vg1x4 (w8 + 8, z0),\n+\t svwrite_za8_vg1x4 (w8 + 8, z0))\n+\n /*\n ** write_w8m1_z0:\n **\tsub\t(w8|w9|w10|w11), w8, #?1\n@@ -114,6 +142,19 @@ TEST_ZA_XN (write_w8_z18, svuint8x4_t,\n \t svwrite_za8_u8_vg1x4 (w8, z18),\n \t svwrite_za8_vg1x4 (w8, z18))\n \n+/*\n+** write_mf8_w8_z18:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmova\tza\\.d\\[w8, 0, vgx4\\], [^\\n]+\n+**\tret\n+*/\n+TEST_ZA_XN (write_mf8_w8_z18, svmfloat8x4_t,\n+\t svwrite_za8_mf8_vg1x4 (w8, z18),\n+\t svwrite_za8_vg1x4 (w8, z18))\n+\n /*\n ** write_w8_z23:\n **\tmov\t[^\\n]+\n@@ -127,6 +168,19 @@ TEST_ZA_XN (write_w8_z23, svuint8x4_t,\n \t svwrite_za8_u8_vg1x4 (w8, z23),\n \t svwrite_za8_vg1x4 (w8, z23))\n \n+/*\n+** write_mf8_w8_z23:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmova\tza\\.d\\[w8, 0, vgx4\\], [^\\n]+\n+**\tret\n+*/\n+TEST_ZA_XN (write_mf8_w8_z23, svmfloat8x4_t,\n+\t svwrite_za8_mf8_vg1x4 (w8, z23),\n+\t svwrite_za8_vg1x4 (w8, z23))\n+\n /*\n ** write_w8_z28:\n **\tmova\tza\\.d\\[w8, 0, vgx4\\], {z28\\.d - z31\\.d}\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x2.c\nnew file mode 100644\nindex 00000000000..834a0e680a8\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x2.c\n@@ -0,0 +1,77 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** zip_z0_z0:\n+**\tzip\t{z0\\.b - z1\\.b}, z0\\.b, z1\\.b\n+**\tret\n+*/\n+TEST_XN (zip_z0_z0, svmfloat8x2_t, z0,\n+\t svzip_mf8_x2 (z0),\n+\t svzip (z0))\n+\n+/*\n+** zip_z0_z4:\n+**\tzip\t{z0\\.b - z1\\.b}, z4\\.b, z5\\.b\n+**\tret\n+*/\n+TEST_XN (zip_z0_z4, svmfloat8x2_t, z0,\n+\t svzip_mf8_x2 (z4),\n+\t svzip (z4))\n+\n+/*\n+** zip_z4_z18:\n+**\tzip\t{z4\\.b - z5\\.b}, z18\\.b, z19\\.b\n+**\tret\n+*/\n+TEST_XN (zip_z4_z18, svmfloat8x2_t, z4,\n+\t svzip_mf8_x2 (z18),\n+\t svzip (z18))\n+\n+/*\n+** zip_z18_z23:\n+**\tzip\t{z18\\.b - z19\\.b}, z23\\.b, z24\\.b\n+**\tret\n+*/\n+TEST_XN (zip_z18_z23, svmfloat8x2_t, z18,\n+\t svzip_mf8_x2 (z23),\n+\t svzip (z23))\n+\n+/*\n+** zip_z23_z28:\n+**\tzip\t[^\\n]+, z28\\.b, z29\\.b\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (zip_z23_z28, svmfloat8x2_t, z23,\n+\t svzip_mf8_x2 (z28),\n+\t svzip (z28))\n+\n+/*\n+** zip_z28_z0:\n+**\tzip\t{z28\\.b - z29\\.b}, z0\\.b, z1\\.b\n+**\tret\n+*/\n+TEST_XN (zip_z28_z0, svmfloat8x2_t, z28,\n+\t svzip_mf8_x2 (z0),\n+\t svzip (z0))\n+\n+/*\n+** zip_z28_z0_z23:\t{ xfail aarch64_big_endian }\n+**\tzip\t{z28\\.b - z29\\.b}, z0\\.b, z23\\.b\n+**\tret\n+*/\n+TEST_XN (zip_z28_z0_z23, svmfloat8x2_t, z28,\n+\t svzip_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),\n+\t svzip (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))\n+\n+/*\n+** zip_z28_z5_z19:\n+**\tzip\t{z28\\.b - z29\\.b}, z5\\.b, z19\\.b\n+**\tret\n+*/\n+TEST_XN (zip_z28_z5_z19, svmfloat8x2_t, z28,\n+\t svzip_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),\n+\t svzip (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x4.c\nnew file mode 100644\nindex 00000000000..487e9b2d3fb\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_mf8_x4.c\n@@ -0,0 +1,73 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** zip_z0_z0:\n+**\tzip\t{z0\\.b - z3\\.b}, {z0\\.b - z3\\.b}\n+**\tret\n+*/\n+TEST_XN (zip_z0_z0, svmfloat8x4_t, z0,\n+\t svzip_mf8_x4 (z0),\n+\t svzip (z0))\n+\n+/*\n+** zip_z0_z4:\n+**\tzip\t{z0\\.b - z3\\.b}, {z4\\.b - z7\\.b}\n+**\tret\n+*/\n+TEST_XN (zip_z0_z4, svmfloat8x4_t, z0,\n+\t svzip_mf8_x4 (z4),\n+\t svzip (z4))\n+\n+/*\n+** zip_z4_z18:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tzip\t{z4\\.b - z7\\.b}, [^\\n]+\n+**\tret\n+*/\n+TEST_XN (zip_z4_z18, svmfloat8x4_t, z4,\n+\t svzip_mf8_x4 (z18),\n+\t svzip (z18))\n+\n+/*\n+** zip_z18_z23:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tzip\t{z[^\\n]+}, {z[^\\n]+}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (zip_z18_z23, svmfloat8x4_t, z18,\n+\t svzip_mf8_x4 (z23),\n+\t svzip (z23))\n+\n+/*\n+** zip_z23_z28:\n+**\tzip\t[^\\n]+, {z28\\.b - z31\\.b}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (zip_z23_z28, svmfloat8x4_t, z23,\n+\t svzip_mf8_x4 (z28),\n+\t svzip (z28))\n+\n+/*\n+** zip_z28_z0:\n+**\tzip\t{z28\\.b - z31\\.b}, {z0\\.b - z3\\.b}\n+**\tret\n+*/\n+TEST_XN (zip_z28_z0, svmfloat8x4_t, z28,\n+\t svzip_mf8_x4 (z0),\n+\t svzip (z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x2.c\nnew file mode 100644\nindex 00000000000..4dd4753461a\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x2.c\n@@ -0,0 +1,77 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** zipq_z0_z0:\n+**\tzip\t{z0\\.q - z1\\.q}, z0\\.q, z1\\.q\n+**\tret\n+*/\n+TEST_XN (zipq_z0_z0, svmfloat8x2_t, z0,\n+\t svzipq_mf8_x2 (z0),\n+\t svzipq (z0))\n+\n+/*\n+** zipq_z0_z4:\n+**\tzip\t{z0\\.q - z1\\.q}, z4\\.q, z5\\.q\n+**\tret\n+*/\n+TEST_XN (zipq_z0_z4, svmfloat8x2_t, z0,\n+\t svzipq_mf8_x2 (z4),\n+\t svzipq (z4))\n+\n+/*\n+** zipq_z4_z18:\n+**\tzip\t{z4\\.q - z5\\.q}, z18\\.q, z19\\.q\n+**\tret\n+*/\n+TEST_XN (zipq_z4_z18, svmfloat8x2_t, z4,\n+\t svzipq_mf8_x2 (z18),\n+\t svzipq (z18))\n+\n+/*\n+** zipq_z18_z23:\n+**\tzip\t{z18\\.q - z19\\.q}, z23\\.q, z24\\.q\n+**\tret\n+*/\n+TEST_XN (zipq_z18_z23, svmfloat8x2_t, z18,\n+\t svzipq_mf8_x2 (z23),\n+\t svzipq (z23))\n+\n+/*\n+** zipq_z23_z28:\n+**\tzip\t[^\\n]+, z28\\.q, z29\\.q\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (zipq_z23_z28, svmfloat8x2_t, z23,\n+\t svzipq_mf8_x2 (z28),\n+\t svzipq (z28))\n+\n+/*\n+** zipq_z28_z0:\n+**\tzip\t{z28\\.q - z29\\.q}, z0\\.q, z1\\.q\n+**\tret\n+*/\n+TEST_XN (zipq_z28_z0, svmfloat8x2_t, z28,\n+\t svzipq_mf8_x2 (z0),\n+\t svzipq (z0))\n+\n+/*\n+** zipq_z28_z0_z23:\t{ xfail aarch64_big_endian }\n+**\tzip\t{z28\\.q - z29\\.q}, z0\\.q, z23\\.q\n+**\tret\n+*/\n+TEST_XN (zipq_z28_z0_z23, svmfloat8x2_t, z28,\n+\t svzipq_mf8_x2 (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))),\n+\t svzipq (svcreate2 (svget2 (z0, 0), svget2 (z23, 0))))\n+\n+/*\n+** zipq_z28_z5_z19:\n+**\tzip\t{z28\\.q - z29\\.q}, z5\\.q, z19\\.q\n+**\tret\n+*/\n+TEST_XN (zipq_z28_z5_z19, svmfloat8x2_t, z28,\n+\t svzipq_mf8_x2 (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))),\n+\t svzipq (svcreate2 (svget2 (z4, 1), svget2 (z18, 1))))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x4.c\nnew file mode 100644\nindex 00000000000..417eb387e4b\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_mf8_x4.c\n@@ -0,0 +1,73 @@\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sme2_acle.h\"\n+\n+/*\n+** zipq_z0_z0:\n+**\tzip\t{z0\\.q - z3\\.q}, {z0\\.q - z3\\.q}\n+**\tret\n+*/\n+TEST_XN (zipq_z0_z0, svmfloat8x4_t, z0,\n+\t svzipq_mf8_x4 (z0),\n+\t svzipq (z0))\n+\n+/*\n+** zipq_z0_z4:\n+**\tzip\t{z0\\.q - z3\\.q}, {z4\\.q - z7\\.q}\n+**\tret\n+*/\n+TEST_XN (zipq_z0_z4, svmfloat8x4_t, z0,\n+\t svzipq_mf8_x4 (z4),\n+\t svzipq (z4))\n+\n+/*\n+** zipq_z4_z18:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tzip\t{z4\\.q - z7\\.q}, [^\\n]+\n+**\tret\n+*/\n+TEST_XN (zipq_z4_z18, svmfloat8x4_t, z4,\n+\t svzipq_mf8_x4 (z18),\n+\t svzipq (z18))\n+\n+/*\n+** zipq_z18_z23:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tzip\t{z[^\\n]+}, {z[^\\n]+}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (zipq_z18_z23, svmfloat8x4_t, z18,\n+\t svzipq_mf8_x4 (z23),\n+\t svzipq (z23))\n+\n+/*\n+** zipq_z23_z28:\n+**\tzip\t[^\\n]+, {z28\\.q - z31\\.q}\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_XN (zipq_z23_z28, svmfloat8x4_t, z23,\n+\t svzipq_mf8_x4 (z28),\n+\t svzipq (z28))\n+\n+/*\n+** zipq_z28_z0:\n+**\tzip\t{z28\\.q - z31\\.q}, {z0\\.q - z3\\.q}\n+**\tret\n+*/\n+TEST_XN (zipq_z28_z0, svmfloat8x4_t, z28,\n+\t svzipq_mf8_x4 (z0),\n+\t svzipq (z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x2.c\nnew file mode 100644\nindex 00000000000..d4073ab279d\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x2.c\n@@ -0,0 +1,269 @@\n+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */\n+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sve_acle.h\"\n+\n+#pragma GCC target \"+sve2p1\"\n+#ifdef STREAMING_COMPATIBLE\n+#pragma GCC target \"+sme2\"\n+#endif\n+\n+/*\n+** ld1_mf8_base:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0),\n+\t\t z0 = svld1_x2 (pn8, x0))\n+\n+/*\n+** ld1_mf8_index:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 + x1),\n+\t\t z0 = svld1_x2 (pn8, x0 + x1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_1:\n+**\tincb\tx0\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 + svcntb ()),\n+\t\t z0 = svld1_x2 (pn8, x0 + svcntb ()))\n+\n+/*\n+** ld1_mf8_2:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 2),\n+\t\t z0 = svld1_x2 (pn8, x0 + svcntb () * 2))\n+\n+/*\n+** ld1_mf8_14:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 14),\n+\t\t z0 = svld1_x2 (pn8, x0 + svcntb () * 14))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 + svcntb () * 16),\n+\t\t z0 = svld1_x2 (pn8, x0 + svcntb () * 16))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_m1:\n+**\tdecb\tx0\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 - svcntb ()),\n+\t\t z0 = svld1_x2 (pn8, x0 - svcntb ()))\n+\n+/*\n+** ld1_mf8_m2:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 2),\n+\t\t z0 = svld1_x2 (pn8, x0 - svcntb () * 2))\n+\n+/*\n+** ld1_mf8_m16:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 16),\n+\t\t z0 = svld1_x2 (pn8, x0 - svcntb () * 16))\n+\n+/*\n+** ld1_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn8, x0 - svcntb () * 18),\n+\t\t z0 = svld1_x2 (pn8, x0 - svcntb () * 18))\n+\n+/*\n+** ld1_mf8_z17:\n+**\tld1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x2_t, mfloat8_t,\n+\t\t z17 = svld1_mf8_x2 (pn8, x0),\n+\t\t z17 = svld1_x2 (pn8, x0))\n+\n+/*\n+** ld1_mf8_z22:\n+**\tld1b\t{z22\\.b(?: - |, )z23\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x2_t, mfloat8_t,\n+\t\t z22 = svld1_mf8_x2 (pn8, x0),\n+\t\t z22 = svld1_x2 (pn8, x0))\n+\n+/*\n+** ld1_mf8_z28:\n+**\tld1b\t{z28\\.b(?: - |, )z29\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x2_t, mfloat8_t,\n+\t\t z28 = svld1_mf8_x2 (pn8, x0),\n+\t\t z28 = svld1_x2 (pn8, x0))\n+\n+/*\n+** ld1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn0, x0),\n+\t\t z0 = svld1_x2 (pn0, x0))\n+\n+/*\n+** ld1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn7, x0),\n+\t\t z0 = svld1_x2 (pn7, x0))\n+\n+/*\n+** ld1_mf8_pn15:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn15/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x2 (pn15, x0),\n+\t\t z0 = svld1_x2 (pn15, x0))\n+\n+/*\n+** ld1_vnum_mf8_0:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, 0),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, 0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, 1),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, 1))\n+\n+/*\n+** ld1_vnum_mf8_2:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, 2),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, 2))\n+\n+/*\n+** ld1_vnum_mf8_14:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, 14),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, 14))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, 16),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, 16))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, -1),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, -1))\n+\n+/*\n+** ld1_vnum_mf8_m2:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, -2),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, -2))\n+\n+/*\n+** ld1_vnum_mf8_m16:\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, -16),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, -16))\n+\n+/*\n+** ld1_vnum_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, -18),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, -18))\n+\n+/*\n+** ld1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tld1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x2 (pn8, x0, x1),\n+\t\t z0 = svld1_vnum_x2 (pn8, x0, x1))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x4.c\nnew file mode 100644\nindex 00000000000..84d053a4261\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_mf8_x4.c\n@@ -0,0 +1,361 @@\n+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */\n+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sve_acle.h\"\n+\n+#pragma GCC target \"+sve2p1\"\n+#ifdef STREAMING_COMPATIBLE\n+#pragma GCC target \"+sme2\"\n+#endif\n+\n+/*\n+** ld1_mf8_base:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_base, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0),\n+\t\t z0 = svld1_x4 (pn8, x0))\n+\n+/*\n+** ld1_mf8_index:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_index, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + x1),\n+\t\t z0 = svld1_x4 (pn8, x0 + x1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_1:\n+**\tincb\tx0\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb ()),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb ()))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 2),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb () * 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 3),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb () * 3))\n+\n+/*\n+** ld1_mf8_4:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 4),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb () * 4))\n+\n+/*\n+** ld1_mf8_28:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 28),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb () * 28))\n+\n+/*\n+** ld1_mf8_32:\n+**\t[^{]*\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 + svcntb () * 32),\n+\t\t z0 = svld1_x4 (pn8, x0 + svcntb () * 32))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_m1:\n+**\tdecb\tx0\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb ()),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb ()))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 2),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb () * 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 3),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb () * 3))\n+\n+/*\n+** ld1_mf8_m4:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+ TEST_LOAD_COUNT (ld1_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 4),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb () * 4))\n+\n+/*\n+** ld1_mf8_m32:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 32),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb () * 32))\n+\n+/*\n+** ld1_mf8_m36:\n+**\t[^{]*\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn8, x0 - svcntb () * 36),\n+\t\t z0 = svld1_x4 (pn8, x0 - svcntb () * 36))\n+\n+/*\n+** ld1_mf8_z17:\n+**\tld1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z17, svmfloat8x4_t, mfloat8_t,\n+\t\t z17 = svld1_mf8_x4 (pn8, x0),\n+\t\t z17 = svld1_x4 (pn8, x0))\n+\n+/*\n+** ld1_mf8_z22:\n+**\tld1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z22, svmfloat8x4_t, mfloat8_t,\n+\t\t z22 = svld1_mf8_x4 (pn8, x0),\n+\t\t z22 = svld1_x4 (pn8, x0))\n+\n+/*\n+** ld1_mf8_z28:\n+**\tld1b\t{z28\\.b(?: - |, )z31\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_z28, svmfloat8x4_t, mfloat8_t,\n+\t\t z28 = svld1_mf8_x4 (pn8, x0),\n+\t\t z28 = svld1_x4 (pn8, x0))\n+\n+/*\n+** ld1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tld1b\t{z0\\.b(?: - |, )z3\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn0, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn0, x0),\n+\t\t z0 = svld1_x4 (pn0, x0))\n+\n+/*\n+** ld1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tld1b\t{z0\\.b(?: - |, )z3\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn7, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn7, x0),\n+\t\t z0 = svld1_x4 (pn7, x0))\n+\n+/*\n+** ld1_mf8_pn15:\n+**\tld1b\t{z0\\.b(?: - |, )z3\\.b}, pn15/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_mf8_pn15, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_mf8_x4 (pn15, x0),\n+\t\t z0 = svld1_x4 (pn15, x0))\n+\n+/*\n+** ld1_vnum_mf8_0:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 0),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 1),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 2),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 3),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 3))\n+\n+/*\n+** ld1_vnum_mf8_4:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 4),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 4))\n+\n+/*\n+** ld1_vnum_mf8_28:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 28),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 28))\n+\n+/*\n+** ld1_vnum_mf8_32:\n+**\t[^{]*\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, 32),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, 32))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -1),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -2),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ld1_vnum_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -3),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -3))\n+\n+/*\n+** ld1_vnum_mf8_m4:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -4),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -4))\n+\n+/*\n+** ld1_vnum_mf8_m32:\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -32),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -32))\n+\n+/*\n+** ld1_vnum_mf8_m36:\n+**\t[^{]*\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, -36),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, -36))\n+\n+/*\n+** ld1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tld1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ld1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svld1_vnum_mf8_x4 (pn8, x0, x1),\n+\t\t z0 = svld1_vnum_x4 (pn8, x0, x1))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x2.c\nnew file mode 100644\nindex 00000000000..60d2caa1568\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x2.c\n@@ -0,0 +1,269 @@\n+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */\n+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sve_acle.h\"\n+\n+#pragma GCC target \"+sve2p1\"\n+#ifdef STREAMING_COMPATIBLE\n+#pragma GCC target \"+sme2\"\n+#endif\n+\n+/*\n+** ldnt1_mf8_base:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0),\n+\t\t z0 = svldnt1_x2 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_index:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 + x1),\n+\t\t z0 = svldnt1_x2 (pn8, x0 + x1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_1:\n+**\tincb\tx0\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb ()),\n+\t\t z0 = svldnt1_x2 (pn8, x0 + svcntb ()))\n+\n+/*\n+** ldnt1_mf8_2:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 2),\n+\t\t z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2))\n+\n+/*\n+** ldnt1_mf8_14:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 14),\n+\t\t z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 + svcntb () * 16),\n+\t\t z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_m1:\n+**\tdecb\tx0\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb ()),\n+\t\t z0 = svldnt1_x2 (pn8, x0 - svcntb ()))\n+\n+/*\n+** ldnt1_mf8_m2:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 2),\n+\t\t z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2))\n+\n+/*\n+** ldnt1_mf8_m16:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 16),\n+\t\t z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16))\n+\n+/*\n+** ldnt1_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn8, x0 - svcntb () * 18),\n+\t\t z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18))\n+\n+/*\n+** ldnt1_mf8_z17:\n+**\tldnt1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x2_t, mfloat8_t,\n+\t\t z17 = svldnt1_mf8_x2 (pn8, x0),\n+\t\t z17 = svldnt1_x2 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_z22:\n+**\tldnt1b\t{z22\\.b(?: - |, )z23\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x2_t, mfloat8_t,\n+\t\t z22 = svldnt1_mf8_x2 (pn8, x0),\n+\t\t z22 = svldnt1_x2 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_z28:\n+**\tldnt1b\t{z28\\.b(?: - |, )z29\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x2_t, mfloat8_t,\n+\t\t z28 = svldnt1_mf8_x2 (pn8, x0),\n+\t\t z28 = svldnt1_x2 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn0, x0),\n+\t\t z0 = svldnt1_x2 (pn0, x0))\n+\n+/*\n+** ldnt1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn7, x0),\n+\t\t z0 = svldnt1_x2 (pn7, x0))\n+\n+/*\n+** ldnt1_mf8_pn15:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn15/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x2 (pn15, x0),\n+\t\t z0 = svldnt1_x2 (pn15, x0))\n+\n+/*\n+** ldnt1_vnum_mf8_0:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 0),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, 0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 1),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, 1))\n+\n+/*\n+** ldnt1_vnum_mf8_2:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 2),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, 2))\n+\n+/*\n+** ldnt1_vnum_mf8_14:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 14),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, 14))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, 16),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, 16))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -1),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, -1))\n+\n+/*\n+** ldnt1_vnum_mf8_m2:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -2),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, -2))\n+\n+/*\n+** ldnt1_vnum_mf8_m16:\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -16),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, -16))\n+\n+/*\n+** ldnt1_vnum_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, -18),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, -18))\n+\n+/*\n+** ldnt1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tldnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8/z, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x2 (pn8, x0, x1),\n+\t\t z0 = svldnt1_vnum_x2 (pn8, x0, x1))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x4.c\nnew file mode 100644\nindex 00000000000..976b1e6f61c\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_mf8_x4.c\n@@ -0,0 +1,361 @@\n+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */\n+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sve_acle.h\"\n+\n+#pragma GCC target \"+sve2p1\"\n+#ifdef STREAMING_COMPATIBLE\n+#pragma GCC target \"+sme2\"\n+#endif\n+\n+/*\n+** ldnt1_mf8_base:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_base, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0),\n+\t\t z0 = svldnt1_x4 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_index:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_index, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + x1),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + x1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_1:\n+**\tincb\tx0\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb ()),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb ()))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 2),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 3),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3))\n+\n+/*\n+** ldnt1_mf8_4:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 4),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4))\n+\n+/*\n+** ldnt1_mf8_28:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 28),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28))\n+\n+/*\n+** ldnt1_mf8_32:\n+**\t[^{]*\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 + svcntb () * 32),\n+\t\t z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_m1:\n+**\tdecb\tx0\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb ()),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb ()))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 2),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 3),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3))\n+\n+/*\n+** ldnt1_mf8_m4:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+ TEST_LOAD_COUNT (ldnt1_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 4),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4))\n+\n+/*\n+** ldnt1_mf8_m32:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 32),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32))\n+\n+/*\n+** ldnt1_mf8_m36:\n+**\t[^{]*\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn8, x0 - svcntb () * 36),\n+\t\t z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36))\n+\n+/*\n+** ldnt1_mf8_z17:\n+**\tldnt1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z17, svmfloat8x4_t, mfloat8_t,\n+\t\t z17 = svldnt1_mf8_x4 (pn8, x0),\n+\t\t z17 = svldnt1_x4 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_z22:\n+**\tldnt1b\t{z[^\\n]+}, pn8/z, \\[x0\\]\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z22, svmfloat8x4_t, mfloat8_t,\n+\t\t z22 = svldnt1_mf8_x4 (pn8, x0),\n+\t\t z22 = svldnt1_x4 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_z28:\n+**\tldnt1b\t{z28\\.b(?: - |, )z31\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_z28, svmfloat8x4_t, mfloat8_t,\n+\t\t z28 = svldnt1_mf8_x4 (pn8, x0),\n+\t\t z28 = svldnt1_x4 (pn8, x0))\n+\n+/*\n+** ldnt1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tldnt1b\t{z0\\.b(?: - |, )z3\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn0, x0),\n+\t\t z0 = svldnt1_x4 (pn0, x0))\n+\n+/*\n+** ldnt1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tldnt1b\t{z0\\.b(?: - |, )z3\\.b}, pn\\1/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn7, x0),\n+\t\t z0 = svldnt1_x4 (pn7, x0))\n+\n+/*\n+** ldnt1_mf8_pn15:\n+**\tldnt1b\t{z0\\.b(?: - |, )z3\\.b}, pn15/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_mf8_x4 (pn15, x0),\n+\t\t z0 = svldnt1_x4 (pn15, x0))\n+\n+/*\n+** ldnt1_vnum_mf8_0:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 0),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 1),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 2),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 3),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 3))\n+\n+/*\n+** ldnt1_vnum_mf8_4:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 4),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 4))\n+\n+/*\n+** ldnt1_vnum_mf8_28:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 28),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 28))\n+\n+/*\n+** ldnt1_vnum_mf8_32:\n+**\t[^{]*\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, 32),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, 32))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -1),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -1))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -2),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -2))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** ldnt1_vnum_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -3),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -3))\n+\n+/*\n+** ldnt1_vnum_mf8_m4:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -4),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -4))\n+\n+/*\n+** ldnt1_vnum_mf8_m32:\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -32),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -32))\n+\n+/*\n+** ldnt1_vnum_mf8_m36:\n+**\t[^{]*\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, -36),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, -36))\n+\n+/*\n+** ldnt1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tldnt1b\t{z0\\.b - z3\\.b}, pn8/z, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_LOAD_COUNT (ldnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,\n+\t\t z0 = svldnt1_vnum_mf8_x4 (pn8, x0, x1),\n+\t\t z0 = svldnt1_vnum_x4 (pn8, x0, x1))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_mf8.c\nnew file mode 100644\nindex 00000000000..64d08509c16\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/revd_mf8.c\n@@ -0,0 +1,80 @@\n+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */\n+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" } } */\n+\n+#include \"test_sve_acle.h\"\n+\n+#pragma GCC target \"+sve2p1\"\n+\n+/*\n+** revd_mf8_m_tied12:\n+**\trevd\tz0\\.q, p0/m, z0\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_m_tied12, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_m (z0, p0, z0),\n+\t\tz0 = svrevd_m (z0, p0, z0))\n+\n+/*\n+** revd_mf8_m_tied1:\n+**\trevd\tz0\\.q, p0/m, z1\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_m_tied1, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_m (z0, p0, z1),\n+\t\tz0 = svrevd_m (z0, p0, z1))\n+\n+/*\n+** revd_mf8_m_tied2:\n+**\tmov\t(z[0-9]+)\\.d, z0\\.d\n+**\tmovprfx\tz0, z1\n+**\trevd\tz0\\.q, p0/m, \\1\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_m_tied2, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_m (z1, p0, z0),\n+\t\tz0 = svrevd_m (z1, p0, z0))\n+\n+/*\n+** revd_mf8_m_untied:\n+**\tmovprfx\tz0, z2\n+**\trevd\tz0\\.q, p0/m, z1\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_m_untied, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_m (z2, p0, z1),\n+\t\tz0 = svrevd_m (z2, p0, z1))\n+\n+/* Awkward register allocation. Don't require specific output. */\n+TEST_UNIFORM_Z (revd_mf8_z_tied1, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_z (p0, z0),\n+\t\tz0 = svrevd_z (p0, z0))\n+\n+/*\n+** revd_mf8_z_untied:\n+**\tmovi?\t[vdz]0\\.?(?:[0-9]*[bhsd])?, #?0\n+**\trevd\tz0\\.q, p0/m, z1\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_z_untied, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_z (p0, z1),\n+\t\tz0 = svrevd_z (p0, z1))\n+\n+/*\n+** revd_mf8_x_tied1:\n+**\trevd\tz0\\.q, p0/m, z0\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_x_tied1, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_x (p0, z0),\n+\t\tz0 = svrevd_x (p0, z0))\n+\n+/*\n+** revd_mf8_x_untied:\n+**\tmovprfx\tz0, z1\n+**\trevd\tz0\\.q, p0/m, z1\\.q\n+**\tret\n+*/\n+TEST_UNIFORM_Z (revd_mf8_x_untied, svmfloat8_t,\n+\t\tz0 = svrevd_mf8_x (p0, z1),\n+\t\tz0 = svrevd_x (p0, z1))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x2.c\nnew file mode 100644\nindex 00000000000..489e4fff54d\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x2.c\n@@ -0,0 +1,269 @@\n+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */\n+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sve_acle.h\"\n+\n+#pragma GCC target \"+sve2p1\"\n+#ifdef STREAMING_COMPATIBLE\n+#pragma GCC target \"+sme2\"\n+#endif\n+\n+/*\n+** stnt1_mf8_base:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0, z0),\n+\t\t svstnt1 (pn8, x0, z0))\n+\n+/*\n+** stnt1_mf8_index:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 + x1, z0),\n+\t\t svstnt1 (pn8, x0 + x1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_1:\n+**\tincb\tx0\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 + svcntb (), z0),\n+\t\t svstnt1 (pn8, x0 + svcntb (), z0))\n+\n+/*\n+** stnt1_mf8_2:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 + svcntb () * 2, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 2, z0))\n+\n+/*\n+** stnt1_mf8_14:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 + svcntb () * 14, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 14, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 + svcntb () * 16, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 16, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_m1:\n+**\tdecb\tx0\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 - svcntb (), z0),\n+\t\t svstnt1 (pn8, x0 - svcntb (), z0))\n+\n+/*\n+** stnt1_mf8_m2:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 - svcntb () * 2, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 2, z0))\n+\n+/*\n+** stnt1_mf8_m16:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 - svcntb () * 16, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 16, z0))\n+\n+/*\n+** stnt1_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[\\1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0 - svcntb () * 18, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 18, z0))\n+\n+/*\n+** stnt1_mf8_z17:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tstnt1b\t{z[^\\n]+}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0, z17),\n+\t\t svstnt1 (pn8, x0, z17))\n+\n+/*\n+** stnt1_mf8_z22:\n+**\tstnt1b\t{z22\\.b(?: - |, )z23\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0, z22),\n+\t\t svstnt1 (pn8, x0, z22))\n+\n+/*\n+** stnt1_mf8_z28:\n+**\tstnt1b\t{z28\\.b(?: - |, )z29\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn8, x0, z28),\n+\t\t svstnt1 (pn8, x0, z28))\n+\n+/*\n+** stnt1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn0, x0, z0),\n+\t\t svstnt1 (pn0, x0, z0))\n+\n+/*\n+** stnt1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn7, x0, z0),\n+\t\t svstnt1 (pn7, x0, z0))\n+\n+/*\n+** stnt1_mf8_pn15:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn15, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_mf8_x2 (pn15, x0, z0),\n+\t\t svstnt1 (pn15, x0, z0))\n+\n+/*\n+** stnt1_vnum_mf8_0:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, 0, z0),\n+\t\t svstnt1_vnum (pn8, x0, 0, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, 1, z0),\n+\t\t svstnt1_vnum (pn8, x0, 1, z0))\n+\n+/*\n+** stnt1_vnum_mf8_2:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, 2, z0),\n+\t\t svstnt1_vnum (pn8, x0, 2, z0))\n+\n+/*\n+** stnt1_vnum_mf8_14:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #14, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_14, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, 14, z0),\n+\t\t svstnt1_vnum (pn8, x0, 14, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_16:\n+**\tincb\tx0, all, mul #16\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_16, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, 16, z0),\n+\t\t svstnt1_vnum (pn8, x0, 16, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, -1, z0),\n+\t\t svstnt1_vnum (pn8, x0, -1, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m2:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-2, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, -2, z0),\n+\t\t svstnt1_vnum (pn8, x0, -2, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m16:\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, #-16, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m16, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, -16, z0),\n+\t\t svstnt1_vnum (pn8, x0, -16, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m18:\n+**\taddvl\t(x[0-9]+), x0, #-18\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[\\1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m18, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, -18, z0),\n+\t\t svstnt1_vnum (pn8, x0, -18, z0))\n+\n+/*\n+** stnt1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tstnt1b\t{z0\\.b(?: - |, )z1\\.b}, pn8, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x2_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x2 (pn8, x0, x1, z0),\n+\t\t svstnt1_vnum (pn8, x0, x1, z0))\ndiff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x4.c\nnew file mode 100644\nindex 00000000000..4be364514ab\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_mf8_x4.c\n@@ -0,0 +1,361 @@\n+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */\n+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */\n+/* { dg-final { check-function-bodies \"**\" \"\" \"-DCHECK_ASM\" { target { ! ilp32 } } } } */\n+\n+#include \"test_sve_acle.h\"\n+\n+#pragma GCC target \"+sve2p1\"\n+#ifdef STREAMING_COMPATIBLE\n+#pragma GCC target \"+sme2\"\n+#endif\n+\n+/*\n+** stnt1_mf8_base:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_base, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0, z0),\n+\t\t svstnt1 (pn8, x0, z0))\n+\n+/*\n+** stnt1_mf8_index:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x1\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_index, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + x1, z0),\n+\t\t svstnt1 (pn8, x0 + x1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_1:\n+**\tincb\tx0\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb (), z0),\n+\t\t svstnt1 (pn8, x0 + svcntb (), z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb () * 2, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb () * 3, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 3, z0))\n+\n+/*\n+** stnt1_mf8_4:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb () * 4, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 4, z0))\n+\n+/*\n+** stnt1_mf8_28:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb () * 28, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 28, z0))\n+\n+/*\n+** stnt1_mf8_32:\n+**\t[^{]*\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 + svcntb () * 32, z0),\n+\t\t svstnt1 (pn8, x0 + svcntb () * 32, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_m1:\n+**\tdecb\tx0\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb (), z0),\n+\t\t svstnt1 (pn8, x0 - svcntb (), z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb () * 2, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb () * 3, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 3, z0))\n+\n+/*\n+** stnt1_mf8_m4:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb () * 4, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 4, z0))\n+\n+/*\n+** stnt1_mf8_m32:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb () * 32, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 32, z0))\n+\n+/*\n+** stnt1_mf8_m36:\n+**\t[^{]*\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0 - svcntb () * 36, z0),\n+\t\t svstnt1 (pn8, x0 - svcntb () * 36, z0))\n+\n+/*\n+** stnt1_mf8_z17:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tstnt1b\t{z[^\\n]+}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z17, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0, z17),\n+\t\t svstnt1 (pn8, x0, z17))\n+\n+/*\n+** stnt1_mf8_z22:\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tmov\t[^\\n]+\n+**\tstnt1b\t{z[^\\n]+}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z22, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0, z22),\n+\t\t svstnt1 (pn8, x0, z22))\n+\n+/*\n+** stnt1_mf8_z28:\n+**\tstnt1b\t{z28\\.b - z31\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_z28, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn8, x0, z28),\n+\t\t svstnt1 (pn8, x0, z28))\n+\n+/*\n+** stnt1_mf8_pn0:\n+**\tmov\tp([89]|1[0-5])\\.b, p0\\.b\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn0, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn0, x0, z0),\n+\t\t svstnt1 (pn0, x0, z0))\n+\n+/*\n+** stnt1_mf8_pn7:\n+**\tmov\tp([89]|1[0-5])\\.b, p7\\.b\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn\\1, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn7, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn7, x0, z0),\n+\t\t svstnt1 (pn7, x0, z0))\n+\n+/*\n+** stnt1_mf8_pn15:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn15, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_mf8_pn15, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_mf8_x4 (pn15, x0, z0),\n+\t\t svstnt1 (pn15, x0, z0))\n+\n+/*\n+** stnt1_vnum_mf8_0:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_0, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 0, z0),\n+\t\t svstnt1_vnum (pn8, x0, 0, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_1:\n+**\tincb\tx0\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_1, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 1, z0),\n+\t\t svstnt1_vnum (pn8, x0, 1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_2:\n+**\tincb\tx0, all, mul #2\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_2, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 2, z0),\n+\t\t svstnt1_vnum (pn8, x0, 2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_3:\n+**\tincb\tx0, all, mul #3\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_3, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 3, z0),\n+\t\t svstnt1_vnum (pn8, x0, 3, z0))\n+\n+/*\n+** stnt1_vnum_mf8_4:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_4, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 4, z0),\n+\t\t svstnt1_vnum (pn8, x0, 4, z0))\n+\n+/*\n+** stnt1_vnum_mf8_28:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #28, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_28, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 28, z0),\n+\t\t svstnt1_vnum (pn8, x0, 28, z0))\n+\n+/*\n+** stnt1_vnum_mf8_32:\n+**\t[^{]*\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_32, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, 32, z0),\n+\t\t svstnt1_vnum (pn8, x0, 32, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_m1:\n+**\tdecb\tx0\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m1, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -1, z0),\n+\t\t svstnt1_vnum (pn8, x0, -1, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_m2:\n+**\tdecb\tx0, all, mul #2\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m2, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -2, z0),\n+\t\t svstnt1_vnum (pn8, x0, -2, z0))\n+\n+/* Moving the constant into a register would also be OK. */\n+/*\n+** stnt1_vnum_mf8_m3:\n+**\tdecb\tx0, all, mul #3\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m3, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -3, z0),\n+\t\t svstnt1_vnum (pn8, x0, -3, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m4:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-4, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m4, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -4, z0),\n+\t\t svstnt1_vnum (pn8, x0, -4, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m32:\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, #-32, mul vl\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m32, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -32, z0),\n+\t\t svstnt1_vnum (pn8, x0, -32, z0))\n+\n+/*\n+** stnt1_vnum_mf8_m36:\n+**\t[^{]*\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, x[0-9]+\\]\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_m36, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, -36, z0),\n+\t\t svstnt1_vnum (pn8, x0, -36, z0))\n+\n+/*\n+** stnt1_vnum_mf8_x1:\n+**\tcntb\t(x[0-9]+)\n+** (\n+**\tmadd\t(x[0-9]+), (?:x1, \\1|\\1, x1), x0\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[\\2\\]\n+** |\n+**\tmul\t(x[0-9]+), (?:x1, \\1|\\1, x1)\n+**\tstnt1b\t{z0\\.b - z3\\.b}, pn8, \\[x0, \\3\\]\n+** )\n+**\tret\n+*/\n+TEST_STORE_COUNT (stnt1_vnum_mf8_x1, svmfloat8x4_t, mfloat8_t,\n+\t\t svstnt1_vnum_mf8_x4 (pn8, x0, x1, z0),\n+\t\t svstnt1_vnum (pn8, x0, x1, z0))\n", "prefixes": [ "v5", "2/9" ] }