get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/2216765/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 2216765,
    "url": "http://patchwork.ozlabs.org/api/patches/2216765/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/sparclinux/patch/20260327061704.3707577-20-hch@lst.de/",
    "project": {
        "id": 10,
        "url": "http://patchwork.ozlabs.org/api/projects/10/?format=api",
        "name": "Linux SPARC Development ",
        "link_name": "sparclinux",
        "list_id": "sparclinux.vger.kernel.org",
        "list_email": "sparclinux@vger.kernel.org",
        "web_url": null,
        "scm_url": null,
        "webscm_url": null,
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<20260327061704.3707577-20-hch@lst.de>",
    "list_archive_url": null,
    "date": "2026-03-27T06:16:51",
    "name": "[19/28] x86: move the XOR code to lib/raid/",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "3ec2117bb956dea4decf3a7b85f80ce5b88dc343",
    "submitter": {
        "id": 82,
        "url": "http://patchwork.ozlabs.org/api/people/82/?format=api",
        "name": "Christoph Hellwig",
        "email": "hch@lst.de"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/sparclinux/patch/20260327061704.3707577-20-hch@lst.de/mbox/",
    "series": [
        {
            "id": 497694,
            "url": "http://patchwork.ozlabs.org/api/series/497694/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/sparclinux/list/?series=497694",
            "date": "2026-03-27T06:16:33",
            "name": "[01/28] xor: assert that xor_blocks is not call from interrupt context",
            "version": 1,
            "mbox": "http://patchwork.ozlabs.org/series/497694/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/2216765/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/2216765/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "\n <SRS0=rieT=B3=vger.kernel.org=sparclinux+bounces-6594-patchwork-incoming=ozlabs.org@ozlabs.org>",
        "X-Original-To": [
            "incoming@patchwork.ozlabs.org",
            "sparclinux@vger.kernel.org"
        ],
        "Delivered-To": [
            "patchwork-incoming@legolas.ozlabs.org",
            "patchwork-incoming@ozlabs.org"
        ],
        "Authentication-Results": [
            "legolas.ozlabs.org;\n\tdkim=pass (2048-bit key;\n secure) header.d=infradead.org header.i=@infradead.org header.a=rsa-sha256\n header.s=bombadil.20210309 header.b=4XhU/9rC;\n\tdkim-atps=neutral",
            "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=ozlabs.org\n (client-ip=2404:9400:2221:ea00::3; helo=mail.ozlabs.org;\n envelope-from=srs0=riet=b3=vger.kernel.org=sparclinux+bounces-6594-patchwork-incoming=ozlabs.org@ozlabs.org;\n receiver=patchwork.ozlabs.org)",
            "gandalf.ozlabs.org;\n arc=pass smtp.remote-ip=172.105.105.114 arc.chain=subspace.kernel.org",
            "gandalf.ozlabs.org;\n dmarc=fail (p=none dis=none) header.from=lst.de",
            "gandalf.ozlabs.org;\n\tdkim=pass (2048-bit key;\n secure) header.d=infradead.org header.i=@infradead.org header.a=rsa-sha256\n header.s=bombadil.20210309 header.b=4XhU/9rC;\n\tdkim-atps=neutral",
            "gandalf.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org\n (client-ip=172.105.105.114; helo=tor.lore.kernel.org;\n envelope-from=sparclinux+bounces-6594-patchwork-incoming=ozlabs.org@vger.kernel.org;\n receiver=ozlabs.org)",
            "smtp.subspace.kernel.org;\n\tdkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org\n header.b=\"4XhU/9rC\"",
            "smtp.subspace.kernel.org;\n arc=none smtp.client-ip=198.137.202.133",
            "smtp.subspace.kernel.org;\n dmarc=fail (p=none dis=none) header.from=lst.de",
            "smtp.subspace.kernel.org;\n spf=none smtp.mailfrom=bombadil.srs.infradead.org"
        ],
        "Received": [
            "from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fhrSV1k1mz1y1j\n\tfor <incoming@patchwork.ozlabs.org>; Fri, 27 Mar 2026 17:33:30 +1100 (AEDT)",
            "from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3])\n\tby gandalf.ozlabs.org (Postfix) with ESMTP id 4fhrSV15LBz4wSW\n\tfor <incoming@patchwork.ozlabs.org>; Fri, 27 Mar 2026 17:33:30 +1100 (AEDT)",
            "by gandalf.ozlabs.org (Postfix)\n\tid 4fhrSV0wtkz4wHX; Fri, 27 Mar 2026 17:33:30 +1100 (AEDT)",
            "from tor.lore.kernel.org (tor.lore.kernel.org [172.105.105.114])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519)\n\t(No client certificate requested)\n\tby gandalf.ozlabs.org (Postfix) with ESMTPS id 4fhrSQ3S9Jz4wSW\n\tfor <patchwork-incoming@ozlabs.org>; Fri, 27 Mar 2026 17:33:26 +1100 (AEDT)",
            "from smtp.subspace.kernel.org (conduit.subspace.kernel.org\n [100.90.174.1])\n\tby tor.lore.kernel.org (Postfix) with ESMTP id C153D30D6B09\n\tfor <patchwork-incoming@ozlabs.org>; Fri, 27 Mar 2026 06:25:33 +0000 (UTC)",
            "from localhost.localdomain (localhost.localdomain [127.0.0.1])\n\tby smtp.subspace.kernel.org (Postfix) with ESMTP id 9DBED388E53;\n\tFri, 27 Mar 2026 06:22:16 +0000 (UTC)",
            "from bombadil.infradead.org (bombadil.infradead.org\n [198.137.202.133])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby smtp.subspace.kernel.org (Postfix) with ESMTPS id A621F39DBFF;\n\tFri, 27 Mar 2026 06:22:09 +0000 (UTC)",
            "from\n 2a02-8389-2341-5b80-d601-7564-c2e0-491c.cable.dynamic.v6.surfer.at\n ([2a02:8389:2341:5b80:d601:7564:c2e0:491c] helo=localhost)\n\tby bombadil.infradead.org with esmtpsa (Exim 4.98.2 #2 (Red Hat Linux))\n\tid 1w60Zt-00000006n7i-1XIT;\n\tFri, 27 Mar 2026 06:21:53 +0000"
        ],
        "ARC-Seal": [
            "i=2; a=rsa-sha256; d=ozlabs.org; s=201707; t=1774593210; cv=pass;\n\tb=fJTkhbcYhJcdgtJ4qVHQ15c9+e/NDPoJvpsH+4vf5FC+NCiwzVZjHKdTkYyGkj9ySlw+hymTVm2/dSt6Vu/38BwOPbYcVGCPHQ2kWQROPgTzLpeu7KPYalmwhvTmzSWcsob/KU/VJC+qSkzfYrV5k7/lkh4sE8Dw+jFOf2lyF1BTpfivI4Yydwuxuam6B/mOVE6FDKkB+NlcKpN2vlgap3pLPYlSW/4Uomqy18g5488DpKQbgP32J0OvsULkaAuPd+YvqIuNxoy/CKmI6TvaVrhJx6e4fN3CgQuIh7plzike2bvyB9LqssfVwrrzNdSzZlNEB6SFzblex3jwCAZvBA==",
            "i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;\n\tt=1774592535; cv=none;\n b=HnoonwZE4d9msVxgt0FvewoALLq5PdQfza1b0ZCc2+fg3SXSnZVUdIElARA4hJ1FU8MpD2DADbtdzdwlsbG+IFU4qx445gZPVwzkECMTjYEU9kaY90zNAj4V5/v1TTRX4eXjJnQ3AMzTDuJhAyuGD5fWTuhqwpvFLtH662d4U08="
        ],
        "ARC-Message-Signature": [
            "i=2; a=rsa-sha256; d=ozlabs.org; s=201707;\n\tt=1774593210; c=relaxed/relaxed;\n\tbh=L+6ojMJfwCWhLFRB0GPni+uSlsd8H4EJEa7++qGwg4w=;\n\th=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:\n\t MIME-Version;\n b=JIGD1chKhM/jNdP9U9wo2pegDh1f+NyH2vhs7J35fyv9SGoDmygQROcc5Wv2B2ICyQcfMzIsScKt9mSoABWdc798aDbdvwJ5oYsGZ9nqOmOqUMMoNh1kwTpqatbvU1vGk7qpZ93k+H/0zHozfGnYFLW4CLzP+Qh6mJLLU+wdZSlh99pgCaSL31WySOwjtgh6844F3wUKo0BnwXHVMlXDKJou5ZNVEI0GFN0l3Nvfu4DUMq4402GAs+h5j8s1z7+zJE/JWjmsP2j5j0MOIiY0WA1h/bBJVrgubTQSApmYgasbAgEna2/gstBp5A2WwH0QKG6n0mnbV9zMc8vZZSyzLw==",
            "i=1; a=rsa-sha256; d=subspace.kernel.org;\n\ts=arc-20240116; t=1774592535; c=relaxed/simple;\n\tbh=4CQ0E/MDGIIyFwOFRKSq8Gd6DvIjRLvEY3SEoA1gZDQ=;\n\th=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:\n\t MIME-Version;\n b=FQJ2t1R4a+SelSY71tjNWOSl7sABlqf3mbXyvu+VxoNBvr1ozDZV0DJoSE/C01ss/ZmajswKACWuyEQbMucouhPZcx+98I8/BL1XRQbXTj4t7RK6++XZmBla1SKjGo6ZvRHrOy8ndsiD0cXII2PLH9HmQ782JOKVjRGHwn5d69M="
        ],
        "ARC-Authentication-Results": [
            "i=2; gandalf.ozlabs.org;\n dmarc=fail (p=none dis=none) header.from=lst.de; dkim=pass (2048-bit key;\n secure) header.d=infradead.org header.i=@infradead.org header.a=rsa-sha256\n header.s=bombadil.20210309 header.b=4XhU/9rC; dkim-atps=neutral;\n spf=pass (client-ip=172.105.105.114; helo=tor.lore.kernel.org;\n envelope-from=sparclinux+bounces-6594-patchwork-incoming=ozlabs.org@vger.kernel.org;\n receiver=ozlabs.org) smtp.mailfrom=vger.kernel.org",
            "i=1; smtp.subspace.kernel.org;\n dmarc=fail (p=none dis=none) header.from=lst.de;\n spf=none smtp.mailfrom=bombadil.srs.infradead.org;\n dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org\n header.b=4XhU/9rC; arc=none smtp.client-ip=198.137.202.133"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;\n\td=infradead.org; s=bombadil.20210309; h=Content-Transfer-Encoding:\n\tMIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:Sender\n\t:Reply-To:Content-Type:Content-ID:Content-Description;\n\tbh=L+6ojMJfwCWhLFRB0GPni+uSlsd8H4EJEa7++qGwg4w=; b=4XhU/9rCyLY9fimUyUdKcmGMp5\n\tYF1YVjyokj6W9Q31s7V3S5N6iBkGZjQrkc02H8uylN5H/FLCjfOI7RElSw2i6AZR32AZ3ZCUJ4Xxx\n\tLFIUSTAuC8axWqfUb/mrMbqGpUG4TU5D5F2psW0P+siigIogzc1AmrneGYBf3UFtla3QQuTcSi1Mp\n\tQcRaTgo7zKYkvwiE/qoGqBAypjDJcgjvGxgJZEoP/GK7mQtldSwfdRsLwIwciBl+ozydv2FV8F0dG\n\tgjmUSWsyvnaDEa1ZYV8LfIMmIRQh0NPBwUrZg4UgxORw3L62aoFoZl6KL3Tc5h2f9qRfofWaY/5Ft\n\tN6Tc/fXg==;",
        "From": "Christoph Hellwig <hch@lst.de>",
        "To": "Andrew Morton <akpm@linux-foundation.org>",
        "Cc": "Richard Henderson <richard.henderson@linaro.org>,\n\tMatt Turner <mattst88@gmail.com>,\n\tMagnus Lindholm <linmag7@gmail.com>,\n\tRussell King <linux@armlinux.org.uk>,\n\tCatalin Marinas <catalin.marinas@arm.com>,\n\tWill Deacon <will@kernel.org>,\n\tArd Biesheuvel <ardb@kernel.org>,\n\tHuacai Chen <chenhuacai@kernel.org>,\n\tWANG Xuerui <kernel@xen0n.name>,\n\tMadhavan Srinivasan <maddy@linux.ibm.com>,\n\tMichael Ellerman <mpe@ellerman.id.au>,\n\tNicholas Piggin <npiggin@gmail.com>,\n\t\"Christophe Leroy (CS GROUP)\" <chleroy@kernel.org>,\n\tPaul Walmsley <pjw@kernel.org>,\n\tPalmer Dabbelt <palmer@dabbelt.com>,\n\tAlbert Ou <aou@eecs.berkeley.edu>,\n\tAlexandre Ghiti <alex@ghiti.fr>,\n\tHeiko Carstens <hca@linux.ibm.com>,\n\tVasily Gorbik <gor@linux.ibm.com>,\n\tAlexander Gordeev <agordeev@linux.ibm.com>,\n\tChristian Borntraeger <borntraeger@linux.ibm.com>,\n\tSven Schnelle <svens@linux.ibm.com>,\n\t\"David S. Miller\" <davem@davemloft.net>,\n\tAndreas Larsson <andreas@gaisler.com>,\n\tRichard Weinberger <richard@nod.at>,\n\tAnton Ivanov <anton.ivanov@cambridgegreys.com>,\n\tJohannes Berg <johannes@sipsolutions.net>,\n\tThomas Gleixner <tglx@kernel.org>,\n\tIngo Molnar <mingo@redhat.com>,\n\tBorislav Petkov <bp@alien8.de>,\n\tDave Hansen <dave.hansen@linux.intel.com>,\n\tx86@kernel.org,\n\t\"H. Peter Anvin\" <hpa@zytor.com>,\n\tHerbert Xu <herbert@gondor.apana.org.au>,\n\tDan Williams <dan.j.williams@intel.com>,\n\tChris Mason <clm@fb.com>,\n\tDavid Sterba <dsterba@suse.com>,\n\tArnd Bergmann <arnd@arndb.de>,\n\tSong Liu <song@kernel.org>,\n\tYu Kuai <yukuai@fnnas.com>,\n\tLi Nan <linan122@huawei.com>,\n\t\"Theodore Ts'o\" <tytso@mit.edu>,\n\t\"Jason A. Donenfeld\" <Jason@zx2c4.com>,\n\tlinux-alpha@vger.kernel.org,\n\tlinux-kernel@vger.kernel.org,\n\tlinux-arm-kernel@lists.infradead.org,\n\tloongarch@lists.linux.dev,\n\tlinuxppc-dev@lists.ozlabs.org,\n\tlinux-riscv@lists.infradead.org,\n\tlinux-s390@vger.kernel.org,\n\tsparclinux@vger.kernel.org,\n\tlinux-um@lists.infradead.org,\n\tlinux-crypto@vger.kernel.org,\n\tlinux-btrfs@vger.kernel.org,\n\tlinux-arch@vger.kernel.org,\n\tlinux-raid@vger.kernel.org",
        "Subject": "[PATCH 19/28] x86: move the XOR code to lib/raid/",
        "Date": "Fri, 27 Mar 2026 07:16:51 +0100",
        "Message-ID": "<20260327061704.3707577-20-hch@lst.de>",
        "X-Mailer": "git-send-email 2.47.3",
        "In-Reply-To": "<20260327061704.3707577-1-hch@lst.de>",
        "References": "<20260327061704.3707577-1-hch@lst.de>",
        "Precedence": "bulk",
        "X-Mailing-List": "sparclinux@vger.kernel.org",
        "List-Id": "<sparclinux.vger.kernel.org>",
        "List-Subscribe": "<mailto:sparclinux+subscribe@vger.kernel.org>",
        "List-Unsubscribe": "<mailto:sparclinux+unsubscribe@vger.kernel.org>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-SRS-Rewrite": "SMTP reverse-path rewritten from <hch@infradead.org> by\n bombadil.infradead.org. See http://www.infradead.org/rpr.html",
        "X-Spam-Status": "No, score=-0.2 required=5.0 tests=ARC_SIGNED,ARC_VALID,\n\tDKIM_SIGNED,DKIM_VALID,DMARC_NONE,HEADER_FROM_DIFFERENT_DOMAINS,\n\tMAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS autolearn=disabled\n\tversion=4.0.1",
        "X-Spam-Checker-Version": "SpamAssassin 4.0.1 (2024-03-25) on gandalf.ozlabs.org"
    },
    "content": "Move the optimized XOR code out of line into lib/raid.\n\nSigned-off-by: Christoph Hellwig <hch@lst.de>\n---\n arch/x86/include/asm/xor.h                    | 518 ++----------------\n arch/x86/include/asm/xor_64.h                 |  32 --\n lib/raid/xor/Makefile                         |   2 +\n .../xor_avx.h => lib/raid/xor/x86/xor-avx.c   |  14 +-\n .../xor_32.h => lib/raid/xor/x86/xor-mmx.c    |  60 +-\n lib/raid/xor/x86/xor-sse.c                    | 476 ++++++++++++++++\n 6 files changed, 522 insertions(+), 580 deletions(-)\n delete mode 100644 arch/x86/include/asm/xor_64.h\n rename arch/x86/include/asm/xor_avx.h => lib/raid/xor/x86/xor-avx.c (95%)\n rename arch/x86/include/asm/xor_32.h => lib/raid/xor/x86/xor-mmx.c (90%)\n create mode 100644 lib/raid/xor/x86/xor-sse.c",
    "diff": "diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h\nindex 33f5620d8d69..d1aab8275908 100644\n--- a/arch/x86/include/asm/xor.h\n+++ b/arch/x86/include/asm/xor.h\n@@ -2,498 +2,42 @@\n #ifndef _ASM_X86_XOR_H\n #define _ASM_X86_XOR_H\n \n-/*\n- * Optimized RAID-5 checksumming functions for SSE.\n- */\n-\n-/*\n- * Cache avoiding checksumming functions utilizing KNI instructions\n- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)\n- */\n+#include <asm/cpufeature.h>\n+#include <asm-generic/xor.h>\n \n-/*\n- * Based on\n- * High-speed RAID5 checksumming functions utilizing SSE instructions.\n- * Copyright (C) 1998 Ingo Molnar.\n- */\n+extern struct xor_block_template xor_block_pII_mmx;\n+extern struct xor_block_template xor_block_p5_mmx;\n+extern struct xor_block_template xor_block_sse;\n+extern struct xor_block_template xor_block_sse_pf64;\n+extern struct xor_block_template xor_block_avx;\n \n /*\n- * x86-64 changes / gcc fixes from Andi Kleen.\n- * Copyright 2002 Andi Kleen, SuSE Labs.\n+ * When SSE is available, use it as it can write around L2.  We may also be able\n+ * to load into the L1 only depending on how the cpu deals with a load to a line\n+ * that is being prefetched.\n+ *\n+ * When AVX2 is available, force using it as it is better by all measures.\n  *\n- * This hasn't been optimized for the hammer yet, but there are likely\n- * no advantages to be gotten from x86-64 here anyways.\n+ * 32-bit without MMX can fall back to the generic routines.\n  */\n-\n-#include <asm/fpu/api.h>\n-\n-#ifdef CONFIG_X86_32\n-/* reduce register pressure */\n-# define XOR_CONSTANT_CONSTRAINT \"i\"\n-#else\n-# define XOR_CONSTANT_CONSTRAINT \"re\"\n-#endif\n-\n-#define OFFS(x)\t\t\"16*(\"#x\")\"\n-#define PF_OFFS(x)\t\"256+16*(\"#x\")\"\n-#define PF0(x)\t\t\"\tprefetchnta \"PF_OFFS(x)\"(%[p1])\t\t;\\n\"\n-#define LD(x, y)\t\"\tmovaps \"OFFS(x)\"(%[p1]), %%xmm\"#y\"\t;\\n\"\n-#define ST(x, y)\t\"\tmovaps %%xmm\"#y\", \"OFFS(x)\"(%[p1])\t;\\n\"\n-#define PF1(x)\t\t\"\tprefetchnta \"PF_OFFS(x)\"(%[p2])\t\t;\\n\"\n-#define PF2(x)\t\t\"\tprefetchnta \"PF_OFFS(x)\"(%[p3])\t\t;\\n\"\n-#define PF3(x)\t\t\"\tprefetchnta \"PF_OFFS(x)\"(%[p4])\t\t;\\n\"\n-#define PF4(x)\t\t\"\tprefetchnta \"PF_OFFS(x)\"(%[p5])\t\t;\\n\"\n-#define XO1(x, y)\t\"\txorps \"OFFS(x)\"(%[p2]), %%xmm\"#y\"\t;\\n\"\n-#define XO2(x, y)\t\"\txorps \"OFFS(x)\"(%[p3]), %%xmm\"#y\"\t;\\n\"\n-#define XO3(x, y)\t\"\txorps \"OFFS(x)\"(%[p4]), %%xmm\"#y\"\t;\\n\"\n-#define XO4(x, y)\t\"\txorps \"OFFS(x)\"(%[p5]), %%xmm\"#y\"\t;\\n\"\n-#define NOP(x)\n-\n-#define BLK64(pf, op, i)\t\t\t\t\\\n-\t\tpf(i)\t\t\t\t\t\\\n-\t\top(i, 0)\t\t\t\t\\\n-\t\t\top(i + 1, 1)\t\t\t\\\n-\t\t\t\top(i + 2, 2)\t\t\\\n-\t\t\t\t\top(i + 3, 3)\n-\n-static void\n-xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,\n-\t  const unsigned long * __restrict p2)\n-{\n-\tunsigned long lines = bytes >> 8;\n-\n-\tkernel_fpu_begin();\n-\n-\tasm volatile(\n-#undef BLOCK\n-#define BLOCK(i)\t\t\t\t\t\\\n-\t\tLD(i, 0)\t\t\t\t\\\n-\t\t\tLD(i + 1, 1)\t\t\t\\\n-\t\tPF1(i)\t\t\t\t\t\\\n-\t\t\t\tPF1(i + 2)\t\t\\\n-\t\t\t\tLD(i + 2, 2)\t\t\\\n-\t\t\t\t\tLD(i + 3, 3)\t\\\n-\t\tPF0(i + 4)\t\t\t\t\\\n-\t\t\t\tPF0(i + 6)\t\t\\\n-\t\tXO1(i, 0)\t\t\t\t\\\n-\t\t\tXO1(i + 1, 1)\t\t\t\\\n-\t\t\t\tXO1(i + 2, 2)\t\t\\\n-\t\t\t\t\tXO1(i + 3, 3)\t\\\n-\t\tST(i, 0)\t\t\t\t\\\n-\t\t\tST(i + 1, 1)\t\t\t\\\n-\t\t\t\tST(i + 2, 2)\t\t\\\n-\t\t\t\t\tST(i + 3, 3)\t\\\n-\n-\n-\t\tPF0(0)\n-\t\t\t\tPF0(2)\n-\n-\t\" .align 32\t\t\t;\\n\"\n-\t\" 1:                            ;\\n\"\n-\n-\t\tBLOCK(0)\n-\t\tBLOCK(4)\n-\t\tBLOCK(8)\n-\t\tBLOCK(12)\n-\n-\t\"       add %[inc], %[p1]       ;\\n\"\n-\t\"       add %[inc], %[p2]       ;\\n\"\n-\t\"       dec %[cnt]              ;\\n\"\n-\t\"       jnz 1b                  ;\\n\"\n-\t: [cnt] \"+r\" (lines),\n-\t  [p1] \"+r\" (p1), [p2] \"+r\" (p2)\n-\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n-\t: \"memory\");\n-\n-\tkernel_fpu_end();\n-}\n-\n-static void\n-xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,\n-\t       const unsigned long * __restrict p2)\n-{\n-\tunsigned long lines = bytes >> 8;\n-\n-\tkernel_fpu_begin();\n-\n-\tasm volatile(\n-#undef BLOCK\n-#define BLOCK(i)\t\t\t\\\n-\t\tBLK64(PF0, LD, i)\t\\\n-\t\tBLK64(PF1, XO1, i)\t\\\n-\t\tBLK64(NOP, ST, i)\t\\\n-\n-\t\" .align 32\t\t\t;\\n\"\n-\t\" 1:                            ;\\n\"\n-\n-\t\tBLOCK(0)\n-\t\tBLOCK(4)\n-\t\tBLOCK(8)\n-\t\tBLOCK(12)\n-\n-\t\"       add %[inc], %[p1]       ;\\n\"\n-\t\"       add %[inc], %[p2]       ;\\n\"\n-\t\"       dec %[cnt]              ;\\n\"\n-\t\"       jnz 1b                  ;\\n\"\n-\t: [cnt] \"+r\" (lines),\n-\t  [p1] \"+r\" (p1), [p2] \"+r\" (p2)\n-\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n-\t: \"memory\");\n-\n-\tkernel_fpu_end();\n-}\n-\n-static void\n-xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,\n-\t  const unsigned long * __restrict p2,\n-\t  const unsigned long * __restrict p3)\n-{\n-\tunsigned long lines = bytes >> 8;\n-\n-\tkernel_fpu_begin();\n-\n-\tasm volatile(\n-#undef BLOCK\n-#define BLOCK(i) \\\n-\t\tPF1(i)\t\t\t\t\t\\\n-\t\t\t\tPF1(i + 2)\t\t\\\n-\t\tLD(i, 0)\t\t\t\t\\\n-\t\t\tLD(i + 1, 1)\t\t\t\\\n-\t\t\t\tLD(i + 2, 2)\t\t\\\n-\t\t\t\t\tLD(i + 3, 3)\t\\\n-\t\tPF2(i)\t\t\t\t\t\\\n-\t\t\t\tPF2(i + 2)\t\t\\\n-\t\tPF0(i + 4)\t\t\t\t\\\n-\t\t\t\tPF0(i + 6)\t\t\\\n-\t\tXO1(i, 0)\t\t\t\t\\\n-\t\t\tXO1(i + 1, 1)\t\t\t\\\n-\t\t\t\tXO1(i + 2, 2)\t\t\\\n-\t\t\t\t\tXO1(i + 3, 3)\t\\\n-\t\tXO2(i, 0)\t\t\t\t\\\n-\t\t\tXO2(i + 1, 1)\t\t\t\\\n-\t\t\t\tXO2(i + 2, 2)\t\t\\\n-\t\t\t\t\tXO2(i + 3, 3)\t\\\n-\t\tST(i, 0)\t\t\t\t\\\n-\t\t\tST(i + 1, 1)\t\t\t\\\n-\t\t\t\tST(i + 2, 2)\t\t\\\n-\t\t\t\t\tST(i + 3, 3)\t\\\n-\n-\n-\t\tPF0(0)\n-\t\t\t\tPF0(2)\n-\n-\t\" .align 32\t\t\t;\\n\"\n-\t\" 1:                            ;\\n\"\n-\n-\t\tBLOCK(0)\n-\t\tBLOCK(4)\n-\t\tBLOCK(8)\n-\t\tBLOCK(12)\n-\n-\t\"       add %[inc], %[p1]       ;\\n\"\n-\t\"       add %[inc], %[p2]       ;\\n\"\n-\t\"       add %[inc], %[p3]       ;\\n\"\n-\t\"       dec %[cnt]              ;\\n\"\n-\t\"       jnz 1b                  ;\\n\"\n-\t: [cnt] \"+r\" (lines),\n-\t  [p1] \"+r\" (p1), [p2] \"+r\" (p2), [p3] \"+r\" (p3)\n-\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n-\t: \"memory\");\n-\n-\tkernel_fpu_end();\n+#define arch_xor_init arch_xor_init\n+static __always_inline void __init arch_xor_init(void)\n+{\n+\tif (boot_cpu_has(X86_FEATURE_AVX) &&\n+\t    boot_cpu_has(X86_FEATURE_OSXSAVE)) {\n+\t\txor_force(&xor_block_avx);\n+\t} else if (IS_ENABLED(CONFIG_X86_64) || boot_cpu_has(X86_FEATURE_XMM)) {\n+\t\txor_register(&xor_block_sse);\n+\t\txor_register(&xor_block_sse_pf64);\n+\t} else if (boot_cpu_has(X86_FEATURE_MMX)) {\n+\t\txor_register(&xor_block_pII_mmx);\n+\t\txor_register(&xor_block_p5_mmx);\n+\t} else {\n+\t\txor_register(&xor_block_8regs);\n+\t\txor_register(&xor_block_8regs_p);\n+\t\txor_register(&xor_block_32regs);\n+\t\txor_register(&xor_block_32regs_p);\n+\t}\n }\n \n-static void\n-xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,\n-\t       const unsigned long * __restrict p2,\n-\t       const unsigned long * __restrict p3)\n-{\n-\tunsigned long lines = bytes >> 8;\n-\n-\tkernel_fpu_begin();\n-\n-\tasm volatile(\n-#undef BLOCK\n-#define BLOCK(i)\t\t\t\\\n-\t\tBLK64(PF0, LD, i)\t\\\n-\t\tBLK64(PF1, XO1, i)\t\\\n-\t\tBLK64(PF2, XO2, i)\t\\\n-\t\tBLK64(NOP, ST, i)\t\\\n-\n-\t\" .align 32\t\t\t;\\n\"\n-\t\" 1:                            ;\\n\"\n-\n-\t\tBLOCK(0)\n-\t\tBLOCK(4)\n-\t\tBLOCK(8)\n-\t\tBLOCK(12)\n-\n-\t\"       add %[inc], %[p1]       ;\\n\"\n-\t\"       add %[inc], %[p2]       ;\\n\"\n-\t\"       add %[inc], %[p3]       ;\\n\"\n-\t\"       dec %[cnt]              ;\\n\"\n-\t\"       jnz 1b                  ;\\n\"\n-\t: [cnt] \"+r\" (lines),\n-\t  [p1] \"+r\" (p1), [p2] \"+r\" (p2), [p3] \"+r\" (p3)\n-\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n-\t: \"memory\");\n-\n-\tkernel_fpu_end();\n-}\n-\n-static void\n-xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,\n-\t  const unsigned long * __restrict p2,\n-\t  const unsigned long * __restrict p3,\n-\t  const unsigned long * __restrict p4)\n-{\n-\tunsigned long lines = bytes >> 8;\n-\n-\tkernel_fpu_begin();\n-\n-\tasm volatile(\n-#undef BLOCK\n-#define BLOCK(i) \\\n-\t\tPF1(i)\t\t\t\t\t\\\n-\t\t\t\tPF1(i + 2)\t\t\\\n-\t\tLD(i, 0)\t\t\t\t\\\n-\t\t\tLD(i + 1, 1)\t\t\t\\\n-\t\t\t\tLD(i + 2, 2)\t\t\\\n-\t\t\t\t\tLD(i + 3, 3)\t\\\n-\t\tPF2(i)\t\t\t\t\t\\\n-\t\t\t\tPF2(i + 2)\t\t\\\n-\t\tXO1(i, 0)\t\t\t\t\\\n-\t\t\tXO1(i + 1, 1)\t\t\t\\\n-\t\t\t\tXO1(i + 2, 2)\t\t\\\n-\t\t\t\t\tXO1(i + 3, 3)\t\\\n-\t\tPF3(i)\t\t\t\t\t\\\n-\t\t\t\tPF3(i + 2)\t\t\\\n-\t\tPF0(i + 4)\t\t\t\t\\\n-\t\t\t\tPF0(i + 6)\t\t\\\n-\t\tXO2(i, 0)\t\t\t\t\\\n-\t\t\tXO2(i + 1, 1)\t\t\t\\\n-\t\t\t\tXO2(i + 2, 2)\t\t\\\n-\t\t\t\t\tXO2(i + 3, 3)\t\\\n-\t\tXO3(i, 0)\t\t\t\t\\\n-\t\t\tXO3(i + 1, 1)\t\t\t\\\n-\t\t\t\tXO3(i + 2, 2)\t\t\\\n-\t\t\t\t\tXO3(i + 3, 3)\t\\\n-\t\tST(i, 0)\t\t\t\t\\\n-\t\t\tST(i + 1, 1)\t\t\t\\\n-\t\t\t\tST(i + 2, 2)\t\t\\\n-\t\t\t\t\tST(i + 3, 3)\t\\\n-\n-\n-\t\tPF0(0)\n-\t\t\t\tPF0(2)\n-\n-\t\" .align 32\t\t\t;\\n\"\n-\t\" 1:                            ;\\n\"\n-\n-\t\tBLOCK(0)\n-\t\tBLOCK(4)\n-\t\tBLOCK(8)\n-\t\tBLOCK(12)\n-\n-\t\"       add %[inc], %[p1]       ;\\n\"\n-\t\"       add %[inc], %[p2]       ;\\n\"\n-\t\"       add %[inc], %[p3]       ;\\n\"\n-\t\"       add %[inc], %[p4]       ;\\n\"\n-\t\"       dec %[cnt]              ;\\n\"\n-\t\"       jnz 1b                  ;\\n\"\n-\t: [cnt] \"+r\" (lines), [p1] \"+r\" (p1),\n-\t  [p2] \"+r\" (p2), [p3] \"+r\" (p3), [p4] \"+r\" (p4)\n-\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n-\t: \"memory\");\n-\n-\tkernel_fpu_end();\n-}\n-\n-static void\n-xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,\n-\t       const unsigned long * __restrict p2,\n-\t       const unsigned long * __restrict p3,\n-\t       const unsigned long * __restrict p4)\n-{\n-\tunsigned long lines = bytes >> 8;\n-\n-\tkernel_fpu_begin();\n-\n-\tasm volatile(\n-#undef BLOCK\n-#define BLOCK(i)\t\t\t\\\n-\t\tBLK64(PF0, LD, i)\t\\\n-\t\tBLK64(PF1, XO1, i)\t\\\n-\t\tBLK64(PF2, XO2, i)\t\\\n-\t\tBLK64(PF3, XO3, i)\t\\\n-\t\tBLK64(NOP, ST, i)\t\\\n-\n-\t\" .align 32\t\t\t;\\n\"\n-\t\" 1:                            ;\\n\"\n-\n-\t\tBLOCK(0)\n-\t\tBLOCK(4)\n-\t\tBLOCK(8)\n-\t\tBLOCK(12)\n-\n-\t\"       add %[inc], %[p1]       ;\\n\"\n-\t\"       add %[inc], %[p2]       ;\\n\"\n-\t\"       add %[inc], %[p3]       ;\\n\"\n-\t\"       add %[inc], %[p4]       ;\\n\"\n-\t\"       dec %[cnt]              ;\\n\"\n-\t\"       jnz 1b                  ;\\n\"\n-\t: [cnt] \"+r\" (lines), [p1] \"+r\" (p1),\n-\t  [p2] \"+r\" (p2), [p3] \"+r\" (p3), [p4] \"+r\" (p4)\n-\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n-\t: \"memory\");\n-\n-\tkernel_fpu_end();\n-}\n-\n-static void\n-xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,\n-\t  const unsigned long * __restrict p2,\n-\t  const unsigned long * __restrict p3,\n-\t  const unsigned long * __restrict p4,\n-\t  const unsigned long * __restrict p5)\n-{\n-\tunsigned long lines = bytes >> 8;\n-\n-\tkernel_fpu_begin();\n-\n-\tasm volatile(\n-#undef BLOCK\n-#define BLOCK(i) \\\n-\t\tPF1(i)\t\t\t\t\t\\\n-\t\t\t\tPF1(i + 2)\t\t\\\n-\t\tLD(i, 0)\t\t\t\t\\\n-\t\t\tLD(i + 1, 1)\t\t\t\\\n-\t\t\t\tLD(i + 2, 2)\t\t\\\n-\t\t\t\t\tLD(i + 3, 3)\t\\\n-\t\tPF2(i)\t\t\t\t\t\\\n-\t\t\t\tPF2(i + 2)\t\t\\\n-\t\tXO1(i, 0)\t\t\t\t\\\n-\t\t\tXO1(i + 1, 1)\t\t\t\\\n-\t\t\t\tXO1(i + 2, 2)\t\t\\\n-\t\t\t\t\tXO1(i + 3, 3)\t\\\n-\t\tPF3(i)\t\t\t\t\t\\\n-\t\t\t\tPF3(i + 2)\t\t\\\n-\t\tXO2(i, 0)\t\t\t\t\\\n-\t\t\tXO2(i + 1, 1)\t\t\t\\\n-\t\t\t\tXO2(i + 2, 2)\t\t\\\n-\t\t\t\t\tXO2(i + 3, 3)\t\\\n-\t\tPF4(i)\t\t\t\t\t\\\n-\t\t\t\tPF4(i + 2)\t\t\\\n-\t\tPF0(i + 4)\t\t\t\t\\\n-\t\t\t\tPF0(i + 6)\t\t\\\n-\t\tXO3(i, 0)\t\t\t\t\\\n-\t\t\tXO3(i + 1, 1)\t\t\t\\\n-\t\t\t\tXO3(i + 2, 2)\t\t\\\n-\t\t\t\t\tXO3(i + 3, 3)\t\\\n-\t\tXO4(i, 0)\t\t\t\t\\\n-\t\t\tXO4(i + 1, 1)\t\t\t\\\n-\t\t\t\tXO4(i + 2, 2)\t\t\\\n-\t\t\t\t\tXO4(i + 3, 3)\t\\\n-\t\tST(i, 0)\t\t\t\t\\\n-\t\t\tST(i + 1, 1)\t\t\t\\\n-\t\t\t\tST(i + 2, 2)\t\t\\\n-\t\t\t\t\tST(i + 3, 3)\t\\\n-\n-\n-\t\tPF0(0)\n-\t\t\t\tPF0(2)\n-\n-\t\" .align 32\t\t\t;\\n\"\n-\t\" 1:                            ;\\n\"\n-\n-\t\tBLOCK(0)\n-\t\tBLOCK(4)\n-\t\tBLOCK(8)\n-\t\tBLOCK(12)\n-\n-\t\"       add %[inc], %[p1]       ;\\n\"\n-\t\"       add %[inc], %[p2]       ;\\n\"\n-\t\"       add %[inc], %[p3]       ;\\n\"\n-\t\"       add %[inc], %[p4]       ;\\n\"\n-\t\"       add %[inc], %[p5]       ;\\n\"\n-\t\"       dec %[cnt]              ;\\n\"\n-\t\"       jnz 1b                  ;\\n\"\n-\t: [cnt] \"+r\" (lines), [p1] \"+r\" (p1), [p2] \"+r\" (p2),\n-\t  [p3] \"+r\" (p3), [p4] \"+r\" (p4), [p5] \"+r\" (p5)\n-\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n-\t: \"memory\");\n-\n-\tkernel_fpu_end();\n-}\n-\n-static void\n-xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,\n-\t       const unsigned long * __restrict p2,\n-\t       const unsigned long * __restrict p3,\n-\t       const unsigned long * __restrict p4,\n-\t       const unsigned long * __restrict p5)\n-{\n-\tunsigned long lines = bytes >> 8;\n-\n-\tkernel_fpu_begin();\n-\n-\tasm volatile(\n-#undef BLOCK\n-#define BLOCK(i)\t\t\t\\\n-\t\tBLK64(PF0, LD, i)\t\\\n-\t\tBLK64(PF1, XO1, i)\t\\\n-\t\tBLK64(PF2, XO2, i)\t\\\n-\t\tBLK64(PF3, XO3, i)\t\\\n-\t\tBLK64(PF4, XO4, i)\t\\\n-\t\tBLK64(NOP, ST, i)\t\\\n-\n-\t\" .align 32\t\t\t;\\n\"\n-\t\" 1:                            ;\\n\"\n-\n-\t\tBLOCK(0)\n-\t\tBLOCK(4)\n-\t\tBLOCK(8)\n-\t\tBLOCK(12)\n-\n-\t\"       add %[inc], %[p1]       ;\\n\"\n-\t\"       add %[inc], %[p2]       ;\\n\"\n-\t\"       add %[inc], %[p3]       ;\\n\"\n-\t\"       add %[inc], %[p4]       ;\\n\"\n-\t\"       add %[inc], %[p5]       ;\\n\"\n-\t\"       dec %[cnt]              ;\\n\"\n-\t\"       jnz 1b                  ;\\n\"\n-\t: [cnt] \"+r\" (lines), [p1] \"+r\" (p1), [p2] \"+r\" (p2),\n-\t  [p3] \"+r\" (p3), [p4] \"+r\" (p4), [p5] \"+r\" (p5)\n-\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n-\t: \"memory\");\n-\n-\tkernel_fpu_end();\n-}\n-\n-static struct xor_block_template xor_block_sse_pf64 = {\n-\t.name = \"prefetch64-sse\",\n-\t.do_2 = xor_sse_2_pf64,\n-\t.do_3 = xor_sse_3_pf64,\n-\t.do_4 = xor_sse_4_pf64,\n-\t.do_5 = xor_sse_5_pf64,\n-};\n-\n-#undef LD\n-#undef XO1\n-#undef XO2\n-#undef XO3\n-#undef XO4\n-#undef ST\n-#undef NOP\n-#undef BLK64\n-#undef BLOCK\n-\n-#undef XOR_CONSTANT_CONSTRAINT\n-\n-#ifdef CONFIG_X86_32\n-# include <asm/xor_32.h>\n-#else\n-# include <asm/xor_64.h>\n-#endif\n-\n #endif /* _ASM_X86_XOR_H */\ndiff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h\ndeleted file mode 100644\nindex 2d2ceb241866..000000000000\n--- a/arch/x86/include/asm/xor_64.h\n+++ /dev/null\n@@ -1,32 +0,0 @@\n-/* SPDX-License-Identifier: GPL-2.0 */\n-#ifndef _ASM_X86_XOR_64_H\n-#define _ASM_X86_XOR_64_H\n-\n-static struct xor_block_template xor_block_sse = {\n-\t.name = \"generic_sse\",\n-\t.do_2 = xor_sse_2,\n-\t.do_3 = xor_sse_3,\n-\t.do_4 = xor_sse_4,\n-\t.do_5 = xor_sse_5,\n-};\n-\n-\n-/* Also try the AVX routines */\n-#include <asm/xor_avx.h>\n-\n-/* We force the use of the SSE xor block because it can write around L2.\n-   We may also be able to load into the L1 only depending on how the cpu\n-   deals with a load to a line that is being prefetched.  */\n-#define arch_xor_init arch_xor_init\n-static __always_inline void __init arch_xor_init(void)\n-{\n-\tif (boot_cpu_has(X86_FEATURE_AVX) &&\n-\t    boot_cpu_has(X86_FEATURE_OSXSAVE)) {\n-\t\txor_force(&xor_block_avx);\n-\t} else {\n-\t\txor_register(&xor_block_sse_pf64);\n-\t\txor_register(&xor_block_sse);\n-\t}\n-}\n-\n-#endif /* _ASM_X86_XOR_64_H */\ndiff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile\nindex 3db6c2b2f26a..05aca96041b3 100644\n--- a/lib/raid/xor/Makefile\n+++ b/lib/raid/xor/Makefile\n@@ -21,6 +21,8 @@ xor-$(CONFIG_RISCV_ISA_V)\t+= riscv/xor.o riscv/xor-glue.o\n xor-$(CONFIG_SPARC32)\t\t+= sparc/xor-sparc32.o\n xor-$(CONFIG_SPARC64)\t\t+= sparc/xor-sparc64.o sparc/xor-sparc64-glue.o\n xor-$(CONFIG_S390)\t\t+= s390/xor.o\n+xor-$(CONFIG_X86_32)\t\t+= x86/xor-avx.o x86/xor-sse.o x86/xor-mmx.o\n+xor-$(CONFIG_X86_64)\t\t+= x86/xor-avx.o x86/xor-sse.o\n \n \n CFLAGS_arm/xor-neon.o\t\t+= $(CC_FLAGS_FPU)\ndiff --git a/arch/x86/include/asm/xor_avx.h b/lib/raid/xor/x86/xor-avx.c\nsimilarity index 95%\nrename from arch/x86/include/asm/xor_avx.h\nrename to lib/raid/xor/x86/xor-avx.c\nindex c600888436bb..b49cb5199e70 100644\n--- a/arch/x86/include/asm/xor_avx.h\n+++ b/lib/raid/xor/x86/xor-avx.c\n@@ -1,18 +1,16 @@\n-/* SPDX-License-Identifier: GPL-2.0-only */\n-#ifndef _ASM_X86_XOR_AVX_H\n-#define _ASM_X86_XOR_AVX_H\n-\n+// SPDX-License-Identifier: GPL-2.0-only\n /*\n- * Optimized RAID-5 checksumming functions for AVX\n+ * Optimized XOR parity functions for AVX\n  *\n  * Copyright (C) 2012 Intel Corporation\n  * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>\n  *\n  * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines\n  */\n-\n #include <linux/compiler.h>\n+#include <linux/raid/xor_impl.h>\n #include <asm/fpu/api.h>\n+#include <asm/xor.h>\n \n #define BLOCK4(i) \\\n \t\tBLOCK(32 * i, 0) \\\n@@ -158,12 +156,10 @@ do { \\\n \tkernel_fpu_end();\n }\n \n-static struct xor_block_template xor_block_avx = {\n+struct xor_block_template xor_block_avx = {\n \t.name = \"avx\",\n \t.do_2 = xor_avx_2,\n \t.do_3 = xor_avx_3,\n \t.do_4 = xor_avx_4,\n \t.do_5 = xor_avx_5,\n };\n-\n-#endif\ndiff --git a/arch/x86/include/asm/xor_32.h b/lib/raid/xor/x86/xor-mmx.c\nsimilarity index 90%\nrename from arch/x86/include/asm/xor_32.h\nrename to lib/raid/xor/x86/xor-mmx.c\nindex ee32d08c27bc..cf0fafea33b7 100644\n--- a/arch/x86/include/asm/xor_32.h\n+++ b/lib/raid/xor/x86/xor-mmx.c\n@@ -1,15 +1,12 @@\n-/* SPDX-License-Identifier: GPL-2.0-or-later */\n-#ifndef _ASM_X86_XOR_32_H\n-#define _ASM_X86_XOR_32_H\n-\n-/*\n- * Optimized RAID-5 checksumming functions for MMX.\n- */\n-\n+// SPDX-License-Identifier: GPL-2.0-or-later\n /*\n- * High-speed RAID5 checksumming functions utilizing MMX instructions.\n+ * Optimized XOR parity functions for MMX.\n+ *\n  * Copyright (C) 1998 Ingo Molnar.\n  */\n+#include <linux/raid/xor_impl.h>\n+#include <asm/fpu/api.h>\n+#include <asm/xor.h>\n \n #define LD(x, y)\t\"       movq   8*(\"#x\")(%1), %%mm\"#y\"   ;\\n\"\n #define ST(x, y)\t\"       movq %%mm\"#y\",   8*(\"#x\")(%1)   ;\\n\"\n@@ -18,8 +15,6 @@\n #define XO3(x, y)\t\"       pxor   8*(\"#x\")(%4), %%mm\"#y\"   ;\\n\"\n #define XO4(x, y)\t\"       pxor   8*(\"#x\")(%5), %%mm\"#y\"   ;\\n\"\n \n-#include <asm/fpu/api.h>\n-\n static void\n xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,\n \t      const unsigned long * __restrict p2)\n@@ -519,7 +514,7 @@ xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,\n \tkernel_fpu_end();\n }\n \n-static struct xor_block_template xor_block_pII_mmx = {\n+struct xor_block_template xor_block_pII_mmx = {\n \t.name = \"pII_mmx\",\n \t.do_2 = xor_pII_mmx_2,\n \t.do_3 = xor_pII_mmx_3,\n@@ -527,49 +522,10 @@ static struct xor_block_template xor_block_pII_mmx = {\n \t.do_5 = xor_pII_mmx_5,\n };\n \n-static struct xor_block_template xor_block_p5_mmx = {\n+struct xor_block_template xor_block_p5_mmx = {\n \t.name = \"p5_mmx\",\n \t.do_2 = xor_p5_mmx_2,\n \t.do_3 = xor_p5_mmx_3,\n \t.do_4 = xor_p5_mmx_4,\n \t.do_5 = xor_p5_mmx_5,\n };\n-\n-static struct xor_block_template xor_block_pIII_sse = {\n-\t.name = \"pIII_sse\",\n-\t.do_2 = xor_sse_2,\n-\t.do_3 = xor_sse_3,\n-\t.do_4 = xor_sse_4,\n-\t.do_5 = xor_sse_5,\n-};\n-\n-/* Also try the AVX routines */\n-#include <asm/xor_avx.h>\n-\n-/* Also try the generic routines.  */\n-#include <asm-generic/xor.h>\n-\n-/* We force the use of the SSE xor block because it can write around L2.\n-   We may also be able to load into the L1 only depending on how the cpu\n-   deals with a load to a line that is being prefetched.  */\n-#define arch_xor_init arch_xor_init\n-static __always_inline void __init arch_xor_init(void)\n-{\n-\tif (boot_cpu_has(X86_FEATURE_AVX) &&\n-\t    boot_cpu_has(X86_FEATURE_OSXSAVE)) {\n-\t\txor_force(&xor_block_avx);\n-\t} else if (boot_cpu_has(X86_FEATURE_XMM)) {\n-\t\txor_register(&xor_block_pIII_sse);\n-\t\txor_register(&xor_block_sse_pf64);\n-\t} else if (boot_cpu_has(X86_FEATURE_MMX)) {\n-\t\txor_register(&xor_block_pII_mmx);\n-\t\txor_register(&xor_block_p5_mmx);\n-\t} else {\n-\t\txor_register(&xor_block_8regs);\n-\t\txor_register(&xor_block_8regs_p);\n-\t\txor_register(&xor_block_32regs);\n-\t\txor_register(&xor_block_32regs_p);\n-\t}\n-}\n-\n-#endif /* _ASM_X86_XOR_32_H */\ndiff --git a/lib/raid/xor/x86/xor-sse.c b/lib/raid/xor/x86/xor-sse.c\nnew file mode 100644\nindex 000000000000..0e727ced8b00\n--- /dev/null\n+++ b/lib/raid/xor/x86/xor-sse.c\n@@ -0,0 +1,476 @@\n+// SPDX-License-Identifier: GPL-2.0-or-later\n+/*\n+ * Optimized XOR parity functions for SSE.\n+ *\n+ * Cache avoiding checksumming functions utilizing KNI instructions\n+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)\n+ *\n+ * Based on\n+ * High-speed RAID5 checksumming functions utilizing SSE instructions.\n+ * Copyright (C) 1998 Ingo Molnar.\n+ *\n+ * x86-64 changes / gcc fixes from Andi Kleen.\n+ * Copyright 2002 Andi Kleen, SuSE Labs.\n+ */\n+#include <linux/raid/xor_impl.h>\n+#include <asm/fpu/api.h>\n+#include <asm/xor.h>\n+\n+#ifdef CONFIG_X86_32\n+/* reduce register pressure */\n+# define XOR_CONSTANT_CONSTRAINT \"i\"\n+#else\n+# define XOR_CONSTANT_CONSTRAINT \"re\"\n+#endif\n+\n+#define OFFS(x)\t\t\"16*(\"#x\")\"\n+#define PF_OFFS(x)\t\"256+16*(\"#x\")\"\n+#define PF0(x)\t\t\"\tprefetchnta \"PF_OFFS(x)\"(%[p1])\t\t;\\n\"\n+#define LD(x, y)\t\"\tmovaps \"OFFS(x)\"(%[p1]), %%xmm\"#y\"\t;\\n\"\n+#define ST(x, y)\t\"\tmovaps %%xmm\"#y\", \"OFFS(x)\"(%[p1])\t;\\n\"\n+#define PF1(x)\t\t\"\tprefetchnta \"PF_OFFS(x)\"(%[p2])\t\t;\\n\"\n+#define PF2(x)\t\t\"\tprefetchnta \"PF_OFFS(x)\"(%[p3])\t\t;\\n\"\n+#define PF3(x)\t\t\"\tprefetchnta \"PF_OFFS(x)\"(%[p4])\t\t;\\n\"\n+#define PF4(x)\t\t\"\tprefetchnta \"PF_OFFS(x)\"(%[p5])\t\t;\\n\"\n+#define XO1(x, y)\t\"\txorps \"OFFS(x)\"(%[p2]), %%xmm\"#y\"\t;\\n\"\n+#define XO2(x, y)\t\"\txorps \"OFFS(x)\"(%[p3]), %%xmm\"#y\"\t;\\n\"\n+#define XO3(x, y)\t\"\txorps \"OFFS(x)\"(%[p4]), %%xmm\"#y\"\t;\\n\"\n+#define XO4(x, y)\t\"\txorps \"OFFS(x)\"(%[p5]), %%xmm\"#y\"\t;\\n\"\n+#define NOP(x)\n+\n+#define BLK64(pf, op, i)\t\t\t\t\\\n+\t\tpf(i)\t\t\t\t\t\\\n+\t\top(i, 0)\t\t\t\t\\\n+\t\t\top(i + 1, 1)\t\t\t\\\n+\t\t\t\top(i + 2, 2)\t\t\\\n+\t\t\t\t\top(i + 3, 3)\n+\n+static void\n+xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,\n+\t  const unsigned long * __restrict p2)\n+{\n+\tunsigned long lines = bytes >> 8;\n+\n+\tkernel_fpu_begin();\n+\n+\tasm volatile(\n+#undef BLOCK\n+#define BLOCK(i)\t\t\t\t\t\\\n+\t\tLD(i, 0)\t\t\t\t\\\n+\t\t\tLD(i + 1, 1)\t\t\t\\\n+\t\tPF1(i)\t\t\t\t\t\\\n+\t\t\t\tPF1(i + 2)\t\t\\\n+\t\t\t\tLD(i + 2, 2)\t\t\\\n+\t\t\t\t\tLD(i + 3, 3)\t\\\n+\t\tPF0(i + 4)\t\t\t\t\\\n+\t\t\t\tPF0(i + 6)\t\t\\\n+\t\tXO1(i, 0)\t\t\t\t\\\n+\t\t\tXO1(i + 1, 1)\t\t\t\\\n+\t\t\t\tXO1(i + 2, 2)\t\t\\\n+\t\t\t\t\tXO1(i + 3, 3)\t\\\n+\t\tST(i, 0)\t\t\t\t\\\n+\t\t\tST(i + 1, 1)\t\t\t\\\n+\t\t\t\tST(i + 2, 2)\t\t\\\n+\t\t\t\t\tST(i + 3, 3)\t\\\n+\n+\n+\t\tPF0(0)\n+\t\t\t\tPF0(2)\n+\n+\t\" .align 32\t\t\t;\\n\"\n+\t\" 1:                            ;\\n\"\n+\n+\t\tBLOCK(0)\n+\t\tBLOCK(4)\n+\t\tBLOCK(8)\n+\t\tBLOCK(12)\n+\n+\t\"       add %[inc], %[p1]       ;\\n\"\n+\t\"       add %[inc], %[p2]       ;\\n\"\n+\t\"       dec %[cnt]              ;\\n\"\n+\t\"       jnz 1b                  ;\\n\"\n+\t: [cnt] \"+r\" (lines),\n+\t  [p1] \"+r\" (p1), [p2] \"+r\" (p2)\n+\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n+\t: \"memory\");\n+\n+\tkernel_fpu_end();\n+}\n+\n+static void\n+xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,\n+\t       const unsigned long * __restrict p2)\n+{\n+\tunsigned long lines = bytes >> 8;\n+\n+\tkernel_fpu_begin();\n+\n+\tasm volatile(\n+#undef BLOCK\n+#define BLOCK(i)\t\t\t\\\n+\t\tBLK64(PF0, LD, i)\t\\\n+\t\tBLK64(PF1, XO1, i)\t\\\n+\t\tBLK64(NOP, ST, i)\t\\\n+\n+\t\" .align 32\t\t\t;\\n\"\n+\t\" 1:                            ;\\n\"\n+\n+\t\tBLOCK(0)\n+\t\tBLOCK(4)\n+\t\tBLOCK(8)\n+\t\tBLOCK(12)\n+\n+\t\"       add %[inc], %[p1]       ;\\n\"\n+\t\"       add %[inc], %[p2]       ;\\n\"\n+\t\"       dec %[cnt]              ;\\n\"\n+\t\"       jnz 1b                  ;\\n\"\n+\t: [cnt] \"+r\" (lines),\n+\t  [p1] \"+r\" (p1), [p2] \"+r\" (p2)\n+\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n+\t: \"memory\");\n+\n+\tkernel_fpu_end();\n+}\n+\n+static void\n+xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,\n+\t  const unsigned long * __restrict p2,\n+\t  const unsigned long * __restrict p3)\n+{\n+\tunsigned long lines = bytes >> 8;\n+\n+\tkernel_fpu_begin();\n+\n+\tasm volatile(\n+#undef BLOCK\n+#define BLOCK(i) \\\n+\t\tPF1(i)\t\t\t\t\t\\\n+\t\t\t\tPF1(i + 2)\t\t\\\n+\t\tLD(i, 0)\t\t\t\t\\\n+\t\t\tLD(i + 1, 1)\t\t\t\\\n+\t\t\t\tLD(i + 2, 2)\t\t\\\n+\t\t\t\t\tLD(i + 3, 3)\t\\\n+\t\tPF2(i)\t\t\t\t\t\\\n+\t\t\t\tPF2(i + 2)\t\t\\\n+\t\tPF0(i + 4)\t\t\t\t\\\n+\t\t\t\tPF0(i + 6)\t\t\\\n+\t\tXO1(i, 0)\t\t\t\t\\\n+\t\t\tXO1(i + 1, 1)\t\t\t\\\n+\t\t\t\tXO1(i + 2, 2)\t\t\\\n+\t\t\t\t\tXO1(i + 3, 3)\t\\\n+\t\tXO2(i, 0)\t\t\t\t\\\n+\t\t\tXO2(i + 1, 1)\t\t\t\\\n+\t\t\t\tXO2(i + 2, 2)\t\t\\\n+\t\t\t\t\tXO2(i + 3, 3)\t\\\n+\t\tST(i, 0)\t\t\t\t\\\n+\t\t\tST(i + 1, 1)\t\t\t\\\n+\t\t\t\tST(i + 2, 2)\t\t\\\n+\t\t\t\t\tST(i + 3, 3)\t\\\n+\n+\n+\t\tPF0(0)\n+\t\t\t\tPF0(2)\n+\n+\t\" .align 32\t\t\t;\\n\"\n+\t\" 1:                            ;\\n\"\n+\n+\t\tBLOCK(0)\n+\t\tBLOCK(4)\n+\t\tBLOCK(8)\n+\t\tBLOCK(12)\n+\n+\t\"       add %[inc], %[p1]       ;\\n\"\n+\t\"       add %[inc], %[p2]       ;\\n\"\n+\t\"       add %[inc], %[p3]       ;\\n\"\n+\t\"       dec %[cnt]              ;\\n\"\n+\t\"       jnz 1b                  ;\\n\"\n+\t: [cnt] \"+r\" (lines),\n+\t  [p1] \"+r\" (p1), [p2] \"+r\" (p2), [p3] \"+r\" (p3)\n+\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n+\t: \"memory\");\n+\n+\tkernel_fpu_end();\n+}\n+\n+static void\n+xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,\n+\t       const unsigned long * __restrict p2,\n+\t       const unsigned long * __restrict p3)\n+{\n+\tunsigned long lines = bytes >> 8;\n+\n+\tkernel_fpu_begin();\n+\n+\tasm volatile(\n+#undef BLOCK\n+#define BLOCK(i)\t\t\t\\\n+\t\tBLK64(PF0, LD, i)\t\\\n+\t\tBLK64(PF1, XO1, i)\t\\\n+\t\tBLK64(PF2, XO2, i)\t\\\n+\t\tBLK64(NOP, ST, i)\t\\\n+\n+\t\" .align 32\t\t\t;\\n\"\n+\t\" 1:                            ;\\n\"\n+\n+\t\tBLOCK(0)\n+\t\tBLOCK(4)\n+\t\tBLOCK(8)\n+\t\tBLOCK(12)\n+\n+\t\"       add %[inc], %[p1]       ;\\n\"\n+\t\"       add %[inc], %[p2]       ;\\n\"\n+\t\"       add %[inc], %[p3]       ;\\n\"\n+\t\"       dec %[cnt]              ;\\n\"\n+\t\"       jnz 1b                  ;\\n\"\n+\t: [cnt] \"+r\" (lines),\n+\t  [p1] \"+r\" (p1), [p2] \"+r\" (p2), [p3] \"+r\" (p3)\n+\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n+\t: \"memory\");\n+\n+\tkernel_fpu_end();\n+}\n+\n+static void\n+xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,\n+\t  const unsigned long * __restrict p2,\n+\t  const unsigned long * __restrict p3,\n+\t  const unsigned long * __restrict p4)\n+{\n+\tunsigned long lines = bytes >> 8;\n+\n+\tkernel_fpu_begin();\n+\n+\tasm volatile(\n+#undef BLOCK\n+#define BLOCK(i) \\\n+\t\tPF1(i)\t\t\t\t\t\\\n+\t\t\t\tPF1(i + 2)\t\t\\\n+\t\tLD(i, 0)\t\t\t\t\\\n+\t\t\tLD(i + 1, 1)\t\t\t\\\n+\t\t\t\tLD(i + 2, 2)\t\t\\\n+\t\t\t\t\tLD(i + 3, 3)\t\\\n+\t\tPF2(i)\t\t\t\t\t\\\n+\t\t\t\tPF2(i + 2)\t\t\\\n+\t\tXO1(i, 0)\t\t\t\t\\\n+\t\t\tXO1(i + 1, 1)\t\t\t\\\n+\t\t\t\tXO1(i + 2, 2)\t\t\\\n+\t\t\t\t\tXO1(i + 3, 3)\t\\\n+\t\tPF3(i)\t\t\t\t\t\\\n+\t\t\t\tPF3(i + 2)\t\t\\\n+\t\tPF0(i + 4)\t\t\t\t\\\n+\t\t\t\tPF0(i + 6)\t\t\\\n+\t\tXO2(i, 0)\t\t\t\t\\\n+\t\t\tXO2(i + 1, 1)\t\t\t\\\n+\t\t\t\tXO2(i + 2, 2)\t\t\\\n+\t\t\t\t\tXO2(i + 3, 3)\t\\\n+\t\tXO3(i, 0)\t\t\t\t\\\n+\t\t\tXO3(i + 1, 1)\t\t\t\\\n+\t\t\t\tXO3(i + 2, 2)\t\t\\\n+\t\t\t\t\tXO3(i + 3, 3)\t\\\n+\t\tST(i, 0)\t\t\t\t\\\n+\t\t\tST(i + 1, 1)\t\t\t\\\n+\t\t\t\tST(i + 2, 2)\t\t\\\n+\t\t\t\t\tST(i + 3, 3)\t\\\n+\n+\n+\t\tPF0(0)\n+\t\t\t\tPF0(2)\n+\n+\t\" .align 32\t\t\t;\\n\"\n+\t\" 1:                            ;\\n\"\n+\n+\t\tBLOCK(0)\n+\t\tBLOCK(4)\n+\t\tBLOCK(8)\n+\t\tBLOCK(12)\n+\n+\t\"       add %[inc], %[p1]       ;\\n\"\n+\t\"       add %[inc], %[p2]       ;\\n\"\n+\t\"       add %[inc], %[p3]       ;\\n\"\n+\t\"       add %[inc], %[p4]       ;\\n\"\n+\t\"       dec %[cnt]              ;\\n\"\n+\t\"       jnz 1b                  ;\\n\"\n+\t: [cnt] \"+r\" (lines), [p1] \"+r\" (p1),\n+\t  [p2] \"+r\" (p2), [p3] \"+r\" (p3), [p4] \"+r\" (p4)\n+\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n+\t: \"memory\");\n+\n+\tkernel_fpu_end();\n+}\n+\n+static void\n+xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,\n+\t       const unsigned long * __restrict p2,\n+\t       const unsigned long * __restrict p3,\n+\t       const unsigned long * __restrict p4)\n+{\n+\tunsigned long lines = bytes >> 8;\n+\n+\tkernel_fpu_begin();\n+\n+\tasm volatile(\n+#undef BLOCK\n+#define BLOCK(i)\t\t\t\\\n+\t\tBLK64(PF0, LD, i)\t\\\n+\t\tBLK64(PF1, XO1, i)\t\\\n+\t\tBLK64(PF2, XO2, i)\t\\\n+\t\tBLK64(PF3, XO3, i)\t\\\n+\t\tBLK64(NOP, ST, i)\t\\\n+\n+\t\" .align 32\t\t\t;\\n\"\n+\t\" 1:                            ;\\n\"\n+\n+\t\tBLOCK(0)\n+\t\tBLOCK(4)\n+\t\tBLOCK(8)\n+\t\tBLOCK(12)\n+\n+\t\"       add %[inc], %[p1]       ;\\n\"\n+\t\"       add %[inc], %[p2]       ;\\n\"\n+\t\"       add %[inc], %[p3]       ;\\n\"\n+\t\"       add %[inc], %[p4]       ;\\n\"\n+\t\"       dec %[cnt]              ;\\n\"\n+\t\"       jnz 1b                  ;\\n\"\n+\t: [cnt] \"+r\" (lines), [p1] \"+r\" (p1),\n+\t  [p2] \"+r\" (p2), [p3] \"+r\" (p3), [p4] \"+r\" (p4)\n+\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n+\t: \"memory\");\n+\n+\tkernel_fpu_end();\n+}\n+\n+static void\n+xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,\n+\t  const unsigned long * __restrict p2,\n+\t  const unsigned long * __restrict p3,\n+\t  const unsigned long * __restrict p4,\n+\t  const unsigned long * __restrict p5)\n+{\n+\tunsigned long lines = bytes >> 8;\n+\n+\tkernel_fpu_begin();\n+\n+\tasm volatile(\n+#undef BLOCK\n+#define BLOCK(i) \\\n+\t\tPF1(i)\t\t\t\t\t\\\n+\t\t\t\tPF1(i + 2)\t\t\\\n+\t\tLD(i, 0)\t\t\t\t\\\n+\t\t\tLD(i + 1, 1)\t\t\t\\\n+\t\t\t\tLD(i + 2, 2)\t\t\\\n+\t\t\t\t\tLD(i + 3, 3)\t\\\n+\t\tPF2(i)\t\t\t\t\t\\\n+\t\t\t\tPF2(i + 2)\t\t\\\n+\t\tXO1(i, 0)\t\t\t\t\\\n+\t\t\tXO1(i + 1, 1)\t\t\t\\\n+\t\t\t\tXO1(i + 2, 2)\t\t\\\n+\t\t\t\t\tXO1(i + 3, 3)\t\\\n+\t\tPF3(i)\t\t\t\t\t\\\n+\t\t\t\tPF3(i + 2)\t\t\\\n+\t\tXO2(i, 0)\t\t\t\t\\\n+\t\t\tXO2(i + 1, 1)\t\t\t\\\n+\t\t\t\tXO2(i + 2, 2)\t\t\\\n+\t\t\t\t\tXO2(i + 3, 3)\t\\\n+\t\tPF4(i)\t\t\t\t\t\\\n+\t\t\t\tPF4(i + 2)\t\t\\\n+\t\tPF0(i + 4)\t\t\t\t\\\n+\t\t\t\tPF0(i + 6)\t\t\\\n+\t\tXO3(i, 0)\t\t\t\t\\\n+\t\t\tXO3(i + 1, 1)\t\t\t\\\n+\t\t\t\tXO3(i + 2, 2)\t\t\\\n+\t\t\t\t\tXO3(i + 3, 3)\t\\\n+\t\tXO4(i, 0)\t\t\t\t\\\n+\t\t\tXO4(i + 1, 1)\t\t\t\\\n+\t\t\t\tXO4(i + 2, 2)\t\t\\\n+\t\t\t\t\tXO4(i + 3, 3)\t\\\n+\t\tST(i, 0)\t\t\t\t\\\n+\t\t\tST(i + 1, 1)\t\t\t\\\n+\t\t\t\tST(i + 2, 2)\t\t\\\n+\t\t\t\t\tST(i + 3, 3)\t\\\n+\n+\n+\t\tPF0(0)\n+\t\t\t\tPF0(2)\n+\n+\t\" .align 32\t\t\t;\\n\"\n+\t\" 1:                            ;\\n\"\n+\n+\t\tBLOCK(0)\n+\t\tBLOCK(4)\n+\t\tBLOCK(8)\n+\t\tBLOCK(12)\n+\n+\t\"       add %[inc], %[p1]       ;\\n\"\n+\t\"       add %[inc], %[p2]       ;\\n\"\n+\t\"       add %[inc], %[p3]       ;\\n\"\n+\t\"       add %[inc], %[p4]       ;\\n\"\n+\t\"       add %[inc], %[p5]       ;\\n\"\n+\t\"       dec %[cnt]              ;\\n\"\n+\t\"       jnz 1b                  ;\\n\"\n+\t: [cnt] \"+r\" (lines), [p1] \"+r\" (p1), [p2] \"+r\" (p2),\n+\t  [p3] \"+r\" (p3), [p4] \"+r\" (p4), [p5] \"+r\" (p5)\n+\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n+\t: \"memory\");\n+\n+\tkernel_fpu_end();\n+}\n+\n+static void\n+xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,\n+\t       const unsigned long * __restrict p2,\n+\t       const unsigned long * __restrict p3,\n+\t       const unsigned long * __restrict p4,\n+\t       const unsigned long * __restrict p5)\n+{\n+\tunsigned long lines = bytes >> 8;\n+\n+\tkernel_fpu_begin();\n+\n+\tasm volatile(\n+#undef BLOCK\n+#define BLOCK(i)\t\t\t\\\n+\t\tBLK64(PF0, LD, i)\t\\\n+\t\tBLK64(PF1, XO1, i)\t\\\n+\t\tBLK64(PF2, XO2, i)\t\\\n+\t\tBLK64(PF3, XO3, i)\t\\\n+\t\tBLK64(PF4, XO4, i)\t\\\n+\t\tBLK64(NOP, ST, i)\t\\\n+\n+\t\" .align 32\t\t\t;\\n\"\n+\t\" 1:                            ;\\n\"\n+\n+\t\tBLOCK(0)\n+\t\tBLOCK(4)\n+\t\tBLOCK(8)\n+\t\tBLOCK(12)\n+\n+\t\"       add %[inc], %[p1]       ;\\n\"\n+\t\"       add %[inc], %[p2]       ;\\n\"\n+\t\"       add %[inc], %[p3]       ;\\n\"\n+\t\"       add %[inc], %[p4]       ;\\n\"\n+\t\"       add %[inc], %[p5]       ;\\n\"\n+\t\"       dec %[cnt]              ;\\n\"\n+\t\"       jnz 1b                  ;\\n\"\n+\t: [cnt] \"+r\" (lines), [p1] \"+r\" (p1), [p2] \"+r\" (p2),\n+\t  [p3] \"+r\" (p3), [p4] \"+r\" (p4), [p5] \"+r\" (p5)\n+\t: [inc] XOR_CONSTANT_CONSTRAINT (256UL)\n+\t: \"memory\");\n+\n+\tkernel_fpu_end();\n+}\n+\n+struct xor_block_template xor_block_sse = {\n+\t.name = \"sse\",\n+\t.do_2 = xor_sse_2,\n+\t.do_3 = xor_sse_3,\n+\t.do_4 = xor_sse_4,\n+\t.do_5 = xor_sse_5,\n+};\n+\n+struct xor_block_template xor_block_sse_pf64 = {\n+\t.name = \"prefetch64-sse\",\n+\t.do_2 = xor_sse_2_pf64,\n+\t.do_3 = xor_sse_3_pf64,\n+\t.do_4 = xor_sse_4_pf64,\n+\t.do_5 = xor_sse_5_pf64,\n+};\n",
    "prefixes": [
        "19/28"
    ]
}