Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2183191/?format=api
{ "id": 2183191, "url": "http://patchwork.ozlabs.org/api/patches/2183191/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20260112192035.10427-18-ebiggers@kernel.org/", "project": { "id": 2, "url": "http://patchwork.ozlabs.org/api/projects/2/?format=api", "name": "Linux PPC development", "link_name": "linuxppc-dev", "list_id": "linuxppc-dev.lists.ozlabs.org", "list_email": "linuxppc-dev@lists.ozlabs.org", "web_url": "https://github.com/linuxppc/wiki/wiki", "scm_url": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git", "webscm_url": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/", "list_archive_url": "https://lore.kernel.org/linuxppc-dev/", "list_archive_url_format": "https://lore.kernel.org/linuxppc-dev/{}/", "commit_url_format": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id={}" }, "msgid": "<20260112192035.10427-18-ebiggers@kernel.org>", "list_archive_url": "https://lore.kernel.org/linuxppc-dev/20260112192035.10427-18-ebiggers@kernel.org/", "date": "2026-01-12T19:20:15", "name": "[v2,17/35] lib/crypto: x86/aes: Add AES-NI optimization", "commit_ref": null, "pull_url": null, "state": "handled-elsewhere", "archived": false, "hash": "ec0514f4f1aa556c15253f84215c87995a52c03c", "submitter": { "id": 74690, "url": "http://patchwork.ozlabs.org/api/people/74690/?format=api", "name": "Eric Biggers", "email": "ebiggers@kernel.org" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20260112192035.10427-18-ebiggers@kernel.org/mbox/", "series": [ { "id": 488089, "url": "http://patchwork.ozlabs.org/api/series/488089/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=488089", "date": "2026-01-12T19:19:58", "name": "AES library improvements", "version": 2, "mbox": "http://patchwork.ozlabs.org/series/488089/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2183191/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2183191/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "\n <linuxppc-dev+bounces-15560-incoming=patchwork.ozlabs.org@lists.ozlabs.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "linuxppc-dev@lists.ozlabs.org" ], "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (2048-bit key;\n unprotected) header.d=kernel.org header.i=@kernel.org header.a=rsa-sha256\n header.s=k20201202 header.b=tMd17uhU;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=lists.ozlabs.org\n (client-ip=112.213.38.117; helo=lists.ozlabs.org;\n envelope-from=linuxppc-dev+bounces-15560-incoming=patchwork.ozlabs.org@lists.ozlabs.org;\n receiver=patchwork.ozlabs.org)", "lists.ozlabs.org;\n arc=none smtp.remote-ip=172.234.252.31", "lists.ozlabs.org;\n dmarc=pass (p=quarantine dis=none) header.from=kernel.org", "lists.ozlabs.org;\n\tdkim=pass (2048-bit key;\n unprotected) header.d=kernel.org header.i=@kernel.org header.a=rsa-sha256\n header.s=k20201202 header.b=tMd17uhU;\n\tdkim-atps=neutral", "lists.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=kernel.org\n (client-ip=172.234.252.31; helo=sea.source.kernel.org;\n envelope-from=ebiggers@kernel.org; receiver=lists.ozlabs.org)" ], "Received": [ "from lists.ozlabs.org (lists.ozlabs.org [112.213.38.117])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4dqj3x0YTYz1xpk\n\tfor <incoming@patchwork.ozlabs.org>; Tue, 13 Jan 2026 06:24:13 +1100 (AEDT)", "from boromir.ozlabs.org (localhost [127.0.0.1])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 4dqj353DzXz2ynh;\n\tTue, 13 Jan 2026 06:23:29 +1100 (AEDT)", "from sea.source.kernel.org (sea.source.kernel.org [172.234.252.31])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519)\n\t(No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 4dqj335TWqz2yft\n\tfor <linuxppc-dev@lists.ozlabs.org>; Tue, 13 Jan 2026 06:23:27 +1100 (AEDT)", "from smtp.kernel.org (transwarp.subspace.kernel.org [100.75.92.58])\n\tby sea.source.kernel.org (Postfix) with ESMTP id 720444430D;\n\tMon, 12 Jan 2026 19:23:27 +0000 (UTC)", "by smtp.kernel.org (Postfix) with ESMTPSA id E15F7C19424;\n\tMon, 12 Jan 2026 19:23:26 +0000 (UTC)" ], "ARC-Seal": "i=1; a=rsa-sha256; d=lists.ozlabs.org; s=201707; t=1768245809;\n\tcv=none;\n b=n6B8qiitZjZ31Rea3FnoQfqSFjAMzw9DFqiolxA+GEEqbiikyDzW38tjVmlxMJG5LW4ZhfYXMfI9ayETcfZwb6k6OSmNuWVyt81mFDpGgdJmEcEjhWUAeWsf5wLTHuPuZId3sI7tO3bDPOlZso9nl9Q/1mCxUp+9maoqx8jh3IbmMfrgCwy/uPsuMXDBQyr9omEZCF7QRoF0D1FRpd/6bIrPv7442ekJwowFoN/42CXDGkxITvjosUJhxtSeY7ZMudihMEO6IDwps5kNmz7LeCN5JZEgzdbTZkX7ufOaR8iggHoNpUyoYpkl9MgN+MrOLuTGBwHUOr+LrNSfzJYCjA==", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=lists.ozlabs.org; s=201707;\n\tt=1768245809; c=relaxed/relaxed;\n\tbh=RWxW1uFuup9ONYA01aeUHR0qAMfLTjJS5PUrW6w5PXk=;\n\th=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:\n\t MIME-Version;\n b=PULtMtYwid7bH7My8HIGeREUgc9fHThWEoBn47fH6XatyjdIkTMr29WqyRdU8cJE1W255dne5YgAaSHDLJCLWXFfh0eSB8FMtNgK/jI8cwRTyGeTSFg4KxCLrucPlNJWsLCI39ZUL5CTKztfPKYnCF7BZ8d10UdhCiRqUKeTCbe12vuLik86gnX0GrqcfTQsCYEMoZtnH1JaF74ntkJCdSZPM3GcS1R0F6gv8OW5SQJCBke8vOYIiC6G+35FoADy5pyGGbX10Nf7yxZYKDSf3ZvyNNrHGq4JVqcv2iKM5bDXGPEN+QGPN8cViG1MNq9kCXH4z269gdslKSRdheoNrA==", "ARC-Authentication-Results": "i=1; lists.ozlabs.org;\n dmarc=pass (p=quarantine dis=none) header.from=kernel.org;\n dkim=pass (2048-bit key;\n unprotected) header.d=kernel.org header.i=@kernel.org header.a=rsa-sha256\n header.s=k20201202 header.b=tMd17uhU; dkim-atps=neutral;\n spf=pass (client-ip=172.234.252.31; helo=sea.source.kernel.org;\n envelope-from=ebiggers@kernel.org;\n receiver=lists.ozlabs.org) smtp.mailfrom=kernel.org", "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;\n\ts=k20201202; t=1768245807;\n\tbh=+Ks/FgLrVIgNh0mVR9LgvhCStXCy1TQy3vUt0BiA4Xw=;\n\th=From:To:Cc:Subject:Date:In-Reply-To:References:From;\n\tb=tMd17uhU6V2tVXPHXhtW3rPuEKtDLWTBQq3CUg8shZ9tcX4/7s+uwYmLr3xdCiUU4\n\t NWxnO8p8r4frohwhvYoDPytgY3v+2LKctzHs67dD+tVCqDlxAXrILzvR2Kk7398VGh\n\t G6vSBWhLUBcRJfvHlxHXEVl4HBZndbrslU1h3gUFaeaxAedZFN2HEjepQZNdUM7xi0\n\t /SJXu6XTfq0uctlyJRu9L/hCrnqynixtUH/sLLl72vZKvQD5pa9G6WvNFy4a/+6k0C\n\t E5cG5ScYUy/u7bOkVsFG0BL/UL4rPc2J3OBSm62jc/9CUGqLPM6byFI5VGUd5CjJM7\n\t lx6+ZPaH3Tv3w==", "From": "Eric Biggers <ebiggers@kernel.org>", "To": "linux-crypto@vger.kernel.org", "Cc": "linux-kernel@vger.kernel.org,\n\tArd Biesheuvel <ardb@kernel.org>,\n\t\"Jason A . Donenfeld\" <Jason@zx2c4.com>,\n\tHerbert Xu <herbert@gondor.apana.org.au>,\n\tlinux-arm-kernel@lists.infradead.org,\n\tlinuxppc-dev@lists.ozlabs.org,\n\tlinux-riscv@lists.infradead.org,\n\tlinux-s390@vger.kernel.org,\n\tsparclinux@vger.kernel.org,\n\tx86@kernel.org,\n\tHolger Dengler <dengler@linux.ibm.com>,\n\tHarald Freudenberger <freude@linux.ibm.com>,\n\tEric Biggers <ebiggers@kernel.org>", "Subject": "[PATCH v2 17/35] lib/crypto: x86/aes: Add AES-NI optimization", "Date": "Mon, 12 Jan 2026 11:20:15 -0800", "Message-ID": "<20260112192035.10427-18-ebiggers@kernel.org>", "X-Mailer": "git-send-email 2.52.0", "In-Reply-To": "<20260112192035.10427-1-ebiggers@kernel.org>", "References": "<20260112192035.10427-1-ebiggers@kernel.org>", "X-Mailing-List": "linuxppc-dev@lists.ozlabs.org", "List-Id": "<linuxppc-dev.lists.ozlabs.org>", "List-Help": "<mailto:linuxppc-dev+help@lists.ozlabs.org>", "List-Owner": "<mailto:linuxppc-dev+owner@lists.ozlabs.org>", "List-Post": "<mailto:linuxppc-dev@lists.ozlabs.org>", "List-Archive": "<https://lore.kernel.org/linuxppc-dev/>,\n <https://lists.ozlabs.org/pipermail/linuxppc-dev/>", "List-Subscribe": "<mailto:linuxppc-dev+subscribe@lists.ozlabs.org>,\n <mailto:linuxppc-dev+subscribe-digest@lists.ozlabs.org>,\n <mailto:linuxppc-dev+subscribe-nomail@lists.ozlabs.org>", "List-Unsubscribe": "<mailto:linuxppc-dev+unsubscribe@lists.ozlabs.org>", "Precedence": "list", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "X-Spam-Status": "No, score=-0.2 required=3.0 tests=DKIMWL_WL_HIGH,DKIM_SIGNED,\n\tDKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,SPF_HELO_NONE,SPF_PASS\n\tautolearn=disabled version=4.0.1 OzLabs 8", "X-Spam-Checker-Version": "SpamAssassin 4.0.1 (2024-03-25) on lists.ozlabs.org" }, "content": "Optimize the AES library with x86 AES-NI instructions.\n\nThe relevant existing assembly functions, aesni_set_key(), aesni_enc(),\nand aesni_dec(), are a bit difficult to extract into the library:\n\n- They're coupled to the code for the AES modes.\n- They operate on struct crypto_aes_ctx. The AES library now uses\n different structs.\n- They assume the key is 16-byte aligned. The AES library only\n *prefers* 16-byte alignment; it doesn't require it.\n\nMoreover, they're not all that great in the first place:\n\n- They use unrolled loops, which isn't a great choice on x86.\n- They use the 'aeskeygenassist' instruction, which is unnecessary, is\n slow on Intel CPUs, and forces the loop to be unrolled.\n- They have special code for AES-192 key expansion, despite that being\n kind of useless. AES-128 and AES-256 are the ones used in practice.\n\nThese are small functions anyway.\n\nTherefore, I opted to just write replacements of these functions for the\nlibrary. They address all the above issues.\n\nAcked-by: Ard Biesheuvel <ardb@kernel.org>\nSigned-off-by: Eric Biggers <ebiggers@kernel.org>\n---\n lib/crypto/Kconfig | 1 +\n lib/crypto/Makefile | 1 +\n lib/crypto/x86/aes-aesni.S | 261 +++++++++++++++++++++++++++++++++++++\n lib/crypto/x86/aes.h | 85 ++++++++++++\n 4 files changed, 348 insertions(+)\n create mode 100644 lib/crypto/x86/aes-aesni.S\n create mode 100644 lib/crypto/x86/aes.h", "diff": "diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig\nindex 920d96e6b498..032f9755f999 100644\n--- a/lib/crypto/Kconfig\n+++ b/lib/crypto/Kconfig\n@@ -19,10 +19,11 @@ config CRYPTO_LIB_AES_ARCH\n \tdefault y if PPC && (SPE || (PPC64 && VSX))\n \tdefault y if RISCV && 64BIT && TOOLCHAIN_HAS_VECTOR_CRYPTO && \\\n \t\t RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS\n \tdefault y if S390\n \tdefault y if SPARC64\n+\tdefault y if X86\n \n config CRYPTO_LIB_AESCFB\n \ttristate\n \tselect CRYPTO_LIB_AES\n \tselect CRYPTO_LIB_UTILS\ndiff --git a/lib/crypto/Makefile b/lib/crypto/Makefile\nindex 761d52d91f92..725eef05b758 100644\n--- a/lib/crypto/Makefile\n+++ b/lib/crypto/Makefile\n@@ -50,10 +50,11 @@ OBJECT_FILES_NON_STANDARD_powerpc/aesp8-ppc.o := y\n endif # !CONFIG_SPE\n endif # CONFIG_PPC\n \n libaes-$(CONFIG_RISCV) += riscv/aes-riscv64-zvkned.o\n libaes-$(CONFIG_SPARC) += sparc/aes_asm.o\n+libaes-$(CONFIG_X86) += x86/aes-aesni.o\n endif # CONFIG_CRYPTO_LIB_AES_ARCH\n \n ################################################################################\n \n obj-$(CONFIG_CRYPTO_LIB_AESCFB)\t\t\t+= libaescfb.o\ndiff --git a/lib/crypto/x86/aes-aesni.S b/lib/crypto/x86/aes-aesni.S\nnew file mode 100644\nindex 000000000000..b8c3e104a3be\n--- /dev/null\n+++ b/lib/crypto/x86/aes-aesni.S\n@@ -0,0 +1,261 @@\n+/* SPDX-License-Identifier: GPL-2.0-or-later */\n+//\n+// AES block cipher using AES-NI instructions\n+//\n+// Copyright 2026 Google LLC\n+//\n+// The code in this file supports 32-bit and 64-bit CPUs, and it doesn't require\n+// AVX. It does use up to SSE4.1, which all CPUs with AES-NI have.\n+#include <linux/linkage.h>\n+\n+.section .rodata\n+#ifdef __x86_64__\n+#define RODATA(label)\tlabel(%rip)\n+#else\n+#define RODATA(label)\tlabel\n+#endif\n+\n+\t// A mask for pshufb that extracts the last dword, rotates it right by 8\n+\t// bits, and copies the result to all four dwords.\n+.p2align 4\n+.Lmask:\n+\t.byte\t13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12\n+\n+\t// The AES round constants, used during key expansion\n+.Lrcon:\n+\t.long\t0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36\n+\n+.text\n+\n+// Transform four dwords [a0, a1, a2, a3] in \\a into\n+// [a0, a0^a1, a0^a1^a2, a0^a1^a2^a3]. \\tmp is a temporary xmm register.\n+//\n+// Note: this could be done in four instructions, shufps + pxor + shufps + pxor,\n+// if the temporary register were zero-initialized ahead of time. We instead do\n+// it in an easier-to-understand way that doesn't require zero-initialization\n+// and avoids the unusual shufps instruction. movdqa is usually \"free\" anyway.\n+.macro\t_prefix_sum\ta, tmp\n+\tmovdqa\t\t\\a, \\tmp\t// [a0, a1, a2, a3]\n+\tpslldq\t\t$4, \\a\t\t// [0, a0, a1, a2]\n+\tpxor\t\t\\tmp, \\a\t// [a0, a0^a1, a1^a2, a2^a3]\n+\tmovdqa\t\t\\a, \\tmp\n+\tpslldq\t\t$8, \\a\t\t// [0, 0, a0, a0^a1]\n+\tpxor\t\t\\tmp, \\a\t// [a0, a0^a1, a0^a1^a2, a0^a1^a2^a3]\n+.endm\n+\n+.macro\t_gen_round_key\ta, b\n+\t// Compute four copies of rcon[i] ^ SubBytes(ror32(w, 8)), where w is\n+\t// the last dword of the previous round key (given in \\b).\n+\t//\n+\t// 'aesenclast src, dst' does dst = src XOR SubBytes(ShiftRows(dst)).\n+\t// It is used here solely for the SubBytes and the XOR. The ShiftRows\n+\t// is a no-op because all four columns are the same here.\n+\t//\n+\t// Don't use the 'aeskeygenassist' instruction, since:\n+\t// - On most Intel CPUs it is microcoded, making it have a much higher\n+\t// latency and use more execution ports than 'aesenclast'.\n+\t// - It cannot be used in a loop, since it requires an immediate.\n+\t// - It doesn't do much more than 'aesenclast' in the first place.\n+\tmovdqa\t\t\\b, %xmm2\n+\tpshufb\t\tMASK, %xmm2\n+\taesenclast\tRCON, %xmm2\n+\n+\t// XOR in the prefix sum of the four dwords of \\a, which is the\n+\t// previous round key (AES-128) or the first round key in the previous\n+\t// pair of round keys (AES-256). The result is the next round key.\n+\t_prefix_sum\t\\a, tmp=%xmm3\n+\tpxor\t\t%xmm2, \\a\n+\n+\t// Store the next round key to memory. Also leave it in \\a.\n+\tmovdqu\t\t\\a, (RNDKEYS)\n+.endm\n+\n+.macro\t_aes_expandkey_aesni\tis_aes128\n+#ifdef __x86_64__\n+\t// Arguments\n+\t.set\tRNDKEYS,\t%rdi\n+\t.set\tINV_RNDKEYS,\t%rsi\n+\t.set\tIN_KEY,\t\t%rdx\n+\n+\t// Other local variables\n+\t.set\tRCON_PTR,\t%rcx\n+\t.set\tCOUNTER,\t%eax\n+#else\n+\t// Arguments, assuming -mregparm=3\n+\t.set\tRNDKEYS,\t%eax\n+\t.set\tINV_RNDKEYS,\t%edx\n+\t.set\tIN_KEY,\t\t%ecx\n+\n+\t// Other local variables\n+\t.set\tRCON_PTR,\t%ebx\n+\t.set\tCOUNTER,\t%esi\n+#endif\n+\t.set\tRCON,\t\t%xmm6\n+\t.set\tMASK,\t\t%xmm7\n+\n+#ifdef __i386__\n+\tpush\t\t%ebx\n+\tpush\t\t%esi\n+#endif\n+\n+.if \\is_aes128\n+\t// AES-128: the first round key is simply a copy of the raw key.\n+\tmovdqu\t\t(IN_KEY), %xmm0\n+\tmovdqu\t\t%xmm0, (RNDKEYS)\n+.else\n+\t// AES-256: the first two round keys are simply a copy of the raw key.\n+\tmovdqu\t\t(IN_KEY), %xmm0\n+\tmovdqu\t\t%xmm0, (RNDKEYS)\n+\tmovdqu\t\t16(IN_KEY), %xmm1\n+\tmovdqu\t\t%xmm1, 16(RNDKEYS)\n+\tadd\t\t$32, RNDKEYS\n+.endif\n+\n+\t// Generate the remaining round keys.\n+\tmovdqa\t\tRODATA(.Lmask), MASK\n+.if \\is_aes128\n+\tlea\t\tRODATA(.Lrcon), RCON_PTR\n+\tmov\t\t$10, COUNTER\n+.Lgen_next_aes128_round_key:\n+\tadd\t\t$16, RNDKEYS\n+\tmovd\t\t(RCON_PTR), RCON\n+\tpshufd\t\t$0x00, RCON, RCON\n+\tadd\t\t$4, RCON_PTR\n+\t_gen_round_key\t%xmm0, %xmm0\n+\tdec\t\tCOUNTER\n+\tjnz\t\t.Lgen_next_aes128_round_key\n+.else\n+\t// AES-256: only the first 7 round constants are needed, so instead of\n+\t// loading each one from memory, just start by loading [1, 1, 1, 1] and\n+\t// then generate the rest by doubling.\n+\tpshufd\t\t$0x00, RODATA(.Lrcon), RCON\n+\tpxor\t\t%xmm5, %xmm5\t// All-zeroes\n+\tmov\t\t$7, COUNTER\n+.Lgen_next_aes256_round_key_pair:\n+\t// Generate the next AES-256 round key: either the first of a pair of\n+\t// two, or the last one.\n+\t_gen_round_key\t%xmm0, %xmm1\n+\n+\tdec\t\tCOUNTER\n+\tjz\t\t.Lgen_aes256_round_keys_done\n+\n+\t// Generate the second AES-256 round key of the pair. Compared to the\n+\t// first, there's no rotation and no XOR of a round constant.\n+\tpshufd\t\t$0xff, %xmm0, %xmm2\t// Get four copies of last dword\n+\taesenclast\t%xmm5, %xmm2\t\t// Just does SubBytes\n+\t_prefix_sum\t%xmm1, tmp=%xmm3\n+\tpxor\t\t%xmm2, %xmm1\n+\tmovdqu\t\t%xmm1, 16(RNDKEYS)\n+\tadd\t\t$32, RNDKEYS\n+\tpaddd\t\tRCON, RCON\t\t// RCON <<= 1\n+\tjmp\t\t.Lgen_next_aes256_round_key_pair\n+.Lgen_aes256_round_keys_done:\n+.endif\n+\n+\t// If INV_RNDKEYS is non-NULL, write the round keys for the Equivalent\n+\t// Inverse Cipher to it. To do that, reverse the standard round keys,\n+\t// and apply aesimc (InvMixColumn) to each except the first and last.\n+\ttest\t\tINV_RNDKEYS, INV_RNDKEYS\n+\tjz\t\t.Ldone\\@\n+\tmovdqu\t\t(RNDKEYS), %xmm0\t// Last standard round key\n+\tmovdqu\t\t%xmm0, (INV_RNDKEYS)\t// => First inverse round key\n+.if \\is_aes128\n+\tmov\t\t$9, COUNTER\n+.else\n+\tmov\t\t$13, COUNTER\n+.endif\n+.Lgen_next_inv_round_key\\@:\n+\tsub\t\t$16, RNDKEYS\n+\tadd\t\t$16, INV_RNDKEYS\n+\tmovdqu\t\t(RNDKEYS), %xmm0\n+\taesimc\t\t%xmm0, %xmm0\n+\tmovdqu\t\t%xmm0, (INV_RNDKEYS)\n+\tdec\t\tCOUNTER\n+\tjnz\t\t.Lgen_next_inv_round_key\\@\n+\tmovdqu\t\t-16(RNDKEYS), %xmm0\t// First standard round key\n+\tmovdqu\t\t%xmm0, 16(INV_RNDKEYS)\t// => Last inverse round key\n+\n+.Ldone\\@:\n+#ifdef __i386__\n+\tpop\t\t%esi\n+\tpop\t\t%ebx\n+#endif\n+\tRET\n+.endm\n+\n+// void aes128_expandkey_aesni(u32 rndkeys[], u32 *inv_rndkeys,\n+//\t\t\t const u8 in_key[AES_KEYSIZE_128]);\n+SYM_FUNC_START(aes128_expandkey_aesni)\n+\t_aes_expandkey_aesni\t1\n+SYM_FUNC_END(aes128_expandkey_aesni)\n+\n+// void aes256_expandkey_aesni(u32 rndkeys[], u32 *inv_rndkeys,\n+//\t\t\t const u8 in_key[AES_KEYSIZE_256]);\n+SYM_FUNC_START(aes256_expandkey_aesni)\n+\t_aes_expandkey_aesni\t0\n+SYM_FUNC_END(aes256_expandkey_aesni)\n+\n+.macro\t_aes_crypt_aesni\tenc\n+#ifdef __x86_64__\n+\t.set\tRNDKEYS,\t%rdi\n+\t.set\tNROUNDS,\t%esi\n+\t.set\tOUT,\t\t%rdx\n+\t.set\tIN,\t\t%rcx\n+#else\n+\t// Assuming -mregparm=3\n+\t.set\tRNDKEYS,\t%eax\n+\t.set\tNROUNDS,\t%edx\n+\t.set\tOUT,\t\t%ecx\n+\t.set\tIN,\t\t%ebx\t// Passed on stack\n+#endif\n+\n+#ifdef __i386__\n+\tpush\t\t%ebx\n+\tmov\t\t8(%esp), %ebx\n+#endif\n+\n+\t// Zero-th round\n+\tmovdqu\t\t(IN), %xmm0\n+\tmovdqu\t\t(RNDKEYS), %xmm1\n+\tpxor\t\t%xmm1, %xmm0\n+\n+\t// Normal rounds\n+\tadd\t\t$16, RNDKEYS\n+\tdec\t\tNROUNDS\n+.Lnext_round\\@:\n+\tmovdqu\t\t(RNDKEYS), %xmm1\n+.if \\enc\n+\taesenc\t\t%xmm1, %xmm0\n+.else\n+\taesdec\t\t%xmm1, %xmm0\n+.endif\n+\tadd\t\t$16, RNDKEYS\n+\tdec\t\tNROUNDS\n+\tjne\t\t.Lnext_round\\@\n+\n+\t// Last round\n+\tmovdqu\t\t(RNDKEYS), %xmm1\n+.if \\enc\n+\taesenclast\t%xmm1, %xmm0\n+.else\n+\taesdeclast\t%xmm1, %xmm0\n+.endif\n+\tmovdqu\t\t%xmm0, (OUT)\n+\n+#ifdef __i386__\n+\tpop\t\t%ebx\n+#endif\n+\tRET\n+.endm\n+\n+// void aes_encrypt_aesni(const u32 rndkeys[], int nrounds,\n+//\t\t\t u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);\n+SYM_FUNC_START(aes_encrypt_aesni)\n+\t_aes_crypt_aesni\t1\n+SYM_FUNC_END(aes_encrypt_aesni)\n+\n+// void aes_decrypt_aesni(const u32 inv_rndkeys[], int nrounds,\n+//\t\t\t u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);\n+SYM_FUNC_START(aes_decrypt_aesni)\n+\t_aes_crypt_aesni\t0\n+SYM_FUNC_END(aes_decrypt_aesni)\ndiff --git a/lib/crypto/x86/aes.h b/lib/crypto/x86/aes.h\nnew file mode 100644\nindex 000000000000..b047dee94f57\n--- /dev/null\n+++ b/lib/crypto/x86/aes.h\n@@ -0,0 +1,85 @@\n+/* SPDX-License-Identifier: GPL-2.0-or-later */\n+/*\n+ * AES block cipher using AES-NI instructions\n+ *\n+ * Copyright 2026 Google LLC\n+ */\n+\n+#include <asm/fpu/api.h>\n+\n+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_aes);\n+\n+void aes128_expandkey_aesni(u32 rndkeys[], u32 *inv_rndkeys,\n+\t\t\t const u8 in_key[AES_KEYSIZE_128]);\n+void aes256_expandkey_aesni(u32 rndkeys[], u32 *inv_rndkeys,\n+\t\t\t const u8 in_key[AES_KEYSIZE_256]);\n+void aes_encrypt_aesni(const u32 rndkeys[], int nrounds,\n+\t\t u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);\n+void aes_decrypt_aesni(const u32 inv_rndkeys[], int nrounds,\n+\t\t u8 out[AES_BLOCK_SIZE], const u8 in[AES_BLOCK_SIZE]);\n+\n+/*\n+ * Expand an AES key using AES-NI if supported and usable or generic code\n+ * otherwise. The expanded key format is compatible between the two cases. The\n+ * outputs are @k->rndkeys (required) and @inv_k->inv_rndkeys (optional).\n+ *\n+ * We could just always use the generic key expansion code. AES key expansion\n+ * is usually less performance-critical than AES en/decryption. However,\n+ * there's still *some* value in speed here, as well as in non-key-dependent\n+ * execution time which AES-NI provides. So, do use AES-NI to expand AES-128\n+ * and AES-256 keys. (Don't bother with AES-192, as it's almost never used.)\n+ */\n+static void aes_preparekey_arch(union aes_enckey_arch *k,\n+\t\t\t\tunion aes_invkey_arch *inv_k,\n+\t\t\t\tconst u8 *in_key, int key_len, int nrounds)\n+{\n+\tu32 *rndkeys = k->rndkeys;\n+\tu32 *inv_rndkeys = inv_k ? inv_k->inv_rndkeys : NULL;\n+\n+\tif (static_branch_likely(&have_aes) && key_len != AES_KEYSIZE_192 &&\n+\t irq_fpu_usable()) {\n+\t\tkernel_fpu_begin();\n+\t\tif (key_len == AES_KEYSIZE_128)\n+\t\t\taes128_expandkey_aesni(rndkeys, inv_rndkeys, in_key);\n+\t\telse\n+\t\t\taes256_expandkey_aesni(rndkeys, inv_rndkeys, in_key);\n+\t\tkernel_fpu_end();\n+\t} else {\n+\t\taes_expandkey_generic(rndkeys, inv_rndkeys, in_key, key_len);\n+\t}\n+}\n+\n+static void aes_encrypt_arch(const struct aes_enckey *key,\n+\t\t\t u8 out[AES_BLOCK_SIZE],\n+\t\t\t const u8 in[AES_BLOCK_SIZE])\n+{\n+\tif (static_branch_likely(&have_aes) && irq_fpu_usable()) {\n+\t\tkernel_fpu_begin();\n+\t\taes_encrypt_aesni(key->k.rndkeys, key->nrounds, out, in);\n+\t\tkernel_fpu_end();\n+\t} else {\n+\t\taes_encrypt_generic(key->k.rndkeys, key->nrounds, out, in);\n+\t}\n+}\n+\n+static void aes_decrypt_arch(const struct aes_key *key,\n+\t\t\t u8 out[AES_BLOCK_SIZE],\n+\t\t\t const u8 in[AES_BLOCK_SIZE])\n+{\n+\tif (static_branch_likely(&have_aes) && irq_fpu_usable()) {\n+\t\tkernel_fpu_begin();\n+\t\taes_decrypt_aesni(key->inv_k.inv_rndkeys, key->nrounds,\n+\t\t\t\t out, in);\n+\t\tkernel_fpu_end();\n+\t} else {\n+\t\taes_decrypt_generic(key->inv_k.inv_rndkeys, key->nrounds,\n+\t\t\t\t out, in);\n+\t}\n+}\n+\n+#define aes_mod_init_arch aes_mod_init_arch\n+static void aes_mod_init_arch(void)\n+{\n+\tif (boot_cpu_has(X86_FEATURE_AES))\n+\t\tstatic_branch_enable(&have_aes);\n+}\n", "prefixes": [ "v2", "17/35" ] }