Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2232126/?format=api
{ "id": 2232126, "url": "http://patchwork.ozlabs.org/api/patches/2232126/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/patch/014401dcda87$37100870$a5301950$@nextmovesoftware.com/", "project": { "id": 17, "url": "http://patchwork.ozlabs.org/api/projects/17/?format=api", "name": "GNU Compiler Collection", "link_name": "gcc", "list_id": "gcc-patches.gcc.gnu.org", "list_email": "gcc-patches@gcc.gnu.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<014401dcda87$37100870$a5301950$@nextmovesoftware.com>", "list_archive_url": null, "date": "2026-05-02T22:58:37", "name": "[x86_64] Handle hard registers in STV with inter-unit moves.", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "239d954c4d0220db730544187cf47dbf5e6844d9", "submitter": { "id": 68376, "url": "http://patchwork.ozlabs.org/api/people/68376/?format=api", "name": "Roger Sayle", "email": "roger@nextmovesoftware.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/014401dcda87$37100870$a5301950$@nextmovesoftware.com/mbox/", "series": [ { "id": 502553, "url": "http://patchwork.ozlabs.org/api/series/502553/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/list/?series=502553", "date": "2026-05-02T22:58:37", "name": "[x86_64] Handle hard registers in STV with inter-unit moves.", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/502553/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2232126/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2232126/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n unprotected) header.d=nextmovesoftware.com header.i=@nextmovesoftware.com\n header.a=rsa-sha256 header.s=default header.b=IOrj+U5h;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org\n (client-ip=2620:52:6:3111::32; helo=vm01.sourceware.org;\n envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;\n receiver=patchwork.ozlabs.org)", "sourceware.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key,\n unprotected) header.d=nextmovesoftware.com header.i=@nextmovesoftware.com\n header.a=rsa-sha256 header.s=default header.b=IOrj+U5h", "sourceware.org; dmarc=pass (p=none dis=none)\n header.from=nextmovesoftware.com", "sourceware.org;\n spf=pass smtp.mailfrom=nextmovesoftware.com", "server2.sourceware.org;\n arc=none smtp.remote-ip=69.48.154.134" ], "Received": [ "from vm01.sourceware.org (vm01.sourceware.org\n [IPv6:2620:52:6:3111::32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4g7NdN5zvMz1xvV\n\tfor <incoming@patchwork.ozlabs.org>; Sun, 03 May 2026 08:59:20 +1000 (AEST)", "from vm01.sourceware.org (localhost [127.0.0.1])\n\tby sourceware.org (Postfix) with ESMTP id 04C5F4BB5900\n\tfor <incoming@patchwork.ozlabs.org>; Sat, 2 May 2026 22:59:19 +0000 (GMT)", "from server.nextmovesoftware.com (server.nextmovesoftware.com\n [69.48.154.134])\n by sourceware.org (Postfix) with ESMTPS id 1A0354BB3BE1\n for <gcc-patches@gcc.gnu.org>; Sat, 2 May 2026 22:58:40 +0000 (GMT)", "from [168.86.198.7] (port=58705 helo=Dell)\n by server.nextmovesoftware.com with esmtpsa (TLS1.2) tls\n TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.99.1)\n (envelope-from <roger@nextmovesoftware.com>)\n id 1wJJIF-00000006H8l-16SN; Sat, 02 May 2026 18:58:39 -0400" ], "DKIM-Filter": [ "OpenDKIM Filter v2.11.0 sourceware.org 04C5F4BB5900", "OpenDKIM Filter v2.11.0 sourceware.org 1A0354BB3BE1" ], "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org 1A0354BB3BE1", "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org 1A0354BB3BE1", "ARC-Seal": "i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1777762720; cv=none;\n b=GQDHQv7aJB7JE5eU9PVAOj5iO5/9BTbSGC7JrFmuHxSNztbhTzIU6GahNpcloJqbF7G6wS3UhQmppbPcdnHjmmRSqBaWAi0z2o9Sm8S6UOjaJ8WbItGt4XmF9cHn8wDQUFJWYZCK4GKf7LkW5WRYhRYdMXlOUQ78bHl+dUBrUac=", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=sourceware.org; s=key;\n t=1777762720; c=relaxed/simple;\n bh=deJQ98xTbnFw/JdGQaD540z1TYqgB64VC1i6A6bUAKI=;\n h=DKIM-Signature:From:To:Subject:Date:Message-ID:MIME-Version;\n b=Jrw783M0Q0MFl6lKTFrXBhq3KPooIXToms3NsY6Xs+yj90Ee+JZ5DbI8HISRNMuuD6xmaVMOTlUnNVF9BR5+8DD7Q8Mv2ftNYsvx79mRUTuHSxY0kyDCahnJCE+t5s2Rkr5yslPxz3BdlCoqKANRblqVwLmeDLf1SOexf9E+oK0=", "ARC-Authentication-Results": "i=1; server2.sourceware.org", "DKIM-Signature": "v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;\n d=nextmovesoftware.com; s=default; h=Content-Type:MIME-Version:Message-ID:\n Date:Subject:Cc:To:From:Sender:Reply-To:Content-Transfer-Encoding:Content-ID:\n Content-Description:Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc\n :Resent-Message-ID:In-Reply-To:References:List-Id:List-Help:List-Unsubscribe:\n List-Subscribe:List-Post:List-Owner:List-Archive;\n bh=SOjBzLiIYP8hlwZZmvYR64kH8BG1djBYdMBFGCRt7OU=; b=IOrj+U5hVIjWcPq3OsqFOKQ8xg\n BI5htQDXrRqiJN3+MuboASOwKYmLz9m0bYvbnjMZRYwVbn/AB8NHqt8vlNOYqv1oj4py6UvyKnRkf\n CQvCVgWJ4XcwT4r9mPkphHPr4D1DrBqk/80/h5geeokP2Yvqxq+2QeIK4AQ0hNH8IgH47r0NDlLPU\n STBn5QQFLK/hIrZEyYUbTim/NbPZ1Lp+7KlwEFdF27R2yRZIW802siKRQ/1Ie4D64prIbjqTyrrnD\n wNqHRqYM2RALcp/op7A6E3WmgcmGRt+KaLklr9bdp8OmDjh5+J6+hbeInDCGU9QdREMTMPDDK3JJz\n HkWTbu0Q==;", "From": "\"Roger Sayle\" <roger@nextmovesoftware.com>", "To": "\"'GCC Patches'\" <gcc-patches@gcc.gnu.org>", "Cc": "\"'Hongtao Liu'\" <crazylht@gmail.com>,\n \"'Liu, Hongtao'\" <hongtao.liu@intel.com>,\n \"'Uros Bizjak'\" <ubizjak@gmail.com>", "Subject": "[x86_64 PATCH] Handle hard registers in STV with inter-unit moves.", "Date": "Sat, 2 May 2026 23:58:37 +0100", "Message-ID": "<014401dcda87$37100870$a5301950$@nextmovesoftware.com>", "MIME-Version": "1.0", "Content-Type": "multipart/mixed;\n boundary=\"----=_NextPart_000_0145_01DCDA8F.98D47070\"", "X-Mailer": "Microsoft Outlook 16.0", "Thread-Index": "AdzahqMVhiN0nDktTBqDmVZ9FgUu6g==", "Content-Language": "en-gb", "X-AntiAbuse": [ "This header was added to track abuse,\n please include it with any abuse report", "Primary Hostname - server.nextmovesoftware.com", "Original Domain - gcc.gnu.org", "Originator/Caller UID/GID - [47 12] / [47 12]", "Sender Address Domain - nextmovesoftware.com" ], "X-Get-Message-Sender-Via": "server.nextmovesoftware.com: authenticated_id:\n roger@nextmovesoftware.com", "X-Authenticated-Sender": "server.nextmovesoftware.com:\n roger@nextmovesoftware.com", "X-Source": "", "X-Source-Args": "", "X-Source-Dir": "", "X-BeenThere": "gcc-patches@gcc.gnu.org", "X-Mailman-Version": "2.1.30", "Precedence": "list", "List-Id": "Gcc-patches mailing list <gcc-patches.gcc.gnu.org>", "List-Unsubscribe": "<https://gcc.gnu.org/mailman/options/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>", "List-Archive": "<https://gcc.gnu.org/pipermail/gcc-patches/>", "List-Post": "<mailto:gcc-patches@gcc.gnu.org>", "List-Help": "<mailto:gcc-patches-request@gcc.gnu.org?subject=help>", "List-Subscribe": "<https://gcc.gnu.org/mailman/listinfo/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>", "Errors-To": "gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org" }, "content": "This patch extends the types of chains that can be converted by x86's\nTImode Scalar-To-Vector (STV) pass, to include chains that originate\nand/or terminate with moves from/to hard registers. Currently STV\ncandidate instructions explicitly exclude those than mention hard\nregisters.\n\nAs motivation, consider the four following functions:\n\n__int128 a, b, c, z;\n__int128 fun();\n\nvoid foo_in(__int128 x) { z = (x ^ a ^ b ^ c); }\n\n__int128 foo_out() { return (z ^ a ^ b ^ c); }\n\n__int128 foo_inout(__int128 x) { return (x ^ a ^ b ^ c ^ z); }\n\nvoid foo_fun() { z = (fun() ^ a ^ b ^ c); }\n\nOf these, only the first, foo_in, is currently STV converted to use\nSSE instructions. Its incoming argument is constructed from a concat\nof two DImode registers, and support for this idiom was added in a\nprevious STV patch. The next two functions aren't converted because\nthe chain terminates with a return, which places the TImode result in\na hard register. Likewise, the final foo_fun case isn't converted as\nthe result from fun initiates a chain from a hard register.\n\nThis patch supports STV conversion of TImode register-to-register\nmoves, where either the source or the destination (but not both) is\na hard register, by implementing it as a (relatively expensive)\ninter-unit move.\n\nBefore, with -O2 -mavx:\n\nfoo_out:\n movq z(%rip), %rax\n movq z+8(%rip), %rdx\n xorq a(%rip), %rax\n xorq a+8(%rip), %rdx\n xorq b(%rip), %rax\n xorq b+8(%rip), %rdx\n xorq c(%rip), %rax\n xorq c+8(%rip), %rdx\n ret\n\nAfter, with -O2 -mavx:\n\nfoo_out:\n vmovdqa z(%rip), %xmm0\n vpxor a(%rip), %xmm0, %xmm0\n vpxor b(%rip), %xmm0, %xmm0\n vpxor c(%rip), %xmm0, %xmm0\n vpextrq $1, %xmm0, %rdx\n vmovq %xmm0, %rax\n ret\n\nLikewise for foo_fun, before with -O2 -mavx:\n\nfoo_fun:\n subq $8, %rsp\n call fun\n movq a(%rip), %rsi\n movq a+8(%rip), %rdi\n xorq b(%rip), %rsi\n xorq b+8(%rip), %rdi\n xorq c(%rip), %rsi\n xorq c+8(%rip), %rdi\n xorq %rax, %rsi\n xorq %rdx, %rdi\n movq %rsi, z(%rip)\n movq %rdi, z+8(%rip)\n addq $8, %rsp\n ret\n\nAfter with -O2 -mavx:\n\nfoo_fun:\n subq $8, %rsp\n call fun\n vmovdqa a(%rip), %xmm0\n vpxor b(%rip), %xmm0, %xmm0\n vmovq %rax, %xmm2\n vpxor c(%rip), %xmm0, %xmm0\n vpinsrq $1, %rdx, %xmm2, %xmm1\n vpxor %xmm1, %xmm0, %xmm0\n vmovdqa %xmm0, z(%rip)\n addq $8, %rsp\n ret\n\nThe one small subtlety in this patch is in the cost calculation\nfor inter-unit moves, which now correctly uses both sse_to_integer\nand integer_to_sse costs. This patch models the transfer of double\nword transfers between units as interunit_cost + COSTS_N_INSNS(1),\ni.e. that the two transfers are pipelined in parallel, so that the\nhigh latency is accounted for once [rather than 2*interunit_cost\nthat assumes the transfers take place strictly sequentially with\ntwice the single word transfer latency].\n\n\nThis patch has been tested on x86_64-pc-linux-gnu with make bootstrap\nand make -k check, both with and without --target_board=unix{-m32}\nwith no new failures. Ok for mainline?\n\n\n2026-05-02 Roger Sayle <roger@nextmovesoftware.com>\n\ngcc/ChangeLog\n * config/i386/i386-features.cc (scalar_chain): If the chain\n starts with a register-to-register move from a hard register,\n then the hard register's defs don't need to converted.\n (timode_scalar_chain::compute_convert_gain): Provide costs\n for hard_reg-to-pseudo and pseudo-to-hard_reg moves.\n Tweak speed cost of timode_concatdi_p moves.\n (timode_scalar_chain::convert_insn): Add support for\n hard_reg-to-pseudo and pseudo-to-hard_reg TImode transfers.\n (timode_scalar_to_vector_candidate_p): Likewise.\n\ngcc/testsuite/ChangeLog\n * gcc.target/i386/avx-stv-1.c: New test case.\n * gcc.target/i386/sse2-stv-3.c: Likewise.\n\n\nThanks,\nRoger\n--", "diff": "diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc\nindex 63f9dcc9f93..acbf2de22a8 100644\n--- a/gcc/config/i386/i386-features.cc\n+++ b/gcc/config/i386/i386-features.cc\n@@ -506,6 +506,10 @@ scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,\n if (def_set)\n switch (GET_CODE (SET_SRC (def_set)))\n {\n+ case REG:\n+\tif (HARD_REGISTER_P (SET_SRC (def_set)))\n+\t return true;\n+\tbreak;\n case VEC_SELECT:\n \treturn true;\n case ZERO_EXTEND:\n@@ -1641,7 +1645,33 @@ timode_scalar_chain::compute_convert_gain ()\n switch (GET_CODE (src))\n \t{\n \tcase REG:\n-\t if (!speed_p)\n+\t if (HARD_REGISTER_P (src))\n+\t {\n+\t if (TARGET_AVX)\n+\t\t/* vmovq + vpinsrq */\n+\t\tigain = speed_p ? -ix86_cost->integer_to_sse\n+\t\t\t\t - COSTS_N_INSNS (1)\n+\t\t\t\t: -COSTS_N_BYTES (11);\n+\t else\n+\t\t/* movq + movq + punpcklqdq */\n+\t\tigain = speed_p ? -ix86_cost->integer_to_sse\n+\t\t\t\t - COSTS_N_INSNS (2)\n+\t\t\t\t: -COSTS_N_BYTES (14);\n+\t }\n+\t else if (REG_P (dst) && HARD_REGISTER_P (dst))\n+\t {\n+\t if (TARGET_AVX)\n+\t\t/* vpextrq + vmovq */\n+\t\tigain = speed_p ? -ix86_cost->sse_to_integer\n+\t\t\t\t - COSTS_N_INSNS (1)\n+\t\t\t\t: -COSTS_N_BYTES (11);\n+\t else\n+\t\t/* movhlps + movq + movq */\n+\t\tigain = speed_p ? -ix86_cost->sse_to_integer\n+\t\t\t\t - COSTS_N_INSNS (2)\n+\t\t\t\t: -COSTS_N_BYTES (13);\n+\t }\n+\t else if (!speed_p)\n \t igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);\n \t else\n \t igain = COSTS_N_INSNS (1);\n@@ -1680,7 +1710,7 @@ timode_scalar_chain::compute_convert_gain ()\n \t if (timode_concatdi_p (src))\n \t {\n \t /* vmovq;vpinsrq (11 bytes). */\n-\t igain = speed_p ? -2 * ix86_cost->sse_to_integer\n+\t igain = speed_p ? -ix86_cost->integer_to_sse - COSTS_N_INSNS (1)\n \t\t\t : -COSTS_N_BYTES (11);\n \t break;\n \t }\n@@ -1693,7 +1723,7 @@ timode_scalar_chain::compute_convert_gain ()\n \tcase PLUS:\n \t if (timode_concatdi_p (src))\n \t /* vmovq;vpinsrq (11 bytes). */\n-\t igain = speed_p ? -2 * ix86_cost->sse_to_integer\n+\t igain = speed_p ? -ix86_cost->integer_to_sse - COSTS_N_INSNS (1)\n \t\t\t : -COSTS_N_BYTES (11);\n \t break;\n \n@@ -1961,7 +1991,8 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)\n switch (GET_CODE (dst))\n {\n case REG:\n- if (GET_MODE (dst) == TImode)\n+ if (GET_MODE (dst) == TImode\n+\t && !HARD_REGISTER_P (dst))\n \t{\n \t PUT_MODE (dst, V1TImode);\n \t fix_debug_reg_uses (dst);\n@@ -1988,8 +2019,40 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)\n case REG:\n if (GET_MODE (src) == TImode)\n \t{\n-\t PUT_MODE (src, V1TImode);\n-\t fix_debug_reg_uses (src);\n+\t if (HARD_REGISTER_P (src))\n+\t {\n+\t rtx lo = gen_reg_rtx (DImode);\n+\t rtx hi = gen_reg_rtx (DImode);\n+\t emit_insn_before (gen_rtx_SET (lo, gen_lowpart (DImode, src)),\n+\t\t\t\tinsn);\n+\t emit_insn_before (gen_rtx_SET (hi, gen_highpart (DImode, src)),\n+\t\t\t\tinsn);\n+\t src = gen_reg_rtx (V2DImode);\n+\t emit_insn_before (gen_vec_concatv2di (src, lo, hi), insn);\n+\t src = gen_lowpart (V1TImode, src);\n+\t }\n+\t else\n+\t {\n+\t PUT_MODE (src, V1TImode);\n+\t fix_debug_reg_uses (src);\n+\t }\n+\t}\n+ if (REG_P (dst) && HARD_REGISTER_P (dst))\n+\t{\n+\t rtx tmp = gen_reg_rtx (V2DImode);\n+\t src = gen_lowpart (V2DImode, src);\n+\t emit_insn_before (gen_rtx_SET (tmp, src), insn);\n+\t /* Extracting hi before lo helps register allocation. */\n+\t rtx hi = gen_reg_rtx (DImode);\n+\t rtx lo = gen_reg_rtx (DImode);\n+\t emit_insn_before (gen_vec_extractv2didi (hi, tmp, const1_rtx), insn);\n+\t emit_insn_before (gen_vec_extractv2didi (lo, tmp, const0_rtx), insn);\n+\n+\t /* Construct *concatditi3 pattern from lo and hi. */\n+\t hi = gen_rtx_ZERO_EXTEND (TImode, hi);\n+\t hi = gen_rtx_ASHIFT (TImode, hi, GEN_INT (64));\n+\t lo = gen_rtx_ZERO_EXTEND (TImode, lo);\n+\t src = gen_rtx_PLUS (TImode, hi, lo);\n \t}\n break;\n \n@@ -2453,8 +2516,31 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)\n {\n rtx def_set = pseudo_reg_set (insn);\n \n+ /* We allow two exceptions to the pseudo registers only rule.\n+ Setting a hard register from a pseudo, and setting a pseudo\n+ from a hard register. */\n if (!def_set)\n- return false;\n+ {\n+ def_set = single_set (insn);\n+ if (def_set)\n+\t{\n+\t rtx src = SET_SRC (def_set);\n+\t rtx dst = SET_DEST (def_set);\n+\t if (GET_MODE (dst) == TImode\n+\t && REG_P (src) && REG_P (dst))\n+\t {\n+\t if (HARD_REGISTER_P (dst)\n+\t\t && !HARD_REGISTER_P (src)\n+\t\t && single_def_chain_p (src))\n+\t\treturn true;\n+\t if (HARD_REGISTER_P (src)\n+\t\t && !HARD_REGISTER_P (dst)\n+\t\t && single_def_chain_p (dst))\n+\t\treturn true;\n+\t }\n+\t}\n+ return false;\n+ }\n \n rtx src = SET_SRC (def_set);\n rtx dst = SET_DEST (def_set);\ndiff --git a/gcc/testsuite/gcc.target/i386/avx-stv-1.c b/gcc/testsuite/gcc.target/i386/avx-stv-1.c\nnew file mode 100644\nindex 00000000000..e9dea2d0f4a\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/i386/avx-stv-1.c\n@@ -0,0 +1,30 @@\n+/* { dg-do compile { target int128 } } */\n+/* { dg-options \"-O2 -mavx -mno-stackrealign\" } */\n+\n+__int128 a, b, c, z;\n+\n+__int128 fun();\n+\n+void foo_in(__int128 x)\n+{\n+ z = (x ^ a ^ b ^ c);\n+}\n+\n+__int128 foo_out()\n+{\n+ return (z ^ a ^ b ^ c);\n+}\n+\n+__int128 foo_inout(__int128 x)\n+{\n+ return (x ^ a ^ b ^ c ^ z);\n+}\n+\n+void foo_fun()\n+{\n+ z = (fun() ^ a ^ b ^ c);\n+}\n+\n+/* { dg-final { scan-assembler-times \"vpinsrq\" 3 } } */\n+/* { dg-final { scan-assembler-times \"vpextrq\" 2 } } */\n+/* { dg-final { scan-assembler-times \"vpxor\" 13 } } */\ndiff --git a/gcc/testsuite/gcc.target/i386/sse2-stv-3.c b/gcc/testsuite/gcc.target/i386/sse2-stv-3.c\nnew file mode 100644\nindex 00000000000..0a638013aed\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/i386/sse2-stv-3.c\n@@ -0,0 +1,31 @@\n+/* { dg-do compile { target int128 } } */\n+/* { dg-options \"-O2 -msse2 -mno-sse4 -mno-stackrealign\" } */\n+\n+__int128 a, b, c, z;\n+\n+__int128 fun();\n+\n+void foo_in(__int128 x)\n+{\n+ z = (x ^ a ^ b ^ c);\n+}\n+\n+__int128 foo_out()\n+{\n+ return (z ^ a ^ b ^ c);\n+}\n+\n+__int128 foo_inout(__int128 x)\n+{\n+ return (x ^ a ^ b ^ c ^ z);\n+}\n+\n+void foo_fun()\n+{\n+ z = (fun() ^ a ^ b ^ c);\n+}\n+\n+/* { dg-final { scan-assembler-times \"punpcklqdq\" 2 } } */\n+/* { dg-final { scan-assembler-times \"movhlps\" 1 } } */\n+/* { dg-final { scan-assembler-times \"pxor\" 9 } } */\n+/* { dg-final { scan-assembler-times \"xorq\" 8 } } */\n", "prefixes": [ "x86_64" ] }