get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/2232126/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 2232126,
    "url": "http://patchwork.ozlabs.org/api/patches/2232126/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/gcc/patch/014401dcda87$37100870$a5301950$@nextmovesoftware.com/",
    "project": {
        "id": 17,
        "url": "http://patchwork.ozlabs.org/api/projects/17/?format=api",
        "name": "GNU Compiler Collection",
        "link_name": "gcc",
        "list_id": "gcc-patches.gcc.gnu.org",
        "list_email": "gcc-patches@gcc.gnu.org",
        "web_url": null,
        "scm_url": null,
        "webscm_url": null,
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<014401dcda87$37100870$a5301950$@nextmovesoftware.com>",
    "list_archive_url": null,
    "date": "2026-05-02T22:58:37",
    "name": "[x86_64] Handle hard registers in STV with inter-unit moves.",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "239d954c4d0220db730544187cf47dbf5e6844d9",
    "submitter": {
        "id": 68376,
        "url": "http://patchwork.ozlabs.org/api/people/68376/?format=api",
        "name": "Roger Sayle",
        "email": "roger@nextmovesoftware.com"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/014401dcda87$37100870$a5301950$@nextmovesoftware.com/mbox/",
    "series": [
        {
            "id": 502553,
            "url": "http://patchwork.ozlabs.org/api/series/502553/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/gcc/list/?series=502553",
            "date": "2026-05-02T22:58:37",
            "name": "[x86_64] Handle hard registers in STV with inter-unit moves.",
            "version": 1,
            "mbox": "http://patchwork.ozlabs.org/series/502553/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/2232126/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/2232126/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>",
        "X-Original-To": [
            "incoming@patchwork.ozlabs.org",
            "gcc-patches@gcc.gnu.org"
        ],
        "Delivered-To": [
            "patchwork-incoming@legolas.ozlabs.org",
            "gcc-patches@gcc.gnu.org"
        ],
        "Authentication-Results": [
            "legolas.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n unprotected) header.d=nextmovesoftware.com header.i=@nextmovesoftware.com\n header.a=rsa-sha256 header.s=default header.b=IOrj+U5h;\n\tdkim-atps=neutral",
            "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org\n (client-ip=2620:52:6:3111::32; helo=vm01.sourceware.org;\n envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;\n receiver=patchwork.ozlabs.org)",
            "sourceware.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key,\n unprotected) header.d=nextmovesoftware.com header.i=@nextmovesoftware.com\n header.a=rsa-sha256 header.s=default header.b=IOrj+U5h",
            "sourceware.org; dmarc=pass (p=none dis=none)\n header.from=nextmovesoftware.com",
            "sourceware.org;\n spf=pass smtp.mailfrom=nextmovesoftware.com",
            "server2.sourceware.org;\n arc=none smtp.remote-ip=69.48.154.134"
        ],
        "Received": [
            "from vm01.sourceware.org (vm01.sourceware.org\n [IPv6:2620:52:6:3111::32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4g7NdN5zvMz1xvV\n\tfor <incoming@patchwork.ozlabs.org>; Sun, 03 May 2026 08:59:20 +1000 (AEST)",
            "from vm01.sourceware.org (localhost [127.0.0.1])\n\tby sourceware.org (Postfix) with ESMTP id 04C5F4BB5900\n\tfor <incoming@patchwork.ozlabs.org>; Sat,  2 May 2026 22:59:19 +0000 (GMT)",
            "from server.nextmovesoftware.com (server.nextmovesoftware.com\n [69.48.154.134])\n by sourceware.org (Postfix) with ESMTPS id 1A0354BB3BE1\n for <gcc-patches@gcc.gnu.org>; Sat,  2 May 2026 22:58:40 +0000 (GMT)",
            "from [168.86.198.7] (port=58705 helo=Dell)\n by server.nextmovesoftware.com with esmtpsa (TLS1.2) tls\n TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.99.1)\n (envelope-from <roger@nextmovesoftware.com>)\n id 1wJJIF-00000006H8l-16SN; Sat, 02 May 2026 18:58:39 -0400"
        ],
        "DKIM-Filter": [
            "OpenDKIM Filter v2.11.0 sourceware.org 04C5F4BB5900",
            "OpenDKIM Filter v2.11.0 sourceware.org 1A0354BB3BE1"
        ],
        "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org 1A0354BB3BE1",
        "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org 1A0354BB3BE1",
        "ARC-Seal": "i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1777762720; cv=none;\n b=GQDHQv7aJB7JE5eU9PVAOj5iO5/9BTbSGC7JrFmuHxSNztbhTzIU6GahNpcloJqbF7G6wS3UhQmppbPcdnHjmmRSqBaWAi0z2o9Sm8S6UOjaJ8WbItGt4XmF9cHn8wDQUFJWYZCK4GKf7LkW5WRYhRYdMXlOUQ78bHl+dUBrUac=",
        "ARC-Message-Signature": "i=1; a=rsa-sha256; d=sourceware.org; s=key;\n t=1777762720; c=relaxed/simple;\n bh=deJQ98xTbnFw/JdGQaD540z1TYqgB64VC1i6A6bUAKI=;\n h=DKIM-Signature:From:To:Subject:Date:Message-ID:MIME-Version;\n b=Jrw783M0Q0MFl6lKTFrXBhq3KPooIXToms3NsY6Xs+yj90Ee+JZ5DbI8HISRNMuuD6xmaVMOTlUnNVF9BR5+8DD7Q8Mv2ftNYsvx79mRUTuHSxY0kyDCahnJCE+t5s2Rkr5yslPxz3BdlCoqKANRblqVwLmeDLf1SOexf9E+oK0=",
        "ARC-Authentication-Results": "i=1; server2.sourceware.org",
        "DKIM-Signature": "v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;\n d=nextmovesoftware.com; s=default; h=Content-Type:MIME-Version:Message-ID:\n Date:Subject:Cc:To:From:Sender:Reply-To:Content-Transfer-Encoding:Content-ID:\n Content-Description:Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc\n :Resent-Message-ID:In-Reply-To:References:List-Id:List-Help:List-Unsubscribe:\n List-Subscribe:List-Post:List-Owner:List-Archive;\n bh=SOjBzLiIYP8hlwZZmvYR64kH8BG1djBYdMBFGCRt7OU=; b=IOrj+U5hVIjWcPq3OsqFOKQ8xg\n BI5htQDXrRqiJN3+MuboASOwKYmLz9m0bYvbnjMZRYwVbn/AB8NHqt8vlNOYqv1oj4py6UvyKnRkf\n CQvCVgWJ4XcwT4r9mPkphHPr4D1DrBqk/80/h5geeokP2Yvqxq+2QeIK4AQ0hNH8IgH47r0NDlLPU\n STBn5QQFLK/hIrZEyYUbTim/NbPZ1Lp+7KlwEFdF27R2yRZIW802siKRQ/1Ie4D64prIbjqTyrrnD\n wNqHRqYM2RALcp/op7A6E3WmgcmGRt+KaLklr9bdp8OmDjh5+J6+hbeInDCGU9QdREMTMPDDK3JJz\n HkWTbu0Q==;",
        "From": "\"Roger Sayle\" <roger@nextmovesoftware.com>",
        "To": "\"'GCC Patches'\" <gcc-patches@gcc.gnu.org>",
        "Cc": "\"'Hongtao Liu'\" <crazylht@gmail.com>,\n \"'Liu, Hongtao'\" <hongtao.liu@intel.com>,\n \"'Uros Bizjak'\" <ubizjak@gmail.com>",
        "Subject": "[x86_64 PATCH] Handle hard registers in STV with inter-unit moves.",
        "Date": "Sat, 2 May 2026 23:58:37 +0100",
        "Message-ID": "<014401dcda87$37100870$a5301950$@nextmovesoftware.com>",
        "MIME-Version": "1.0",
        "Content-Type": "multipart/mixed;\n boundary=\"----=_NextPart_000_0145_01DCDA8F.98D47070\"",
        "X-Mailer": "Microsoft Outlook 16.0",
        "Thread-Index": "AdzahqMVhiN0nDktTBqDmVZ9FgUu6g==",
        "Content-Language": "en-gb",
        "X-AntiAbuse": [
            "This header was added to track abuse,\n please include it with any abuse report",
            "Primary Hostname - server.nextmovesoftware.com",
            "Original Domain - gcc.gnu.org",
            "Originator/Caller UID/GID - [47 12] / [47 12]",
            "Sender Address Domain - nextmovesoftware.com"
        ],
        "X-Get-Message-Sender-Via": "server.nextmovesoftware.com: authenticated_id:\n roger@nextmovesoftware.com",
        "X-Authenticated-Sender": "server.nextmovesoftware.com:\n roger@nextmovesoftware.com",
        "X-Source": "",
        "X-Source-Args": "",
        "X-Source-Dir": "",
        "X-BeenThere": "gcc-patches@gcc.gnu.org",
        "X-Mailman-Version": "2.1.30",
        "Precedence": "list",
        "List-Id": "Gcc-patches mailing list <gcc-patches.gcc.gnu.org>",
        "List-Unsubscribe": "<https://gcc.gnu.org/mailman/options/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>",
        "List-Archive": "<https://gcc.gnu.org/pipermail/gcc-patches/>",
        "List-Post": "<mailto:gcc-patches@gcc.gnu.org>",
        "List-Help": "<mailto:gcc-patches-request@gcc.gnu.org?subject=help>",
        "List-Subscribe": "<https://gcc.gnu.org/mailman/listinfo/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>",
        "Errors-To": "gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org"
    },
    "content": "This patch extends the types of chains that can be converted by x86's\nTImode Scalar-To-Vector (STV) pass, to include chains that originate\nand/or terminate with moves from/to hard registers.  Currently STV\ncandidate instructions explicitly exclude those than mention hard\nregisters.\n\nAs motivation, consider the four following functions:\n\n__int128 a, b, c, z;\n__int128 fun();\n\nvoid foo_in(__int128 x) { z = (x ^ a ^ b ^ c); }\n\n__int128 foo_out() { return (z ^ a ^ b ^ c); }\n\n__int128 foo_inout(__int128 x) { return (x ^ a ^ b ^ c ^ z); }\n\nvoid foo_fun() { z = (fun() ^ a ^ b ^ c); }\n\nOf these, only the first, foo_in, is currently STV converted to use\nSSE instructions.  Its incoming argument is constructed from a concat\nof two DImode registers, and support for this idiom was added in a\nprevious STV patch.  The next two functions aren't converted because\nthe chain terminates with a return, which places the TImode result in\na hard register.  Likewise, the final foo_fun case isn't converted as\nthe result from fun initiates a chain from a hard register.\n\nThis patch supports STV conversion of TImode register-to-register\nmoves, where either the source or the destination (but not both) is\na hard register, by implementing it as a (relatively expensive)\ninter-unit move.\n\nBefore, with -O2 -mavx:\n\nfoo_out:\n        movq    z(%rip), %rax\n        movq    z+8(%rip), %rdx\n        xorq    a(%rip), %rax\n        xorq    a+8(%rip), %rdx\n        xorq    b(%rip), %rax\n        xorq    b+8(%rip), %rdx\n        xorq    c(%rip), %rax\n        xorq    c+8(%rip), %rdx\n        ret\n\nAfter, with -O2 -mavx:\n\nfoo_out:\n        vmovdqa z(%rip), %xmm0\n        vpxor   a(%rip), %xmm0, %xmm0\n        vpxor   b(%rip), %xmm0, %xmm0\n        vpxor   c(%rip), %xmm0, %xmm0\n        vpextrq $1, %xmm0, %rdx\n        vmovq   %xmm0, %rax\n        ret\n\nLikewise for foo_fun, before with -O2 -mavx:\n\nfoo_fun:\n        subq    $8, %rsp\n        call    fun\n        movq    a(%rip), %rsi\n        movq    a+8(%rip), %rdi\n        xorq    b(%rip), %rsi\n        xorq    b+8(%rip), %rdi\n        xorq    c(%rip), %rsi\n        xorq    c+8(%rip), %rdi\n        xorq    %rax, %rsi\n        xorq    %rdx, %rdi\n        movq    %rsi, z(%rip)\n        movq    %rdi, z+8(%rip)\n        addq    $8, %rsp\n        ret\n\nAfter with -O2 -mavx:\n\nfoo_fun:\n        subq    $8, %rsp\n        call    fun\n        vmovdqa a(%rip), %xmm0\n        vpxor   b(%rip), %xmm0, %xmm0\n        vmovq   %rax, %xmm2\n        vpxor   c(%rip), %xmm0, %xmm0\n        vpinsrq $1, %rdx, %xmm2, %xmm1\n        vpxor   %xmm1, %xmm0, %xmm0\n        vmovdqa %xmm0, z(%rip)\n        addq    $8, %rsp\n        ret\n\nThe one small subtlety in this patch is in the cost calculation\nfor inter-unit moves, which now correctly uses both sse_to_integer\nand integer_to_sse costs.  This patch models the transfer of double\nword transfers between units as interunit_cost + COSTS_N_INSNS(1),\ni.e. that the two transfers are pipelined in parallel, so that the\nhigh latency is accounted for once [rather than 2*interunit_cost\nthat assumes the transfers take place strictly sequentially with\ntwice the single word transfer latency].\n\n\nThis patch has been tested on x86_64-pc-linux-gnu with make bootstrap\nand make -k check, both with and without --target_board=unix{-m32}\nwith no new failures.  Ok for mainline?\n\n\n2026-05-02  Roger Sayle  <roger@nextmovesoftware.com>\n\ngcc/ChangeLog\n        * config/i386/i386-features.cc (scalar_chain): If the chain\n        starts with a register-to-register move from a hard register,\n        then the hard register's defs don't need to converted.\n        (timode_scalar_chain::compute_convert_gain): Provide costs\n        for hard_reg-to-pseudo and pseudo-to-hard_reg moves.\n        Tweak speed cost of timode_concatdi_p moves.\n        (timode_scalar_chain::convert_insn): Add support for\n        hard_reg-to-pseudo and pseudo-to-hard_reg TImode transfers.\n        (timode_scalar_to_vector_candidate_p): Likewise.\n\ngcc/testsuite/ChangeLog\n        * gcc.target/i386/avx-stv-1.c: New test case.\n        * gcc.target/i386/sse2-stv-3.c: Likewise.\n\n\nThanks,\nRoger\n--",
    "diff": "diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc\nindex 63f9dcc9f93..acbf2de22a8 100644\n--- a/gcc/config/i386/i386-features.cc\n+++ b/gcc/config/i386/i386-features.cc\n@@ -506,6 +506,10 @@ scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,\n   if (def_set)\n     switch (GET_CODE (SET_SRC (def_set)))\n       {\n+      case REG:\n+\tif (HARD_REGISTER_P (SET_SRC (def_set)))\n+\t  return true;\n+\tbreak;\n       case VEC_SELECT:\n \treturn true;\n       case ZERO_EXTEND:\n@@ -1641,7 +1645,33 @@ timode_scalar_chain::compute_convert_gain ()\n       switch (GET_CODE (src))\n \t{\n \tcase REG:\n-\t  if (!speed_p)\n+\t  if (HARD_REGISTER_P (src))\n+\t    {\n+\t      if (TARGET_AVX)\n+\t\t/* vmovq + vpinsrq */\n+\t\tigain = speed_p ? -ix86_cost->integer_to_sse\n+\t\t\t\t  - COSTS_N_INSNS (1)\n+\t\t\t\t: -COSTS_N_BYTES (11);\n+\t      else\n+\t\t/* movq + movq + punpcklqdq */\n+\t\tigain = speed_p ? -ix86_cost->integer_to_sse\n+\t\t\t\t  - COSTS_N_INSNS (2)\n+\t\t\t\t: -COSTS_N_BYTES (14);\n+\t    }\n+\t  else if (REG_P (dst) && HARD_REGISTER_P (dst))\n+\t    {\n+\t      if (TARGET_AVX)\n+\t\t/* vpextrq + vmovq */\n+\t\tigain = speed_p ? -ix86_cost->sse_to_integer\n+\t\t\t\t  - COSTS_N_INSNS (1)\n+\t\t\t\t: -COSTS_N_BYTES (11);\n+\t      else\n+\t\t/* movhlps + movq + movq */\n+\t\tigain = speed_p ? -ix86_cost->sse_to_integer\n+\t\t\t\t  - COSTS_N_INSNS (2)\n+\t\t\t\t: -COSTS_N_BYTES (13);\n+\t    }\n+\t  else if (!speed_p)\n \t    igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);\n \t  else\n \t    igain = COSTS_N_INSNS (1);\n@@ -1680,7 +1710,7 @@ timode_scalar_chain::compute_convert_gain ()\n \t  if (timode_concatdi_p (src))\n \t    {\n \t      /* vmovq;vpinsrq (11 bytes).  */\n-\t      igain = speed_p ? -2 * ix86_cost->sse_to_integer\n+\t      igain = speed_p ? -ix86_cost->integer_to_sse - COSTS_N_INSNS (1)\n \t\t\t      : -COSTS_N_BYTES (11);\n \t      break;\n \t    }\n@@ -1693,7 +1723,7 @@ timode_scalar_chain::compute_convert_gain ()\n \tcase PLUS:\n \t  if (timode_concatdi_p (src))\n \t    /* vmovq;vpinsrq (11 bytes).  */\n-\t    igain = speed_p ? -2 * ix86_cost->sse_to_integer\n+\t    igain = speed_p ? -ix86_cost->integer_to_sse - COSTS_N_INSNS (1)\n \t\t\t    : -COSTS_N_BYTES (11);\n \t  break;\n \n@@ -1961,7 +1991,8 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)\n   switch (GET_CODE (dst))\n     {\n     case REG:\n-      if (GET_MODE (dst) == TImode)\n+      if (GET_MODE (dst) == TImode\n+\t  && !HARD_REGISTER_P (dst))\n \t{\n \t  PUT_MODE (dst, V1TImode);\n \t  fix_debug_reg_uses (dst);\n@@ -1988,8 +2019,40 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)\n     case REG:\n       if (GET_MODE (src) == TImode)\n \t{\n-\t  PUT_MODE (src, V1TImode);\n-\t  fix_debug_reg_uses (src);\n+\t  if (HARD_REGISTER_P (src))\n+\t    {\n+\t      rtx lo = gen_reg_rtx (DImode);\n+\t      rtx hi = gen_reg_rtx (DImode);\n+\t      emit_insn_before (gen_rtx_SET (lo, gen_lowpart (DImode, src)),\n+\t\t\t\tinsn);\n+\t      emit_insn_before (gen_rtx_SET (hi, gen_highpart (DImode, src)),\n+\t\t\t\tinsn);\n+\t      src = gen_reg_rtx (V2DImode);\n+\t      emit_insn_before (gen_vec_concatv2di (src, lo, hi), insn);\n+\t      src = gen_lowpart (V1TImode, src);\n+\t    }\n+\t  else\n+\t    {\n+\t      PUT_MODE (src, V1TImode);\n+\t      fix_debug_reg_uses (src);\n+\t    }\n+\t}\n+      if (REG_P (dst) && HARD_REGISTER_P (dst))\n+\t{\n+\t  rtx tmp = gen_reg_rtx (V2DImode);\n+\t  src = gen_lowpart (V2DImode, src);\n+\t  emit_insn_before (gen_rtx_SET (tmp, src), insn);\n+\t  /* Extracting hi before lo helps register allocation.  */\n+\t  rtx hi = gen_reg_rtx (DImode);\n+\t  rtx lo = gen_reg_rtx (DImode);\n+\t  emit_insn_before (gen_vec_extractv2didi (hi, tmp, const1_rtx), insn);\n+\t  emit_insn_before (gen_vec_extractv2didi (lo, tmp, const0_rtx), insn);\n+\n+\t  /* Construct *concatditi3 pattern from lo and hi.  */\n+\t  hi = gen_rtx_ZERO_EXTEND (TImode, hi);\n+\t  hi = gen_rtx_ASHIFT (TImode, hi, GEN_INT (64));\n+\t  lo = gen_rtx_ZERO_EXTEND (TImode, lo);\n+\t  src = gen_rtx_PLUS (TImode, hi, lo);\n \t}\n       break;\n \n@@ -2453,8 +2516,31 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)\n {\n   rtx def_set = pseudo_reg_set (insn);\n \n+  /* We allow two exceptions to the pseudo registers only rule.\n+     Setting a hard register from a pseudo, and setting a pseudo\n+     from a hard register.  */\n   if (!def_set)\n-    return false;\n+    {\n+      def_set = single_set (insn);\n+      if (def_set)\n+\t{\n+\t  rtx src = SET_SRC (def_set);\n+\t  rtx dst = SET_DEST (def_set);\n+\t  if (GET_MODE (dst) == TImode\n+\t      && REG_P (src) && REG_P (dst))\n+\t    {\n+\t      if (HARD_REGISTER_P (dst)\n+\t\t  && !HARD_REGISTER_P (src)\n+\t\t  && single_def_chain_p (src))\n+\t\treturn true;\n+\t      if (HARD_REGISTER_P (src)\n+\t\t  && !HARD_REGISTER_P (dst)\n+\t\t  && single_def_chain_p (dst))\n+\t\treturn true;\n+\t    }\n+\t}\n+      return false;\n+    }\n \n   rtx src = SET_SRC (def_set);\n   rtx dst = SET_DEST (def_set);\ndiff --git a/gcc/testsuite/gcc.target/i386/avx-stv-1.c b/gcc/testsuite/gcc.target/i386/avx-stv-1.c\nnew file mode 100644\nindex 00000000000..e9dea2d0f4a\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/i386/avx-stv-1.c\n@@ -0,0 +1,30 @@\n+/* { dg-do compile { target int128 } } */\n+/* { dg-options \"-O2 -mavx -mno-stackrealign\" } */\n+\n+__int128 a, b, c, z;\n+\n+__int128 fun();\n+\n+void foo_in(__int128 x)\n+{\n+  z = (x ^ a ^ b ^ c);\n+}\n+\n+__int128 foo_out()\n+{\n+  return (z ^ a ^ b ^ c);\n+}\n+\n+__int128 foo_inout(__int128 x)\n+{\n+  return (x ^ a ^ b ^ c ^ z);\n+}\n+\n+void foo_fun()\n+{\n+  z = (fun() ^ a ^ b ^ c);\n+}\n+\n+/* { dg-final { scan-assembler-times \"vpinsrq\" 3 } } */\n+/* { dg-final { scan-assembler-times \"vpextrq\" 2 } } */\n+/* { dg-final { scan-assembler-times \"vpxor\" 13 } } */\ndiff --git a/gcc/testsuite/gcc.target/i386/sse2-stv-3.c b/gcc/testsuite/gcc.target/i386/sse2-stv-3.c\nnew file mode 100644\nindex 00000000000..0a638013aed\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/i386/sse2-stv-3.c\n@@ -0,0 +1,31 @@\n+/* { dg-do compile { target int128 } } */\n+/* { dg-options \"-O2 -msse2 -mno-sse4 -mno-stackrealign\" } */\n+\n+__int128 a, b, c, z;\n+\n+__int128 fun();\n+\n+void foo_in(__int128 x)\n+{\n+  z = (x ^ a ^ b ^ c);\n+}\n+\n+__int128 foo_out()\n+{\n+  return (z ^ a ^ b ^ c);\n+}\n+\n+__int128 foo_inout(__int128 x)\n+{\n+  return (x ^ a ^ b ^ c ^ z);\n+}\n+\n+void foo_fun()\n+{\n+  z = (fun() ^ a ^ b ^ c);\n+}\n+\n+/* { dg-final { scan-assembler-times \"punpcklqdq\" 2 } } */\n+/* { dg-final { scan-assembler-times \"movhlps\" 1 } } */\n+/* { dg-final { scan-assembler-times \"pxor\" 9 } } */\n+/* { dg-final { scan-assembler-times \"xorq\" 8 } } */\n",
    "prefixes": [
        "x86_64"
    ]
}