Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2218120/?format=api
{ "id": 2218120, "url": "http://patchwork.ozlabs.org/api/patches/2218120/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/patch/DHGZH4M7BA62.1EIKZXL4JIZWE@gmail.com/", "project": { "id": 17, "url": "http://patchwork.ozlabs.org/api/projects/17/?format=api", "name": "GNU Compiler Collection", "link_name": "gcc", "list_id": "gcc-patches.gcc.gnu.org", "list_email": "gcc-patches@gcc.gnu.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<DHGZH4M7BA62.1EIKZXL4JIZWE@gmail.com>", "list_archive_url": null, "date": "2026-03-31T12:58:03", "name": "vect: Enable two distinct sources in BIT_FIELD_REFs. [PR105816]", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "ce3e776b4f8b51f995bb4c451beab1a1fbf7e5d6", "submitter": { "id": 86205, "url": "http://patchwork.ozlabs.org/api/people/86205/?format=api", "name": "Robin Dapp", "email": "rdapp.gcc@gmail.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/DHGZH4M7BA62.1EIKZXL4JIZWE@gmail.com/mbox/", "series": [ { "id": 498192, "url": "http://patchwork.ozlabs.org/api/series/498192/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/list/?series=498192", "date": "2026-03-31T12:58:03", "name": "vect: Enable two distinct sources in BIT_FIELD_REFs. [PR105816]", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/498192/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2218120/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2218120/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (2048-bit key;\n unprotected) header.d=gmail.com header.i=@gmail.com header.a=rsa-sha256\n header.s=20251104 header.b=SZmconBb;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org\n (client-ip=2620:52:6:3111::32; helo=vm01.sourceware.org;\n envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;\n receiver=patchwork.ozlabs.org)", "sourceware.org;\n\tdkim=pass (2048-bit key,\n unprotected) header.d=gmail.com header.i=@gmail.com header.a=rsa-sha256\n header.s=20251104 header.b=SZmconBb", "sourceware.org;\n dmarc=pass (p=none dis=none) header.from=gmail.com", "sourceware.org; spf=pass smtp.mailfrom=gmail.com", "server2.sourceware.org;\n arc=none smtp.remote-ip=209.85.128.51" ], "Received": [ "from vm01.sourceware.org (vm01.sourceware.org\n [IPv6:2620:52:6:3111::32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4flSq32CtRz1yCp\n\tfor <incoming@patchwork.ozlabs.org>; Tue, 31 Mar 2026 23:58:39 +1100 (AEDT)", "from vm01.sourceware.org (localhost [127.0.0.1])\n\tby sourceware.org (Postfix) with ESMTP id 7072C4BB58EF\n\tfor <incoming@patchwork.ozlabs.org>; Tue, 31 Mar 2026 12:58:37 +0000 (GMT)", "from mail-wm1-f51.google.com (mail-wm1-f51.google.com\n [209.85.128.51])\n by sourceware.org (Postfix) with ESMTPS id DD5734BA540B\n for <gcc-patches@gcc.gnu.org>; Tue, 31 Mar 2026 12:58:06 +0000 (GMT)", "by mail-wm1-f51.google.com with SMTP id\n 5b1f17b1804b1-48557c8ad47so43582945e9.0\n for <gcc-patches@gcc.gnu.org>; Tue, 31 Mar 2026 05:58:06 -0700 (PDT)", "from localhost (ip-149-172-150-237.um42.pools.vodafone-ip.de.\n [149.172.150.237]) by smtp.gmail.com with ESMTPSA id\n 5b1f17b1804b1-4887e80a63esm46909175e9.3.2026.03.31.05.58.04\n (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128);\n Tue, 31 Mar 2026 05:58:04 -0700 (PDT)" ], "DKIM-Filter": [ "OpenDKIM Filter v2.11.0 sourceware.org 7072C4BB58EF", "OpenDKIM Filter v2.11.0 sourceware.org DD5734BA540B" ], "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org DD5734BA540B", "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org DD5734BA540B", "ARC-Seal": "i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1774961887; cv=none;\n b=EOr1cg0Q9VSUzxTcZS3aS67T9KQx0+MY4Ke0Jcp9Y/0N/t28GUvKvMOF+sC5cf/UR11Bgo9U6YVSQ4sZc3lFbAi1dpxPQpJKOiKEaYDXW5/sZDee8Y/a+pYOdQRboFiKVYglGjmatbLiu1PoEuNxtCfYVJZPbxacpQGtZKWaxL4=", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=sourceware.org; s=key;\n t=1774961887; c=relaxed/simple;\n bh=spBO/XSfZcRWr5CqIhVr2BDYP/A9nKIbs3XFrbAT9m0=;\n h=DKIM-Signature:Mime-Version:Date:Message-Id:Subject:To:From;\n b=aQtejGakqKphh0tMsdEtoQwRliiYD7yN0kToL/ncsM+iVC8F4y+Dj53IOml5OBEGKtg+nYy2OcdVnf+3GYI4+w9cTvTLp4sJ1a1HtAz44DcOrvr1dlR/jpymWRowX/+B9YivQPzD6iwJGowQW9TmcCQJB/4B0TuTlE0Ipm5SfPI=", "ARC-Authentication-Results": "i=1; server2.sourceware.org", "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=gmail.com; s=20251104; t=1774961885; x=1775566685; darn=gcc.gnu.org;\n h=from:to:cc:subject:message-id:date:content-transfer-encoding\n :mime-version:from:to:cc:subject:date:message-id:reply-to;\n bh=gYIjhR+xtxSOy0d7DzaYghLNPzoMTzPb1Zx6xWFpeVE=;\n b=SZmconBbaxtEjgjvhUWGfkpsrTqnZv3gNHy9UiXgwo6h/qpmS9HWQSb250REURCDUt\n 5yFMCTRbZ93MDzO9ayqKEG1PunP0vt07M8hce/EdYs2Hprpio6szXaF+4g2I6cEBx28p\n XXztHw2dlo8FKOlw4EMAAnLro4gHWgd5+yfVG6G2ZzalHhK+YhQJVgrwTRieOgQSp2oL\n 0lT9z2cX5daeb7aAbs4KbIGiwqY2hxubRh8lz76OyN0hdxcGjaHJU82CjMUb+k9piltO\n eHisqar62UIOUe+KRyKLsiaYi32ga5P/ZgO4xmuDBoqkAg9FsTPh0GedN1mo4o3b4/cz\n A3dA==", "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20251104; t=1774961885; x=1775566685;\n h=from:to:cc:subject:message-id:date:content-transfer-encoding\n :mime-version:x-gm-gg:x-gm-message-state:from:to:cc:subject:date\n :message-id:reply-to;\n bh=gYIjhR+xtxSOy0d7DzaYghLNPzoMTzPb1Zx6xWFpeVE=;\n b=SP52pCt8IDK4NVI2/EF7B5RttJZgEXT3XQpKGUF/eBG+xefWNup4F6tAWLznT4MOAT\n FzbXoTLzE9Wu8O7UlqQ+YaJKPCzq7Z74LWWZ68vjNkf4bbhouWYRMJIZIjy2POzUiTSa\n MsMN7J5/xDS2IZxr66GoMZA7Q/5R8H82RNezdVbLEgO7dTkjygrPZIVPWr8HklHNYHuA\n EIfEmz3O9YJkbOAjBVxC7ETJEu1RpG8YRqKbwwOMHavwjrIszpbAgPIvgWYD4j5+s8xX\n fkwVESk+lrdP9eyPqCa9SFjfBfGj0LjZDGrCSTzWzrvlS3uEpUJT3uB8rY0lS/EConvv\n MOag==", "X-Gm-Message-State": "AOJu0YxGXpS7n5WziaaTO94sirQd3q31FrPH4YwCiMfHAsjIBrRfvtLS\n zhtFA2XBMtDUR2sfHCznl0P+JbXBv5S6aEUWGiD+FmBtcJ6qbwWDu9WlpofoFw==", "X-Gm-Gg": "ATEYQzwBI+sX2f4SXidxkanonKH1IsIHyjJVG/+FsK3D3y5UeU/dU7v99wp0PpoEA7E\n 9ZM3gwwz4LQuH2fOqOOR++FATBDK05PRItJGvjZBRyb7cO5H9UjyWT1NJSAjI4Ok62xdUqmOxYo\n JUX6GEz9fcvnQGSF9WGuSoxDnehx3UZh71eta0rkqTtjueez8H4AB5Y+j1V9sRc1NC8RiFj+tsO\n Rh4mdhc2cC3OyilXSUrNaDX56ayc8H6iBjdxZ34tGwVDG1THnAuFBcLnFkv7Yperi0yuQphwjaf\n TNJwsyQsw5PfhoPrOuiwgVrFm6X/op4TS2vsn3yN2eXa6aNcNGIk+aUoVoWa+LwrLW/V7y0Chul\n BTNRq7bvQnFvllpAdILVrGDgoQKOhz0PYRq3qAE3hF74J0Ut99cCvq/JmRCIru5LFtJ9xBeyDKk\n X3y40UitPwvIl8Vb/UnLZVV7gbeLRsaC4tIYEPmzAvuRztIorHReoljUiuI8QIoKzx00ipgYob", "X-Received": "by 2002:a05:600c:4994:b0:485:9a50:3369 with SMTP id\n 5b1f17b1804b1-4872cb2825amr148343635e9.29.1774961884903;\n Tue, 31 Mar 2026 05:58:04 -0700 (PDT)", "Mime-Version": "1.0", "Content-Transfer-Encoding": "quoted-printable", "Content-Type": "text/plain; charset=UTF-8", "Date": "Tue, 31 Mar 2026 14:58:03 +0200", "Message-Id": "<DHGZH4M7BA62.1EIKZXL4JIZWE@gmail.com>", "Subject": "[PATCH] vect: Enable two distinct sources in BIT_FIELD_REFs.\n [PR105816]", "Cc": "\"Robin Dapp\" <rdapp.gcc@gmail.com>", "To": "\"gcc-patches\" <gcc-patches@gcc.gnu.org>", "From": "\"Robin Dapp\" <rdapp.gcc@gmail.com>", "X-BeenThere": "gcc-patches@gcc.gnu.org", "X-Mailman-Version": "2.1.30", "Precedence": "list", "List-Id": "Gcc-patches mailing list <gcc-patches.gcc.gnu.org>", "List-Unsubscribe": "<https://gcc.gnu.org/mailman/options/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>", "List-Archive": "<https://gcc.gnu.org/pipermail/gcc-patches/>", "List-Post": "<mailto:gcc-patches@gcc.gnu.org>", "List-Help": "<mailto:gcc-patches-request@gcc.gnu.org?subject=help>", "List-Subscribe": "<https://gcc.gnu.org/mailman/listinfo/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>", "Errors-To": "gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org" }, "content": "Hi,\n\n(GCC 17)\n\nCurrently, for a situation like\n\n _1 = BIT_FIELD_REF <src1_9(D), 32, 0>;\n _2 = BIT_FIELD_REF <src1_9(D), 32, 32>;\n _3 = BIT_FIELD_REF <src1_9(D), 32, 64>;\n _4 = BIT_FIELD_REF <src1_9(D), 32, 96>;\n _5 = BIT_FIELD_REF <src2_16(D), 32, 0>;\n _6 = BIT_FIELD_REF <src2_16(D), 32, 32>;\n _7 = BIT_FIELD_REF <src2_16(D), 32, 64>;\n _8 = BIT_FIELD_REF <src2_16(D), 32, 96>;\n _21 = {_1, _2, _3, _4, _5, _6, _7, _8};\n\nwe give up because the vec_init, looking through the BIT_FIELD_REFs, sees\ntwo source vectors but we only expect one.\n\nThis patch adds support for a second one, leading to a permutation being\ncreated. We can do the same optimization in forwprop but that's too\nlate as we have already confused vect costing by the (usually) expensive\nvec_init.\n\nWithout changes, this causes a regression on x86 (pr54400.c):\n\n __m128d i4 (__m128d p, __m128d q)\n {\n __m128d r = { p[1] + p[0], q[1] + q[0] };\n return r;\n }\n\nThe BIT_FIELD_EXPRs are expanded to vec_selects, the constructor to vec_concat. \nWe recognize this in combine as the appropriate insn pattern. I took the \nshotgun approach and just added all now-necessary patterns.\n\nI suppose this kind of fallout is unavoidable, in particular for small vectors \nthat we can \"naturally\" combine into others. In such situations the \ntwo-vector/permute approach is at a disadvantage. For larger vectors, though, \nthe vec_select/vec_concat approach doesn't scale.\n\nBootstrapped and regtested on x86, power10, and aarch64.\nRegtested on riscv64.\n\nRegards\n Robin\n\n\tPR tree-optimization/105816\n\ngcc/ChangeLog:\n\n\t* tree-vect-slp.cc (compatible_bit_field_ref_source): New helper\n\tfunction.\n\t(vect_build_slp_tree_1): Use helper.\n\t(build_perm_source_node): New helper function.\n\t(vect_build_slp_tree_2): Use helper and allow a second source\n\tvector.\n\t* config/i386/sse.md (*sse3_haddv2df3_perm): New pattern to\n\trecognize interleaved/permuted hsub/hadd.\n\t(*sse3_hsubv2df3_perm): Ditto.\n\t(*sse3_haddv2df3_perm_rev): Ditto.\n\t(*sse3_haddv2df3_merge): Ditto.\n\t(*sse3_haddv2df3_merge_rev): Ditto.\n\t(*sse3_hsubv2df3_perm): Ditto.\n\t(*sse3_hsubv2df3_perm_rev): Ditto.\n\ngcc/testsuite/ChangeLog:\n\n\t* gcc.dg/vect/pr105816.c: New test.\n\nSigned-off-by: Robin Dapp <robin.dapp@oss.qualcomm.com>\n---\n gcc/config/i386/sse.md | 141 +++++++++++++++++++++++++++\n gcc/testsuite/gcc.dg/vect/pr105816.c | 23 +++++\n gcc/tree-vect-slp.cc | 139 ++++++++++++++++++--------\n 3 files changed, 263 insertions(+), 40 deletions(-)\n create mode 100644 gcc/testsuite/gcc.dg/vect/pr105816.c", "diff": "diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md\nindex a3f68ad9c1a..b32ea0657de 100644\n--- a/gcc/config/i386/sse.md\n+++ b/gcc/config/i386/sse.md\n@@ -3844,6 +3844,147 @@ (define_insn \"*sse3_hsubv2df3_low\"\n (set_attr \"prefix\" \"orig,vex\")\n (set_attr \"mode\" \"V2DF\")])\n \n+; The following patterns help recognize\n+; (plus/minus interleave_even interleave_odd)\n+; as h(sub/add).\n+(define_insn_and_split \"*sse3_haddv2df3_perm\"\n+ [(set (match_operand:V2DF 0 \"register_operand\")\n+ (plus:V2DF\n+ (vec_select:V2DF\n+ (vec_concat:V4DF\n+ (match_operand:V2DF 1 \"register_operand\")\n+ (match_operand:V2DF 2 \"vector_operand\"))\n+ (parallel [(const_int 0) (const_int 2)]))\n+ (vec_select:V2DF\n+ (vec_concat:V4DF (match_dup 1) (match_dup 2))\n+ (parallel [(const_int 1) (const_int 3)]))))]\n+ \"TARGET_SSE3\"\n+ \"#\"\n+ \"&& 1\"\n+ [(set (match_dup 0)\n+ (vec_concat:V2DF\n+ (plus:DF\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))\n+ (plus:DF\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))])\n+\n+(define_insn_and_split \"*sse3_haddv2df3_perm_rev\"\n+ [(set (match_operand:V2DF 0 \"register_operand\")\n+ (plus:V2DF\n+ (vec_select:V2DF\n+ (vec_concat:V4DF\n+ (match_operand:V2DF 1 \"register_operand\")\n+ (match_operand:V2DF 2 \"vector_operand\"))\n+ (parallel [(const_int 1) (const_int 3)]))\n+ (vec_select:V2DF\n+ (vec_concat:V4DF (match_dup 1) (match_dup 2))\n+ (parallel [(const_int 0) (const_int 2)]))))]\n+ \"TARGET_SSE3\"\n+ \"#\"\n+ \"&& 1\"\n+ [(set (match_dup 0)\n+ (vec_concat:V2DF\n+ (plus:DF\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))\n+ (plus:DF\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))])\n+\n+(define_insn_and_split \"*sse3_haddv2df3_merge\"\n+ [(set (match_operand:V2DF 0 \"register_operand\")\n+ (plus:V2DF\n+ (vec_select:V2DF\n+ (vec_concat:V4DF\n+ (match_operand:V2DF 1 \"register_operand\")\n+ (match_operand:V2DF 2 \"vector_operand\"))\n+ (parallel [(const_int 1) (const_int 2)]))\n+ (vec_merge:V2DF\n+ (match_dup 1)\n+ (match_dup 2)\n+ (const_int 1))))]\n+ \"TARGET_SSE3\"\n+ \"#\"\n+ \"&& 1\"\n+ [(set (match_dup 0)\n+ (vec_concat:V2DF\n+ (plus:DF\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))\n+ (plus:DF\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))])\n+\n+(define_insn_and_split \"*sse3_haddv2df3_merge_rev\"\n+ [(set (match_operand:V2DF 0 \"register_operand\")\n+ (plus:V2DF\n+ (vec_merge:V2DF\n+ (match_operand:V2DF 1 \"register_operand\")\n+ (match_operand:V2DF 2 \"vector_operand\")\n+ (const_int 1))\n+ (vec_select:V2DF\n+ (vec_concat:V4DF (match_dup 1) (match_dup 2))\n+ (parallel [(const_int 1) (const_int 2)]))))]\n+ \"TARGET_SSE3\"\n+ \"#\"\n+ \"&& 1\"\n+ [(set (match_dup 0)\n+ (vec_concat:V2DF\n+ (plus:DF\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))\n+ (plus:DF\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))])\n+\n+(define_insn_and_split \"*sse3_hsubv2df3_perm\"\n+ [(set (match_operand:V2DF 0 \"register_operand\")\n+ (minus:V2DF\n+ (vec_select:V2DF\n+ (vec_concat:V4DF\n+ (match_operand:V2DF 1 \"register_operand\")\n+ (match_operand:V2DF 2 \"vector_operand\"))\n+ (parallel [(const_int 0) (const_int 2)]))\n+ (vec_select:V2DF\n+ (vec_concat:V4DF (match_dup 1) (match_dup 2))\n+ (parallel [(const_int 1) (const_int 3)]))))]\n+ \"TARGET_SSE3\"\n+ \"#\"\n+ \"&& 1\"\n+ [(set (match_dup 0)\n+ (vec_concat:V2DF\n+ (minus:DF\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))\n+ (minus:DF\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))])\n+\n+(define_insn_and_split \"*sse3_hsubv2df3_perm_rev\"\n+ [(set (match_operand:V2DF 0 \"register_operand\")\n+ (minus:V2DF\n+ (vec_select:V2DF\n+ (vec_concat:V4DF\n+ (match_operand:V2DF 1 \"register_operand\")\n+ (match_operand:V2DF 2 \"vector_operand\"))\n+ (parallel [(const_int 1) (const_int 3)]))\n+ (vec_select:V2DF\n+ (vec_concat:V4DF (match_dup 1) (match_dup 2))\n+ (parallel [(const_int 0) (const_int 2)]))))]\n+ \"TARGET_SSE3\"\n+ \"#\"\n+ \"&& 1\"\n+ [(set (match_dup 0)\n+ (vec_concat:V2DF\n+ (minus:DF\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))\n+ (minus:DF\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 0)]))\n+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))])\n+\n (define_insn \"avx_h<insn>v8sf3\"\n [(set (match_operand:V8SF 0 \"register_operand\" \"=x\")\n \t(vec_concat:V8SF\ndiff --git a/gcc/testsuite/gcc.dg/vect/pr105816.c b/gcc/testsuite/gcc.dg/vect/pr105816.c\nnew file mode 100644\nindex 00000000000..9f9a24aea4e\n--- /dev/null\n+++ b/gcc/testsuite/gcc.dg/vect/pr105816.c\n@@ -0,0 +1,23 @@\n+/* { dg-do compile } */\n+/* { dg-require-effective-target vect_int } */\n+/* { dg-additional-options \"-fdump-tree-slp-details\" } */\n+\n+void test_lo (short * __restrict dst, short *src1, short *src2, int n)\n+{\n+ for (int i = 0; i < n; ++i)\n+ {\n+ dst[0] = src1[0];\n+ dst[1] = src1[1];\n+ dst[2] = src1[2];\n+ dst[3] = src1[3];\n+ dst[4] = src2[0];\n+ dst[5] = src2[1];\n+ dst[6] = src2[2];\n+ dst[7] = src2[3];\n+ dst+=8;\n+ src1+=4;\n+ src2+=4;\n+ }\n+}\n+\n+/* { dg-final { scan-tree-dump-not \"different BIT_FIELD_REF arguments\" \"slp1\" } } */\ndiff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc\nindex 8fa6a740c96..e71118bb56f 100644\n--- a/gcc/tree-vect-slp.cc\n+++ b/gcc/tree-vect-slp.cc\n@@ -1088,6 +1088,23 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info,\n return true;\n }\n \n+/* Return true if VEC is an SSA name, of compatible vector type and its\n+ type of the same size as VECTYPE. */\n+\n+static bool\n+compatible_bit_field_ref_source (vec_info *vinfo, tree vec, tree vectype)\n+{\n+ return is_a <bb_vec_info> (vinfo)\n+ && TREE_CODE (vec) == SSA_NAME\n+ /* When the element types are not compatible we pun the\n+ source to the target vectype which requires equal size. */\n+ && ((VECTOR_TYPE_P (TREE_TYPE (vec))\n+\t && types_compatible_p (TREE_TYPE (vectype),\n+\t\t\t\tTREE_TYPE (TREE_TYPE (vec))))\n+\t|| operand_equal_p (TYPE_SIZE (vectype),\n+\t\t\t TYPE_SIZE (TREE_TYPE (vec))));\n+}\n+\n /* Verify if the scalar stmts STMTS are isomorphic, require data\n permutation or are of unsupported types of operation. Return\n true if they are, otherwise return false and indicate in *MATCHES\n@@ -1122,6 +1139,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,\n int first_reduc_idx = -1;\n bool maybe_soft_fail = false;\n tree soft_fail_nunits_vectype = NULL_TREE;\n+ tree other_bfref_source = NULL_TREE;\n \n tree vectype, nunits_vectype;\n if (!vect_get_vector_types_for_stmt (vinfo, first_stmt_info, &vectype,\n@@ -1316,15 +1334,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,\n \t\t && rhs_code == BIT_FIELD_REF)\n \t {\n \t tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);\n-\t if (!is_a <bb_vec_info> (vinfo)\n-\t\t || TREE_CODE (vec) != SSA_NAME\n-\t\t /* When the element types are not compatible we pun the\n-\t\t source to the target vectype which requires equal size. */\n-\t\t || ((!VECTOR_TYPE_P (TREE_TYPE (vec))\n-\t\t || !types_compatible_p (TREE_TYPE (vectype),\n-\t\t\t\t\t TREE_TYPE (TREE_TYPE (vec))))\n-\t\t && !operand_equal_p (TYPE_SIZE (vectype),\n-\t\t\t\t\t TYPE_SIZE (TREE_TYPE (vec)))))\n+\t if (!compatible_bit_field_ref_source (vinfo, vec, vectype))\n \t\t{\n \t\t if (dump_enabled_p ())\n \t\t dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,\n@@ -1422,17 +1432,40 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,\n \t continue;\n \t }\n \n+\t /* Check if the number of distinct BIT_FIELD_REF source operands\n+\t is at most 2. */\n \t if (!ldst_p\n \t && first_stmt_code == BIT_FIELD_REF\n \t && (TREE_OPERAND (gimple_assign_rhs1 (first_stmt_info->stmt), 0)\n \t\t != TREE_OPERAND (gimple_assign_rhs1 (stmt_info->stmt), 0)))\n \t {\n-\t if (dump_enabled_p ())\n-\t\tdump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,\n-\t\t\t\t \"Build SLP failed: different BIT_FIELD_REF \"\n-\t\t\t\t \"arguments in %G\", stmt);\n-\t /* Mismatch. */\n-\t continue;\n+\t tree cur = TREE_OPERAND (gimple_assign_rhs1 (stmt_info->stmt), 0);\n+\t if (!other_bfref_source)\n+\t\t{\n+\t\t if (!compatible_bit_field_ref_source (vinfo, cur, vectype))\n+\t\t {\n+\t\t if (dump_enabled_p ())\n+\t\t\tdump_printf_loc (MSG_MISSED_OPTIMIZATION,\n+\t\t\t\t\t vect_location,\n+\t\t\t\t\t \"Build SLP failed: different \"\n+\t\t\t\t\t \"BIT_FIELD_REF arguments in \"\n+\t\t\t\t\t \"%G\", stmt);\n+\t\t /* Mismatch. */\n+\t\t continue;\n+\t\t }\n+\t\t other_bfref_source = cur;\n+\t\t}\n+\t else if (cur != other_bfref_source)\n+\t\t{\n+\t\t if (dump_enabled_p ())\n+\t\t dump_printf_loc (MSG_MISSED_OPTIMIZATION,\n+\t\t\t\t vect_location,\n+\t\t\t\t \"Build SLP failed: different \"\n+\t\t\t\t \"BIT_FIELD_REF arguments in \"\n+\t\t\t\t \"%G\", stmt);\n+\t\t /* Mismatch. */\n+\t\t continue;\n+\t\t}\n \t }\n \n \t if (call_stmt\n@@ -1962,6 +1995,40 @@ vect_slp_build_two_operator_nodes (slp_tree perm, tree vectype,\n SLP_TREE_CHILDREN (perm).quick_push (child2);\n }\n \n+/* Helper for creating a permutation source node when converting a\n+ vec[0] = BIT_FIELD_REF (vec, ...)\n+ into a vector permutation.\n+\n+ Initializes an SLP_TREE with the type of VECTYPE, sets its number of units,\n+ and adds VEC to its VEC_DEFs, returning the new node. */\n+\n+static slp_tree\n+build_perm_source_node (tree vec, tree vectype)\n+{\n+ slp_tree vnode = vect_create_new_slp_node (vNULL);\n+ if (operand_equal_p (TYPE_SIZE (vectype), TYPE_SIZE (TREE_TYPE (vec))))\n+ /* ??? We record vectype here but we hide eventually necessary\n+ punning and instead rely on code generation to materialize\n+ VIEW_CONVERT_EXPRs as necessary. We instead should make\n+ this explicit somehow. */\n+ SLP_TREE_VECTYPE (vnode) = vectype;\n+ else\n+ {\n+ /* For different size but compatible elements we can still\n+\t use VEC_PERM_EXPR without punning. */\n+ gcc_assert (VECTOR_TYPE_P (TREE_TYPE (vec))\n+\t\t && types_compatible_p (TREE_TYPE (vectype),\n+\t\t\t\t\t TREE_TYPE (TREE_TYPE (vec))));\n+ SLP_TREE_VECTYPE (vnode) = TREE_TYPE (vec);\n+ }\n+ auto nunits = TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (vnode));\n+ unsigned HOST_WIDE_INT const_nunits;\n+ if (nunits.is_constant (&const_nunits))\n+ SLP_TREE_LANES (vnode) = const_nunits;\n+ SLP_TREE_VEC_DEFS (vnode).safe_push (vec);\n+ return vnode;\n+}\n+\n /* Recursively build an SLP tree starting from NODE.\n Fail (and return a value not equal to zero) if def-stmts are not\n isomorphic, require data permutation or are of unsupported types of\n@@ -2195,15 +2262,19 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,\n \t && !gimple_vuse (stmt_info->stmt)\n \t && gimple_assign_rhs_code (stmt_info->stmt) == BIT_FIELD_REF)\n {\n- /* vect_build_slp_tree_2 determined all BIT_FIELD_REFs reference\n-\t the same SSA name vector of a compatible type to vectype. */\n+ /* vect_build_slp_tree_1 determined all BIT_FIELD_REFs reference\n+\t at most two SSA name vectors of a compatible type to vectype. */\n vec<std::pair<unsigned, unsigned> > lperm = vNULL;\n tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt_info->stmt), 0);\n+ tree other_vec = NULL_TREE;\n stmt_vec_info estmt_info;\n FOR_EACH_VEC_ELT (stmts, i, estmt_info)\n \t{\n \t gassign *estmt = as_a <gassign *> (estmt_info->stmt);\n \t tree bfref = gimple_assign_rhs1 (estmt);\n+\t tree cur_vec = TREE_OPERAND (bfref, 0);\n+\t if (cur_vec != vec)\n+\t other_vec = cur_vec;\n \t HOST_WIDE_INT lane;\n \t if (!known_eq (bit_field_size (bfref),\n \t\t\t tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (vectype))))\n@@ -2214,39 +2285,27 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,\n \t matches[0] = false;\n \t return NULL;\n \t }\n-\t lperm.safe_push (std::make_pair (0, (unsigned)lane));\n+\t lperm.safe_push (std::make_pair (cur_vec == vec ? 0 : 1,\n+\t\t\t\t\t (unsigned) lane));\n \t}\n- slp_tree vnode = vect_create_new_slp_node (vNULL);\n- if (operand_equal_p (TYPE_SIZE (vectype), TYPE_SIZE (TREE_TYPE (vec))))\n-\t/* ??? We record vectype here but we hide eventually necessary\n-\t punning and instead rely on code generation to materialize\n-\t VIEW_CONVERT_EXPRs as necessary. We instead should make\n-\t this explicit somehow. */\n-\tSLP_TREE_VECTYPE (vnode) = vectype;\n- else\n-\t{\n-\t /* For different size but compatible elements we can still\n-\t use VEC_PERM_EXPR without punning. */\n-\t gcc_assert (VECTOR_TYPE_P (TREE_TYPE (vec))\n-\t\t && types_compatible_p (TREE_TYPE (vectype),\n-\t\t\t\t\t TREE_TYPE (TREE_TYPE (vec))));\n-\t SLP_TREE_VECTYPE (vnode) = TREE_TYPE (vec);\n-\t}\n- auto nunits = TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (vnode));\n- unsigned HOST_WIDE_INT const_nunits;\n- if (nunits.is_constant (&const_nunits))\n-\tSLP_TREE_LANES (vnode) = const_nunits;\n- SLP_TREE_VEC_DEFS (vnode).safe_push (vec);\n+ slp_tree vnode = build_perm_source_node (vec, vectype);\n+ slp_tree vnode2 = NULL;\n+ if (other_vec)\n+\tvnode2 = build_perm_source_node (other_vec, vectype);\n+\n /* We are always building a permutation node even if it is an identity\n \t permute to shield the rest of the vectorizer from the odd node\n \t representing an actual vector without any scalar ops.\n \t ??? We could hide it completely with making the permute node\n \t external? */\n- node = vect_create_new_slp_node (node, stmts, 1);\n+ node = vect_create_new_slp_node (node, stmts, !other_vec ? 1 : 2);\n SLP_TREE_CODE (node) = VEC_PERM_EXPR;\n SLP_TREE_LANE_PERMUTATION (node) = lperm;\n SLP_TREE_VECTYPE (node) = vectype;\n SLP_TREE_CHILDREN (node).quick_push (vnode);\n+ if (other_vec)\n+\tSLP_TREE_CHILDREN (node).quick_push (vnode2);\n+\n return node;\n }\n /* When discovery reaches an associatable operation see whether we can\n", "prefixes": [] }