Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2194191/?format=api
{ "id": 2194191, "url": "http://patchwork.ozlabs.org/api/patches/2194191/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/patch/aYcG8WtB4ZmYbYG3@tucnak/", "project": { "id": 17, "url": "http://patchwork.ozlabs.org/api/projects/17/?format=api", "name": "GNU Compiler Collection", "link_name": "gcc", "list_id": "gcc-patches.gcc.gnu.org", "list_email": "gcc-patches@gcc.gnu.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<aYcG8WtB4ZmYbYG3@tucnak>", "list_archive_url": null, "date": "2026-02-07T09:33:37", "name": "forwprop: Fix up calc_perm_vec_perm_simplify_seqs [PR123672]", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "edd2bd2a7ecb2a3021324c1f4a2de1b58350e6e2", "submitter": { "id": 671, "url": "http://patchwork.ozlabs.org/api/people/671/?format=api", "name": "Jakub Jelinek", "email": "jakub@redhat.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/aYcG8WtB4ZmYbYG3@tucnak/mbox/", "series": [ { "id": 491366, "url": "http://patchwork.ozlabs.org/api/series/491366/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/list/?series=491366", "date": "2026-02-07T09:33:37", "name": "forwprop: Fix up calc_perm_vec_perm_simplify_seqs [PR123672]", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/491366/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2194191/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2194191/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (1024-bit key;\n unprotected) header.d=redhat.com header.i=@redhat.com header.a=rsa-sha256\n header.s=mimecast20190719 header.b=IIZbdTMn;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org\n (client-ip=38.145.34.32; helo=vm01.sourceware.org;\n envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;\n receiver=patchwork.ozlabs.org)", "sourceware.org;\n\tdkim=pass (1024-bit key,\n unprotected) header.d=redhat.com header.i=@redhat.com header.a=rsa-sha256\n header.s=mimecast20190719 header.b=IIZbdTMn", "sourceware.org; dmarc=pass (p=quarantine dis=none)\n header.from=redhat.com", "sourceware.org; spf=pass smtp.mailfrom=redhat.com", "server2.sourceware.org;\n arc=none smtp.remote-ip=170.10.129.124" ], "Received": [ "from vm01.sourceware.org (vm01.sourceware.org [38.145.34.32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4f7QlF71phz1xtV\n\tfor <incoming@patchwork.ozlabs.org>; Sat, 07 Feb 2026 20:34:17 +1100 (AEDT)", "from vm01.sourceware.org (localhost [127.0.0.1])\n\tby sourceware.org (Postfix) with ESMTP id 9AB6E4B920EB\n\tfor <incoming@patchwork.ozlabs.org>; Sat, 7 Feb 2026 09:34:15 +0000 (GMT)", "from us-smtp-delivery-124.mimecast.com\n (us-smtp-delivery-124.mimecast.com [170.10.129.124])\n by sourceware.org (Postfix) with ESMTP id 6B0D54B9DB5E\n for <gcc-patches@gcc.gnu.org>; Sat, 7 Feb 2026 09:33:44 +0000 (GMT)", "from mx-prod-mc-05.mail-002.prod.us-west-2.aws.redhat.com\n (ec2-54-186-198-63.us-west-2.compute.amazonaws.com [54.186.198.63]) by\n relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3,\n cipher=TLS_AES_256_GCM_SHA384) id us-mta-359-ucllXpKmOdSrHgXo6eHfYg-1; Sat,\n 07 Feb 2026 04:33:41 -0500", "from mx-prod-int-08.mail-002.prod.us-west-2.aws.redhat.com\n (mx-prod-int-08.mail-002.prod.us-west-2.aws.redhat.com [10.30.177.111])\n (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest\n SHA256)\n (No client certificate requested)\n by mx-prod-mc-05.mail-002.prod.us-west-2.aws.redhat.com (Postfix) with ESMTPS\n id 01668195609E; Sat, 7 Feb 2026 09:33:41 +0000 (UTC)", "from tucnak.zalov.cz (unknown [10.45.226.62])\n by mx-prod-int-08.mail-002.prod.us-west-2.aws.redhat.com (Postfix) with\n ESMTPS\n id 6F26318003F5; Sat, 7 Feb 2026 09:33:40 +0000 (UTC)", "from tucnak.zalov.cz (localhost [127.0.0.1])\n by tucnak.zalov.cz (8.18.1/8.18.1) with ESMTPS id 6179XbTP747167\n (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT);\n Sat, 7 Feb 2026 10:33:37 +0100", "(from jakub@localhost)\n by tucnak.zalov.cz (8.18.1/8.18.1/Submit) id 6179XbLv747166;\n Sat, 7 Feb 2026 10:33:37 +0100" ], "DKIM-Filter": [ "OpenDKIM Filter v2.11.0 sourceware.org 9AB6E4B920EB", "OpenDKIM Filter v2.11.0 sourceware.org 6B0D54B9DB5E" ], "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org 6B0D54B9DB5E", "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org 6B0D54B9DB5E", "ARC-Seal": "i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1770456824; cv=none;\n b=otLSdyd6PpV7Gla4LgzK7ZzPejKmp+oHJo9Jo4Erd34gyTkMg09J9OYUuupKsuGE1Ga66kviQ+jQsVfn7ZA0b34+DOcIgiazZ8ulA9sP8xDT8bIi4B4bLwHgH7o1pMpBRM6fW7oXLieZ0ZzjwBDpgMv8ej6aOuc1AhpeFJxymis=", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=sourceware.org; s=key;\n t=1770456824; c=relaxed/simple;\n bh=H0hL6tnCmOEgzH8mgbei7vKOJWLhQgm07/MG+eRll8w=;\n h=DKIM-Signature:Date:From:To:Subject:Message-ID:MIME-Version;\n b=bVmNFtlUrnM3OKLdBwAzL6uXpZ238abcOEeRIqDp2jp2OSBrJeaB++zS6YNRaDjsPXYewJ1r7MknsTmFhr9JF+h6R/nF6GNlU7QR6IF37ugbXHrZ8cHkqQ5rSW2vdlY0J/Hbflb6c7jykT88zDJ5prir2NZ6KO+oJk5ooNKVn5Y=", "ARC-Authentication-Results": "i=1; server2.sourceware.org", "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n s=mimecast20190719; t=1770456823;\n h=from:from:reply-to:reply-to:subject:subject:date:date:\n message-id:message-id:to:to:cc:cc:mime-version:mime-version:\n content-type:content-type; bh=KajtG94QwEaWEplaFz9oqH/MBVDChgn4S5maCg/ocTs=;\n b=IIZbdTMno98Cm3qZe2UUoQp5tl3gYM0z2FEGyeyMjU+nOfezt86zUrz5UMlz3ZoaLtueKo\n 8OFmx+EFpCnmcDBk5LKthf2+N2xfKtTdeQRNU1b2R2CRo2TDx5Vn4M+L7/+scm8Gei+2YR\n 6alBmBpuuhBjf3mozkPpiRiuzA6MF5o=", "X-MC-Unique": "ucllXpKmOdSrHgXo6eHfYg-1", "X-Mimecast-MFC-AGG-ID": "ucllXpKmOdSrHgXo6eHfYg_1770456821", "Date": "Sat, 7 Feb 2026 10:33:37 +0100", "From": "Jakub Jelinek <jakub@redhat.com>", "To": "Richard Biener <rguenther@suse.de>", "Cc": "gcc-patches@gcc.gnu.org", "Subject": "[PATCH] forwprop: Fix up calc_perm_vec_perm_simplify_seqs [PR123672]", "Message-ID": "<aYcG8WtB4ZmYbYG3@tucnak>", "MIME-Version": "1.0", "X-Scanned-By": "MIMEDefang 3.4.1 on 10.30.177.111", "X-Mimecast-Spam-Score": "0", "X-Mimecast-MFC-PROC-ID": "92CAxujZPvpkGTo86FqTJANSeJ55u1sSvMIG7BLTMkA_1770456821", "X-Mimecast-Originator": "redhat.com", "Content-Type": "text/plain; charset=us-ascii", "Content-Disposition": "inline", "X-BeenThere": "gcc-patches@gcc.gnu.org", "X-Mailman-Version": "2.1.30", "Precedence": "list", "List-Id": "Gcc-patches mailing list <gcc-patches.gcc.gnu.org>", "List-Unsubscribe": "<https://gcc.gnu.org/mailman/options/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>", "List-Archive": "<https://gcc.gnu.org/pipermail/gcc-patches/>", "List-Post": "<mailto:gcc-patches@gcc.gnu.org>", "List-Help": "<mailto:gcc-patches-request@gcc.gnu.org?subject=help>", "List-Subscribe": "<https://gcc.gnu.org/mailman/listinfo/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>", "Reply-To": "Jakub Jelinek <jakub@redhat.com>", "Errors-To": "gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org" }, "content": "Hi!\n\nSince r15-5563-g1c4d39ada we have an optimization to try to blend 2\nsequences of 2xVEC_PERM_EXPR + 2x binop + 1x VEC_PERM where the first two\nVEC_PERMs are permuting a single input and the last one permutes result from\nthose 2 binops into 2 VEC_PERM_EXPRs from 2 inputs, 2 binops and 2 final\nVEC_PERMs.\nOn the following testcase, the intended change (i.e. after patch) is\n(including DCE after it which the optimizations relies on):\n a_7 = *x_6(D);\n b_9 = *y_8(D);\n- c_10 = VEC_PERM_EXPR <a_7, a_7, { 0, 2, 0, 2 }>;\n- d_11 = VEC_PERM_EXPR <a_7, a_7, { 1, 3, 1, 3 }>;\n- e_12 = VEC_PERM_EXPR <b_9, b_9, { 0, 2, 0, 2 }>;\n- f_13 = VEC_PERM_EXPR <b_9, b_9, { 1, 3, 1, 3 }>;\n+ c_10 = VEC_PERM_EXPR <a_7, b_9, { 0, 2, 4, 6 }>;\n+ d_11 = VEC_PERM_EXPR <a_7, b_9, { 1, 3, 5, 7 }>;\n _1 = c_10 + d_11;\n _2 = c_10 - d_11;\n g_14 = VEC_PERM_EXPR <_1, _2, { 0, 4, 1, 5 }>;\n- _3 = e_12 + f_13;\n- _4 = e_12 - f_13;\n- h_15 = VEC_PERM_EXPR <_3, _4, { 0, 4, 1, 5 }>;\n+ h_15 = VEC_PERM_EXPR <_1, _2, { 2, 6, 3, 7 }>;\n *x_6(D) = g_14;\n *y_8(D) = h_15;\nThis works by first identifying the two sequences, attempting to use vect\nelem redundancies to only use at most half of the vector elements\n(in this testcase a nop because 0, 4, 1, 5 perms already use only half of\nthe vector elts), remembering details of such sequences and later comparing\nthem if there are at least two (up to 8 I think) and trying to merge them.\nThe optimization is meant to improve SPEC x264.\nAnyway, in r15-6387-geee289131 the optimization was changed to fix some\nregressions but regressed this testcase, instead of the desirable\n{ 0, 2, 4, 6 } and { 1, 3, 5, 7 } first 2 VEC_PERMs 15 branch and trunk\nuses { 0, 2, 4, 4 } and { 1, 3, 5, 5 } and on this testcase that means\ncomputing incorrect result.\nOn this testcase, it identified the two sequences (one ending with g_14\nand one with h_15 with no changes (see above). The first one (it has\nsome code to attempt to swap them if needed, but here the first one remains\ng_14) keeps using the final VEC_PERM_EXPR as is (or with whatever\nsimplification recognise_vec_perm_simplify_seq performed on just that to\nreduce to at most half of nelts) and the second one is modified so that\nit uses the other elts of the two vectors.\nSo, we have { 0, 4, 1, 5 } (i.e. twice first lanes and twice second lanes)\nfrom the first sequence and look up unused lanes (third and fourth) to\ntransform the other { 0, 4, 1, 5 } to, and find that is { 2, 6, 3, 7 }.\nSo far good. But the next operation is to compute the new selectors\nfor the first 2 VEC_PERM_EXPRs, which are changed from single input to\ntwo input ones. For that, the code correctly uses the VECTOR_CST elts\nunmodified for the lanes used by the first sequence (in this\ntestcase first/second lanes), so { 0, 2, X, X } and { 1, 3, X, X }\nand then need to find out what to use for the needs of the second sequence.\nHere is what it does currently:\n for (i = 0; i < nelts; i++)\n {\n bool use_seq1 = lane_assignment[i] != 2;\n unsigned int l1, l2;\n\n if (use_seq1)\n {\n /* Just reuse the selector indices. */\n tree s1 = gimple_assign_rhs3 (seq1->v_1_stmt);\n tree s2 = gimple_assign_rhs3 (seq1->v_2_stmt);\n l1 = TREE_INT_CST_LOW (VECTOR_CST_ELT (s1, i));\n l2 = TREE_INT_CST_LOW (VECTOR_CST_ELT (s2, i));\n }\n else\n {\n /* We moved the lanes for seq2, so we need to adjust for that. */\n tree s1 = gimple_assign_rhs3 (seq2->v_1_stmt);\n tree s2 = gimple_assign_rhs3 (seq2->v_2_stmt);\n\n unsigned int j = 0;\n for (; j < i; j++)\n {\n unsigned int sel_new;\n sel_new = seq2_stmt_sel_perm[j].to_constant ();\n sel_new %= nelts;\n if (sel_new == i)\n break;\n }\n\n /* This should not happen. Test anyway to guarantee correctness. */\n if (j == i)\n return false;\n\n l1 = TREE_INT_CST_LOW (VECTOR_CST_ELT (s1, j));\n l2 = TREE_INT_CST_LOW (VECTOR_CST_ELT (s2, j));\n }\n\n seq1_v_1_stmt_sel_perm.quick_push (l1 + (use_seq1 ? 0 : nelts));\n seq1_v_2_stmt_sel_perm.quick_push (l2 + (use_seq1 ? 0 : nelts));\n }\nseq2_stmt_sel_perm is the newly computed { 2, 6, 3, 7 } selector and\nseq1->v_{1,2}_stmt are def stmts of {c_10,d_11} and seq2->v_{1,2}_stmt\nare def stmts of {e_12,f_13}. For i 0 and 1 it is use_seq1 and\ncorrect, then for i 2 the loop checks first seq2_stmt_sel_perm[0],\nit is 2 % 4, equal to i, so picks up VECTOR_CST_ELTS (s{1,2}, 2),\nwhich happens to be correct in this case, for i 3 the loop loops until\nseq2_stmt_sel_perm[2] which is 3 % 4, stops and picks the wrong\nVECTOR_CST_ELTS (s{1,2}, 2) which has the same value as\nVECTOR_CST_ELTS (s{1,2}, 0), when the correct value would be in this\ncase either 1 or 3 (due to the duplication).\nWhat the loop should do for !use_seq1 is to take the lane transformations\ninto account, we've changed { 0, 4, 1, 5 } to { 2, 6, 3, 7 }, so instead\nof using lanes 0, 0, 1, 1 we now use lanes 2, 2, 3, 3 (x / 4 is about\nwhich input it is picked from, here + or -). So, for 2 which got remapped\nfrom 0 we want to use 0 and for 3 which got remapped from 1 we want to use\n1.\nThe function uses an auto_vec lane_assignment with values 0 (unused lane,\nso far or altogether), 1 (used by first sequence) and 2 (used by second\nsequence). When we store in there 2, we know exactly which lane we are\nremapping to which lane, so instead of computing it again the following\npatch stores there 2 + l_orig, such that value >= 2 means second lane\nand lane_assignment[i] - 2 in that case is the lane that got remapped to i.\nAnd then the last loop doesn't need to recompute anything and can just use\nthe remembered transformation.\nThe rest of the changes (hunks 1-5 and 7) are just random small fixes I've\nnoticed while trying to understand the code. The real fix is\n- lane_assignment[lane] = 2;\n+ lane_assignment[lane] = 2 + l_orig;\nand\n- bool use_seq1 = lane_assignment[i] != 2;\n+ bool use_seq1 = lane_assignment[i] < 2;\nand the rest of the last hunk. Also, the last loop was kind of assuming\nVEC_PERM_EXPR canonicalization happened and for single input perm the\nselector elts are never >= nelts, I've added %= nelts just to be sure.\n\nBootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?\n\n2026-02-07 Jakub Jelinek <jakub@redhat.com>\n\n\tPR tree-optimization/123672\n\t* tree-ssa-forwprop.cc (recognise_vec_perm_simplify_seq): Use std::swap\n\tinstead of fetching gimple_assign_rhs{1,2} again. Change type of lanes\n\tvector from auto_vec<unsigned int> to auto_vec<bool> and store true\n\tinstead of 1 into it. Fix comment typo and formatting fix.\n\t(can_blend_vec_perm_simplify_seqs_p): Put end of comment on the same\n\tline as the last sentence in it.\n\t(calc_perm_vec_perm_simplify_seqs): Change lane_assignment type from\n\tauto_vec<int> to auto_vec<unsigned> and store 2 + l_orig into it\n\tinstead of true. Fix comment typo and formatting fix. Set use_seq1\n\tto line_assignment[i] < 2 instead of line_assignment[i] != 2. Replace\n\tbogus computation of index for !use_seq with using\n\tline_assignment[i] - 2. Set l1 to l1 % nelts and similarly for l2.\n\n\t* gcc.dg/pr123672.c: New test.\n\n\n\tJakub", "diff": "--- gcc/tree-ssa-forwprop.cc.jj\t2026-02-05 11:14:57.189729922 +0100\n+++ gcc/tree-ssa-forwprop.cc\t2026-02-06 19:27:25.408278359 +0100\n@@ -4617,8 +4617,7 @@ recognise_vec_perm_simplify_seq (gassign\n if (commutative_tree_code (gimple_assign_rhs_code (v_x_stmt)))\n \t{\n \t /* Keep v_x_1 the first operand for non-commutative operators. */\n-\t v_x_1 = gimple_assign_rhs2 (v_x_stmt);\n-\t v_x_2 = gimple_assign_rhs1 (v_x_stmt);\n+\t std::swap (v_x_1, v_x_2);\n \t if (v_x_1 != v_y_1 || v_x_2 != v_y_2)\n \t return false;\n \t}\n@@ -4661,7 +4660,7 @@ recognise_vec_perm_simplify_seq (gassign\n \n /* Create the new selector. */\n vec_perm_builder new_sel_perm (nelts, nelts, 1);\n- auto_vec<unsigned int> lanes (nelts);\n+ auto_vec<bool> lanes (nelts);\n lanes.quick_grow_cleared (nelts);\n for (unsigned int i = 0; i < nelts; i++)\n {\n@@ -4687,7 +4686,7 @@ recognise_vec_perm_simplify_seq (gassign\n new_sel_perm.quick_push (l + offs * nelts);\n \n /* Mark lane as used. */\n- lanes[l] = 1;\n+ lanes[l] = true;\n }\n \n /* Count how many lanes are need. */\n@@ -4699,12 +4698,12 @@ recognise_vec_perm_simplify_seq (gassign\n if (cnt > nelts / 2)\n return false;\n \n- /* Check if the resulting permuation is cheap. */\n+ /* Check if the resulting permutation is cheap. */\n vec_perm_indices new_indices (new_sel_perm, 2, nelts);\n tree vectype = TREE_TYPE (gimple_assign_lhs (stmt));\n machine_mode vmode = TYPE_MODE (vectype);\n if (!can_vec_perm_const_p (vmode, vmode, new_indices, false))\n- return false;\n+ return false;\n \n *seq = XNEW (struct _vec_perm_simplify_seq);\n (*seq)->stmt = stmt;\n@@ -4794,8 +4793,7 @@ can_blend_vec_perm_simplify_seqs_p (vec_\n seq1->v_x_stmt and seq1->v_y_stmt are before it.\n \n Note, that we don't need to check the BBs here, because all\n- statements of both sequences have to be in the same BB.\n- */\n+ statements of both sequences have to be in the same BB. */\n \n tree seq2_v_in = gimple_assign_rhs1 (seq2->v_1_stmt);\n if (TREE_CODE (seq2_v_in) != SSA_NAME)\n@@ -4843,7 +4841,7 @@ calc_perm_vec_perm_simplify_seqs (vec_pe\n {\n unsigned int i;\n unsigned int nelts = seq1->nelts;\n- auto_vec<int> lane_assignment;\n+ auto_vec<unsigned int> lane_assignment;\n lane_assignment.create (nelts);\n \n /* Mark all lanes as free. */\n@@ -4855,7 +4853,7 @@ calc_perm_vec_perm_simplify_seqs (vec_pe\n unsigned int l = TREE_INT_CST_LOW (VECTOR_CST_ELT (seq1->new_sel, i));\n l %= nelts;\n lane_assignment[l] = 1;\n-}\n+ }\n \n /* Allocate lanes for seq2 and calculate selector for seq2->stmt. */\n vec_perm_builder seq2_stmt_sel_perm (nelts, nelts, 1);\n@@ -4896,14 +4894,14 @@ calc_perm_vec_perm_simplify_seqs (vec_pe\n \t }\n \n \t /* Allocate lane. */\n-\t lane_assignment[lane] = 2;\n+\t lane_assignment[lane] = 2 + l_orig;\n \t new_sel = lane + offs * nelts;\n \t}\n \n seq2_stmt_sel_perm.quick_push (new_sel);\n }\n \n- /* Check if the resulting permuation is cheap. */\n+ /* Check if the resulting permutation is cheap. */\n seq2_stmt_indices->new_vector (seq2_stmt_sel_perm, 2, nelts);\n tree vectype = TREE_TYPE (gimple_assign_lhs (seq2->stmt));\n machine_mode vmode = TYPE_MODE (vectype);\n@@ -4915,7 +4913,7 @@ calc_perm_vec_perm_simplify_seqs (vec_pe\n vec_perm_builder seq1_v_2_stmt_sel_perm (nelts, nelts, 1);\n for (i = 0; i < nelts; i++)\n {\n- bool use_seq1 = lane_assignment[i] != 2;\n+ bool use_seq1 = lane_assignment[i] < 2;\n unsigned int l1, l2;\n \n if (use_seq1)\n@@ -4931,25 +4929,12 @@ calc_perm_vec_perm_simplify_seqs (vec_pe\n \t /* We moved the lanes for seq2, so we need to adjust for that. */\n \t tree s1 = gimple_assign_rhs3 (seq2->v_1_stmt);\n \t tree s2 = gimple_assign_rhs3 (seq2->v_2_stmt);\n-\n-\t unsigned int j = 0;\n-\t for (; j < i; j++)\n-\t {\n-\t unsigned int sel_new;\n-\t sel_new = seq2_stmt_sel_perm[j].to_constant ();\n-\t sel_new %= nelts;\n-\t if (sel_new == i)\n-\t\tbreak;\n-\t }\n-\n-\t /* This should not happen. Test anyway to guarantee correctness. */\n-\t if (j == i)\n-\t return false;\n-\n-\t l1 = TREE_INT_CST_LOW (VECTOR_CST_ELT (s1, j));\n-\t l2 = TREE_INT_CST_LOW (VECTOR_CST_ELT (s2, j));\n+\t l1 = TREE_INT_CST_LOW (VECTOR_CST_ELT (s1, lane_assignment[i] - 2));\n+\t l2 = TREE_INT_CST_LOW (VECTOR_CST_ELT (s2, lane_assignment[i] - 2));\n \t}\n \n+ l1 %= nelts;\n+ l2 %= nelts;\n seq1_v_1_stmt_sel_perm.quick_push (l1 + (use_seq1 ? 0 : nelts));\n seq1_v_2_stmt_sel_perm.quick_push (l2 + (use_seq1 ? 0 : nelts));\n }\n--- gcc/testsuite/gcc.dg/pr123672.c.jj\t2026-02-06 19:51:07.648379517 +0100\n+++ gcc/testsuite/gcc.dg/pr123672.c\t2026-02-06 19:50:37.082892783 +0100\n@@ -0,0 +1,33 @@\n+/* PR tree-optimization/123672 */\n+/* { dg-do run } */\n+/* { dg-options \"-O2 -fdump-tree-forwprop1-details\" } */\n+/* { dg-additional-options \"-msse2\" { target i?86-*-* x86_64-*-* } } */\n+/* { dg-final { scan-tree-dump \"Vec perm simplify sequences have been blended\" \"forwprop1\" { target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */\n+\n+typedef int V __attribute__((vector_size (4 * sizeof (int))));\n+\n+[[gnu::noipa]] void\n+foo (V *x, V *y)\n+{\n+ V a = *x;\n+ V b = *y;\n+ V c = __builtin_shufflevector (a, a, 0, 2, 0, 2);\n+ V d = __builtin_shufflevector (a, a, 1, 3, 1, 3);\n+ V e = __builtin_shufflevector (b, b, 0, 2, 0, 2);\n+ V f = __builtin_shufflevector (b, b, 1, 3, 1, 3);\n+ V g = __builtin_shufflevector (c + d, c - d, 0, 4, 1, 5);\n+ V h = __builtin_shufflevector (e + f, e - f, 0, 4, 1, 5);\n+ *x = g;\n+ *y = h;\n+}\n+\n+int\n+main ()\n+{\n+ V a = { 1, 21, 2, 32 };\n+ V b = { 3, 43, 4, 54 };\n+ foo (&a, &b);\n+ if (a[0] != 22 || a[1] != -20 || a[2] != 34 || a[3] != -30\n+ || b[0] != 46 || b[1] != -40 || b[2] != 58 || b[3] != -50)\n+ __builtin_abort ();\n+}\n", "prefixes": [] }