Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.0/patches/2198183/?format=api
{ "id": 2198183, "url": "http://patchwork.ozlabs.org/api/1.0/patches/2198183/?format=api", "project": { "id": 17, "url": "http://patchwork.ozlabs.org/api/1.0/projects/17/?format=api", "name": "GNU Compiler Collection", "link_name": "gcc", "list_id": "gcc-patches.gcc.gnu.org", "list_email": "gcc-patches@gcc.gnu.org", "web_url": null, "scm_url": null, "webscm_url": null }, "msgid": "<bmm.hfy4c1w8xq.gcc.gcc-TEST.why.135.2.4@forge-stage.sourceware.org>", "date": "2026-02-19T14:02:05", "name": "[v2,4/7,Vectorizer] : Improve vectorizable_call to support transformation from standard IFN to its conditional version", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "df91b65a1630620731ba2f6cc6576d1fae91d0f7", "submitter": { "id": 92460, "url": "http://patchwork.ozlabs.org/api/1.0/people/92460/?format=api", "name": "Andrei Tirziu via Sourceware Forge", "email": "forge-bot+why@forge-stage.sourceware.org" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/bmm.hfy4c1w8xq.gcc.gcc-TEST.why.135.2.4@forge-stage.sourceware.org/mbox/", "series": [ { "id": 492684, "url": "http://patchwork.ozlabs.org/api/1.0/series/492684/?format=api", "date": "2026-02-19T14:02:02", "name": "Vectorizer: New SLP Pattern", "version": 2, "mbox": "http://patchwork.ozlabs.org/series/492684/mbox/" } ], "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2198183/checks/", "tags": {}, "headers": { "Return-Path": "<gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org\n (client-ip=2620:52:6:3111::32; helo=vm01.sourceware.org;\n envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;\n receiver=patchwork.ozlabs.org)", "sourceware.org; dmarc=none (p=none dis=none)\n header.from=forge-stage.sourceware.org", "sourceware.org;\n spf=pass smtp.mailfrom=forge-stage.sourceware.org", "server2.sourceware.org;\n arc=none smtp.remote-ip=38.145.34.39" ], "Received": [ "from vm01.sourceware.org (vm01.sourceware.org\n [IPv6:2620:52:6:3111::32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fGwBr66Cbz1xpl\n\tfor <incoming@patchwork.ozlabs.org>; Fri, 20 Feb 2026 01:05:40 +1100 (AEDT)", "from vm01.sourceware.org (localhost [127.0.0.1])\n\tby sourceware.org (Postfix) with ESMTP id DAF314B9DB79\n\tfor <incoming@patchwork.ozlabs.org>; Thu, 19 Feb 2026 14:05:38 +0000 (GMT)", "from forge-stage.sourceware.org (vm08.sourceware.org [38.145.34.39])\n by sourceware.org (Postfix) with ESMTPS id EFDB74B9DB52\n for <gcc-patches@gcc.gnu.org>; Thu, 19 Feb 2026 14:02:36 +0000 (GMT)", "from forge-stage.sourceware.org (localhost [IPv6:::1])\n (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n key-exchange x25519 server-signature ECDSA (prime256v1) server-digest SHA256)\n (No client certificate requested)\n by forge-stage.sourceware.org (Postfix) with ESMTPS id B6C1144232;\n Thu, 19 Feb 2026 14:02:36 +0000 (UTC)" ], "DKIM-Filter": [ "OpenDKIM Filter v2.11.0 sourceware.org DAF314B9DB79", "OpenDKIM Filter v2.11.0 sourceware.org EFDB74B9DB52" ], "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org EFDB74B9DB52", "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org EFDB74B9DB52", "ARC-Seal": "i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1771509757; cv=none;\n b=jXtNAx3RpIopmO0R+W96NIjy/sFu1MIn3b5umvYv5dNQ7vnbrUluSzJAaEDlEEh7R01AIVbc6G9ocS3+bOnAYAXJ8qTTLQ9InZRO/zbh+YdGmMlR8lnDFdCEpE9ouP4c03w32NNSkPtoRc62xTMxIiuVoSi6rjVF+cB1B8JQRGc=", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=sourceware.org; s=key;\n t=1771509757; c=relaxed/simple;\n bh=DTkSsCp3/g2bOL029pjxHuHLW+lD/T+WW7CJ0RCsvMw=;\n h=From:Date:Subject:To:Message-ID;\n b=dpXNp26O3seZaYbTuRb9lKDWPTNNtM29nYRucwBqcjVCcHz1coOXlQsqGH9li4PUZn39VTP5U2wOTPq9f0XZsisG2MZYq+oHKAhcNmYdXGK2KZS+5FdGX3TkibOfILt1WT/IeBc5z8bIlwWNXA6cJVjDRkXtwx6kgDofmfU87dc=", "ARC-Authentication-Results": "i=1; server2.sourceware.org", "From": "Andrei Tirziu via Sourceware Forge\n <forge-bot+why@forge-stage.sourceware.org>", "Date": "Thu, 19 Feb 2026 14:02:05 +0000", "Subject": "[PATCH v2 4/7] [Vectorizer]: Improve vectorizable_call to support\n transformation from standard IFN to its conditional version", "To": "gcc-patches mailing list <gcc-patches@gcc.gnu.org>", "Cc": "Tamar Christina <tamar.christina@arm.com>,\n Victor Do Nascimento <victor.donascimento@arm.com>", "Message-ID": "\n <bmm.hfy4c1w8xq.gcc.gcc-TEST.why.135.2.4@forge-stage.sourceware.org>", "X-Mailer": "batrachomyomachia", "X-Pull-Request-Organization": "gcc", "X-Pull-Request-Repository": "gcc-TEST", "X-Pull-Request": "https://forge.sourceware.org/gcc/gcc-TEST/pulls/135", "References": "\n <bmm.hfy4c1w8xq.gcc.gcc-TEST.why.135.2.0@forge-stage.sourceware.org>", "In-Reply-To": "\n <bmm.hfy4c1w8xq.gcc.gcc-TEST.why.135.2.0@forge-stage.sourceware.org>", "X-Patch-URL": "\n https://forge.sourceware.org/why/gcc/commit/05205cea0e04e14566a259676c50e755f12917b9", "X-BeenThere": "gcc-patches@gcc.gnu.org", "X-Mailman-Version": "2.1.30", "Precedence": "list", "List-Id": "Gcc-patches mailing list <gcc-patches.gcc.gnu.org>", "List-Unsubscribe": "<https://gcc.gnu.org/mailman/options/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>", "List-Archive": "<https://gcc.gnu.org/pipermail/gcc-patches/>", "List-Post": "<mailto:gcc-patches@gcc.gnu.org>", "List-Help": "<mailto:gcc-patches-request@gcc.gnu.org?subject=help>", "List-Subscribe": "<https://gcc.gnu.org/mailman/listinfo/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>", "Reply-To": "gcc-patches mailing list <gcc-patches@gcc.gnu.org>,\n Tamar Christina <tamar.christina@arm.com>,\n Victor Do Nascimento <victor.donascimento@arm.com>,\n andreinichita.tirziu@arm.com", "Errors-To": "gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org" }, "content": "From: Andrei Nichita Tirziu <andreinichita.tirziu@arm.com>\n\nThe `vectorizable_call` function is used to analyze and transform a given\n`gcall` GIMPLE statement.\n\nPrior to this commit, if the original `gcall` was:\n - standard IFN => no change; just add its arguments.\n - conditional IFN => if conditional-len IFN is available,\n do a conversion; then add its arguments,\n plus a `len` and `bias`.\n - conditional-len IFN => no change; just add its arguments.\n\nAfter this commit, if the original `gcall` is:\n - standard IFN\n - if conditional-len IFN is available, do a conversion;\n add its arguments, plus a newly created `mask`, `else`,\n `len` and `bias`;\n - if conditional IFN is available, do a conversion;\n add its arguments, plus a newly created `mask`, `else`;\n - otherwise, leave it as it is and add its arguments.\n - conditional IFN => if conditional-len IFN is available,\n do a conversion; then add its arguments,\n plus a `len` and `bias`.\n - conditional-len IFN => no change; just add its arguments.\n\ngcc/ChangeLog:\n\n\t* internal-fn.h: New helper functions.\n\t* tree-vect-stmts.cc: Improve vectorizable_call function.\n---\n gcc/internal-fn.h | 40 ++++\n gcc/tree-vect-stmts.cc | 457 ++++++++++++++++++++++++++++++++++-------\n 2 files changed, 424 insertions(+), 73 deletions(-)", "diff": "diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h\nindex b97c0dc60315..7195eed4a4b0 100644\n--- a/gcc/internal-fn.h\n+++ b/gcc/internal-fn.h\n@@ -183,6 +183,46 @@ vectorizable_internal_fn_p (internal_fn fn)\n return direct_internal_fn_array[fn].vectorizable;\n }\n \n+/**\n+ * Return true if an internal function should be transformed to its conditional\n+ * version by the vectorizer. The transformation might depend on other things\n+ * as well, such as the availability of an optab (or other conditions),\n+ * so this function only indicates that a transformation should be considered,\n+ * not necessarily applied.\n+ */\n+inline bool transform_to_conditional_version (internal_fn fn)\n+{\n+ switch (fn)\n+ {\n+ case IFN_MATCH_ANY_FROM:\n+ case IFN_MATCH_NONE_FROM:\n+ return true;\n+ default:\n+ return false;\n+ }\n+}\n+\n+/**\n+ * Return true if an internal function should be transformed to its\n+ * conditional-len version by the vectorizer. The transformation\n+ * might depend on other things as well, such as the availability of\n+ * an optab (or other conditions), so this function only indicates that\n+ * a transformation should be considered, not necessarily applied.\n+ */\n+inline bool transform_to_conditional_len_version (internal_fn fn)\n+{\n+ switch (fn)\n+ {\n+ case IFN_MATCH_ANY_FROM:\n+ case IFN_MATCH_NONE_FROM:\n+ case IFN_COND_MATCH_ANY_FROM:\n+ case IFN_COND_MATCH_NONE_FROM:\n+ return true;\n+ default:\n+ return false;\n+ }\n+}\n+\n /* Return optab information about internal function FN. Only meaningful\n if direct_internal_fn_p (FN). */\n \ndiff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc\nindex 83983742467c..6b0d243558bd 100644\n--- a/gcc/tree-vect-stmts.cc\n+++ b/gcc/tree-vect-stmts.cc\n@@ -1311,6 +1311,8 @@ vect_finish_stmt_generation (vec_info *vinfo,\n \t\t\t stmt_vec_info stmt_info, gimple *vec_stmt,\n \t\t\t gimple_stmt_iterator *gsi)\n {\n+ loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);\n+\n gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);\n \n if (!gsi_end_p (*gsi)\n@@ -1342,6 +1344,16 @@ vect_finish_stmt_generation (vec_info *vinfo,\n \t }\n \t}\n }\n+\n+ /* If we could not re-use an existing virtual operand, updating virtual\n+ SSA form will be needed later. */\n+ if (loop_vinfo\n+ && gimple_has_mem_ops (vec_stmt)\n+ && gimple_vuse (vec_stmt) == NULL_TREE)\n+ {\n+ loop_vinfo->any_known_not_updated_vssa = true;\n+ }\n+\n gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);\n vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);\n }\n@@ -1373,8 +1385,12 @@ vectorizable_internal_function (combined_fn cfn, tree fndecl,\n \t exactly the same when vectype_out isn't participating the optab.\n \t While there is no restriction for type size when vectype_out\n \t is part of the optab query. */\n-\t if (type0 != vectype_out && type1 != vectype_out && !same_size_p)\n-\t return IFN_LAST;\n+ if (ifn != IFN_MATCH_ANY_FROM && ifn != IFN_MATCH_NONE_FROM &&\n+\t ifn != IFN_COND_MATCH_ANY_FROM && ifn != IFN_COND_MATCH_NONE_FROM)\n+\t {\n+\t if (type0 != vectype_out && type1 != vectype_out && !same_size_p)\n+\t return IFN_LAST;\n+\t }\n \n \t if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),\n \t\t\t\t\t OPTIMIZE_FOR_SPEED))\n@@ -3570,14 +3586,24 @@ vectorizable_call (vec_info *vinfo,\n return false;\n }\n \n- if (VECTOR_BOOLEAN_TYPE_P (vectype_out)\n- != VECTOR_BOOLEAN_TYPE_P (vectype_in))\n+ /* In the case of IFN_MATCH_ANY_FROM and IFN_MATCH_NONE_FROM, we know that the\n+ input vectype is an integer vector, while the output is a boolean mask.\n+ So we only check if the vectypes match if we don't have an IFN\n+ or if we have an IFN different from IFN_MATCH_ANY_FROM\n+ and IFN_MATCH_NONE_FROM. */\n+ if (!gimple_call_internal_p (stmt) ||\n+\t(gimple_call_internal_fn (stmt) != IFN_MATCH_ANY_FROM &&\n+\t gimple_call_internal_fn (stmt) != IFN_MATCH_NONE_FROM))\n+ {\n+ if (VECTOR_BOOLEAN_TYPE_P (vectype_out) !=\n+\tVECTOR_BOOLEAN_TYPE_P (vectype_in))\n {\n if (dump_enabled_p ())\n \tdump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,\n \t\t\t \"mixed mask and nonmask vector types\\n\");\n return false;\n }\n+ }\n \n if (vect_emulated_vector_p (vectype_in)\n || vect_emulated_vector_p (vectype_out))\n@@ -3696,10 +3722,76 @@ vectorizable_call (vec_info *vinfo,\n }\n \n int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);\n- internal_fn cond_fn = (internal_fn_mask_index (ifn) != -1\n-\t\t\t ? ifn : get_conditional_internal_fn (ifn));\n- internal_fn cond_len_fn = get_len_internal_fn (ifn);\n+\n+ /* If the original IFN was already conditional, keep it.\n+ Otherwise, for a \"standard\" IFN, we only get its conditional version\n+ if the IFN supports being converted to such a form. */\n+ internal_fn cond_fn = IFN_LAST;\n+ if (internal_fn_mask_index (ifn) != -1)\n+ cond_fn = ifn;\n+ else\n+ {\n+ if (transform_to_conditional_version (ifn) || could_trap)\n+ {\n+ cond_fn = get_conditional_internal_fn (ifn);\n+ }\n+ }\n+\n+ /* If the original IFN was conditional-len, keep it.\n+ If the original IFN was conditional, look for its conditional-len version.\n+ Otherwise, for a \"standard\" IFN, we only get its conditional-len version\n+ if the IFN supports being converted to such a form. */\n+ internal_fn cond_len_fn = IFN_LAST;\n+ if (internal_fn_len_index (ifn) != -1)\n+ cond_len_fn = ifn;\n+ else\n+ {\n+ if (internal_fn_mask_index (ifn) != -1 ||\n+ transform_to_conditional_len_version (ifn) ||\n+ could_trap)\n+ {\n+ cond_len_fn = get_len_internal_fn (ifn);\n+ }\n+ }\n+\n+ if (dump_enabled_p ())\n+ dump_printf_loc (MSG_NOTE, vect_location,\n+\t\t \"For given IFN %s, found potential conditional version \"\n+\t\t \"%s and conditional-len version %s\\n\",\n+\t\t internal_fn_name (ifn),\n+\t\t internal_fn_name (cond_fn),\n+\t\t internal_fn_name (cond_len_fn));\n+\n int len_opno = internal_fn_len_index (cond_len_fn);\n+\n+ /* In accordance with the definition of `direct_internal_fn_info`,\n+ get the types of an internal function (these are given by the values\n+ of the `type0` and `type1` fields). */\n+ auto internal_fn_vector_types = [&](internal_fn fn) -> tree_pair {\n+ if (fn == IFN_LAST || !direct_internal_fn_p (fn))\n+ return tree_pair (NULL_TREE, NULL_TREE);\n+\n+ const direct_internal_fn_info &info = direct_internal_fn (fn);\n+\n+ // We pick a certain type using the definition of `direct_internal_fn_info`.\n+ auto pick_type = [&](int idx) -> tree {\n+ if (idx < 0)\n+\treturn vectype_out;\n+\n+ if ((unsigned) idx < nargs && vectypes[idx])\n+\treturn vectypes[idx];\n+\n+ return vectype_in;\n+ };\n+\n+ return tree_pair (pick_type (info.type0), pick_type (info.type1));\n+ };\n+\n+ /* The types of the conditional versions (with and without the length)\n+ of the IFN. */\n+ tree_pair cond_fn_types = internal_fn_vector_types (cond_fn);\n+ tree_pair cond_len_fn_types = internal_fn_vector_types (cond_len_fn);\n+\n vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);\n vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);\n unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);\n@@ -3709,49 +3801,105 @@ vectorizable_call (vec_info *vinfo,\n \tif (!vect_maybe_update_slp_op_vectype (slp_op[i],\n \t\t\t\t\t vectypes[i]\n \t\t\t\t\t ? vectypes[i] : vectype_in))\n-\t {\n-\t if (dump_enabled_p ())\n-\t dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,\n-\t\t\t \"incompatible vector types for invariants\\n\");\n-\t return false;\n-\t }\n+\t{\n+\t if (dump_enabled_p ())\n+\t dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,\n+\t\t\t \"incompatible vector types for invariants\\n\");\n+\t return false;\n+\t}\n+\n SLP_TREE_TYPE (slp_node) = call_vec_info_type;\n DUMP_VECT_SCOPE (\"vectorizable_call\");\n vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);\n \n+ /* Check if we can use the partial vector, and we either had a reduction,\n+\t an IFN that already had a mask when we called `vectorizable_call`\n+\t (`mask_opno >= 0`) or if the current IFN has a conditional version. */\n if (loop_vinfo\n \t && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)\n-\t && (reduc_idx >= 0 || mask_opno >= 0))\n+\t && (reduc_idx >= 0 || mask_opno >= 0 || cond_fn != IFN_LAST\n+\t || cond_len_fn != IFN_LAST))\n+ {\n+\tif (reduc_idx >= 0\n+\t && (cond_fn == IFN_LAST\n+\t\t|| !direct_internal_fn_supported_p (cond_fn, cond_fn_types,\n+\t\t\t\t\t\t OPTIMIZE_FOR_SPEED))\n+\t && (cond_len_fn == IFN_LAST\n+\t\t|| !direct_internal_fn_supported_p (cond_len_fn,\n+\t\t\t\t\t\t cond_len_fn_types,\n+\t\t\t\t\t\t OPTIMIZE_FOR_SPEED)))\n \t{\n-\t if (reduc_idx >= 0\n-\t && (cond_fn == IFN_LAST\n-\t\t || !direct_internal_fn_supported_p (cond_fn, vectype_out,\n-\t\t\t\t\t\t OPTIMIZE_FOR_SPEED))\n-\t && (cond_len_fn == IFN_LAST\n-\t\t || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,\n-\t\t\t\t\t\t OPTIMIZE_FOR_SPEED)))\n-\t {\n-\t if (dump_enabled_p ())\n-\t\tdump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,\n-\t\t\t\t \"can't use a fully-masked loop because no\"\n-\t\t\t\t \" conditional operation is available.\\n\");\n-\t LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;\n-\t }\n+\t if (dump_enabled_p ())\n+\t dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,\n+\t\t\t \"can't use a fully-masked loop because no\"\n+\t\t\t \" conditional operation is available.\\n\");\n+\t LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;\n+\t}\n+\telse if (reduc_idx >= 0 || mask_opno >= 0)\n+\t{\n+\t tree scalar_mask = NULL_TREE;\n+\t if (mask_opno >= 0)\n+\t scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);\n+\t if (cond_len_fn != IFN_LAST\n+\t && direct_internal_fn_supported_p (cond_len_fn, cond_len_fn_types,\n+\t\t\t\t\t\t OPTIMIZE_FOR_SPEED))\n+\t vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out, 1);\n \t else\n-\t {\n-\t tree scalar_mask = NULL_TREE;\n-\t if (mask_opno >= 0)\n-\t\tscalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);\n-\t if (cond_len_fn != IFN_LAST\n-\t\t && direct_internal_fn_supported_p (cond_len_fn, vectype_out,\n-\t\t\t\t\t\t OPTIMIZE_FOR_SPEED))\n-\t\tvect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,\n-\t\t\t\t 1);\n-\t else\n-\t\tvect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,\n-\t\t\t\t scalar_mask);\n-\t }\n+\t vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,\n+\t\t\t\t scalar_mask);\n \t}\n+\telse\n+\t{\n+\t /* In this case, we know that we don't have a reduction, and the\n+\t IFN given to `vectorizable_call` didn't have a mask initially.\n+\t But, the given IFN has at least one conditional version. */\n+\n+\t if (cond_len_fn != IFN_LAST &&\n+\t direct_internal_fn_supported_p (cond_len_fn, cond_len_fn_types,\n+\t\t\t\t\t OPTIMIZE_FOR_SPEED))\n+\t {\n+\t /* We have a conditional-len version and there is a direct optab\n+\t that supports it. */\n+\t vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out, 1);\n+\t LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;\n+\n+\t if (dump_enabled_p ())\n+\t dump_printf_loc (MSG_NOTE, vect_location,\n+\t\t\t \"Choosing conditional-len (%s) version \"\n+\t\t\t \"of original IFN %s\\n\",\n+\t\t\t internal_fn_name (cond_len_fn),\n+\t\t\t internal_fn_name (ifn));\n+\t }\n+\t else if (cond_fn != IFN_LAST &&\n+\t\t direct_internal_fn_supported_p (cond_fn, cond_fn_types,\n+\t\t\t\t\t\t OPTIMIZE_FOR_SPEED))\n+\t {\n+\t /* We have a conditional version and there is a direct optab that\n+\t supports it. */\n+\t vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,\n+\t\t\t\t NULL_TREE);\n+\t LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;\n+\n+\t if (dump_enabled_p ())\n+\t dump_printf_loc (MSG_NOTE, vect_location,\n+\t\t\t \"Choosing conditional (%s) version \"\n+\t\t\t \"of original IFN %s\\n\",\n+\t\t\t internal_fn_name (cond_fn),\n+\t\t\t internal_fn_name (ifn));\n+\t }\n+\t else\n+\t {\n+\t /* Even though we had conditional versions,\n+\t they are not supported by a direct optab. */\n+\n+\t if (dump_enabled_p ())\n+\t dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,\n+\t\t\t \"can't use a fully-masked loop because no\"\n+\t\t\t \" conditional operation is available.\\n\");\n+\t LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;\n+\t }\n+\t}\n+ }\n return true;\n }\n \n@@ -3767,22 +3915,71 @@ vectorizable_call (vec_info *vinfo,\n bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);\n bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);\n unsigned int vect_nargs = nargs;\n+\n+ if (dump_enabled_p ())\n+\t dump_printf_loc (MSG_NOTE, vect_location,\n+\t\t\t \"Transform phase: \"\n+\t\t\t \"masked_loop_p = %d , \"\n+\t\t\t \"len_loop_p = %d\\n\",\n+\t\t\t masked_loop_p, len_loop_p);\n+\n if (len_loop_p)\n {\n+ /* In this case, we are supposed to transform the original IFN with\n+\t its conditional-len version. */\n+\n if (len_opno >= 0)\n+ {\n+\t/* In this case, we'll have a conditional-len function, which might have\n+\t mask + else + len + bias. Unlike `mask_opno` which comes from the\n+\t original IFN, `len_opno` comes from the conditional-len version\n+\t of the original call, so just because we are in this if,\n+\t it doesn't mean that the original IFN had a mask.\n+\t We know from the analysis phase that the conditional-len version\n+\t exists, and that it is supported by an optab. */\n+\n+\tif (mask_opno == -1 && internal_fn_mask_index (cond_len_fn) >= 0)\n+\t{\n+\t /* The original IFN had no mask (this also implies that\n+\t it had no len).\n+\t This means that we have to add 4 arguments (mask, else, len, bias).\n+\t */\n+\t vect_nargs += 4;\n+\t}\n+\telse\n \t{\n-\t ifn = cond_len_fn;\n-\t /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */\n+\t /* The original IFN either had a mask (in which case `mask_opno >= 0`)\n+\t or the conditional-len version doesn't require a mask.\n+\t This means that we only have to add 2 arguments (len, bias). */\n \t vect_nargs += 2;\n \t}\n+\n+\tifn = cond_len_fn;\n+ }\n else if (reduc_idx >= 0)\n \tgcc_unreachable ();\n }\n- else if (masked_loop_p && mask_opno == -1 && (reduc_idx >= 0 || could_trap))\n+ else if (masked_loop_p && mask_opno == -1 && reduc_idx >= 0)\n+ {\n+ ifn = cond_fn;\n+ vect_nargs += 2;\n+ }\n+ else if (masked_loop_p\n+\t && mask_opno == -1\n+\t && cond_fn != IFN_LAST\n+ && internal_fn_mask_index (cond_fn) >= 0)\n {\n+\n+ /* In this case, we are supposed to transform the orignal,\n+\t non-masked IFN (since `mask_opno == -1`) to its conditonal version.\n+\t We know from the analysis phase that the conditional version exists,\n+\t and that it is supported by an optab.\n+\t We have to add 2 arguments (mask, else). */\n+\n ifn = cond_fn;\n vect_nargs += 2;\n }\n+\n if (clz_ctz_arg1)\n ++vect_nargs;\n \n@@ -3819,39 +4016,143 @@ vectorizable_call (vec_info *vinfo,\n \n \t /* Arguments are ready. Create the new vector stmt. */\n \t FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)\n+\t {\n+\t /* The indices of the mask, else, len. The index of the bias can\n+\t be determined from the index of len. */\n+\t int mask_index = internal_fn_mask_index (ifn);\n+\t int else_index = internal_fn_else_index (ifn);\n+\t int len_index = internal_fn_len_index (ifn);\n+\n+\t // If we generate a new loop mask, keep track of it here.\n+\t tree loop_mask_for_stmt = NULL_TREE;\n+\n+\t if (dump_enabled_p ())\n+\t dump_printf_loc (MSG_NOTE, vect_location,\n+\t\t\t \"Transform phase: For IFN %s, got \"\n+\t\t\t \"mask_index = %d , \"\n+\t\t\t \"else_index = %d , \"\n+\t\t\t \"len_index = %d , \"\n+\t\t\t \"bias_index = %d\\n\",\n+\t\t\t internal_fn_name (ifn),\n+\t\t\t mask_index, else_index,\n+\t\t\t len_index, len_index + 1);\n+\n+\t if (mask_index < 0 && else_index >= 0)\n+\t {\n+\t // An else operand is only meaningful if there is a mask.\n+\t gcc_unreachable ();\n+\t }\n+\n+\t /* If `mask_opno != -1`, we already had a mask for this loop (of\n+\t course, other things such as len, bias might still have\n+\t to be added).\n+\t Otherwise, we need to also create a mask. */\n+\t bool needs_new_loop_mask = ((masked_loop_p || len_loop_p)\n+\t\t\t\t\t&& mask_opno == -1\n+\t\t\t\t\t&& mask_index >= 0);\n+\n+\t /* Helper to identify if a slot is reserved for mask/else/len so\n+\t that we avoid it, and drop the other operands into the\n+\t correct positions. */\n+\t auto is_reserved_slot = [&](int idx) -> bool {\n+\t if (needs_new_loop_mask)\n+\t {\n+\t\t/* We didn't have a mask originally, but we have to\n+\t\t build one now. We need to keep clear the positions\n+\t\t of the mask and else.\n+\t\t If it already had a mask, there's no need to reserve certain\n+\t\t slots, as the mask and else would already be among it's usual\n+\t\t arguments that we copy. */\n+\t\treturn idx == mask_index || (else_index >= 0\n+\t\t\t\t\t && idx == else_index);\n+\t }\n+\n+\t if (len_loop_p)\n+\t {\n+\t\t// We need to keep clear the slots for len and bias.\n+\t\treturn idx == len_index || idx == (len_index + 1);\n+\t }\n+\n+\t return false;\n+\t };\n+\n+\t /* If the initial call had no explicit mask but we need it now,\n+\t synthesize the loop mask into the conditional IFN's mask operand.\n+\t */\n+\t if (needs_new_loop_mask)\n \t {\n-\t int varg = 0;\n-\t /* Add the mask if necessary. */\n-\t if (masked_loop_p && mask_opno == -1\n-\t\t && (reduc_idx >= 0 || could_trap))\n+\t tree new_loop_mask;\n+\t if (masked_loop_p)\n+\t {\n+\t\t// We have a conditional version of the original IFN.\n+\t\tunsigned int vec_num = vec_oprnds0.length ();\n+\t\tnew_loop_mask = vect_get_loop_mask (loop_vinfo, gsi, masks,\n+\t\t\t\t\t\t vec_num, vectype_out, i);\n+\n+\t\t/* We only set this if we have a masked loop (conditional\n+\t\t version). In the case of conditional-len versions, there's\n+\t\t no need to carry the loop mask. */\n+\t\tloop_mask_for_stmt = new_loop_mask;\n+\t }\n+\t else\n+\t {\n+\t\t// We have a conditional-len version of the original IFN.\n+\t\tnew_loop_mask =\n+\t\t\tbuild_minus_one_cst (truth_type_for (vectype_out));\n+\t }\n+\n+\t vargs[mask_index] = new_loop_mask;\n+\n+\t // Check if we also need to add the else operand.\n+\t if (else_index >= 0)\n+\t {\n+\t\ttree else_val = NULL_TREE;\n+\n+\t\tif (reduc_idx >= 0)\n \t\t{\n-\t\t gcc_assert (internal_fn_mask_index (ifn) == varg);\n-\t\t unsigned int vec_num = vec_oprnds0.length ();\n-\t\t vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks,\n-\t\t\t\t\t\t vec_num, vectype_out, i);\n+\t\t // For reductions, use the running reduction value.\n+\t\t else_val = vec_defs[reduc_idx][i];\n \t\t}\n-\t size_t k;\n-\t for (k = 0; k < nargs; k++)\n+\t\telse\n \t\t{\n-\t\t vec<tree> vec_oprndsk = vec_defs[k];\n-\t\t vargs[varg++] = vec_oprndsk[i];\n+\t\t /* Pick the target-preferred inactive value when there is\n+\t\t no reduction seed we can reuse. */\n+\t\t auto_vec<tree> data_ops;\n+\t\t for (size_t k = 0; k < nargs; k++)\n+\t\t data_ops.safe_push (vec_defs[k][i]);\n+\t\t else_val = targetm.preferred_else_value (ifn,\n+\t\t\t\t\t\t\t vectype_out,\n+\t\t\t\t\t\t\t data_ops.length (),\n+\t\t\t\t\t\t\t data_ops.address ());\n \t\t}\n-\t /* Add the else value if necessary. */\n-\t if (masked_loop_p && mask_opno == -1\n-\t\t && (reduc_idx >= 0 || could_trap))\n-\t\t{\n-\t\t gcc_assert (internal_fn_else_index (ifn) == varg);\n-\t\t if (reduc_idx >= 0)\n-\t\t vargs[varg++] = vargs[reduc_idx + 1];\n-\t\t else\n-\t\t {\n-\t\t auto else_value = targetm.preferred_else_value\n-\t\t\t(cond_fn, vectype_out, varg - 1, &vargs[1]);\n-\t\t vargs[varg++] = else_value;\n-\t\t }\n-\t\t}\n-\t if (clz_ctz_arg1)\n-\t\tvargs[varg++] = clz_ctz_arg1;\n+\n+\t\tvargs[else_index] = else_val;\n+\t }\n+\t }\n+\n+\t /* Copy the vector arguments into the non-reserved slots, skipping\n+\t over mask/else/len positions as needed. */\n+\t unsigned int current_varg_slot = 0;\n+\t for (size_t k = 0; k < nargs; k++)\n+\t {\n+\t while (is_reserved_slot (current_varg_slot))\n+\t\tcurrent_varg_slot++;\n+\n+\t vec<tree> vec_oprndsk = vec_defs[k];\n+\t vargs[current_varg_slot] = vec_oprndsk[i];\n+\t current_varg_slot++;\n+\t }\n+\n+\t /* Ensure the clz/ctz extra argument, if present, lands after the\n+\t synthesized mask/else/len operands. */\n+\t if (clz_ctz_arg1)\n+\t {\n+\t while (is_reserved_slot (current_varg_slot))\n+\t\tcurrent_varg_slot++;\n+\n+\t vargs[current_varg_slot] = clz_ctz_arg1;\n+\t current_varg_slot++;\n+\t }\n \n \t gimple *new_stmt;\n \t if (modifier == NARROW)\n@@ -3907,6 +4208,17 @@ vectorizable_call (vec_info *vinfo,\n \t\t gimple_call_set_nothrow (call, true);\n \t\t vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);\n \t\t new_stmt = call;\n+\n+\t\t /* The result of this call is already masked by LOOP_MASK,\n+\t\t so note that to avoid re-applying it later. */\n+\t\t if (loop_mask_for_stmt\n+\t\t && loop_vinfo\n+\t\t && VECTOR_BOOLEAN_TYPE_P (vectype_out))\n+\t\t {\n+\t\t loop_vinfo->vec_cond_masked_set.add ({ new_temp,\n+\t\t\t\t\t\t\t loop_mask_for_stmt\n+\t\t\t\t\t\t\t});\n+\t\t }\n \t\t}\n \t slp_node->push_vec_def (new_stmt);\n \t }\n@@ -14822,4 +15134,3 @@ vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)\n \n return stmts;\n }\n-\n", "prefixes": [ "v2", "4/7", "Vectorizer" ] }