Patch Detail

GET /api/1.1/patches/2233392/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 2233392,
    "url": "http://patchwork.ozlabs.org/api/1.1/patches/2233392/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/gcc/patch/20260506070415.99154-1-wangyaduo@linux.alibaba.com/",
    "project": {
        "id": 17,
        "url": "http://patchwork.ozlabs.org/api/1.1/projects/17/?format=api",
        "name": "GNU Compiler Collection",
        "link_name": "gcc",
        "list_id": "gcc-patches.gcc.gnu.org",
        "list_email": "gcc-patches@gcc.gnu.org",
        "web_url": null,
        "scm_url": null,
        "webscm_url": null
    },
    "msgid": "<20260506070415.99154-1-wangyaduo@linux.alibaba.com>",
    "date": "2026-05-06T07:04:15",
    "name": "RISC-V: Add per-type reduction costs to the vector cost model",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "251f5d482500d66690a07a1b21d3da3faf86c1b7",
    "submitter": {
        "id": 93342,
        "url": "http://patchwork.ozlabs.org/api/1.1/people/93342/?format=api",
        "name": "Wang Yaduo",
        "email": "wangyaduo@linux.alibaba.com"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/20260506070415.99154-1-wangyaduo@linux.alibaba.com/mbox/",
    "series": [
        {
            "id": 502964,
            "url": "http://patchwork.ozlabs.org/api/1.1/series/502964/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/gcc/list/?series=502964",
            "date": "2026-05-06T07:04:15",
            "name": "RISC-V: Add per-type reduction costs to the vector cost model",
            "version": 1,
            "mbox": "http://patchwork.ozlabs.org/series/502964/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/2233392/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/2233392/checks/",
    "tags": {},
    "headers": {
        "Return-Path": "<gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>",
        "X-Original-To": [
            "incoming@patchwork.ozlabs.org",
            "gcc-patches@gcc.gnu.org"
        ],
        "Delivered-To": [
            "patchwork-incoming@legolas.ozlabs.org",
            "gcc-patches@gcc.gnu.org"
        ],
        "Authentication-Results": [
            "legolas.ozlabs.org;\n\tdkim=pass (1024-bit key;\n unprotected) header.d=linux.alibaba.com header.i=@linux.alibaba.com\n header.a=rsa-sha256 header.s=default header.b=da5npsRI;\n\tdkim-atps=neutral",
            "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org\n (client-ip=2620:52:6:3111::32; helo=vm01.sourceware.org;\n envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;\n receiver=patchwork.ozlabs.org)",
            "sourceware.org;\n\tdkim=pass (1024-bit key,\n unprotected) header.d=linux.alibaba.com header.i=@linux.alibaba.com\n header.a=rsa-sha256 header.s=default header.b=da5npsRI",
            "sourceware.org; dmarc=pass (p=none dis=none)\n header.from=linux.alibaba.com",
            "sourceware.org;\n spf=pass smtp.mailfrom=linux.alibaba.com",
            "sourceware.org; arc=none smtp.remote-ip=115.124.30.100"
        ],
        "Received": [
            "from vm01.sourceware.org (vm01.sourceware.org\n [IPv6:2620:52:6:3111::32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4g9XLg5ysmz1yJV\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 06 May 2026 20:54:02 +1000 (AEST)",
            "from vm01.sourceware.org (localhost [IPv6:::1])\n\tby sourceware.org (Postfix) with ESMTP id 707A54B9DB5F\n\tfor <incoming@patchwork.ozlabs.org>; Wed,  6 May 2026 10:54:00 +0000 (GMT)",
            "from out30-100.freemail.mail.aliyun.com\n (out30-100.freemail.mail.aliyun.com [115.124.30.100])\n by sourceware.org (Postfix) with ESMTPS id 7C75C4BA2E38\n for <gcc-patches@gcc.gnu.org>; Wed,  6 May 2026 07:04:22 +0000 (GMT)",
            "from localhost.localdomain(mailfrom:wangyaduo@linux.alibaba.com\n fp:SMTPD_---0X2MLDff_1778051057 cluster:ay36) by smtp.aliyun-inc.com;\n Wed, 06 May 2026 15:04:18 +0800"
        ],
        "DKIM-Filter": [
            "OpenDKIM Filter v2.11.0 sourceware.org 707A54B9DB5F",
            "OpenDKIM Filter v2.11.0 sourceware.org 7C75C4BA2E38"
        ],
        "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org 7C75C4BA2E38",
        "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org 7C75C4BA2E38",
        "ARC-Seal": "i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1778051064; cv=none;\n b=R4fNR4E3+OQWZIFsi/JJRDlqnmbhCY3o1IWbc3PdF66eK/HFhOtCPdSpGrRmvOIFEZoYlZS6g8xKYufIjNaWxFxK4ZLbcTaajsEN9sPwkWcXST1rP9SjzDNiKBlHu++7XlvN7Vhn+c70rdR+W9hpFkEjaxvhJanF7sl5CNddctY=",
        "ARC-Message-Signature": "i=1; a=rsa-sha256; d=sourceware.org; s=key;\n t=1778051064; c=relaxed/simple;\n bh=plLZEEI04XfoRaDzcEufgMsvdL9HeXKjxTU+5r3V0v0=;\n h=DKIM-Signature:From:To:Subject:Date:Message-ID:MIME-Version;\n b=opOqI6MBAVv4txJy+ja7H46KKQWBPlUkk27CCOpeNcVgDea2v8oBuhsQfpsd6ziwg4QCLGFpMMx4b78ZDfOgkSHvXtHCXTZ7WJ7sUcjDqPBHvP+R4Zr+5xACeGnrVVI4kBvg0+0hma49Yj0jTVKAscvxZZU04EsHbmPITtqcKmE=",
        "ARC-Authentication-Results": "i=1; sourceware.org;\n dkim=pass (1024-bit key, unprotected)\n header.d=linux.alibaba.com header.i=@linux.alibaba.com header.a=rsa-sha256\n header.s=default header.b=da5npsRI",
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=linux.alibaba.com; s=default;\n t=1778051060; h=From:To:Subject:Date:Message-ID:MIME-Version;\n bh=jRqEgO9vLWpPjYX/N2fAY63U1BUs+snC2xwhIuso06c=;\n b=da5npsRIBe/rCu4uIIsORd028/DYnbTEMNDOCyv9zUJwimS/VQ+AklKdfN7t2mJYcOmvvJGnCPZ1MVWykzNn0/PIb+wmd4qFXbkoCGRZKcmNfttDF3hZOIzNQNAoIKQPQ118L9nNt3Yh9bI13RsruPFXlE0t/SY8JuH03cjO3ys=",
        "X-Alimail-AntiSpam": "AC=PASS; BC=-1|-1; BR=01201311R151e4; CH=green;\n DM=||false|;\n DS=||; FP=0|-1|-1|-1|0|-1|-1|-1; HT=maildocker-contentspam033037026112;\n MF=wangyaduo@linux.alibaba.com; NM=1; PH=DS; RN=9; SR=0;\n TI=SMTPD_---0X2MLDff_1778051057;",
        "From": "Wang Yaduo <wangyaduo@linux.alibaba.com>",
        "To": "gcc-patches@gcc.gnu.org",
        "Cc": "rdapp.gcc@gmail.com, kito.cheng@gmail.com, juzhe.zhong@rivai.ai,\n palmer@dabbelt.com, pan2.li@intel.com, jeffreyalaw@gmail.com,\n chenzhongyao.hit@gmail.com, Wang Yaduo <wangyaduo@linux.alibaba.com>",
        "Subject": "[PATCH] RISC-V: Add per-type reduction costs to the vector cost model",
        "Date": "Wed,  6 May 2026 15:04:15 +0800",
        "Message-ID": "<20260506070415.99154-1-wangyaduo@linux.alibaba.com>",
        "X-Mailer": "git-send-email 2.54.0",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "gcc-patches@gcc.gnu.org",
        "X-Mailman-Version": "2.1.30",
        "Precedence": "list",
        "List-Id": "Gcc-patches mailing list <gcc-patches.gcc.gnu.org>",
        "List-Unsubscribe": "<https://gcc.gnu.org/mailman/options/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>",
        "List-Archive": "<https://gcc.gnu.org/pipermail/gcc-patches/>",
        "List-Post": "<mailto:gcc-patches@gcc.gnu.org>",
        "List-Help": "<mailto:gcc-patches-request@gcc.gnu.org?subject=help>",
        "List-Subscribe": "<https://gcc.gnu.org/mailman/listinfo/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>",
        "Errors-To": "gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org"
    },
    "content": "Add per-type reduction costs (i8/i16/i32/i64/f16/f32/f64) to the RISC-V\nvector cost model, distinguishing between ordered (fold-left) and\nunordered (tree) floating-point reductions.  When a reduction is\ndetected, the per-type cost replaces the default vec_to_scalar_cost,\nsimilar to AArch64.  This causes _Float16 n=4 ordered reductions to no\nlonger be vectorized in VLS mode due to the higher cost.\n\ngcc/ChangeLog:\n\n\t* config/riscv/riscv-protos.h (common_vector_cost): Add per-type\n\treduction cost fields: reduc_i8_cost, reduc_i16_cost,\n\treduc_i32_cost, reduc_i64_cost, reduc_f16_cost, reduc_f32_cost,\n\treduc_f64_cost for unordered reductions, and reduc_f16_ordered_cost,\n\treduc_f32_ordered_cost, reduc_f64_ordered_cost for ordered\n\t(fold-left) reductions.\n\t* config/riscv/riscv.cc (rvv_vla_vector_cost): Initialize reduction\n\tcost fields with default values.\n\t(rvv_vls_vector_cost): Likewise.\n\t* config/riscv/riscv-vector-costs.cc (costs::adjust_stmt_cost): Add\n\treduction detection in the vec_to_scalar case.  When a reduction is\n\tdetected, replace the default vec_to_scalar_cost with the\n\tappropriate per-type reduction cost based on element mode and\n\treduction kind (ordered vs unordered).\n\ngcc/testsuite/ChangeLog:\n\n\t* gcc.target/riscv/rvv/autovec/reduc/reduc_cost-1.c: New test for\n\tVLA unordered reduction costs.\n\t* gcc.target/riscv/rvv/autovec/reduc/reduc_cost-2.c: New test for\n\tVLA ordered reduction costs.\n\t* gcc.target/riscv/rvv/autovec/vls/reduc_cost-1.c: New test for\n\tVLS reduction costs.\n\t* gcc.target/riscv/rvv/autovec/vls/reduc-19.c: Update expected\n\tvfredosum count from 9 to 8.\n\t* gcc.target/riscv/rvv/autovec/vls/wred-3.c: Update expected\n\tvfwredosum count from 17 to 16.\n\nSigned-off-by: Wang Yaduo <wangyaduo@linux.alibaba.com>\n---\n gcc/config/riscv/riscv-protos.h               | 20 +++++-\n gcc/config/riscv/riscv-vector-costs.cc        | 68 ++++++++++++++++++-\n gcc/config/riscv/riscv.cc                     | 20 ++++++\n .../riscv/rvv/autovec/reduc/reduc_cost-1.c    | 34 ++++++++++\n .../riscv/rvv/autovec/reduc/reduc_cost-2.c    | 34 ++++++++++\n .../riscv/rvv/autovec/vls/reduc-19.c          |  4 +-\n .../riscv/rvv/autovec/vls/reduc_cost-1.c      | 41 +++++++++++\n .../gcc.target/riscv/rvv/autovec/vls/wred-3.c |  4 +-\n 8 files changed, 219 insertions(+), 6 deletions(-)\n create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_cost-1.c\n create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_cost-2.c\n create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc_cost-1.c",
    "diff": "diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h\nindex dd029c704..5da5a6a21 100644\n--- a/gcc/config/riscv/riscv-protos.h\n+++ b/gcc/config/riscv/riscv-protos.h\n@@ -279,6 +279,24 @@ struct common_vector_cost\n \n   /* Cost of an unaligned vector store.  */\n   const int unalign_store_cost;\n+\n+  /* Cost of vector reduction operations (unordered / tree reduction).\n+     Indexed by element type.  */\n+  const int reduc_i8_cost;\n+  const int reduc_i16_cost;\n+  const int reduc_i32_cost;\n+  const int reduc_i64_cost;\n+  const int reduc_f16_cost;\n+  const int reduc_f32_cost;\n+  const int reduc_f64_cost;\n+\n+  /* Cost of ordered (fold-left / strict) floating-point reductions.\n+     These are significantly more expensive than unordered (tree) reductions\n+     because RVV ordered reduction instructions (e.g. vfredosum) process\n+     elements sequentially.  */\n+  const int reduc_f16_ordered_cost;\n+  const int reduc_f32_ordered_cost;\n+  const int reduc_f64_ordered_cost;\n };\n \n /* scalable vectorization (VLA) specific cost.  */\n@@ -289,7 +307,7 @@ struct scalable_vector_cost : common_vector_cost\n   {}\n \n   /* TODO: We will need more other kinds of vector cost for VLA.\n-     E.g. fold_left reduction cost, lanes load/store cost, ..., etc.  */\n+     E.g. lanes load/store cost, ..., etc.  */\n };\n \n /* Additional costs for register copies.  Cost is for one register.  */\ndiff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc\nindex f582551eb..a837e4879 100644\n--- a/gcc/config/riscv/riscv-vector-costs.cc\n+++ b/gcc/config/riscv/riscv-vector-costs.cc\n@@ -1253,9 +1253,71 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,\n \t+= (FLOAT_TYPE_P (vectype) ? get_fr2vr_cost () : get_gr2vr_cost ());\n       break;\n     case vec_to_scalar:\n-      stmt_cost\n-\t+= (FLOAT_TYPE_P (vectype) ? get_vr2fr_cost () : get_vr2gr_cost ());\n-      break;\n+      {\n+\t/* Detect reduction operations and apply type-specific reduction\n+\t   costs.  The vec_to_scalar cost kind represents the reduction\n+\t   operation itself (e.g. vredsum.vs, vfredosum.vs), so we replace\n+\t   the default vec_to_scalar_cost with a more precise per-type cost.\n+\t   For floating-point reductions, distinguish between ordered\n+\t   (fold-left, e.g. vfredosum) and unordered (tree, e.g. vfredusum)\n+\t   reductions since ordered reductions are significantly more\n+\t   expensive due to sequential processing.  */\n+\tif (stmt_info && vectype && vect_is_reduction (stmt_info))\n+\t  {\n+\t    const common_vector_cost *common_costs\n+\t      = loop && riscv_vla_mode_p (loop->vector_mode)\n+\t\t? costs->vla : costs->vls;\n+\n+\t    bool is_ordered = false;\n+\t    if (FLOAT_TYPE_P (vectype) && loop && node)\n+\t      {\n+\t\tint reduc_type = vect_reduc_type (m_vinfo, node);\n+\t\tis_ordered = (reduc_type == FOLD_LEFT_REDUCTION);\n+\t      }\n+\n+\t    int reduc_cost = 0;\n+\t    switch (GET_MODE_INNER (TYPE_MODE (vectype)))\n+\t      {\n+\t      case E_QImode:\n+\t\treduc_cost = common_costs->reduc_i8_cost;\n+\t\tbreak;\n+\t      case E_HImode:\n+\t\treduc_cost = common_costs->reduc_i16_cost;\n+\t\tbreak;\n+\t      case E_SImode:\n+\t\treduc_cost = common_costs->reduc_i32_cost;\n+\t\tbreak;\n+\t      case E_DImode:\n+\t\treduc_cost = common_costs->reduc_i64_cost;\n+\t\tbreak;\n+\t      case E_HFmode:\n+\t      case E_BFmode:\n+\t\treduc_cost = is_ordered\n+\t\t\t     ? common_costs->reduc_f16_ordered_cost\n+\t\t\t     : common_costs->reduc_f16_cost;\n+\t\tbreak;\n+\t      case E_SFmode:\n+\t\treduc_cost = is_ordered\n+\t\t\t     ? common_costs->reduc_f32_ordered_cost\n+\t\t\t     : common_costs->reduc_f32_cost;\n+\t\tbreak;\n+\t      case E_DFmode:\n+\t\treduc_cost = is_ordered\n+\t\t\t     ? common_costs->reduc_f64_ordered_cost\n+\t\t\t     : common_costs->reduc_f64_cost;\n+\t\tbreak;\n+\t      default:\n+\t\tbreak;\n+\t      }\n+\n+\t    if (reduc_cost)\n+\t      stmt_cost = reduc_cost;\n+\t  }\n+\n+\tstmt_cost\n+\t  += (FLOAT_TYPE_P (vectype) ? get_vr2fr_cost () : get_vr2gr_cost ());\n+\tbreak;\n+      }\n     case vector_load:\n     case vector_store:\n \t{\ndiff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc\nindex 97272b434..50fa9bd96 100644\n--- a/gcc/config/riscv/riscv.cc\n+++ b/gcc/config/riscv/riscv.cc\n@@ -415,6 +415,16 @@ static const common_vector_cost rvv_vls_vector_cost = {\n   1, /* align_store_cost  */\n   2, /* unalign_load_cost  */\n   2, /* unalign_store_cost  */\n+  2, /* reduc_i8_cost  */\n+  2, /* reduc_i16_cost  */\n+  2, /* reduc_i32_cost  */\n+  2, /* reduc_i64_cost  */\n+  2, /* reduc_f16_cost  */\n+  2, /* reduc_f32_cost  */\n+  2, /* reduc_f64_cost  */\n+  6, /* reduc_f16_ordered_cost  */\n+  4, /* reduc_f32_ordered_cost  */\n+  2, /* reduc_f64_ordered_cost  */\n };\n \n /* RVV costs for VLA vector operations.  */\n@@ -438,6 +448,16 @@ static const scalable_vector_cost rvv_vla_vector_cost = {\n     1, /* align_store_cost  */\n     2, /* unalign_load_cost  */\n     2, /* unalign_store_cost  */\n+    2, /* reduc_i8_cost  */\n+    2, /* reduc_i16_cost  */\n+    2, /* reduc_i32_cost  */\n+    2, /* reduc_i64_cost  */\n+    2, /* reduc_f16_cost  */\n+    2, /* reduc_f32_cost  */\n+    2, /* reduc_f64_cost  */\n+    6, /* reduc_f16_ordered_cost  */\n+    4, /* reduc_f32_ordered_cost  */\n+    2, /* reduc_f64_ordered_cost  */\n   },\n };\n \ndiff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_cost-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_cost-1.c\nnew file mode 100644\nindex 000000000..f567e0ce7\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_cost-1.c\n@@ -0,0 +1,34 @@\n+/* Verify that the vector cost model handles unordered (tree) reductions\n+   for all integer and floating-point element types (VLA).  */\n+/* { dg-do compile } */\n+/* { dg-additional-options \"-march=rv32gcv_zvfh -mabi=ilp32d -mrvv-vector-bits=scalable -ffast-math -fdump-tree-vect-details\" } */\n+\n+#include <stdint-gcc.h>\n+\n+#define DEF_REDUC_PLUS(TYPE)\t\t\t\\\n+TYPE __attribute__ ((noinline, noclone))\t\\\n+reduc_plus_##TYPE (TYPE *restrict a, int n)\t\\\n+{\t\t\t\t\t\t\\\n+  TYPE r = 0;\t\t\t\t\t\\\n+  for (int i = 0; i < n; ++i)\t\t\t\\\n+    r += a[i];\t\t\t\t\t\\\n+  return r;\t\t\t\t\t\\\n+}\n+\n+DEF_REDUC_PLUS (int8_t)\n+DEF_REDUC_PLUS (int16_t)\n+DEF_REDUC_PLUS (int32_t)\n+DEF_REDUC_PLUS (int64_t)\n+DEF_REDUC_PLUS (_Float16)\n+DEF_REDUC_PLUS (float)\n+DEF_REDUC_PLUS (double)\n+\n+/* All loops should be vectorized with the cost model enabled.  */\n+/* { dg-final { scan-tree-dump-times \"optimized: loop vectorized\" 7 \"vect\" } } */\n+/* { dg-final { scan-assembler-times {vredsum\\.vs\\s+v[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+} 4 } } */\n+/* { dg-final { scan-assembler-times {vfredusum\\.vs\\s+v[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+} 3 } } */\n+\n+/* Verify the reduction cost is reflected in the cost model dump.\n+   For unordered reductions: reduc_*_cost (2) + vr2gr/vr2fr (2) = 4,\n+   where reduc_*_cost replaces the default vec_to_scalar_cost.  */\n+/* { dg-final { scan-tree-dump \"vec_to_scalar costs 4\" \"vect\" } } */\ndiff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_cost-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_cost-2.c\nnew file mode 100644\nindex 000000000..af9ffbcf5\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_cost-2.c\n@@ -0,0 +1,34 @@\n+/* Verify that the vector cost model handles ordered (fold-left / strict)\n+   floating-point reductions for all FP element types (VLA).  */\n+/* { dg-do compile } */\n+/* { dg-additional-options \"-march=rv32gcv_zvfh -mabi=ilp32d -mrvv-vector-bits=scalable -fdump-tree-vect-details\" } */\n+\n+#include <stdint-gcc.h>\n+\n+#define DEF_REDUC_PLUS(TYPE)\t\t\t\t\\\n+  TYPE __attribute__ ((noinline, noclone))\t\t\\\n+  reduc_plus_##TYPE (TYPE *restrict a, int n)\t\t\\\n+  {\t\t\t\t\t\t\t\\\n+    TYPE r = 0;\t\t\t\t\t\t\\\n+    for (int i = 0; i < n; ++i)\t\t\t\t\\\n+      r += a[i];\t\t\t\t\t\\\n+    return r;\t\t\t\t\t\t\\\n+  }\n+\n+DEF_REDUC_PLUS (_Float16)\n+DEF_REDUC_PLUS (float)\n+DEF_REDUC_PLUS (double)\n+\n+/* Without -ffast-math, FP reductions use ordered (fold-left) mode.\n+   The cost model should still allow vectorization.  */\n+/* { dg-final { scan-assembler {vfredosum\\.vs\\s+v[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+} } } */\n+\n+/* Verify ordered reduction costs are reflected in the cost model dump.\n+   The reduc_f*_ordered_cost replaces the default vec_to_scalar_cost,\n+   plus vr2fr cost (2):\n+   f16: reduc_f16_ordered_cost (6) + vr2fr (2) = 8\n+   f32: reduc_f32_ordered_cost (4) + vr2fr (2) = 6\n+   f64: reduc_f64_ordered_cost (2) + vr2fr (2) = 4  */\n+/* { dg-final { scan-tree-dump \"vec_to_scalar costs 8\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"vec_to_scalar costs 6\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"vec_to_scalar costs 4\" \"vect\" } } */\ndiff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-19.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-19.c\nindex 5a4df4824..3815bbadd 100644\n--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-19.c\n+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc-19.c\n@@ -14,7 +14,9 @@ DEF_REDUC_PLUS (_Float16, 512)\n DEF_REDUC_PLUS (_Float16, 1024)\n DEF_REDUC_PLUS (_Float16, 2048)\n \n-/* { dg-final { scan-assembler-times {vfredosum\\.vs} 9 } } */\n+/* The _Float16 n=4 case is not vectorized because the ordered reduction\n+   cost (reduc_f16_ordered_cost) makes it unprofitable for small trip counts.  */\n+/* { dg-final { scan-assembler-times {vfredosum\\.vs} 8 } } */\n /* { dg-final { scan-assembler-not {csrr} } } */\n /* { dg-final { scan-tree-dump-not \"1,1\" \"optimized\" } } */\n /* { dg-final { scan-tree-dump-not \"2,2\" \"optimized\" } } */\ndiff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc_cost-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc_cost-1.c\nnew file mode 100644\nindex 000000000..ed62ee230\n--- /dev/null\n+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/reduc_cost-1.c\n@@ -0,0 +1,41 @@\n+/* Verify that the vector cost model handles reductions for all element\n+   types in VLS mode, including both unordered and ordered reductions.  */\n+/* { dg-do compile } */\n+/* { dg-options \"-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fdump-tree-vect-details\" } */\n+\n+#include \"def.h\"\n+\n+/* Integer unordered reductions (VLS).  */\n+DEF_REDUC_PLUS (int8_t, 4)\n+DEF_REDUC_PLUS (int8_t, 8)\n+DEF_REDUC_PLUS (int16_t, 4)\n+DEF_REDUC_PLUS (int16_t, 8)\n+DEF_REDUC_PLUS (int32_t, 4)\n+DEF_REDUC_PLUS (int32_t, 8)\n+DEF_REDUC_PLUS (int64_t, 4)\n+DEF_REDUC_PLUS (int64_t, 8)\n+\n+/* { dg-final { scan-assembler-times {vredsum\\.vs} 8 } } */\n+\n+/* Floating-point ordered (strict) reductions (VLS).\n+   Without -ffast-math, FP reductions default to ordered.  */\n+DEF_REDUC_PLUS (_Float16, 4)\n+DEF_REDUC_PLUS (_Float16, 8)\n+DEF_REDUC_PLUS (float, 4)\n+DEF_REDUC_PLUS (float, 8)\n+DEF_REDUC_PLUS (double, 4)\n+DEF_REDUC_PLUS (double, 8)\n+\n+/* { dg-final { scan-assembler {vfredosum\\.vs} } } */\n+/* { dg-final { scan-assembler-not {csrr} } } */\n+\n+/* Verify reduction costs in the cost model dump.\n+   The reduc_*_cost replaces the default vec_to_scalar_cost,\n+   plus vr2gr/vr2fr cost (2):\n+   Integer unordered: reduc_i*_cost (2) + vr2gr (2) = 4\n+   FP ordered f16: reduc_f16_ordered_cost (6) + vr2fr (2) = 8\n+   FP ordered f32: reduc_f32_ordered_cost (4) + vr2fr (2) = 6\n+   FP ordered f64: reduc_f64_ordered_cost (2) + vr2fr (2) = 4  */\n+/* { dg-final { scan-tree-dump \"vec_to_scalar costs 8\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"vec_to_scalar costs 6\" \"vect\" } } */\n+/* { dg-final { scan-tree-dump \"vec_to_scalar costs 4\" \"vect\" } } */\ndiff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wred-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wred-3.c\nindex 6e9456b23..0f08d50a5 100644\n--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wred-3.c\n+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wred-3.c\n@@ -3,7 +3,9 @@\n \n #include \"wred-2.c\"\n \n-/* { dg-final { scan-assembler-times {vfwredosum\\.vs} 17 } } */\n+/* The _Float16->float n=4 case is not vectorized because the ordered\n+   reduction cost makes it unprofitable for small trip counts.  */\n+/* { dg-final { scan-assembler-times {vfwredosum\\.vs} 16 } } */\n /* { dg-final { scan-assembler-not {csrr} } } */\n /* { dg-final { scan-tree-dump-not \"1,1\" \"optimized\" } } */\n /* { dg-final { scan-tree-dump-not \"2,2\" \"optimized\" } } */\n",
    "prefixes": []
}