diff mbox series

[V2] RISC-V: Enable basic VLS auto-vectorization

Message ID 20230731021357.3815294-1-juzhe.zhong@rivai.ai
State New
Headers show
Series [V2] RISC-V: Enable basic VLS auto-vectorization | expand

Commit Message

juzhe.zhong@rivai.ai July 31, 2023, 2:13 a.m. UTC
Consider this following case:
void
foo (int8_t *in, int8_t *out, int8_t x)
{
  for (int i = 0; i < 16; i++)
    in[i] = x;
}

Compile option: --param=riscv-autovec-preference=scalable -fno-builtin

Before this patch:

foo:
        li      a5,16
        csrr    a4,vlenb
        vsetvli a3,zero,e8,m1,ta,ma
        vmv.v.x v1,a2
        bleu    a5,a4,.L2
        mv      a5,a4
.L2:
        vsetvli zero,a5,e8,m1,ta,ma
        vse8.v  v1,0(a0)
        ret

After this patch:

foo:
	vsetivli	zero,16,e8,mf8,ta,ma
	vmv.v.x	v1,a2
	vse8.v	v1,0(a0)
	ret

gcc/ChangeLog:

        * config/riscv/autovec-vls.md (@vec_duplicate<mode>): New pattern.
        * config/riscv/riscv-v.cc (autovectorize_vector_modes): Add VLS autovec support.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/v-1.c: Adapt test.
        * gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto.
        * gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto.
        * gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto.
        * gcc.target/riscv/rvv/autovec/vls/dup-1.c: New test.
        * gcc.target/riscv/rvv/autovec/vls/dup-2.c: New test.
        * gcc.target/riscv/rvv/autovec/vls/dup-3.c: New test.
        * gcc.target/riscv/rvv/autovec/vls/dup-4.c: New test.
        * gcc.target/riscv/rvv/autovec/vls/dup-5.c: New test.
        * gcc.target/riscv/rvv/autovec/vls/dup-6.c: New test.
        * gcc.target/riscv/rvv/autovec/vls/dup-7.c: New test.

---
 gcc/config/riscv/autovec-vls.md               |  19 ++
 gcc/config/riscv/riscv-v.cc                   |  21 ++-
 .../gcc.target/riscv/rvv/autovec/v-1.c        |   2 +-
 .../gcc.target/riscv/rvv/autovec/vls/dup-1.c  | 168 ++++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/vls/dup-2.c  | 153 ++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/vls/dup-3.c  | 153 ++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/vls/dup-4.c  | 137 ++++++++++++++
 .../gcc.target/riscv/rvv/autovec/vls/dup-5.c  | 137 ++++++++++++++
 .../gcc.target/riscv/rvv/autovec/vls/dup-6.c  | 122 +++++++++++++
 .../gcc.target/riscv/rvv/autovec/vls/dup-7.c  | 122 +++++++++++++
 .../riscv/rvv/autovec/zve32f_zvl128b-1.c      |   2 +-
 .../riscv/rvv/autovec/zve64d_zvl128b-1.c      |   2 +-
 .../riscv/rvv/autovec/zve64f_zvl128b-1.c      |   2 +-
 13 files changed, 1034 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c

Comments

Kito Cheng July 31, 2023, 2:42 a.m. UTC | #1
LGTM, thanks :)

On Mon, Jul 31, 2023 at 10:14 AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
> Consider this following case:
> void
> foo (int8_t *in, int8_t *out, int8_t x)
> {
>   for (int i = 0; i < 16; i++)
>     in[i] = x;
> }
>
> Compile option: --param=riscv-autovec-preference=scalable -fno-builtin
>
> Before this patch:
>
> foo:
>         li      a5,16
>         csrr    a4,vlenb
>         vsetvli a3,zero,e8,m1,ta,ma
>         vmv.v.x v1,a2
>         bleu    a5,a4,.L2
>         mv      a5,a4
> .L2:
>         vsetvli zero,a5,e8,m1,ta,ma
>         vse8.v  v1,0(a0)
>         ret
>
> After this patch:
>
> foo:
>         vsetivli        zero,16,e8,mf8,ta,ma
>         vmv.v.x v1,a2
>         vse8.v  v1,0(a0)
>         ret
>
> gcc/ChangeLog:
>
>         * config/riscv/autovec-vls.md (@vec_duplicate<mode>): New pattern.
>         * config/riscv/riscv-v.cc (autovectorize_vector_modes): Add VLS autovec support.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/autovec/v-1.c: Adapt test.
>         * gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto.
>         * gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto.
>         * gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto.
>         * gcc.target/riscv/rvv/autovec/vls/dup-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-2.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-3.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-4.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-5.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-6.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-7.c: New test.
>
> ---
>  gcc/config/riscv/autovec-vls.md               |  19 ++
>  gcc/config/riscv/riscv-v.cc                   |  21 ++-
>  .../gcc.target/riscv/rvv/autovec/v-1.c        |   2 +-
>  .../gcc.target/riscv/rvv/autovec/vls/dup-1.c  | 168 ++++++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-2.c  | 153 ++++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-3.c  | 153 ++++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-4.c  | 137 ++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-5.c  | 137 ++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-6.c  | 122 +++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-7.c  | 122 +++++++++++++
>  .../riscv/rvv/autovec/zve32f_zvl128b-1.c      |   2 +-
>  .../riscv/rvv/autovec/zve64d_zvl128b-1.c      |   2 +-
>  .../riscv/rvv/autovec/zve64f_zvl128b-1.c      |   2 +-
>  13 files changed, 1034 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c
>
> diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md
> index 9ece317ca4e..1a64dfdd91e 100644
> --- a/gcc/config/riscv/autovec-vls.md
> +++ b/gcc/config/riscv/autovec-vls.md
> @@ -139,3 +139,22 @@
>    "vmv%m1r.v\t%0,%1"
>    [(set_attr "type" "vmov")
>     (set_attr "mode" "<MODE>")])
> +
> +;; -----------------------------------------------------------------
> +;; ---- Duplicate Operations
> +;; -----------------------------------------------------------------
> +
> +(define_insn_and_split "@vec_duplicate<mode>"
> +  [(set (match_operand:VLS 0 "register_operand")
> +        (vec_duplicate:VLS
> +          (match_operand:<VEL> 1 "reg_or_int_operand")))]
> +  "TARGET_VECTOR && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +  {
> +    riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
> +                                   riscv_vector::RVV_UNOP, operands);
> +    DONE;
> +  }
> +)
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 9e89f970a4c..c10e51b362e 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2533,7 +2533,6 @@ autovectorize_vector_modes (vector_modes *modes, bool)
>  {
>    if (autovec_use_vlmax_p ())
>      {
> -      /* TODO: We will support RVV VLS auto-vectorization mode in the future. */
>        poly_uint64 full_size
>         = BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul);
>
> @@ -2561,7 +2560,25 @@ autovectorize_vector_modes (vector_modes *modes, bool)
>             modes->safe_push (mode);
>         }
>      }
> -  return 0;
> +  unsigned int flag = 0;
> +  if (TARGET_VECTOR_VLS)
> +    {
> +      /* Enable VECT_COMPARE_COSTS between VLA modes VLS modes for scalable
> +        auto-vectorization.  */
> +      flag |= VECT_COMPARE_COSTS;
> +      /* Push all VLSmodes according to TARGET_MIN_VLEN.  */
> +      unsigned int i = 0;
> +      unsigned int base_size = TARGET_MIN_VLEN * riscv_autovec_lmul / 8;
> +      unsigned int size = base_size;
> +      machine_mode mode;
> +      while (size > 0 && get_vector_mode (QImode, size).exists (&mode))
> +       {
> +         modes->safe_push (mode);
> +         i++;
> +         size = base_size / (1U << i);
> +       }
> +    }
> +  return flag;
>  }
>
>  /* If the given VECTOR_MODE is an RVV mode,  first get the largest number
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
> index e68d05f5f48..ebbe5e210c5 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
> @@ -3,4 +3,4 @@
>
>  #include "template-1.h"
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
> new file mode 100644
> index 00000000000..1f520f2b0a7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
> @@ -0,0 +1,168 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e8,\s*mf8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e8,\s*mf8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo9:
> +**  li\s+[a-x0-9]+,1024
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo9 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo10:
> +**  li\s+[a-x0-9]+,4096
> +**  addi\s+[a-x0-9]+,[a-x0-9]+,-2048
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo10 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 2048; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo11:
> +**  li\s+[a-x0-9]+,4096
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo11 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 4096; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
> new file mode 100644
> index 00000000000..1a930d059c8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
> @@ -0,0 +1,153 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo9:
> +**  li\s+[a-x0-9]+,1024
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo9 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo10:
> +**  li\s+[a-x0-9]+,4096
> +**  addi\s+[a-x0-9]+,[a-x0-9]+,-2048
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo10 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 2048; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
> new file mode 100644
> index 00000000000..46fb5a525a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
> @@ -0,0 +1,153 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo9:
> +**  li\s+[a-x0-9]+,1024
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo9 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo10:
> +**  li\s+[a-x0-9]+,4096
> +**  addi\s+[a-x0-9]+,[a-x0-9]+,-2048
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo10 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 2048; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
> new file mode 100644
> index 00000000000..7e46dc42526
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
> @@ -0,0 +1,137 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo9:
> +**  li\s+[a-x0-9]+,1024
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo9 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
> new file mode 100644
> index 00000000000..9b9327bdd4d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
> @@ -0,0 +1,137 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo9:
> +**  li\s+[a-x0-9]+,1024
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo9 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
> new file mode 100644
> index 00000000000..52d5a65b44e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
> @@ -0,0 +1,122 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c
> new file mode 100644
> index 00000000000..39f27ece2e7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c
> @@ -0,0 +1,122 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
> index ecfda79e19a..345e2f963d5 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
> @@ -3,4 +3,4 @@
>
>  #include "template-1.h"
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
> index 6b320ca6f38..e13c27dcdb0 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
> @@ -3,4 +3,4 @@
>
>  #include "template-1.h"
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
> index ae3f066477c..e767629ae54 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
> @@ -3,4 +3,4 @@
>
>  #include "template-1.h"
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
> --
> 2.36.3
>
Li, Pan2 via Gcc-patches July 31, 2023, 2:49 a.m. UTC | #2
Committed, thanks Kito.

Pan

-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Kito Cheng via Gcc-patches
Sent: Monday, July 31, 2023 10:42 AM
To: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Cc: gcc-patches@gcc.gnu.org; kito.cheng@sifive.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH V2] RISC-V: Enable basic VLS auto-vectorization

LGTM, thanks :)

On Mon, Jul 31, 2023 at 10:14 AM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
> Consider this following case:
> void
> foo (int8_t *in, int8_t *out, int8_t x)
> {
>   for (int i = 0; i < 16; i++)
>     in[i] = x;
> }
>
> Compile option: --param=riscv-autovec-preference=scalable -fno-builtin
>
> Before this patch:
>
> foo:
>         li      a5,16
>         csrr    a4,vlenb
>         vsetvli a3,zero,e8,m1,ta,ma
>         vmv.v.x v1,a2
>         bleu    a5,a4,.L2
>         mv      a5,a4
> .L2:
>         vsetvli zero,a5,e8,m1,ta,ma
>         vse8.v  v1,0(a0)
>         ret
>
> After this patch:
>
> foo:
>         vsetivli        zero,16,e8,mf8,ta,ma
>         vmv.v.x v1,a2
>         vse8.v  v1,0(a0)
>         ret
>
> gcc/ChangeLog:
>
>         * config/riscv/autovec-vls.md (@vec_duplicate<mode>): New pattern.
>         * config/riscv/riscv-v.cc (autovectorize_vector_modes): Add VLS autovec support.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/autovec/v-1.c: Adapt test.
>         * gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto.
>         * gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto.
>         * gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto.
>         * gcc.target/riscv/rvv/autovec/vls/dup-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-2.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-3.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-4.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-5.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-6.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls/dup-7.c: New test.
>
> ---
>  gcc/config/riscv/autovec-vls.md               |  19 ++
>  gcc/config/riscv/riscv-v.cc                   |  21 ++-
>  .../gcc.target/riscv/rvv/autovec/v-1.c        |   2 +-
>  .../gcc.target/riscv/rvv/autovec/vls/dup-1.c  | 168 ++++++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-2.c  | 153 ++++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-3.c  | 153 ++++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-4.c  | 137 ++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-5.c  | 137 ++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-6.c  | 122 +++++++++++++
>  .../gcc.target/riscv/rvv/autovec/vls/dup-7.c  | 122 +++++++++++++
>  .../riscv/rvv/autovec/zve32f_zvl128b-1.c      |   2 +-
>  .../riscv/rvv/autovec/zve64d_zvl128b-1.c      |   2 +-
>  .../riscv/rvv/autovec/zve64f_zvl128b-1.c      |   2 +-
>  13 files changed, 1034 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c
>
> diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md
> index 9ece317ca4e..1a64dfdd91e 100644
> --- a/gcc/config/riscv/autovec-vls.md
> +++ b/gcc/config/riscv/autovec-vls.md
> @@ -139,3 +139,22 @@
>    "vmv%m1r.v\t%0,%1"
>    [(set_attr "type" "vmov")
>     (set_attr "mode" "<MODE>")])
> +
> +;; -----------------------------------------------------------------
> +;; ---- Duplicate Operations
> +;; -----------------------------------------------------------------
> +
> +(define_insn_and_split "@vec_duplicate<mode>"
> +  [(set (match_operand:VLS 0 "register_operand")
> +        (vec_duplicate:VLS
> +          (match_operand:<VEL> 1 "reg_or_int_operand")))]
> +  "TARGET_VECTOR && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +  {
> +    riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
> +                                   riscv_vector::RVV_UNOP, operands);
> +    DONE;
> +  }
> +)
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 9e89f970a4c..c10e51b362e 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2533,7 +2533,6 @@ autovectorize_vector_modes (vector_modes *modes, bool)
>  {
>    if (autovec_use_vlmax_p ())
>      {
> -      /* TODO: We will support RVV VLS auto-vectorization mode in the future. */
>        poly_uint64 full_size
>         = BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul);
>
> @@ -2561,7 +2560,25 @@ autovectorize_vector_modes (vector_modes *modes, bool)
>             modes->safe_push (mode);
>         }
>      }
> -  return 0;
> +  unsigned int flag = 0;
> +  if (TARGET_VECTOR_VLS)
> +    {
> +      /* Enable VECT_COMPARE_COSTS between VLA modes VLS modes for scalable
> +        auto-vectorization.  */
> +      flag |= VECT_COMPARE_COSTS;
> +      /* Push all VLSmodes according to TARGET_MIN_VLEN.  */
> +      unsigned int i = 0;
> +      unsigned int base_size = TARGET_MIN_VLEN * riscv_autovec_lmul / 8;
> +      unsigned int size = base_size;
> +      machine_mode mode;
> +      while (size > 0 && get_vector_mode (QImode, size).exists (&mode))
> +       {
> +         modes->safe_push (mode);
> +         i++;
> +         size = base_size / (1U << i);
> +       }
> +    }
> +  return flag;
>  }
>
>  /* If the given VECTOR_MODE is an RVV mode,  first get the largest number
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
> index e68d05f5f48..ebbe5e210c5 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
> @@ -3,4 +3,4 @@
>
>  #include "template-1.h"
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
> new file mode 100644
> index 00000000000..1f520f2b0a7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
> @@ -0,0 +1,168 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e8,\s*mf8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e8,\s*mf8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo9:
> +**  li\s+[a-x0-9]+,1024
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo9 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo10:
> +**  li\s+[a-x0-9]+,4096
> +**  addi\s+[a-x0-9]+,[a-x0-9]+,-2048
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo10 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 2048; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo11:
> +**  li\s+[a-x0-9]+,4096
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo11 (int8_t *in, int8_t *out, int8_t x)
> +{
> +  for (int i = 0; i < 4096; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
> new file mode 100644
> index 00000000000..1a930d059c8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
> @@ -0,0 +1,153 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo9:
> +**  li\s+[a-x0-9]+,1024
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo9 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo10:
> +**  li\s+[a-x0-9]+,4096
> +**  addi\s+[a-x0-9]+,[a-x0-9]+,-2048
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo10 (int16_t *in, int16_t *out, int16_t x)
> +{
> +  for (int i = 0; i < 2048; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
> new file mode 100644
> index 00000000000..46fb5a525a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
> @@ -0,0 +1,153 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo9:
> +**  li\s+[a-x0-9]+,1024
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo9 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo10:
> +**  li\s+[a-x0-9]+,4096
> +**  addi\s+[a-x0-9]+,[a-x0-9]+,-2048
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo10 (_Float16 *in, _Float16 *out, _Float16 x)
> +{
> +  for (int i = 0; i < 2048; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
> new file mode 100644
> index 00000000000..7e46dc42526
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
> @@ -0,0 +1,137 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo9:
> +**  li\s+[a-x0-9]+,1024
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo9 (int32_t *in, int32_t *out, int32_t x)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
> new file mode 100644
> index 00000000000..9b9327bdd4d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
> @@ -0,0 +1,137 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo9:
> +**  li\s+[a-x0-9]+,1024
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo9 (float *in, float *out, float x)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
> new file mode 100644
> index 00000000000..52d5a65b44e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
> @@ -0,0 +1,122 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au]
> +**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (int64_t *in, int64_t *out, int64_t x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c
> new file mode 100644
> index 00000000000..39f27ece2e7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c
> @@ -0,0 +1,122 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "def.h"
> +
> +/*
> +** foo1:
> +**  vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo1 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 4; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo2:
> +**  vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo2 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 8; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo3:
> +**  vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo3 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo4:
> +**  li\s+[a-x0-9]+,32
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo4 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 32; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo5:
> +**  li\s+[a-x0-9]+,64
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo5 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 64; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo6:
> +**  li\s+[a-x0-9]+,128
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo6 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 128; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo7:
> +**  li\s+[a-x0-9]+,256
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo7 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 256; i++)
> +    in[i] = x;
> +}
> +
> +/*
> +** foo8:
> +**  li\s+[a-x0-9]+,512
> +**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au]
> +**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
> +**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
> +**  ret
> +*/
> +void
> +foo8 (double *in, double *out, double x)
> +{
> +  for (int i = 0; i < 512; i++)
> +    in[i] = x;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
> index ecfda79e19a..345e2f963d5 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
> @@ -3,4 +3,4 @@
>
>  #include "template-1.h"
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
> index 6b320ca6f38..e13c27dcdb0 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
> @@ -3,4 +3,4 @@
>
>  #include "template-1.h"
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
> index ae3f066477c..e767629ae54 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
> @@ -3,4 +3,4 @@
>
>  #include "template-1.h"
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
> --
> 2.36.3
>
Robin Dapp July 31, 2023, 6:41 a.m. UTC | #3
> +;; -----------------------------------------------------------------
> +;; ---- Duplicate Operations
> +;; -----------------------------------------------------------------
> +
> +(define_insn_and_split "@vec_duplicate<mode>"
> +  [(set (match_operand:VLS 0 "register_operand")
> +        (vec_duplicate:VLS
> +          (match_operand:<VEL> 1 "reg_or_int_operand")))]
> +  "TARGET_VECTOR && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +  {
> +    riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
> +                                   riscv_vector::RVV_UNOP, operands);
> +    DONE;
> +  }
> +)

Ah, nice.  The same thing helps for vv -> vx (for non-VLS modes) because we
can keep the non-expanded version around longer which allows more fwprop
opportunities.

> +      /* Enable VECT_COMPARE_COSTS between VLA modes VLS modes for scalable
> +	 auto-vectorization.  */
> +      flag |= VECT_COMPARE_COSTS;

I think it's good to activate this here for now.  After a while when
costing is reliable we probably want to have it unconditionally even
for pure scalable/fixed-vlmax.

Regards
 Robin
diff mbox series

Patch

diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md
index 9ece317ca4e..1a64dfdd91e 100644
--- a/gcc/config/riscv/autovec-vls.md
+++ b/gcc/config/riscv/autovec-vls.md
@@ -139,3 +139,22 @@ 
   "vmv%m1r.v\t%0,%1"
   [(set_attr "type" "vmov")
    (set_attr "mode" "<MODE>")])
+
+;; -----------------------------------------------------------------
+;; ---- Duplicate Operations
+;; -----------------------------------------------------------------
+
+(define_insn_and_split "@vec_duplicate<mode>"
+  [(set (match_operand:VLS 0 "register_operand")
+        (vec_duplicate:VLS
+          (match_operand:<VEL> 1 "reg_or_int_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
+                                   riscv_vector::RVV_UNOP, operands);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 9e89f970a4c..c10e51b362e 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2533,7 +2533,6 @@  autovectorize_vector_modes (vector_modes *modes, bool)
 {
   if (autovec_use_vlmax_p ())
     {
-      /* TODO: We will support RVV VLS auto-vectorization mode in the future. */
       poly_uint64 full_size
 	= BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul);
 
@@ -2561,7 +2560,25 @@  autovectorize_vector_modes (vector_modes *modes, bool)
 	    modes->safe_push (mode);
 	}
     }
-  return 0;
+  unsigned int flag = 0;
+  if (TARGET_VECTOR_VLS)
+    {
+      /* Enable VECT_COMPARE_COSTS between VLA modes VLS modes for scalable
+	 auto-vectorization.  */
+      flag |= VECT_COMPARE_COSTS;
+      /* Push all VLSmodes according to TARGET_MIN_VLEN.  */
+      unsigned int i = 0;
+      unsigned int base_size = TARGET_MIN_VLEN * riscv_autovec_lmul / 8;
+      unsigned int size = base_size;
+      machine_mode mode;
+      while (size > 0 && get_vector_mode (QImode, size).exists (&mode))
+	{
+	  modes->safe_push (mode);
+	  i++;
+	  size = base_size / (1U << i);
+	}
+    }
+  return flag;
 }
 
 /* If the given VECTOR_MODE is an RVV mode,  first get the largest number
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
index e68d05f5f48..ebbe5e210c5 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c
@@ -3,4 +3,4 @@ 
 
 #include "template-1.h"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
new file mode 100644
index 00000000000..1f520f2b0a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
@@ -0,0 +1,168 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "def.h"
+
+/*
+** foo1:
+**  vsetivli\s+zero,\s*4,\s*e8,\s*mf8,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo1 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 4; i++)
+    in[i] = x;
+}
+
+/*
+** foo2:
+**  vsetivli\s+zero,\s*8,\s*e8,\s*mf8,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo2 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 8; i++)
+    in[i] = x;
+}
+
+/*
+** foo3:
+**  vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo3 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 16; i++)
+    in[i] = x;
+}
+
+/*
+** foo4:
+**  li\s+[a-x0-9]+,32
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo4 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 32; i++)
+    in[i] = x;
+}
+
+/*
+** foo5:
+**  li\s+[a-x0-9]+,64
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo5 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 64; i++)
+    in[i] = x;
+}
+
+/*
+** foo6:
+**  li\s+[a-x0-9]+,128
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo6 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 128; i++)
+    in[i] = x;
+}
+
+/*
+** foo7:
+**  li\s+[a-x0-9]+,256
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo7 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 256; i++)
+    in[i] = x;
+}
+
+/*
+** foo8:
+**  li\s+[a-x0-9]+,512
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo8 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 512; i++)
+    in[i] = x;
+}
+
+/*
+** foo9:
+**  li\s+[a-x0-9]+,1024
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo9 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 1024; i++)
+    in[i] = x;
+}
+
+/*
+** foo10:
+**  li\s+[a-x0-9]+,4096
+**  addi\s+[a-x0-9]+,[a-x0-9]+,-2048
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m4,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo10 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 2048; i++)
+    in[i] = x;
+}
+
+/*
+** foo11:
+**  li\s+[a-x0-9]+,4096
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo11 (int8_t *in, int8_t *out, int8_t x)
+{
+  for (int i = 0; i < 4096; i++)
+    in[i] = x;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
new file mode 100644
index 00000000000..1a930d059c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
@@ -0,0 +1,153 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "def.h"
+
+/*
+** foo1:
+**  vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo1 (int16_t *in, int16_t *out, int16_t x)
+{
+  for (int i = 0; i < 4; i++)
+    in[i] = x;
+}
+
+/*
+** foo2:
+**  vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo2 (int16_t *in, int16_t *out, int16_t x)
+{
+  for (int i = 0; i < 8; i++)
+    in[i] = x;
+}
+
+/*
+** foo3:
+**  vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo3 (int16_t *in, int16_t *out, int16_t x)
+{
+  for (int i = 0; i < 16; i++)
+    in[i] = x;
+}
+
+/*
+** foo4:
+**  li\s+[a-x0-9]+,32
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo4 (int16_t *in, int16_t *out, int16_t x)
+{
+  for (int i = 0; i < 32; i++)
+    in[i] = x;
+}
+
+/*
+** foo5:
+**  li\s+[a-x0-9]+,64
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo5 (int16_t *in, int16_t *out, int16_t x)
+{
+  for (int i = 0; i < 64; i++)
+    in[i] = x;
+}
+
+/*
+** foo6:
+**  li\s+[a-x0-9]+,128
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo6 (int16_t *in, int16_t *out, int16_t x)
+{
+  for (int i = 0; i < 128; i++)
+    in[i] = x;
+}
+
+/*
+** foo7:
+**  li\s+[a-x0-9]+,256
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo7 (int16_t *in, int16_t *out, int16_t x)
+{
+  for (int i = 0; i < 256; i++)
+    in[i] = x;
+}
+
+/*
+** foo8:
+**  li\s+[a-x0-9]+,512
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo8 (int16_t *in, int16_t *out, int16_t x)
+{
+  for (int i = 0; i < 512; i++)
+    in[i] = x;
+}
+
+/*
+** foo9:
+**  li\s+[a-x0-9]+,1024
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo9 (int16_t *in, int16_t *out, int16_t x)
+{
+  for (int i = 0; i < 1024; i++)
+    in[i] = x;
+}
+
+/*
+** foo10:
+**  li\s+[a-x0-9]+,4096
+**  addi\s+[a-x0-9]+,[a-x0-9]+,-2048
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo10 (int16_t *in, int16_t *out, int16_t x)
+{
+  for (int i = 0; i < 2048; i++)
+    in[i] = x;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
new file mode 100644
index 00000000000..46fb5a525a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
@@ -0,0 +1,153 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "def.h"
+
+/*
+** foo1:
+**  vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo1 (_Float16 *in, _Float16 *out, _Float16 x)
+{
+  for (int i = 0; i < 4; i++)
+    in[i] = x;
+}
+
+/*
+** foo2:
+**  vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo2 (_Float16 *in, _Float16 *out, _Float16 x)
+{
+  for (int i = 0; i < 8; i++)
+    in[i] = x;
+}
+
+/*
+** foo3:
+**  vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo3 (_Float16 *in, _Float16 *out, _Float16 x)
+{
+  for (int i = 0; i < 16; i++)
+    in[i] = x;
+}
+
+/*
+** foo4:
+**  li\s+[a-x0-9]+,32
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo4 (_Float16 *in, _Float16 *out, _Float16 x)
+{
+  for (int i = 0; i < 32; i++)
+    in[i] = x;
+}
+
+/*
+** foo5:
+**  li\s+[a-x0-9]+,64
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo5 (_Float16 *in, _Float16 *out, _Float16 x)
+{
+  for (int i = 0; i < 64; i++)
+    in[i] = x;
+}
+
+/*
+** foo6:
+**  li\s+[a-x0-9]+,128
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo6 (_Float16 *in, _Float16 *out, _Float16 x)
+{
+  for (int i = 0; i < 128; i++)
+    in[i] = x;
+}
+
+/*
+** foo7:
+**  li\s+[a-x0-9]+,256
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo7 (_Float16 *in, _Float16 *out, _Float16 x)
+{
+  for (int i = 0; i < 256; i++)
+    in[i] = x;
+}
+
+/*
+** foo8:
+**  li\s+[a-x0-9]+,512
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo8 (_Float16 *in, _Float16 *out, _Float16 x)
+{
+  for (int i = 0; i < 512; i++)
+    in[i] = x;
+}
+
+/*
+** foo9:
+**  li\s+[a-x0-9]+,1024
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo9 (_Float16 *in, _Float16 *out, _Float16 x)
+{
+  for (int i = 0; i < 1024; i++)
+    in[i] = x;
+}
+
+/*
+** foo10:
+**  li\s+[a-x0-9]+,4096
+**  addi\s+[a-x0-9]+,[a-x0-9]+,-2048
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo10 (_Float16 *in, _Float16 *out, _Float16 x)
+{
+  for (int i = 0; i < 2048; i++)
+    in[i] = x;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
new file mode 100644
index 00000000000..7e46dc42526
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
@@ -0,0 +1,137 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "def.h"
+
+/*
+** foo1:
+**  vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo1 (int32_t *in, int32_t *out, int32_t x)
+{
+  for (int i = 0; i < 4; i++)
+    in[i] = x;
+}
+
+/*
+** foo2:
+**  vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo2 (int32_t *in, int32_t *out, int32_t x)
+{
+  for (int i = 0; i < 8; i++)
+    in[i] = x;
+}
+
+/*
+** foo3:
+**  vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo3 (int32_t *in, int32_t *out, int32_t x)
+{
+  for (int i = 0; i < 16; i++)
+    in[i] = x;
+}
+
+/*
+** foo4:
+**  li\s+[a-x0-9]+,32
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo4 (int32_t *in, int32_t *out, int32_t x)
+{
+  for (int i = 0; i < 32; i++)
+    in[i] = x;
+}
+
+/*
+** foo5:
+**  li\s+[a-x0-9]+,64
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo5 (int32_t *in, int32_t *out, int32_t x)
+{
+  for (int i = 0; i < 64; i++)
+    in[i] = x;
+}
+
+/*
+** foo6:
+**  li\s+[a-x0-9]+,128
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo6 (int32_t *in, int32_t *out, int32_t x)
+{
+  for (int i = 0; i < 128; i++)
+    in[i] = x;
+}
+
+/*
+** foo7:
+**  li\s+[a-x0-9]+,256
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo7 (int32_t *in, int32_t *out, int32_t x)
+{
+  for (int i = 0; i < 256; i++)
+    in[i] = x;
+}
+
+/*
+** foo8:
+**  li\s+[a-x0-9]+,512
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo8 (int32_t *in, int32_t *out, int32_t x)
+{
+  for (int i = 0; i < 512; i++)
+    in[i] = x;
+}
+
+/*
+** foo9:
+**  li\s+[a-x0-9]+,1024
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo9 (int32_t *in, int32_t *out, int32_t x)
+{
+  for (int i = 0; i < 1024; i++)
+    in[i] = x;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
new file mode 100644
index 00000000000..9b9327bdd4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
@@ -0,0 +1,137 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "def.h"
+
+/*
+** foo1:
+**  vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo1 (float *in, float *out, float x)
+{
+  for (int i = 0; i < 4; i++)
+    in[i] = x;
+}
+
+/*
+** foo2:
+**  vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo2 (float *in, float *out, float x)
+{
+  for (int i = 0; i < 8; i++)
+    in[i] = x;
+}
+
+/*
+** foo3:
+**  vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo3 (float *in, float *out, float x)
+{
+  for (int i = 0; i < 16; i++)
+    in[i] = x;
+}
+
+/*
+** foo4:
+**  li\s+[a-x0-9]+,32
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo4 (float *in, float *out, float x)
+{
+  for (int i = 0; i < 32; i++)
+    in[i] = x;
+}
+
+/*
+** foo5:
+**  li\s+[a-x0-9]+,64
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo5 (float *in, float *out, float x)
+{
+  for (int i = 0; i < 64; i++)
+    in[i] = x;
+}
+
+/*
+** foo6:
+**  li\s+[a-x0-9]+,128
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo6 (float *in, float *out, float x)
+{
+  for (int i = 0; i < 128; i++)
+    in[i] = x;
+}
+
+/*
+** foo7:
+**  li\s+[a-x0-9]+,256
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo7 (float *in, float *out, float x)
+{
+  for (int i = 0; i < 256; i++)
+    in[i] = x;
+}
+
+/*
+** foo8:
+**  li\s+[a-x0-9]+,512
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo8 (float *in, float *out, float x)
+{
+  for (int i = 0; i < 512; i++)
+    in[i] = x;
+}
+
+/*
+** foo9:
+**  li\s+[a-x0-9]+,1024
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo9 (float *in, float *out, float x)
+{
+  for (int i = 0; i < 1024; i++)
+    in[i] = x;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
new file mode 100644
index 00000000000..52d5a65b44e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
@@ -0,0 +1,122 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "def.h"
+
+/*
+** foo1:
+**  vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo1 (int64_t *in, int64_t *out, int64_t x)
+{
+  for (int i = 0; i < 4; i++)
+    in[i] = x;
+}
+
+/*
+** foo2:
+**  vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo2 (int64_t *in, int64_t *out, int64_t x)
+{
+  for (int i = 0; i < 8; i++)
+    in[i] = x;
+}
+
+/*
+** foo3:
+**  vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo3 (int64_t *in, int64_t *out, int64_t x)
+{
+  for (int i = 0; i < 16; i++)
+    in[i] = x;
+}
+
+/*
+** foo4:
+**  li\s+[a-x0-9]+,32
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo4 (int64_t *in, int64_t *out, int64_t x)
+{
+  for (int i = 0; i < 32; i++)
+    in[i] = x;
+}
+
+/*
+** foo5:
+**  li\s+[a-x0-9]+,64
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo5 (int64_t *in, int64_t *out, int64_t x)
+{
+  for (int i = 0; i < 64; i++)
+    in[i] = x;
+}
+
+/*
+** foo6:
+**  li\s+[a-x0-9]+,128
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo6 (int64_t *in, int64_t *out, int64_t x)
+{
+  for (int i = 0; i < 128; i++)
+    in[i] = x;
+}
+
+/*
+** foo7:
+**  li\s+[a-x0-9]+,256
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo7 (int64_t *in, int64_t *out, int64_t x)
+{
+  for (int i = 0; i < 256; i++)
+    in[i] = x;
+}
+
+/*
+** foo8:
+**  li\s+[a-x0-9]+,512
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au]
+**  vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo8 (int64_t *in, int64_t *out, int64_t x)
+{
+  for (int i = 0; i < 512; i++)
+    in[i] = x;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c
new file mode 100644
index 00000000000..39f27ece2e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c
@@ -0,0 +1,122 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "def.h"
+
+/*
+** foo1:
+**  vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo1 (double *in, double *out, double x)
+{
+  for (int i = 0; i < 4; i++)
+    in[i] = x;
+}
+
+/*
+** foo2:
+**  vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo2 (double *in, double *out, double x)
+{
+  for (int i = 0; i < 8; i++)
+    in[i] = x;
+}
+
+/*
+** foo3:
+**  vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo3 (double *in, double *out, double x)
+{
+  for (int i = 0; i < 16; i++)
+    in[i] = x;
+}
+
+/*
+** foo4:
+**  li\s+[a-x0-9]+,32
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo4 (double *in, double *out, double x)
+{
+  for (int i = 0; i < 32; i++)
+    in[i] = x;
+}
+
+/*
+** foo5:
+**  li\s+[a-x0-9]+,64
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo5 (double *in, double *out, double x)
+{
+  for (int i = 0; i < 64; i++)
+    in[i] = x;
+}
+
+/*
+** foo6:
+**  li\s+[a-x0-9]+,128
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo6 (double *in, double *out, double x)
+{
+  for (int i = 0; i < 128; i++)
+    in[i] = x;
+}
+
+/*
+** foo7:
+**  li\s+[a-x0-9]+,256
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo7 (double *in, double *out, double x)
+{
+  for (int i = 0; i < 256; i++)
+    in[i] = x;
+}
+
+/*
+** foo8:
+**  li\s+[a-x0-9]+,512
+**  vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au]
+**  vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
+**  vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
+**  ret
+*/
+void
+foo8 (double *in, double *out, double x)
+{
+  for (int i = 0; i < 512; i++)
+    in[i] = x;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
index ecfda79e19a..345e2f963d5 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c
@@ -3,4 +3,4 @@ 
 
 #include "template-1.h"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
index 6b320ca6f38..e13c27dcdb0 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c
@@ -3,4 +3,4 @@ 
 
 #include "template-1.h"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
index ae3f066477c..e767629ae54 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c
@@ -3,4 +3,4 @@ 
 
 #include "template-1.h"
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */