diff mbox series

RISC-V: Support VSETVL PASS for RVV support

Message ID 20221214073111.124081-1-juzhe.zhong@rivai.ai
State New
Headers show
Series RISC-V: Support VSETVL PASS for RVV support | expand

Commit Message

juzhe.zhong@rivai.ai Dec. 14, 2022, 7:31 a.m. UTC
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>

This patch is to support VSETVL PASS for RVV support.
1.The optimization and performance is guaranteed LCM (Lazy code motion).
2.Base on RTL_SSA framework to gain better optimization chances.
3.Also we do VL/VTYPE, demand information backward propagation across
  blocks by RTL_SSA reverse order in CFG.
4.It has been well and fully tested by about 200+ testcases for VLMAX
  AVL situation (Only for VLMAX since we don't have an intrinsics to
  test non-VLMAX).
5.Will support AVL model in the next patch.

gcc/ChangeLog:

        * config.gcc: Add riscv-vsetvl.o.
        * config/riscv/riscv-passes.def (INSERT_PASS_BEFORE): Add VSETVL PASS location.
        * config/riscv/riscv-protos.h (make_pass_vsetvl): New function.
        (enum avl_type): New enum.
        (get_ta): New function.
        (get_ma): Ditto.
        (get_avl_type): Ditto.
        (calculate_ratio): Ditto.
        (enum tail_policy): New enum.
        (enum mask_policy): Ditto.
        * config/riscv/riscv-v.cc (calculate_ratio): New function.
        (emit_pred_op): change the VLMAX mov codgen.
        (get_ta): New function.
        (get_ma): Ditto.
        (enum tail_policy): Change enum.
        (get_prefer_tail_policy): New function.
        (enum mask_policy): Change enum.
        (get_prefer_mask_policy): New function.
        * config/riscv/t-riscv: Add riscv-vsetvl.o
        * config/riscv/vector.md (): Adjust attribute and pattern for VSETVL PASS.
        (@vlmax_avl<mode>): Ditto.
        (@vsetvl<mode>_no_side_effects): Delete.
        (vsetvl_vtype_change_only): New MD pattern.
        (@vsetvl_discard_result<mode>): Ditto.
        * config/riscv/riscv-vsetvl.cc: New file.
        * config/riscv/riscv-vsetvl.h: New file.

---
 gcc/config.gcc                    |    2 +-
 gcc/config/riscv/riscv-passes.def |    1 +
 gcc/config/riscv/riscv-protos.h   |   15 +
 gcc/config/riscv/riscv-v.cc       |  102 +-
 gcc/config/riscv/riscv-vsetvl.cc  | 2509 +++++++++++++++++++++++++++++
 gcc/config/riscv/riscv-vsetvl.h   |  344 ++++
 gcc/config/riscv/t-riscv          |    8 +
 gcc/config/riscv/vector.md        |  131 +-
 8 files changed, 3076 insertions(+), 36 deletions(-)
 create mode 100644 gcc/config/riscv/riscv-vsetvl.cc
 create mode 100644 gcc/config/riscv/riscv-vsetvl.h

Comments

Kito Cheng Dec. 19, 2022, 3:12 p.m. UTC | #1
LGTM, and thanks for this amazing work, actually I review this more than
one month, so I gonna commit this for now.

But feel free to keep helping review that, give comment and report bug to
Ju-Zhe and me :)



<juzhe.zhong@rivai.ai> 於 2022年12月14日 週三 15:32 寫道:

> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>
> This patch is to support VSETVL PASS for RVV support.
> 1.The optimization and performance is guaranteed LCM (Lazy code motion).
> 2.Base on RTL_SSA framework to gain better optimization chances.
> 3.Also we do VL/VTYPE, demand information backward propagation across
>   blocks by RTL_SSA reverse order in CFG.
> 4.It has been well and fully tested by about 200+ testcases for VLMAX
>   AVL situation (Only for VLMAX since we don't have an intrinsics to
>   test non-VLMAX).
> 5.Will support AVL model in the next patch.
>
> gcc/ChangeLog:
>
>         * config.gcc: Add riscv-vsetvl.o.
>         * config/riscv/riscv-passes.def (INSERT_PASS_BEFORE): Add VSETVL
> PASS location.
>         * config/riscv/riscv-protos.h (make_pass_vsetvl): New function.
>         (enum avl_type): New enum.
>         (get_ta): New function.
>         (get_ma): Ditto.
>         (get_avl_type): Ditto.
>         (calculate_ratio): Ditto.
>         (enum tail_policy): New enum.
>         (enum mask_policy): Ditto.
>         * config/riscv/riscv-v.cc (calculate_ratio): New function.
>         (emit_pred_op): change the VLMAX mov codgen.
>         (get_ta): New function.
>         (get_ma): Ditto.
>         (enum tail_policy): Change enum.
>         (get_prefer_tail_policy): New function.
>         (enum mask_policy): Change enum.
>         (get_prefer_mask_policy): New function.
>         * config/riscv/t-riscv: Add riscv-vsetvl.o
>         * config/riscv/vector.md (): Adjust attribute and pattern for
> VSETVL PASS.
>         (@vlmax_avl<mode>): Ditto.
>         (@vsetvl<mode>_no_side_effects): Delete.
>         (vsetvl_vtype_change_only): New MD pattern.
>         (@vsetvl_discard_result<mode>): Ditto.
>         * config/riscv/riscv-vsetvl.cc: New file.
>         * config/riscv/riscv-vsetvl.h: New file.
>
> ---
>  gcc/config.gcc                    |    2 +-
>  gcc/config/riscv/riscv-passes.def |    1 +
>  gcc/config/riscv/riscv-protos.h   |   15 +
>  gcc/config/riscv/riscv-v.cc       |  102 +-
>  gcc/config/riscv/riscv-vsetvl.cc  | 2509 +++++++++++++++++++++++++++++
>  gcc/config/riscv/riscv-vsetvl.h   |  344 ++++
>  gcc/config/riscv/t-riscv          |    8 +
>  gcc/config/riscv/vector.md        |  131 +-
>  8 files changed, 3076 insertions(+), 36 deletions(-)
>  create mode 100644 gcc/config/riscv/riscv-vsetvl.cc
>  create mode 100644 gcc/config/riscv/riscv-vsetvl.h
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index b5eda046033..1eb76c6c076 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -518,7 +518,7 @@ pru-*-*)
>         ;;
>  riscv*)
>         cpu_type=riscv
> -       extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o
> riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o"
> +       extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o
> riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o riscv-vsetvl.o"
>         extra_objs="${extra_objs} riscv-vector-builtins.o
> riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
>         d_target_objs="riscv-d.o"
>         extra_headers="riscv_vector.h"
> diff --git a/gcc/config/riscv/riscv-passes.def
> b/gcc/config/riscv/riscv-passes.def
> index 23ef8ac6114..d2d48f231aa 100644
> --- a/gcc/config/riscv/riscv-passes.def
> +++ b/gcc/config/riscv/riscv-passes.def
> @@ -18,3 +18,4 @@
>     <http://www.gnu.org/licenses/>.  */
>
>  INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs);
> +INSERT_PASS_BEFORE (pass_sched2, 1, pass_vsetvl);
> diff --git a/gcc/config/riscv/riscv-protos.h
> b/gcc/config/riscv/riscv-protos.h
> index e17e003f8e2..cfd0f284f91 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -96,6 +96,7 @@ extern void riscv_parse_arch_string (const char *,
> struct gcc_options *, locatio
>  extern bool riscv_hard_regno_rename_ok (unsigned, unsigned);
>
>  rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
> +rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
>
>  /* Information about one CPU we know about.  */
>  struct riscv_cpu_info {
> @@ -131,6 +132,12 @@ enum vlmul_type
>    LMUL_F4 = 6,
>    LMUL_F2 = 7,
>  };
> +
> +enum avl_type
> +{
> +  NONVLMAX,
> +  VLMAX,
> +};
>  /* Routines implemented in riscv-vector-builtins.cc.  */
>  extern void init_builtins (void);
>  extern const char *mangle_builtin_type (const_tree);
> @@ -145,17 +152,25 @@ extern bool legitimize_move (rtx, rtx, machine_mode);
>  extern void emit_pred_op (unsigned, rtx, rtx, machine_mode);
>  extern enum vlmul_type get_vlmul (machine_mode);
>  extern unsigned int get_ratio (machine_mode);
> +extern int get_ta (rtx);
> +extern int get_ma (rtx);
> +extern int get_avl_type (rtx);
> +extern unsigned int calculate_ratio (unsigned int, enum vlmul_type);
>  enum tail_policy
>  {
>    TAIL_UNDISTURBED = 0,
>    TAIL_AGNOSTIC = 1,
> +  TAIL_ANY = 2,
>  };
>
>  enum mask_policy
>  {
>    MASK_UNDISTURBED = 0,
>    MASK_AGNOSTIC = 1,
> +  MASK_ANY = 2,
>  };
> +enum tail_policy get_prefer_tail_policy ();
> +enum mask_policy get_prefer_mask_policy ();
>  }
>
>  /* We classify builtin types into two classes:
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 13ee33938bb..f02a048f76d 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -72,11 +72,16 @@ public:
>    }
>    void add_policy_operand (enum tail_policy vta, enum mask_policy vma)
>    {
> -    rtx tail_policy_rtx = vta == TAIL_UNDISTURBED ? const0_rtx :
> const1_rtx;
> -    rtx mask_policy_rtx = vma == MASK_UNDISTURBED ? const0_rtx :
> const1_rtx;
> +    rtx tail_policy_rtx = gen_int_mode (vta, Pmode);
> +    rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
>      add_input_operand (tail_policy_rtx, Pmode);
>      add_input_operand (mask_policy_rtx, Pmode);
>    }
> +  void add_avl_type_operand ()
> +  {
> +    rtx vlmax_rtx = gen_int_mode (avl_type::VLMAX, Pmode);
> +    add_input_operand (vlmax_rtx, Pmode);
> +  }
>
>    void expand (enum insn_code icode, bool temporary_volatile_p = false)
>    {
> @@ -112,19 +117,58 @@ emit_vlmax_vsetvl (machine_mode vmode)
>    unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL
>                        ? 8
>                        : GET_MODE_BITSIZE (GET_MODE_INNER (vmode));
> +  enum vlmul_type vlmul = get_vlmul (vmode);
> +  unsigned int ratio = calculate_ratio (sew, vlmul);
> +
> +  if (!optimize)
> +    emit_insn (gen_vsetvl (Pmode, vl, RVV_VLMAX, gen_int_mode (sew,
> Pmode),
> +                          gen_int_mode (get_vlmul (vmode), Pmode),
> const0_rtx,
> +                          const0_rtx));
> +  else
> +    emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode)));
>
> -  emit_insn (
> -    gen_vsetvl_no_side_effects (Pmode, vl, RVV_VLMAX, gen_int_mode (sew,
> Pmode),
> -                               gen_int_mode (get_vlmul (vmode), Pmode),
> -                               const1_rtx, const1_rtx));
>    return vl;
>  }
>
> +/* Calculate SEW/LMUL ratio.  */
> +unsigned int
> +calculate_ratio (unsigned int sew, enum vlmul_type vlmul)
> +{
> +  unsigned int ratio;
> +  switch (vlmul)
> +    {
> +    case LMUL_1:
> +      ratio = sew;
> +      break;
> +    case LMUL_2:
> +      ratio = sew / 2;
> +      break;
> +    case LMUL_4:
> +      ratio = sew / 4;
> +      break;
> +    case LMUL_8:
> +      ratio = sew / 8;
> +      break;
> +    case LMUL_F8:
> +      ratio = sew * 8;
> +      break;
> +    case LMUL_F4:
> +      ratio = sew * 4;
> +      break;
> +    case LMUL_F2:
> +      ratio = sew * 2;
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +  return ratio;
> +}
> +
>  /* Emit an RVV unmask && vl mov from SRC to DEST.  */
>  void
>  emit_pred_op (unsigned icode, rtx dest, rtx src, machine_mode mask_mode)
>  {
> -  insn_expander<7> e;
> +  insn_expander<8> e;
>    machine_mode mode = GET_MODE (dest);
>
>    e.add_output_operand (dest, mode);
> @@ -137,7 +181,9 @@ emit_pred_op (unsigned icode, rtx dest, rtx src,
> machine_mode mask_mode)
>    e.add_input_operand (vlmax, Pmode);
>
>    if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
> -    e.add_policy_operand (TAIL_AGNOSTIC, MASK_AGNOSTIC);
> +    e.add_policy_operand (get_prefer_tail_policy (),
> get_prefer_mask_policy ());
> +
> +  e.add_avl_type_operand ();
>
>    e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src));
>  }
> @@ -256,4 +302,44 @@ get_ratio (machine_mode mode)
>      return mode_vtype_infos.ratio_for_min_vlen64[mode];
>  }
>
> +/* Get ta according to operand[tail_op_idx].  */
> +int
> +get_ta (rtx ta)
> +{
> +  if (INTVAL (ta) == TAIL_ANY)
> +    return INVALID_ATTRIBUTE;
> +  return INTVAL (ta);
> +}
> +
> +/* Get ma according to operand[mask_op_idx].  */
> +int
> +get_ma (rtx ma)
> +{
> +  if (INTVAL (ma) == MASK_ANY)
> +    return INVALID_ATTRIBUTE;
> +  return INTVAL (ma);
> +}
> +
> +/* Get prefer tail policy.  */
> +enum tail_policy
> +get_prefer_tail_policy ()
> +{
> +  /* TODO: By default, we choose to use TAIL_ANY which allows
> +     compiler pick up either agnostic or undisturbed. Maybe we
> +     will have a compile option like -mprefer=agnostic to set
> +     this value???.  */
> +  return TAIL_ANY;
> +}
> +
> +/* Get prefer mask policy.  */
> +enum mask_policy
> +get_prefer_mask_policy ()
> +{
> +  /* TODO: By default, we choose to use MASK_ANY which allows
> +     compiler pick up either agnostic or undisturbed. Maybe we
> +     will have a compile option like -mprefer=agnostic to set
> +     this value???.  */
> +  return MASK_ANY;
> +}
> +
>  } // namespace riscv_vector
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc
> b/gcc/config/riscv/riscv-vsetvl.cc
> new file mode 100644
> index 00000000000..c602426b542
> --- /dev/null
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -0,0 +1,2509 @@
> +/* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
> +   Copyright(C) 2022-2022 Free Software Foundation, Inc.
> +   Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies
> Ltd.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or(at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +/*  This pass is to Set VL/VTYPE global status for RVV instructions
> +    that depend on VL and VTYPE registers by Lazy code motion (LCM).
> +
> +    Strategy:
> +
> +    -  Backward demanded info fusion within block.
> +
> +    -  Lazy code motion (LCM) based demanded info backward propagation.
> +
> +    -  RTL_SSA framework for def-use, PHI analysis.
> +
> +    -  Lazy code motion (LCM) for global VL/VTYPE optimization.
> +
> +    Assumption:
> +
> +    -  Each avl operand is either an immediate (must be in range 0 ~ 31)
> or reg.
> +
> +    This pass consists of 3 phases:
> +
> +    -  Phase 1 - compute VL/VTYPE demanded information within each block
> +       by backward data-flow analysis.
> +
> +    -  Phase 2 - Emit vsetvl instructions within each basic block
> according to
> +       demand, compute and save ANTLOC && AVLOC of each block.
> +
> +    -  Phase 3 - Backward demanded info propagation and fusion across
> blocks.
> +
> +    -  Phase 4 - Lazy code motion including: compute local properties,
> +       pre_edge_lcm and vsetvl insertion && delete edges for LCM results.
> +
> +    -  Phase 5 - Cleanup AVL operand of RVV instruction since it will not
> be
> +       used any more and VL operand of VSETVL instruction if it is not
> used by
> +       any non-debug instructions.
> +
> +    Implementation:
> +
> +    -  The subroutine of optimize == 0 is simple_vsetvl.
> +       This function simplily vsetvl insertion for each RVV
> +       instruction. No optimization.
> +
> +    -  The subroutine of optimize > 0 is lazy_vsetvl.
> +       This function optimize vsetvl insertion process by
> +       lazy code motion (LCM) layering on RTL_SSA.  */
> +
> +#define IN_TARGET_CODE 1
> +#define INCLUDE_ALGORITHM
> +#define INCLUDE_FUNCTIONAL
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "tm.h"
> +#include "backend.h"
> +#include "rtl.h"
> +#include "target.h"
> +#include "tree-pass.h"
> +#include "df.h"
> +#include "rtl-ssa.h"
> +#include "cfgcleanup.h"
> +#include "insn-config.h"
> +#include "insn-attr.h"
> +#include "insn-opinit.h"
> +#include "tm-constrs.h"
> +#include "cfgrtl.h"
> +#include "cfganal.h"
> +#include "lcm.h"
> +#include "predict.h"
> +#include "profile-count.h"
> +#include "riscv-vsetvl.h"
> +
> +using namespace rtl_ssa;
> +using namespace riscv_vector;
> +
> +DEBUG_FUNCTION void
> +debug (const vector_insn_info *info)
> +{
> +  info->dump (stderr);
> +}
> +
> +DEBUG_FUNCTION void
> +debug (const vector_infos_manager *info)
> +{
> +  info->dump (stderr);
> +}
> +
> +static bool
> +vlmax_avl_p (rtx x)
> +{
> +  return x && rtx_equal_p (x, RVV_VLMAX);
> +}
> +
> +static bool
> +vlmax_avl_insn_p (rtx_insn *rinsn)
> +{
> +  return INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi
> +        || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi;
> +}
> +
> +static bool
> +loop_basic_block_p (const basic_block cfg_bb)
> +{
> +  return JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb));
> +}
> +
> +/* Return true if it is an RVV instruction depends on VTYPE global
> +   status register.  */
> +static bool
> +has_vtype_op (rtx_insn *rinsn)
> +{
> +  return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
> +}
> +
> +/* Return true if it is an RVV instruction depends on VL global
> +   status register.  */
> +static bool
> +has_vl_op (rtx_insn *rinsn)
> +{
> +  return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn);
> +}
> +
> +/* Is this a SEW value that can be encoded into the VTYPE format.  */
> +static bool
> +valid_sew_p (size_t sew)
> +{
> +  return exact_log2 (sew) && sew >= 8 && sew <= 64;
> +}
> +
> +/* Return true if it is a vsetvl instruction.  */
> +static bool
> +vector_config_insn_p (rtx_insn *rinsn)
> +{
> +  return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) ==
> TYPE_VSETVL;
> +}
> +
> +/* Return true if it is vsetvldi or vsetvlsi.  */
> +static bool
> +vsetvl_insn_p (rtx_insn *rinsn)
> +{
> +  return INSN_CODE (rinsn) == CODE_FOR_vsetvldi
> +        || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi;
> +}
> +
> +/* Return true if INSN1 comes befeore INSN2 in the same block.  */
> +static bool
> +same_bb_and_before_p (const insn_info *insn1, const insn_info *insn2)
> +{
> +  return (insn1->bb ()->index () == insn2->bb ()->index ())
> +        && (*insn1 < *insn2);
> +}
> +
> +/* Return true if INSN1 comes after or equal INSN2 in the same block.  */
> +static bool
> +same_bb_and_after_or_equal_p (const insn_info *insn1, const insn_info
> *insn2)
> +{
> +  return (insn1->bb ()->index () == insn2->bb ()->index ())
> +        && (*insn1 >= *insn2);
> +}
> +
> +/* An "anticipatable occurrence" is one that is the first occurrence in
> the
> +   basic block, the operands are not modified in the basic block prior
> +   to the occurrence and the output is not used between the start of
> +   the block and the occurrence.  */
> +static bool
> +anticipatable_occurrence_p (const insn_info *insn, const vector_insn_info
> dem)
> +{
> +  /* The only possible operand we care of VSETVL is AVL.  */
> +  if (dem.has_avl_reg ())
> +    {
> +      /* The operands shoule not be modified in the basic block prior
> +        to the occurrence.  */
> +      if (!vlmax_avl_p (dem.get_avl ()))
> +       {
> +         set_info *set
> +           = find_access (insn->uses (), REGNO (dem.get_avl ()))->def ();
> +         /* If it's undefined, it's not anticipatable conservatively.  */
> +         if (!set)
> +           return false;
> +         if (same_bb_and_before_p (set->insn (), insn))
> +           return false;
> +       }
> +    }
> +
> +  /* The output should not be used between the start of the block
> +     and the occurrence.  */
> +  if (vsetvl_insn_p (insn->rtl ()))
> +    {
> +      rtx dest = SET_DEST (XVECEXP (PATTERN (insn->rtl ()), 0, 0));
> +      for (insn_info *i = insn->prev_nondebug_insn (); i != nullptr;
> +          i = i->prev_nondebug_insn ())
> +       if (find_access (i->uses (), REGNO (dest)))
> +         return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* An "available occurrence" is one that is the last occurrence in the
> +   basic block and the operands are not modified by following statements
> in
> +   the basic block [including this insn].  */
> +static bool
> +available_occurrence_p (const insn_info *insn, const vector_insn_info dem)
> +{
> +  /* The only possible operand we care of VSETVL is AVL.  */
> +  if (dem.has_avl_reg ())
> +    {
> +      /* The operands shoule not be modified in the basic block prior
> +        to the occurrence.
> +        e.g.
> +           bb:
> +             vsetvl hr3, hr2, ...
> +             ...
> +             vadd ... (vl=hr3)
> +      */
> +      if (!vlmax_avl_p (dem.get_avl ()))
> +       {
> +         set_info *set
> +           = find_access (insn->uses (), REGNO (dem.get_avl ()))->def ();
> +         /* If it's undefined, it's not available conservatively.  */
> +         if (!set)
> +           return false;
> +         if (same_bb_and_after_or_equal_p (set->insn (), insn))
> +           return false;
> +       }
> +    }
> +  return true;
> +}
> +
> +/* Return true if the branch probability is dominate.  */
> +static bool
> +dominate_probability_p (edge e)
> +{
> +  /* TODO: We simpily pick dominate probability >= 50%.
> +     However, it isn't always optimal. Consider cases
> +     like this:
> +       bb 0: 80% succs: bb 2, bb 3, bb 4, bb 5.
> +       bb 1: 20%
> +
> +     Assume bb 1, bb 2, bb 3, bb 4, bb 5 are different
> +     one another, and bb 2, bb 3, bb 4, bb 5 are incompatible.
> +
> +     ??? Maybe backward propagate bb 1 is better ???
> +     May need to support an accurate and reliable COST model
> +     in the future.  */
> +  return e->probability >= profile_probability::even ();
> +}
> +
> +/* Return true if the block is worthwhile backward propagation.  */
> +static bool
> +backward_propagate_worthwhile_p (const basic_block cfg_bb,
> +                                const vector_block_info block_info)
> +{
> +  if (loop_basic_block_p (cfg_bb))
> +    {
> +      if (block_info.local_dem.compatible_p (block_info.reaching_out))
> +       return true;
> +
> +      /* There is a obvious case that is not worthwhile and meaningless
> +        to propagate the demand information:
> +                         local_dem
> +                            __________
> +                        ____|____     |
> +                       |        |     |
> +                       |________|     |
> +                            |_________|
> +                         reaching_out
> +         Header is incompatible with reaching_out and the block is loop
> itself,
> +         we don't backward propagete the local_dem since we can't avoid
> emit
> +         vsetvl for the local_dem.  */
> +      edge e;
> +      edge_iterator ei;
> +      FOR_EACH_EDGE (e, ei, cfg_bb->succs)
> +       if (e->dest->index == cfg_bb->index)
> +         return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* Helper function to get VL operand.  */
> +static rtx
> +get_vl (rtx_insn *rinsn)
> +{
> +  if (has_vl_op (rinsn))
> +    {
> +      /* We only call get_vl for VLMAX use VTYPE instruction.
> +        It's used to get the VL operand to emit VLMAX VSETVL instruction:
> +        vsetvl a5,zero,e32,m1,ta,ma.  */
> +      gcc_assert (get_attr_avl_type (rinsn) == VLMAX);
> +      extract_insn_cached (rinsn);
> +      return recog_data.operand[get_attr_vl_op_idx (rinsn)];
> +    }
> +  return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
> +}
> +
> +/* Helper function to get AVL operand.  */
> +static rtx
> +get_avl (rtx_insn *rinsn)
> +{
> +  if (vsetvl_insn_p (rinsn))
> +    return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
> +
> +  if (!has_vl_op (rinsn))
> +    return NULL_RTX;
> +  if (get_attr_avl_type (rinsn) == VLMAX)
> +    return RVV_VLMAX;
> +  extract_insn_cached (rinsn);
> +  return recog_data.operand[get_attr_vl_op_idx (rinsn)];
> +}
> +
> +static bool
> +can_backward_propagate_p (const function_info *ssa, const basic_block
> cfg_bb,
> +                         const vector_insn_info prop)
> +{
> +  insn_info *insn = prop.get_insn ();
> +
> +  /* TODO: We don't backward propagate the explict VSETVL here
> +     since we will change vsetvl and vsetvlmax intrinsiscs into
> +     no side effects which can be optimized into optimzal location
> +     by GCC internal PASSes. We only need to support these backward
> +     propagation if vsetvl instrinsics have side effects.  */
> +  if (vsetvl_insn_p (insn->rtl ()))
> +    return false;
> +
> +  gcc_assert (has_vtype_op (insn->rtl ()));
> +  rtx reg = NULL_RTX;
> +
> +  /* Case 1: Don't need VL. Just let it backward propagate.  */
> +  if (!has_vl_op (insn->rtl ()))
> +    return true;
> +  else
> +    {
> +      /* Case 2: CONST_INT AVL, we don't need to check def.  */
> +      if (prop.has_avl_imm ())
> +       return true;
> +      else
> +       {
> +         /* Case 3: REG AVL, we need to check the distance of def to make
> +            sure we won't backward propagate over the def.  */
> +         gcc_assert (prop.has_avl_reg ());
> +         if (vlmax_avl_p (prop.get_avl ()))
> +           /* Check VL operand for vsetvl vl,zero.  */
> +           reg = get_vl (insn->rtl ());
> +         else
> +           /* Check AVL operand for vsetvl zero,avl.  */
> +           reg = get_avl (insn->rtl ());
> +       }
> +    }
> +
> +  def_info *def = find_access (insn->uses (), REGNO (reg))->def ();
> +
> +  /* If the definition is in the current block, we can't propagate it
> +     acrocss blocks.  */
> +  if (def->bb ()->cfg_bb ()->index == insn->bb ()->cfg_bb ()->index)
> +    {
> +      set_info *set = safe_dyn_cast<set_info *> (def);
> +
> +      /* True if it is a degenerate PHI that can be backward propagated.
> */
> +      auto valid_degenerate_phi_p = [&] () {
> +       if (!set)
> +         return false;
> +
> +       phi_info *phi = safe_dyn_cast<phi_info *> (set);
> +       if (!phi)
> +         return false;
> +
> +       basic_block iter_bb;
> +       set_info *ultimate_def = look_through_degenerate_phi (set);
> +       const basic_block ultimate_bb = ultimate_def->bb ()->cfg_bb ();
> +       FOR_BB_BETWEEN (iter_bb, ultimate_bb, def->bb ()->cfg_bb (),
> next_bb)
> +         if (iter_bb->index == cfg_bb->index)
> +           return true;
> +
> +       return false;
> +      };
> +
> +      if (valid_degenerate_phi_p ())
> +       return true;
> +
> +      /* TODO: Support non-degenerate PHI backward propagation later.  */
> +      return false;
> +    }
> +
> +  /* If the definition block is the current block that we iterate, we
> +     can backward propagate it since we will insert or change VL/VTYPE
> +     info at the end of the current block we iterate.  */
> +  if (def->bb ()->cfg_bb ()->index == cfg_bb->index)
> +    return true;
> +
> +  /* Make sure we don't backward propagete the VL/VTYPE info over the
> +     definition blocks.  */
> +  bool visited_p = false;
> +  for (const bb_info *bb : ssa->reverse_bbs ())
> +    {
> +      if (bb->cfg_bb ()->index == cfg_bb->index && visited_p)
> +       return false;
> +      if (bb->cfg_bb ()->index == def->bb ()->cfg_bb ()->index)
> +       visited_p = true;
> +    }
> +
> +  return true;
> +}
> +
> +/* Helper function to get SEW operand. We always have SEW value for
> +   all RVV instructions that have VTYPE OP.  */
> +static uint8_t
> +get_sew (rtx_insn *rinsn)
> +{
> +  return get_attr_sew (rinsn);
> +}
> +
> +/* Helper function to get VLMUL operand. We always have VLMUL value for
> +   all RVV instructions that have VTYPE OP. */
> +static enum vlmul_type
> +get_vlmul (rtx_insn *rinsn)
> +{
> +  return (enum vlmul_type) get_attr_vlmul (rinsn);
> +}
> +
> +/* Get default tail policy.  */
> +static bool
> +get_default_ta ()
> +{
> +  /* For the instruction that doesn't require TA, we still need a default
> value
> +     to emit vsetvl. We pick up the default value according to prefer
> policy. */
> +  return (bool) (get_prefer_tail_policy () & 0x1
> +                || (get_prefer_tail_policy () >> 1 & 0x1));
> +}
> +
> +/* Get default mask policy.  */
> +static bool
> +get_default_ma ()
> +{
> +  /* For the instruction that doesn't require MA, we still need a default
> value
> +     to emit vsetvl. We pick up the default value according to prefer
> policy. */
> +  return (bool) (get_prefer_mask_policy () & 0x1
> +                || (get_prefer_mask_policy () >> 1 & 0x1));
> +}
> +
> +/* Helper function to get TA operand.  */
> +static bool
> +tail_agnostic_p (rtx_insn *rinsn)
> +{
> +  /* If it doesn't have TA, we return agnostic by default.  */
> +  extract_insn_cached (rinsn);
> +  int ta = get_attr_ta (rinsn);
> +  return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta);
> +}
> +
> +/* Helper function to get MA operand.  */
> +static bool
> +mask_agnostic_p (rtx_insn *rinsn)
> +{
> +  /* If it doesn't have MA, we return agnostic by default.  */
> +  extract_insn_cached (rinsn);
> +  int ma = get_attr_ma (rinsn);
> +  return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
> +}
> +
> +/* Return true if FN has a vector instruction that use VL/VTYPE.  */
> +static bool
> +has_vector_insn (function *fn)
> +{
> +  basic_block cfg_bb;
> +  rtx_insn *rinsn;
> +  FOR_ALL_BB_FN (cfg_bb, fn)
> +    FOR_BB_INSNS (cfg_bb, rinsn)
> +      if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
> +       return true;
> +  return false;
> +}
> +
> +/* Emit vsetvl instruction.  */
> +static rtx
> +gen_vsetvl_pat (enum vsetvl_type insn_type, vl_vtype_info info, rtx vl)
> +{
> +  rtx avl = info.get_avl ();
> +  rtx sew = gen_int_mode (info.get_sew (), Pmode);
> +  rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode);
> +  rtx ta = gen_int_mode (info.get_ta (), Pmode);
> +  rtx ma = gen_int_mode (info.get_ma (), Pmode);
> +
> +  if (insn_type == VSETVL_NORMAL)
> +    {
> +      gcc_assert (vl != NULL_RTX);
> +      return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma);
> +    }
> +  else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY)
> +    return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
> +  else
> +    return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
> +}
> +
> +static rtx
> +gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info info)
> +{
> +  rtx new_pat;
> +  if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ()))
> +    {
> +      rtx dest = get_vl (rinsn);
> +      new_pat = gen_vsetvl_pat (VSETVL_NORMAL, info, dest);
> +    }
> +  else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
> +    new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
> +  else
> +    new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX);
> +  return new_pat;
> +}
> +
> +static void
> +emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type,
> +                 vl_vtype_info info, rtx vl, rtx_insn *rinsn)
> +{
> +  rtx pat = gen_vsetvl_pat (insn_type, info, vl);
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n");
> +      print_rtl_single (dump_file, pat);
> +    }
> +
> +  if (emit_type == EMIT_DIRECT)
> +    emit_insn (pat);
> +  else if (emit_type == EMIT_BEFORE)
> +    emit_insn_before (pat, rinsn);
> +  else
> +    emit_insn_after (pat, rinsn);
> +}
> +
> +static void
> +eliminate_insn (rtx_insn *rinsn)
> +{
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn));
> +      print_rtl_single (dump_file, rinsn);
> +    }
> +  if (in_sequence_p ())
> +    remove_insn (rinsn);
> +  else
> +    delete_insn (rinsn);
> +}
> +
> +static void
> +insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn,
> +              const vector_insn_info &info, const vector_insn_info
> &prev_info)
> +{
> +  /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the
> same
> +     VLMAX.  */
> +  if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
> +      && info.same_avl_p (prev_info) && info.same_vlmax_p (prev_info))
> +    {
> +      emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info,
> NULL_RTX,
> +                       rinsn);
> +      return;
> +    }
> +
> +  if (info.has_avl_imm ())
> +    {
> +      emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
> +                       rinsn);
> +      return;
> +    }
> +
> +  if (info.has_avl_no_reg ())
> +    {
> +      /* We can only use x0, x0 if there's no chance of the vtype change
> causing
> +        the previous vl to become invalid.  */
> +      if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
> +         && info.same_vlmax_p (prev_info))
> +       {
> +         emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info,
> NULL_RTX,
> +                           rinsn);
> +         return;
> +       }
> +      /* Otherwise use an AVL of 0 to avoid depending on previous vl.  */
> +      vl_vtype_info new_info = info;
> +      new_info.set_avl_info (avl_info (const0_rtx, nullptr));
> +      emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info,
> NULL_RTX,
> +                       rinsn);
> +      return;
> +    }
> +
> +  /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the
> +     opcode if the AVLReg is X0 as they have different register classes
> for
> +     the AVL operand.  */
> +  if (vlmax_avl_p (info.get_avl ()))
> +    {
> +      gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn));
> +      rtx vl_op = get_vl (rinsn);
> +      gcc_assert (!vlmax_avl_p (vl_op));
> +      emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn);
> +      return;
> +    }
> +
> +  emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
> rinsn);
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "Update VL/VTYPE info, previous info=");
> +      prev_info.dump (dump_file);
> +    }
> +}
> +
> +/* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
> +   to INSN.  If such notes are added to an insn which references a
> +   CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
> +   that note, because the following loop optimization pass requires
> +   them.  */
> +
> +/* ??? If there was a jump optimization pass after gcse and before loop,
> +   then we would not need to do this here, because jump would add the
> +   necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
> +
> +static void
> +add_label_notes (rtx x, rtx_insn *insn)
> +{
> +  enum rtx_code code = GET_CODE (x);
> +  int i, j;
> +  const char *fmt;
> +
> +  if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
> +    {
> +      /* This code used to ignore labels that referred to dispatch tables
> to
> +        avoid flow generating (slightly) worse code.
> +
> +        We no longer ignore such label references (see LABEL_REF handling
> in
> +        mark_jump_label for additional information).  */
> +
> +      /* There's no reason for current users to emit jump-insns with
> +        such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
> +        notes.  */
> +      gcc_assert (!JUMP_P (insn));
> +      add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (x));
> +
> +      if (LABEL_P (label_ref_label (x)))
> +       LABEL_NUSES (label_ref_label (x))++;
> +
> +      return;
> +    }
> +
> +  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >=
> 0; i--)
> +    {
> +      if (fmt[i] == 'e')
> +       add_label_notes (XEXP (x, i), insn);
> +      else if (fmt[i] == 'E')
> +       for (j = XVECLEN (x, i) - 1; j >= 0; j--)
> +         add_label_notes (XVECEXP (x, i, j), insn);
> +    }
> +}
> +
> +/* Add EXPR to the end of basic block BB.
> +
> +   This is used by both the PRE and code hoisting.  */
> +
> +static void
> +insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb)
> +{
> +  rtx_insn *end_rinsn = BB_END (cfg_bb);
> +  rtx_insn *new_insn;
> +  rtx_insn *pat, *pat_end;
> +
> +  pat = rinsn;
> +  gcc_assert (pat && INSN_P (pat));
> +
> +  pat_end = pat;
> +  while (NEXT_INSN (pat_end) != NULL_RTX)
> +    pat_end = NEXT_INSN (pat_end);
> +
> +  /* If the last end_rinsn is a jump, insert EXPR in front.  Similarly we
> need
> +     to take care of trapping instructions in presence of non-call
> exceptions.
> +   */
> +
> +  if (JUMP_P (end_rinsn)
> +      || (NONJUMP_INSN_P (end_rinsn)
> +         && (!single_succ_p (cfg_bb)
> +             || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)))
> +    {
> +      /* FIXME: What if something in jump uses value set in new
> end_rinsn?  */
> +      new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
> +    }
> +
> +  /* Likewise if the last end_rinsn is a call, as will happen in the
> presence
> +     of exception handling.  */
> +  else if (CALL_P (end_rinsn)
> +          && (!single_succ_p (cfg_bb)
> +              || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))
> +    {
> +      /* Keeping in mind targets with small register classes and
> parameters
> +        in registers, we search backward and place the instructions before
> +        the first parameter is loaded.  Do this for everyone for
> consistency
> +        and a presumption that we'll get better code elsewhere as well.
> */
> +
> +      /* Since different machines initialize their parameter registers
> +        in different orders, assume nothing.  Collect the set of all
> +        parameter registers.  */
> +      end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb));
> +
> +      /* If we found all the parameter loads, then we want to insert
> +        before the first parameter load.
> +
> +        If we did not find all the parameter loads, then we might have
> +        stopped on the head of the block, which could be a CODE_LABEL.
> +        If we inserted before the CODE_LABEL, then we would be putting
> +        the end_rinsn in the wrong basic block.  In that case, put the
> +        end_rinsn after the CODE_LABEL.  Also, respect
> NOTE_INSN_BASIC_BLOCK.
> +       */
> +      while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn))
> +       end_rinsn = NEXT_INSN (end_rinsn);
> +
> +      new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
> +    }
> +  else
> +    new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb);
> +
> +  while (1)
> +    {
> +      if (INSN_P (pat))
> +       add_label_notes (PATTERN (pat), new_insn);
> +      if (pat == pat_end)
> +       break;
> +      pat = NEXT_INSN (pat);
> +    }
> +}
> +
> +/* Get VL/VTYPE information for INSN.  */
> +static vl_vtype_info
> +get_vl_vtype_info (const insn_info *insn)
> +{
> +  if (vector_config_insn_p (insn->rtl ()))
> +    gcc_assert (vsetvl_insn_p (insn->rtl ())
> +               && "Can't handle X0, rs1 vsetvli yet");
> +
> +  set_info *set = nullptr;
> +  rtx avl = ::get_avl (insn->rtl ());
> +  if (avl && REG_P (avl) && !vlmax_avl_p (avl))
> +    set = find_access (insn->uses (), REGNO (avl))->def ();
> +
> +  uint8_t sew = get_sew (insn->rtl ());
> +  enum vlmul_type vlmul = get_vlmul (insn->rtl ());
> +  uint8_t ratio = get_attr_ratio (insn->rtl ());
> +  /* when get_attr_ratio is invalid, this kind of instructions
> +     doesn't care about ratio. However, we still need this value
> +     in demand info backward analysis.  */
> +  if (ratio == INVALID_ATTRIBUTE)
> +    ratio = calculate_ratio (sew, vlmul);
> +  bool ta = tail_agnostic_p (insn->rtl ());
> +  bool ma = mask_agnostic_p (insn->rtl ());
> +
> +  /* If merge operand is undef value, we prefer agnostic.  */
> +  int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
> +  if (merge_op_idx != INVALID_ATTRIBUTE
> +      && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
> +    {
> +      ta = true;
> +      ma = true;
> +    }
> +
> +  vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma);
> +  return info;
> +}
> +
> +static void
> +change_insn (rtx_insn *rinsn, rtx new_pat)
> +{
> +  /* We don't apply change on RTL_SSA here since it's possible a
> +     new INSN we add in the PASS before which doesn't have RTL_SSA
> +     info yet.*/
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
> +              INSN_UID (rinsn));
> +      print_rtl_single (dump_file, PATTERN (rinsn));
> +    }
> +
> +  validate_change (rinsn, &PATTERN (rinsn), new_pat, true);
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "\nto:\n");
> +      print_rtl_single (dump_file, PATTERN (rinsn));
> +    }
> +}
> +
> +static bool
> +change_insn (function_info *ssa, insn_change change, insn_info *insn,
> +            rtx new_pat)
> +{
> +  rtx_insn *rinsn = insn->rtl ();
> +  auto attempt = ssa->new_change_attempt ();
> +  if (!restrict_movement (change))
> +    return false;
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
> +              INSN_UID (rinsn));
> +      print_rtl_single (dump_file, PATTERN (rinsn));
> +      if (dump_flags & TDF_DETAILS)
> +       {
> +         fprintf (dump_file, "RTL_SSA info:\n");
> +         pretty_printer pp;
> +         pp.buffer->stream = dump_file;
> +         insn->print_full (&pp);
> +         pp_printf (&pp, "\n");
> +         pp_flush (&pp);
> +       }
> +    }
> +
> +  insn_change_watermark watermark;
> +  validate_change (rinsn, &PATTERN (rinsn), new_pat, true);
> +
> +  /* These routines report failures themselves.  */
> +  if (!recog (attempt, change) || !change_is_worthwhile (change, false))
> +    return false;
> +  confirm_change_group ();
> +  ssa->change_insn (change);
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "\nto:\n");
> +      print_rtl_single (dump_file, PATTERN (rinsn));
> +      if (dump_flags & TDF_DETAILS)
> +       {
> +         fprintf (dump_file, "RTL_SSA info:\n");
> +         pretty_printer pp;
> +         pp.buffer->stream = dump_file;
> +         insn->print_full (&pp);
> +         pp_printf (&pp, "\n");
> +         pp_flush (&pp);
> +       }
> +    }
> +  return true;
> +}
> +
> +avl_info::avl_info (rtx value_in, set_info *source_in)
> +  : m_value (value_in), m_source (source_in)
> +{}
> +
> +avl_info &
> +avl_info::operator= (const avl_info &other)
> +{
> +  m_value = other.get_value ();
> +  m_source = other.get_source ();
> +  return *this;
> +}
> +
> +bool
> +avl_info::operator== (const avl_info &other) const
> +{
> +  if (!m_value)
> +    return !other.get_value ();
> +  if (!other.get_value ())
> +    return false;
> +
> +  /* It's safe to consider they are equal if their RTX value are
> +     strictly the same.  */
> +  if (m_value == other.get_value ())
> +    return true;
> +
> +  if (GET_CODE (m_value) != GET_CODE (other.get_value ()))
> +    return false;
> +
> +  /* Handle CONST_INT AVL.  */
> +  if (CONST_INT_P (m_value))
> +    return INTVAL (m_value) == INTVAL (other.get_value ());
> +
> +  /* Handle VLMAX AVL.  */
> +  if (vlmax_avl_p (m_value))
> +    return vlmax_avl_p (other.get_value ());
> +
> +  /* TODO: So far we only support VLMAX (AVL=zero) comparison,
> +     we will support non-VLMAX AVL in the future.  */
> +  return false;
> +}
> +
> +bool
> +avl_info::operator!= (const avl_info &other) const
> +{
> +  return !(*this == other);
> +}
> +
> +/* Initialize VL/VTYPE information.  */
> +vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in,
> +                             enum vlmul_type vlmul_in, uint8_t ratio_in,
> +                             bool ta_in, bool ma_in)
> +  : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio
> (ratio_in),
> +    m_ta (ta_in), m_ma (ma_in)
> +{
> +  gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW");
> +}
> +
> +bool
> +vl_vtype_info::operator== (const vl_vtype_info &other) const
> +{
> +  return m_avl == other.get_avl_info () && m_sew == other.get_sew ()
> +        && m_vlmul == other.get_vlmul () && m_ta == other.get_ta ()
> +        && m_ma == other.get_ma () && m_ratio == other.get_ratio ();
> +}
> +
> +bool
> +vl_vtype_info::operator!= (const vl_vtype_info &other) const
> +{
> +  return !(*this == other);
> +}
> +
> +bool
> +vl_vtype_info::has_non_zero_avl () const
> +{
> +  if (has_avl_imm ())
> +    return INTVAL (get_avl ()) > 0;
> +  if (has_avl_reg ())
> +    return vlmax_avl_p (get_avl ());
> +  return false;
> +}
> +
> +bool
> +vl_vtype_info::same_avl_p (const vl_vtype_info &other) const
> +{
> +  return get_avl_info () == other.get_avl_info ();
> +}
> +
> +bool
> +vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const
> +{
> +  return get_sew () == other.get_sew () && get_vlmul () ==
> other.get_vlmul ()
> +        && get_ta () == other.get_ta () && get_ma () == other.get_ma ();
> +}
> +
> +bool
> +vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const
> +{
> +  return get_ratio () == other.get_ratio ();
> +}
> +
> +/* Compare the compatibility between Dem1 and Dem2.
> +   If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2
> +   meaning Dem1 is easier be compatible with others than Dem2
> +   or Dem2 is stricter than Dem1.
> +   For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO).  */
> +bool
> +vector_insn_info::operator> (const vector_insn_info &other) const
> +{
> +  if (other.compatible_p (static_cast<const vl_vtype_info &> (*this))
> +      && !this->compatible_p (static_cast<const vl_vtype_info &> (other)))
> +    return true;
> +  return false;
> +}
> +
> +bool
> +vector_insn_info::operator>= (const vector_insn_info &other) const
> +{
> +  if (*this > other)
> +    return true;
> +
> +  if (*this == other)
> +    return true;
> +
> +  if (!compatible_p (other))
> +    return false;
> +
> +  if (!demand_p (DEMAND_AVL) && other.demand_p (DEMAND_AVL))
> +    return false;
> +
> +  if (same_vlmax_p (other))
> +    {
> +      if (demand_p (DEMAND_RATIO) && !other.demand_p (DEMAND_RATIO)
> +         && (get_sew () != other.get_sew ()
> +             || get_vlmul () != other.get_vlmul ()))
> +       return false;
> +
> +      if (get_sew () == other.get_sew () && get_vlmul () ==
> other.get_vlmul ())
> +       {
> +         if (demand_p (DEMAND_RATIO) && !other.demand_p (DEMAND_RATIO))
> +           return false;
> +       }
> +    }
> +
> +  if (demand_p (DEMAND_TAIL_POLICY) && !other.demand_p
> (DEMAND_TAIL_POLICY)
> +      && get_ta () != other.get_ta ())
> +    return false;
> +
> +  if (demand_p (DEMAND_MASK_POLICY) && !other.demand_p
> (DEMAND_MASK_POLICY)
> +      && get_ma () != other.get_ma ())
> +    return false;
> +
> +  return true;
> +}
> +
> +bool
> +vector_insn_info::operator== (const vector_insn_info &other) const
> +{
> +  gcc_assert (!uninit_p () && !other.uninit_p ()
> +             && "Uninitialization should not happen");
> +
> +  /* Empty is only equal to another Empty.  */
> +  if (empty_p ())
> +    return other.empty_p ();
> +  if (other.empty_p ())
> +    return empty_p ();
> +
> +  /* Unknown is only equal to another Unknown.  */
> +  if (unknown_p ())
> +    return other.unknown_p ();
> +  if (other.unknown_p ())
> +    return unknown_p ();
> +
> +  for (size_t i = 0; i < NUM_DEMAND; i++)
> +    if (m_demands[i] != other.demand_p ((enum demand_type) i))
> +      return false;
> +
> +  if (m_insn != other.get_insn ())
> +    return false;
> +  if (m_dirty_pat != other.get_dirty_pat ())
> +    return false;
> +
> +  if (!same_avl_p (other))
> +    return false;
> +
> +  /* If the full VTYPE is valid, check that it is the same.  */
> +  return same_vtype_p (other);
> +}
> +
> +void
> +vector_insn_info::parse_insn (rtx_insn *rinsn)
> +{
> +  *this = vector_insn_info ();
> +  if (!NONDEBUG_INSN_P (rinsn))
> +    return;
> +  if (!has_vtype_op (rinsn))
> +    return;
> +  m_state = VALID;
> +  extract_insn_cached (rinsn);
> +  const rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)];
> +  m_avl = avl_info (avl, nullptr);
> +  m_sew = ::get_sew (rinsn);
> +  m_vlmul = ::get_vlmul (rinsn);
> +  m_ta = tail_agnostic_p (rinsn);
> +  m_ma = mask_agnostic_p (rinsn);
> +}
> +
> +void
> +vector_insn_info::parse_insn (insn_info *insn)
> +{
> +  *this = vector_insn_info ();
> +
> +  /* Return if it is debug insn for the consistency with optimize == 0.
> */
> +  if (insn->is_debug_insn ())
> +    return;
> +
> +  /* We set it as unknown since we don't what will happen in CALL or
> ASM.  */
> +  if (insn->is_call () || insn->is_asm ())
> +    {
> +      set_unknown ();
> +      return;
> +    }
> +
> +  /* If this is something that updates VL/VTYPE that we don't know about,
> set
> +     the state to unknown.  */
> +  if (!vector_config_insn_p (insn->rtl ())
> +      && (find_access (insn->defs (), VL_REGNUM)
> +         || find_access (insn->defs (), VTYPE_REGNUM)))
> +    {
> +      set_unknown ();
> +      return;
> +    }
> +
> +  if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl
> ()))
> +    return;
> +
> +  /* Warning: This function has to work on both the lowered (i.e. post
> +     emit_local_forward_vsetvls) and pre-lowering forms.  The main
> implication
> +     of this is that it can't use the value of a SEW, VL, or Policy
> operand as
> +     they might be stale after lowering.  */
> +  vl_vtype_info::operator= (get_vl_vtype_info (insn));
> +  m_insn = insn;
> +  m_state = VALID;
> +  if (vector_config_insn_p (insn->rtl ()))
> +    {
> +      m_demands[DEMAND_AVL] = true;
> +      m_demands[DEMAND_RATIO] = true;
> +      return;
> +    }
> +
> +  if (has_vl_op (insn->rtl ()))
> +    m_demands[DEMAND_AVL] = true;
> +
> +  if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
> +    m_demands[DEMAND_RATIO] = true;
> +  else
> +    {
> +      /* TODO: By default, if it doesn't demand RATIO, we set it
> +        demand SEW && LMUL both. Some instructions may demand SEW
> +        only and ignore LMUL, will fix it later.  */
> +      m_demands[DEMAND_SEW] = true;
> +      m_demands[DEMAND_LMUL] = true;
> +    }
> +
> +  if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE)
> +    m_demands[DEMAND_TAIL_POLICY] = true;
> +  if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE)
> +    m_demands[DEMAND_MASK_POLICY] = true;
> +}
> +
> +void
> +vector_insn_info::demand_vl_vtype ()
> +{
> +  m_state = VALID;
> +  m_demands[DEMAND_AVL] = true;
> +  m_demands[DEMAND_SEW] = true;
> +  m_demands[DEMAND_LMUL] = true;
> +  m_demands[DEMAND_TAIL_POLICY] = true;
> +  m_demands[DEMAND_MASK_POLICY] = true;
> +}
> +
> +bool
> +vector_insn_info::compatible_p (const vector_insn_info &other) const
> +{
> +  gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
> +             && "Can't compare invalid demanded infos");
> +
> +  /* Check SEW.  */
> +  if (demand_p (DEMAND_SEW) && other.demand_p (DEMAND_SEW)
> +      && get_sew () != other.get_sew ())
> +    return false;
> +
> +  /* Check LMUL.  */
> +  if (demand_p (DEMAND_LMUL) && other.demand_p (DEMAND_LMUL)
> +      && get_vlmul () != other.get_vlmul ())
> +    return false;
> +
> +  /* Check RATIO.  */
> +  if (demand_p (DEMAND_RATIO) && other.demand_p (DEMAND_RATIO)
> +      && get_ratio () != other.get_ratio ())
> +    return false;
> +  if (demand_p (DEMAND_RATIO) && (other.get_sew () || other.get_vlmul ())
> +      && get_ratio () != other.get_ratio ())
> +    return false;
> +  if (other.demand_p (DEMAND_RATIO) && (get_sew () || get_vlmul ())
> +      && get_ratio () != other.get_ratio ())
> +    return false;
> +
> +  if (demand_p (DEMAND_TAIL_POLICY) && other.demand_p (DEMAND_TAIL_POLICY)
> +      && get_ta () != other.get_ta ())
> +    return false;
> +  if (demand_p (DEMAND_MASK_POLICY) && other.demand_p (DEMAND_MASK_POLICY)
> +      && get_ma () != other.get_ma ())
> +    return false;
> +
> +  if (demand_p (DEMAND_AVL) && other.demand_p (DEMAND_AVL))
> +    return m_avl == other.get_avl_info ();
> +
> +  return true;
> +}
> +
> +bool
> +vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const
> +{
> +  gcc_assert (valid_or_dirty_p () && "Can't compare invalid
> vl_vtype_info");
> +  gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
> +  if (!demand_p (DEMAND_AVL))
> +    return true;
> +  return get_avl_info () == other.get_avl_info ();
> +}
> +
> +bool
> +vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const
> +{
> +  gcc_assert (valid_or_dirty_p () && "Can't compare invalid
> vl_vtype_info");
> +  gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state");
> +  if (demand_p (DEMAND_SEW) && m_sew != other.get_sew ())
> +    return false;
> +  if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ())
> +    return false;
> +  if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ())
> +    return false;
> +  if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ())
> +    return false;
> +  if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ())
> +    return false;
> +  return true;
> +}
> +
> +/* Determine whether the vector instructions requirements represented by
> +   Require are compatible with the previous vsetvli instruction
> represented
> +   by this.  INSN is the instruction whose requirements we're
> considering.  */
> +bool
> +vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const
> +{
> +  gcc_assert (!uninit_p () && "Can't handle uninitialized info");
> +  if (empty_p ())
> +    return false;
> +
> +  /* Nothing is compatible with Unknown.  */
> +  if (unknown_p ())
> +    return false;
> +
> +  /* If the instruction doesn't need an AVLReg and the SEW matches,
> consider
> +     it compatible.  */
> +  if (!demand_p (DEMAND_AVL))
> +    if (m_sew == curr_info.get_sew ())
> +      return true;
> +
> +  return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info);
> +}
> +
> +vector_insn_info
> +vector_insn_info::merge (const vector_insn_info &merge_info,
> +                        bool across_bb_p = false) const
> +{
> +  gcc_assert (this->compatible_p (merge_info)
> +             && "Can't merge incompatible demanded infos");
> +
> +  vector_insn_info new_info;
> +  new_info.demand_vl_vtype ();
> +
> +  if (dirty_p ())
> +    {
> +      gcc_assert (across_bb_p);
> +      if (demand_p (DEMAND_AVL))
> +       new_info.set_insn (get_insn ());
> +      else
> +       new_info.set_insn (merge_info.get_insn ());
> +    }
> +  else
> +    {
> +      if (across_bb_p)
> +       new_info.set_insn (get_insn ());
> +      else
> +       new_info.set_insn (merge_info.get_insn ());
> +    }
> +
> +  new_info.set_dirty_pat (merge_info.get_dirty_pat ());
> +
> +  if (!demand_p (DEMAND_AVL) && !merge_info.demand_p (DEMAND_AVL))
> +    new_info.undemand (DEMAND_AVL);
> +  if (!demand_p (DEMAND_SEW) && !merge_info.demand_p (DEMAND_SEW))
> +    new_info.undemand (DEMAND_SEW);
> +  if (!demand_p (DEMAND_LMUL) && !merge_info.demand_p (DEMAND_LMUL))
> +    new_info.undemand (DEMAND_LMUL);
> +
> +  if (!demand_p (DEMAND_TAIL_POLICY)
> +      && !merge_info.demand_p (DEMAND_TAIL_POLICY))
> +    new_info.undemand (DEMAND_TAIL_POLICY);
> +  if (!demand_p (DEMAND_MASK_POLICY)
> +      && !merge_info.demand_p (DEMAND_MASK_POLICY))
> +    new_info.undemand (DEMAND_MASK_POLICY);
> +
> +  if (merge_info.demand_p (DEMAND_AVL))
> +    new_info.set_avl_info (merge_info.get_avl_info ());
> +  else if (demand_p (DEMAND_AVL))
> +    new_info.set_avl_info (get_avl_info ());
> +
> +  if (merge_info.demand_p (DEMAND_SEW))
> +    new_info.set_sew (merge_info.get_sew ());
> +  else if (demand_p (DEMAND_SEW))
> +    new_info.set_sew (get_sew ());
> +
> +  if (merge_info.demand_p (DEMAND_LMUL))
> +    new_info.set_vlmul (merge_info.get_vlmul ());
> +  else if (demand_p (DEMAND_LMUL))
> +    new_info.set_vlmul (get_vlmul ());
> +
> +  if (!new_info.demand_p (DEMAND_SEW) && !new_info.demand_p (DEMAND_LMUL))
> +    {
> +      if (demand_p (DEMAND_RATIO) || merge_info.demand_p (DEMAND_RATIO))
> +       new_info.demand (DEMAND_RATIO);
> +      /* Even though we don't demand_p SEW && VLMUL in this case, we still
> +       * need them.  */
> +      if (merge_info.demand_p (DEMAND_RATIO))
> +       {
> +         new_info.set_sew (merge_info.get_sew ());
> +         new_info.set_vlmul (merge_info.get_vlmul ());
> +         new_info.set_ratio (merge_info.get_ratio ());
> +       }
> +      else if (demand_p (DEMAND_RATIO))
> +       {
> +         new_info.set_sew (get_sew ());
> +         new_info.set_vlmul (get_vlmul ());
> +         new_info.set_ratio (get_ratio ());
> +       }
> +    }
> +  else
> +    {
> +      /* when get_attr_ratio is invalid, this kind of instructions
> +        doesn't care about ratio. However, we still need this value
> +        in demand_p info backward analysis.  */
> +      new_info.set_ratio (
> +       calculate_ratio (new_info.get_sew (), new_info.get_vlmul ()));
> +    }
> +
> +  if (merge_info.demand_p (DEMAND_TAIL_POLICY))
> +    new_info.set_ta (merge_info.get_ta ());
> +  else if (demand_p (DEMAND_TAIL_POLICY))
> +    new_info.set_ta (get_ta ());
> +  else
> +    new_info.set_ta (get_default_ta ());
> +
> +  if (merge_info.demand_p (DEMAND_MASK_POLICY))
> +    new_info.set_ma (merge_info.get_ma ());
> +  else if (demand_p (DEMAND_MASK_POLICY))
> +    new_info.set_ma (get_ma ());
> +  else
> +    new_info.set_ma (get_default_ma ());
> +
> +  return new_info;
> +}
> +
> +void
> +vector_insn_info::dump (FILE *file) const
> +{
> +  fprintf (file, "[");
> +  if (uninit_p ())
> +    fprintf (file, "UNINITIALIZED,");
> +  else if (valid_p ())
> +    fprintf (file, "VALID,");
> +  else if (unknown_p ())
> +    fprintf (file, "UNKNOWN,");
> +  else if (empty_p ())
> +    fprintf (file, "EMPTY,");
> +  else
> +    fprintf (file, "DIRTY,");
> +
> +  fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL));
> +  fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW));
> +  fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL));
> +  fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO));
> +  fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY));
> +  fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY));
> +
> +  fprintf (file, "AVL=");
> +  print_rtl_single (file, get_avl ());
> +  fprintf (file, "SEW=%d,", get_sew ());
> +  fprintf (file, "VLMUL=%d,", get_vlmul ());
> +  fprintf (file, "RATIO=%d,", get_ratio ());
> +  fprintf (file, "TAIL_POLICY=%d,", get_ta ());
> +  fprintf (file, "MASK_POLICY=%d", get_ma ());
> +  fprintf (file, "]\n");
> +
> +  if (valid_p ())
> +    {
> +      if (get_insn ())
> +       {
> +         fprintf (file, "RTL_SSA insn_info=");
> +         pretty_printer pp;
> +         pp.buffer->stream = file;
> +         get_insn ()->print_full (&pp);
> +         pp_printf (&pp, "\n");
> +         pp_flush (&pp);
> +       }
> +      if (get_dirty_pat ())
> +       {
> +         fprintf (file, "Dirty RTL Pattern=");
> +         print_rtl_single (file, get_dirty_pat ());
> +       }
> +    }
> +}
> +
> +vector_infos_manager::vector_infos_manager ()
> +{
> +  vector_edge_list = nullptr;
> +  vector_kill = nullptr;
> +  vector_del = nullptr;
> +  vector_insert = nullptr;
> +  vector_antic = nullptr;
> +  vector_transp = nullptr;
> +  vector_comp = nullptr;
> +  vector_avin = nullptr;
> +  vector_avout = nullptr;
> +  vector_insn_infos.safe_grow (get_max_uid ());
> +  vector_block_infos.safe_grow (last_basic_block_for_fn (cfun));
> +  if (!optimize)
> +    {
> +      basic_block cfg_bb;
> +      rtx_insn *rinsn;
> +      FOR_ALL_BB_FN (cfg_bb, cfun)
> +       {
> +         vector_block_infos[cfg_bb->index].local_dem = vector_insn_info
> ();
> +         vector_block_infos[cfg_bb->index].reaching_out =
> vector_insn_info ();
> +         FOR_BB_INSNS (cfg_bb, rinsn)
> +           vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn);
> +       }
> +    }
> +  else
> +    {
> +      for (const bb_info *bb : crtl->ssa->bbs ())
> +       {
> +         vector_block_infos[bb->index ()].local_dem = vector_insn_info ();
> +         vector_block_infos[bb->index ()].reaching_out = vector_insn_info
> ();
> +         for (insn_info *insn : bb->real_insns ())
> +           vector_insn_infos[insn->uid ()].parse_insn (insn);
> +       }
> +    }
> +}
> +
> +void
> +vector_infos_manager::create_expr (vector_insn_info &info)
> +{
> +  for (size_t i = 0; i < vector_exprs.length (); i++)
> +    if (*vector_exprs[i] == info)
> +      return;
> +  vector_exprs.safe_push (&info);
> +}
> +
> +size_t
> +vector_infos_manager::get_expr_id (const vector_insn_info &info) const
> +{
> +  for (size_t i = 0; i < vector_exprs.length (); i++)
> +    if (*vector_exprs[i] == info)
> +      return i;
> +  gcc_unreachable ();
> +}
> +
> +auto_vec<size_t>
> +vector_infos_manager::get_all_available_exprs (
> +  const vector_insn_info &info) const
> +{
> +  auto_vec<size_t> available_list;
> +  for (size_t i = 0; i < vector_exprs.length (); i++)
> +    if (info >= *vector_exprs[i])
> +      available_list.safe_push (i);
> +  return available_list;
> +}
> +
> +bool
> +vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const
> +{
> +  if (bitmap_empty_p (bitdata))
> +    return false;
> +
> +  int ratio = -1;
> +  unsigned int bb_index;
> +  sbitmap_iterator sbi;
> +
> +  EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
> +  {
> +    if (ratio == -1)
> +      ratio = vector_exprs[bb_index]->get_ratio ();
> +    else if (vector_exprs[bb_index]->get_ratio () != ratio)
> +      return false;
> +  }
> +  return true;
> +}
> +
> +size_t
> +vector_infos_manager::expr_set_num (sbitmap bitdata) const
> +{
> +  size_t count = 0;
> +  for (size_t i = 0; i < vector_exprs.length (); i++)
> +    if (bitmap_bit_p (bitdata, i))
> +      count++;
> +  return count;
> +}
> +
> +void
> +vector_infos_manager::release (void)
> +{
> +  if (!vector_insn_infos.is_empty ())
> +    vector_insn_infos.release ();
> +  if (!vector_block_infos.is_empty ())
> +    vector_block_infos.release ();
> +  if (!vector_exprs.is_empty ())
> +    vector_exprs.release ();
> +
> +  if (optimize > 0)
> +    {
> +      /* Finished. Free up all the things we've allocated.  */
> +      free_edge_list (vector_edge_list);
> +      sbitmap_vector_free (vector_del);
> +      sbitmap_vector_free (vector_insert);
> +      sbitmap_vector_free (vector_kill);
> +      sbitmap_vector_free (vector_antic);
> +      sbitmap_vector_free (vector_transp);
> +      sbitmap_vector_free (vector_comp);
> +      sbitmap_vector_free (vector_avin);
> +      sbitmap_vector_free (vector_avout);
> +    }
> +}
> +
> +void
> +vector_infos_manager::dump (FILE *file) const
> +{
> +  basic_block cfg_bb;
> +  rtx_insn *rinsn;
> +
> +  fprintf (file, "\n");
> +  FOR_ALL_BB_FN (cfg_bb, cfun)
> +    {
> +      fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index);
> +      fprintf (file, "<HEADER>=");
> +      vector_block_infos[cfg_bb->index].local_dem.dump (file);
> +      FOR_BB_INSNS (cfg_bb, rinsn)
> +       {
> +         if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn))
> +           continue;
> +         fprintf (file, "<insn %d>=", INSN_UID (rinsn));
> +         const auto &info = vector_insn_infos[INSN_UID (rinsn)];
> +         info.dump (file);
> +       }
> +      fprintf (file, "<FOOTER>=");
> +      vector_block_infos[cfg_bb->index].reaching_out.dump (file);
> +      fprintf (file, "\n\n");
> +    }
> +
> +  fprintf (file, "\n");
> +  FOR_ALL_BB_FN (cfg_bb, cfun)
> +    {
> +      fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index);
> +
> +      fprintf (file, "<ANTLOC>=");
> +      if (vector_antic == nullptr)
> +       fprintf (file, "(nil)\n");
> +      else
> +       dump_bitmap_file (file, vector_antic[cfg_bb->index]);
> +
> +      fprintf (file, "<AVLOC>=");
> +      if (vector_comp == nullptr)
> +       fprintf (file, "(nil)\n");
> +      else
> +       dump_bitmap_file (file, vector_comp[cfg_bb->index]);
> +
> +      fprintf (file, "<TRANSP>=");
> +      if (vector_transp == nullptr)
> +       fprintf (file, "(nil)\n");
> +      else
> +       dump_bitmap_file (file, vector_transp[cfg_bb->index]);
> +
> +      fprintf (file, "<KILL>=");
> +      if (vector_kill == nullptr)
> +       fprintf (file, "(nil)\n");
> +      else
> +       dump_bitmap_file (file, vector_kill[cfg_bb->index]);
> +    }
> +
> +  fprintf (file, "\n");
> +  FOR_ALL_BB_FN (cfg_bb, cfun)
> +    {
> +      fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n",
> +              cfg_bb->index);
> +
> +      fprintf (file, "<AVIN>=");
> +      if (vector_avin == nullptr)
> +       fprintf (file, "(nil)\n");
> +      else
> +       dump_bitmap_file (file, vector_avin[cfg_bb->index]);
> +
> +      fprintf (file, "<AVOUT>=");
> +      if (vector_avout == nullptr)
> +       fprintf (file, "(nil)\n");
> +      else
> +       dump_bitmap_file (file, vector_avout[cfg_bb->index]);
> +
> +      fprintf (file, "<DELETE>=");
> +      if (vector_del == nullptr)
> +       fprintf (file, "(nil)\n");
> +      else
> +       dump_bitmap_file (file, vector_del[cfg_bb->index]);
> +    }
> +
> +  fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n");
> +  for (size_t i = 0; i < vector_exprs.length (); i++)
> +    {
> +      for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++)
> +       {
> +         edge eg = INDEX_EDGE (vector_edge_list, ed);
> +         if (bitmap_bit_p (vector_insert[ed], i))
> +           fprintf (dump_file,
> +                    "INSERT edge %d from bb %d to bb %d for VSETVL "
> +                    "expr[%ld]\n",
> +                    ed, eg->src->index, eg->dest->index, i);
> +       }
> +    }
> +}
> +
> +const pass_data pass_data_vsetvl = {
> +  RTL_PASS,     /* type */
> +  "vsetvl",     /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_NONE,      /* tv_id */
> +  0,            /* properties_required */
> +  0,            /* properties_provided */
> +  0,            /* properties_destroyed */
> +  0,            /* todo_flags_start */
> +  0,            /* todo_flags_finish */
> +};
> +
> +class pass_vsetvl : public rtl_opt_pass
> +{
> +private:
> +  class vector_infos_manager *m_vector_manager;
> +
> +  void simple_vsetvl (void) const;
> +  void lazy_vsetvl (void);
> +
> +  /* Phase 1.  */
> +  void compute_local_backward_infos (const bb_info *);
> +
> +  /* Phase 2.  */
> +  bool need_vsetvl (const vector_insn_info &, const vector_insn_info &)
> const;
> +  void transfer_before (vector_insn_info &, insn_info *) const;
> +  void transfer_after (vector_insn_info &, insn_info *) const;
> +  void emit_local_forward_vsetvls (const bb_info *);
> +
> +  /* Phase 3.  */
> +  void merge_successors (const basic_block, const basic_block);
> +  void compute_global_backward_infos (void);
> +
> +  /* Phase 4.  */
> +  void prune_expressions (void);
> +  void compute_local_properties (void);
> +  bool can_refine_vsetvl_p (const basic_block, uint8_t) const;
> +  void refine_vsetvls (void) const;
> +  void cleanup_vsetvls (void);
> +  bool commit_vsetvls (void);
> +  void pre_vsetvl (void);
> +
> +  /* Phase 5.  */
> +  void cleanup_insns (void) const;
> +
> +  void init (void);
> +  void done (void);
> +
> +public:
> +  pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl,
> ctxt) {}
> +
> +  /* opt_pass methods: */
> +  virtual bool gate (function *) final override { return TARGET_VECTOR; }
> +  virtual unsigned int execute (function *) final override;
> +}; // class pass_vsetvl
> +
> +/* Simple m_vsetvl_insert vsetvl for optimize == 0.  */
> +void
> +pass_vsetvl::simple_vsetvl (void) const
> +{
> +  if (dump_file)
> +    fprintf (dump_file,
> +            "\nEntering Simple VSETVL PASS and Handling %d basic blocks
> for "
> +            "function:%s\n",
> +            n_basic_blocks_for_fn (cfun), function_name (cfun));
> +
> +  basic_block cfg_bb;
> +  rtx_insn *rinsn;
> +  FOR_ALL_BB_FN (cfg_bb, cfun)
> +    {
> +      FOR_BB_INSNS (cfg_bb, rinsn)
> +       {
> +         if (!NONDEBUG_INSN_P (rinsn))
> +           continue;
> +         if (has_vtype_op (rinsn))
> +           {
> +             const auto info
> +               = m_vector_manager->vector_insn_infos[INSN_UID (rinsn)];
> +             emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info,
> +                               NULL_RTX, rinsn);
> +           }
> +       }
> +    }
> +}
> +
> +/* Compute demanded information by backward data-flow analysis.  */
> +void
> +pass_vsetvl::compute_local_backward_infos (const bb_info *bb)
> +{
> +  vector_insn_info change;
> +  change.set_empty ();
> +
> +  auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
> +  block_info.reaching_out = change;
> +
> +  for (insn_info *insn : bb->reverse_real_nondebug_insns ())
> +    {
> +      auto &info = m_vector_manager->vector_insn_infos[insn->uid ()];
> +
> +      if (info.uninit_p ())
> +       /* If it is uninitialized, propagate it directly.  */
> +       info = change;
> +      else if (info.unknown_p ())
> +       change = info;
> +      else
> +       {
> +         gcc_assert (info.valid_p () && "Unexpected Invalid demanded
> info");
> +         if (change.valid_p () && change.compatible_p (info))
> +           info = change.merge (info);
> +         change = info;
> +       }
> +    }
> +
> +  block_info.local_dem = change;
> +  if (block_info.local_dem.empty_p ())
> +    block_info.reaching_out = block_info.local_dem;
> +}
> +
> +/* Return true if a dem_info is required to transition from curr_info to
> +   require before INSN.  */
> +bool
> +pass_vsetvl::need_vsetvl (const vector_insn_info &require,
> +                         const vector_insn_info &curr_info) const
> +{
> +  if (!curr_info.valid_p () || curr_info.unknown_p () ||
> curr_info.uninit_p ())
> +    return true;
> +
> +  if (require.compatible_p (curr_info))
> +    return false;
> +
> +  return true;
> +}
> +
> +/* Given an incoming state reaching INSN, modifies that state so that it
> is
> +   minimally compatible with INSN.  The resulting state is guaranteed to
> be
> +   semantically legal for INSN, but may not be the state requested by
> INSN.  */
> +void
> +pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn)
> const
> +{
> +  if (!has_vtype_op (insn->rtl ()))
> +    return;
> +
> +  const vector_insn_info require
> +    = m_vector_manager->vector_insn_infos[insn->uid ()];
> +  if (info.valid_p () && !need_vsetvl (require, info))
> +    return;
> +  info = require;
> +}
> +
> +/* Given a state with which we evaluated insn (see transfer_before above
> for why
> +   this might be different that the state insn requested), modify the
> state to
> +   reflect the changes insn might make.  */
> +void
> +pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn)
> const
> +{
> +  if (vector_config_insn_p (insn->rtl ()))
> +    {
> +      info = m_vector_manager->vector_insn_infos[insn->uid ()];
> +      return;
> +    }
> +
> +  /* TODO: Support fault first load info update VL in the future.  */
> +
> +  /* If this is something that updates VL/VTYPE that we don't know about,
> set
> +     the state to unknown.  */
> +  if (insn->is_call () || insn->is_asm ()
> +      || find_access (insn->defs (), VL_REGNUM)
> +      || find_access (insn->defs (), VTYPE_REGNUM))
> +    info = vector_insn_info::get_unknown ();
> +}
> +
> +/* Emit vsetvl within each block by forward data-flow analysis.  */
> +void
> +pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb)
> +{
> +  auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
> +  if (block_info.local_dem.empty_p ())
> +    return;
> +
> +  vector_insn_info curr_info;
> +  for (insn_info *insn : bb->real_nondebug_insns ())
> +    {
> +      const vector_insn_info prev_info = curr_info;
> +      transfer_before (curr_info, insn);
> +
> +      if (has_vtype_op (insn->rtl ()))
> +       {
> +         if (static_cast<const vl_vtype_info &> (prev_info)
> +             != static_cast<const vl_vtype_info &> (curr_info))
> +           {
> +             const auto require
> +               = m_vector_manager->vector_insn_infos[insn->uid ()];
> +             if (!require.compatible_p (
> +                   static_cast<const vl_vtype_info &> (prev_info)))
> +               insert_vsetvl (EMIT_BEFORE, insn->rtl (), require,
> prev_info);
> +           }
> +       }
> +
> +      transfer_after (curr_info, insn);
> +    }
> +
> +  block_info.reaching_out = curr_info;
> +}
> +
> +/* Merge all successors of Father except child node.  */
> +void
> +pass_vsetvl::merge_successors (const basic_block father,
> +                              const basic_block child)
> +{
> +  edge e;
> +  edge_iterator ei;
> +  auto &father_info = m_vector_manager->vector_block_infos[father->index];
> +  gcc_assert (father_info.local_dem.dirty_p ()
> +             || father_info.local_dem.empty_p ());
> +  gcc_assert (father_info.reaching_out.dirty_p ()
> +             || father_info.reaching_out.empty_p ());
> +
> +  FOR_EACH_EDGE (e, ei, father->succs)
> +    {
> +      const basic_block succ = e->dest;
> +      if (succ->index == child->index)
> +       continue;
> +
> +      const auto succ_info
> +       = m_vector_manager->vector_block_infos[succ->index].local_dem;
> +
> +      if (!succ_info.valid_p ())
> +       continue;
> +
> +      vector_insn_info new_info;
> +      if (father_info.reaching_out.dirty_p ())
> +       {
> +         if (!father_info.reaching_out.compatible_p (succ_info))
> +           continue;
> +
> +         new_info = succ_info.merge (father_info.reaching_out, true);
> +       }
> +      else
> +       new_info = succ_info;
> +
> +      new_info.set_dirty ();
> +      rtx new_pat = gen_vsetvl_pat (new_info.get_insn ()->rtl (),
> new_info);
> +      new_info.set_dirty_pat (new_pat);
> +
> +      father_info.local_dem = new_info;
> +      father_info.reaching_out = new_info;
> +    }
> +}
> +
> +/* Compute global backward demanded info.  */
> +void
> +pass_vsetvl::compute_global_backward_infos (void)
> +{
> +  /* We compute global infos by backward propagation.
> +     We want to have better performance in these following cases:
> +
> +       1. for (size_t i = 0; i < n; i++) {
> +            if (i != cond) {
> +              vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
> +              *(vint8mf8_t*)(out + i + 100) = v;
> +            } else {
> +              vbool1_t v = *(vbool1_t*)(in + i + 400);
> +              *(vbool1_t*)(out + i + 400) = v;
> +            }
> +          }
> +
> +          Since we don't have any RVV instruction in the BEFORE blocks,
> +          LCM fails to optimize such case. We want to backward propagate
> +          them into empty blocks so that we could have better performance
> +          in LCM.
> +
> +       2. bb 0:
> +            vsetvl e8,mf8 (demand RATIO)
> +          bb 1:
> +            vsetvl e32,mf2 (demand SEW and LMUL)
> +          We backward propagate the first VSETVL into e32,mf2 so that we
> +          could be able to eliminate the second VSETVL in LCM.  */
> +
> +  for (const bb_info *bb : crtl->ssa->reverse_bbs ())
> +    {
> +      basic_block cfg_bb = bb->cfg_bb ();
> +      const auto &prop
> +       = m_vector_manager->vector_block_infos[cfg_bb->index].local_dem;
> +
> +      /* If there is nothing to propagate, just skip it.  */
> +      if (!prop.valid_or_dirty_p ())
> +       continue;
> +
> +      if (!backward_propagate_worthwhile_p (
> +           cfg_bb, m_vector_manager->vector_block_infos[cfg_bb->index]))
> +       continue;
> +
> +      edge e;
> +      edge_iterator ei;
> +      /* Backward propagate to each predecessor.  */
> +      FOR_EACH_EDGE (e, ei, cfg_bb->preds)
> +       {
> +         rtx new_pat;
> +         auto &block_info
> +           = m_vector_manager->vector_block_infos[e->src->index];
> +
> +         /* We don't propagate through critical edges.  */
> +         if (e->flags & EDGE_COMPLEX)
> +           continue;
> +         if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index)
> +           continue;
> +
> +         if (block_info.reaching_out.unknown_p ())
> +           continue;
> +         else if (block_info.reaching_out.empty_p ())
> +           {
> +             if (!can_backward_propagate_p (crtl->ssa, e->src, prop))
> +               continue;
> +
> +             if (dominate_probability_p (e))
> +               {
> +                 rtx new_pat = gen_vsetvl_pat (prop.get_insn ()->rtl (),
> prop);
> +
> +                 block_info.reaching_out = prop;
> +                 block_info.reaching_out.set_dirty ();
> +                 block_info.reaching_out.set_dirty_pat (new_pat);
> +                 block_info.local_dem = block_info.reaching_out;
> +               }
> +
> +             merge_successors (e->src, cfg_bb);
> +           }
> +         else if (block_info.reaching_out.dirty_p ())
> +           {
> +             /* DIRTY -> DIRTY or VALID -> DIRTY.  */
> +             vector_insn_info new_info;
> +
> +             if (block_info.reaching_out.compatible_p (prop))
> +               {
> +                 if (block_info.reaching_out >= prop)
> +                   continue;
> +                 new_info = block_info.reaching_out.merge (prop, true);
> +               }
> +             else
> +               {
> +                 if (dominate_probability_p (e))
> +                   new_info = prop;
> +                 else
> +                   continue;
> +               }
> +
> +             rtx new_pat
> +               = gen_vsetvl_pat (new_info.get_insn ()->rtl (), new_info);
> +             new_info.set_dirty ();
> +             new_info.set_dirty_pat (new_pat);
> +             block_info.local_dem = new_info;
> +             block_info.reaching_out = new_info;
> +           }
> +         else
> +           {
> +             /* We not only change the info during backward propagation,
> +                but also change the VSETVL instruction.  */
> +             gcc_assert (block_info.reaching_out.valid_p ());
> +             if (!block_info.reaching_out.compatible_p (prop))
> +               continue;
> +             if (block_info.reaching_out >= prop)
> +               continue;
> +
> +             vector_insn_info be_merged = block_info.reaching_out;
> +             if (block_info.local_dem == block_info.reaching_out)
> +               be_merged = block_info.local_dem;
> +             vector_insn_info new_info = be_merged.merge (prop, true);
> +
> +             rtx_insn *rinsn;
> +             if (vector_config_insn_p (new_info.get_insn ()->rtl ()))
> +               {
> +                 rinsn = new_info.get_insn ()->rtl ();
> +                 gcc_assert (vsetvl_insn_p (rinsn)
> +                             && "Can't handle X0, rs1 vsetvli yet");
> +               }
> +             else
> +               {
> +                 gcc_assert (has_vtype_op (new_info.get_insn ()->rtl ()));
> +                 rinsn = PREV_INSN (new_info.get_insn ()->rtl ());
> +                 gcc_assert (vector_config_insn_p (rinsn));
> +               }
> +             new_pat = gen_vsetvl_pat (rinsn, new_info);
> +             change_insn (rinsn, new_pat);
> +             if (block_info.local_dem == block_info.reaching_out)
> +               block_info.local_dem = new_info;
> +             block_info.reaching_out = new_info;
> +           }
> +       }
> +    }
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "\n\nDirty blocks list: ");
> +      for (size_t i = 0; i < m_vector_manager->vector_block_infos.length
> ();
> +          i++)
> +       {
> +         if (m_vector_manager->vector_block_infos[i].reaching_out.dirty_p
> ())
> +           fprintf (dump_file, "%ld ", i);
> +       }
> +      fprintf (dump_file, "\n\n");
> +    }
> +}
> +
> +/* Assemble the candidates expressions for LCM.  */
> +void
> +pass_vsetvl::prune_expressions (void)
> +{
> +  for (size_t i = 0; i < m_vector_manager->vector_block_infos.length ();
> i++)
> +    {
> +      if
> (m_vector_manager->vector_block_infos[i].local_dem.valid_or_dirty_p ())
> +       m_vector_manager->create_expr (
> +         m_vector_manager->vector_block_infos[i].local_dem);
> +      if (m_vector_manager->vector_block_infos[i]
> +           .reaching_out.valid_or_dirty_p ())
> +       m_vector_manager->create_expr (
> +         m_vector_manager->vector_block_infos[i].reaching_out);
> +    }
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "\nThe total VSETVL expression num = %d\n",
> +              m_vector_manager->vector_exprs.length ());
> +      fprintf (dump_file, "Expression List:\n");
> +      for (size_t i = 0; i < m_vector_manager->vector_exprs.length ();
> i++)
> +       {
> +         fprintf (dump_file, "Expr[%ld]:\n", i);
> +         m_vector_manager->vector_exprs[i]->dump (dump_file);
> +         fprintf (dump_file, "\n");
> +       }
> +    }
> +}
> +
> +void
> +pass_vsetvl::compute_local_properties (void)
> +{
> +  /* -  If T is locally available at the end of a block, then T' must be
> +       available at the end of the same block. Since some optimization has
> +       occurred earlier, T' might not be locally available, however, it
> must
> +       have been previously computed on all paths. As a formula, T at
> AVLOC(B)
> +       implies that T' at AVOUT(B).
> +       An "available occurrence" is one that is the last occurrence in the
> +       basic block and the operands are not modified by following
> statements in
> +       the basic block [including this insn].
> +
> +     -  If T is locally anticipated at the beginning of a block, then
> either
> +       T', is locally anticipated or it is already available from previous
> +       blocks. As a formula, this means that T at ANTLOC(B) implies that
> T' at
> +       ANTLOC(B) at AVIN(B).
> +       An "anticipatable occurrence" is one that is the first occurrence
> in the
> +       basic block, the operands are not modified in the basic block prior
> +       to the occurrence and the output is not used between the start of
> +       the block and the occurrence.  */
> +
> +  basic_block cfg_bb;
> +  FOR_EACH_BB_FN (cfg_bb, cfun)
> +    {
> +      int curr_bb_idx = cfg_bb->index;
> +      const auto local_dem
> +       = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem;
> +      const auto reaching_out
> +       = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out;
> +
> +      if (!local_dem.empty_p ())
> +       {
> +         for (size_t i = 0; i < m_vector_manager->vector_exprs.length ();
> i++)
> +           bitmap_clear_bit
> (m_vector_manager->vector_transp[curr_bb_idx], i);
> +       }
> +
> +      if (local_dem.valid_or_dirty_p ())
> +       {
> +         const insn_info *header_insn = local_dem.get_insn ();
> +         size_t header_index = m_vector_manager->get_expr_id (local_dem);
> +         if (anticipatable_occurrence_p (header_insn, local_dem))
> +           bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx],
> +                           header_index);
> +       }
> +
> +      if (reaching_out.valid_or_dirty_p ())
> +       {
> +         const insn_info *footer_insn = reaching_out.get_insn ();
> +         size_t footer_index = m_vector_manager->get_expr_id
> (reaching_out);
> +         if (available_occurrence_p (footer_insn, reaching_out))
> +           bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
> +                           footer_index);
> +         auto_vec<size_t> available_list
> +           = m_vector_manager->get_all_available_exprs (reaching_out);
> +         for (size_t i = 0; i < available_list.length (); i++)
> +           bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
> +                           available_list[i]);
> +       }
> +    }
> +
> +  /* Compute kill for each basic block using:
> +
> +     ~(TRANSP | COMP)
> +  */
> +
> +  FOR_EACH_BB_FN (cfg_bb, cfun)
> +    {
> +      bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index],
> +                 m_vector_manager->vector_transp[cfg_bb->index],
> +                 m_vector_manager->vector_comp[cfg_bb->index]);
> +      bitmap_not (m_vector_manager->vector_kill[cfg_bb->index],
> +                 m_vector_manager->vector_kill[cfg_bb->index]);
> +    }
> +
> +  FOR_EACH_BB_FN (cfg_bb, cfun)
> +    {
> +      edge e;
> +      edge_iterator ei;
> +
> +      /* If the current block is the destination of an abnormal edge, we
> +        kill all trapping (for PRE) and memory (for hoist) expressions
> +        because we won't be able to properly place the instruction on
> +        the edge.  So make them neither anticipatable nor transparent.
> +        This is fairly conservative.
> +
> +        ??? For hoisting it may be necessary to check for set-and-jump
> +        instructions here, not just for abnormal edges.  The general
> problem
> +        is that when an expression cannot not be placed right at the end
> of
> +        a basic block we should account for any side-effects of a
> subsequent
> +        jump instructions that could clobber the expression.  It would
> +        be best to implement this check along the lines of
> +        should_hoist_expr_to_dom where the target block is already known
> +        and, hence, there's no need to conservatively prune expressions on
> +        "intermediate" set-and-jump instructions.  */
> +      FOR_EACH_EDGE (e, ei, cfg_bb->preds)
> +       if (e->flags & EDGE_COMPLEX)
> +         {
> +           bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]);
> +           bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]);
> +         }
> +    }
> +}
> +
> +/* Return true if VSETVL in the block can be refined as vsetvl
> zero,zero.  */
> +bool
> +pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb, uint8_t
> ratio) const
> +{
> +  if (!m_vector_manager->all_same_ratio_p (
> +       m_vector_manager->vector_avin[cfg_bb->index]))
> +    return false;
> +
> +  size_t expr_id
> +    = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
> +  if (m_vector_manager->vector_exprs[expr_id]->get_ratio () != ratio)
> +    return false;
> +
> +  edge e;
> +  edge_iterator ei;
> +  bool all_valid_p = true;
> +  FOR_EACH_EDGE (e, ei, cfg_bb->preds)
> +    {
> +      if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index]))
> +       {
> +         all_valid_p = false;
> +         break;
> +       }
> +    }
> +
> +  if (!all_valid_p)
> +    return false;
> +  return true;
> +}
> +
> +/* Optimize athe case like this:
> +
> +      bb 0:
> +       vsetvl 0 a5,zero,e8,mf8
> +       insn 0 (demand SEW + LMUL)
> +      bb 1:
> +       vsetvl 1 a5,zero,e16,mf4
> +       insn 1 (demand SEW + LMUL)
> +
> +   In this case, we should be able to refine
> +   vsetvl 1 into vsetvl zero, zero according AVIN.  */
> +void
> +pass_vsetvl::refine_vsetvls (void) const
> +{
> +  basic_block cfg_bb;
> +  FOR_EACH_BB_FN (cfg_bb, cfun)
> +    {
> +      auto info =
> m_vector_manager->vector_block_infos[cfg_bb->index].local_dem;
> +      insn_info *insn = info.get_insn ();
> +      if (!info.valid_p ())
> +       continue;
> +
> +      rtx_insn *rinsn = insn->rtl ();
> +      if (!can_refine_vsetvl_p (cfg_bb, info.get_ratio ()))
> +       continue;
> +
> +      if (!vector_config_insn_p (rinsn))
> +       rinsn = PREV_INSN (rinsn);
> +      rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info,
> NULL_RTX);
> +      change_insn (rinsn, new_pat);
> +    }
> +}
> +
> +void
> +pass_vsetvl::cleanup_vsetvls ()
> +{
> +  basic_block cfg_bb;
> +  FOR_EACH_BB_FN (cfg_bb, cfun)
> +    {
> +      auto &info
> +       = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
> +      gcc_assert (m_vector_manager->expr_set_num (
> +                   m_vector_manager->vector_del[cfg_bb->index])
> +                 <= 1);
> +      for (size_t i = 0; i < m_vector_manager->vector_exprs.length ();
> i++)
> +       {
> +         if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index],
> i))
> +           {
> +             if (info.dirty_p ())
> +               info.set_unknown ();
> +             else
> +               {
> +                 insn_info *insn
> +                   = m_vector_manager->vector_exprs[i]->get_insn ();
> +                 gcc_assert (insn && insn->rtl ());
> +                 rtx_insn *rinsn;
> +                 if (vector_config_insn_p (insn->rtl ()))
> +                   rinsn = insn->rtl ();
> +                 else
> +                   {
> +                     gcc_assert (has_vtype_op (insn->rtl ()));
> +                     rinsn = PREV_INSN (insn->rtl ());
> +                     gcc_assert (
> +                       vector_config_insn_p (PREV_INSN (insn->rtl ())));
> +                   }
> +                 eliminate_insn (rinsn);
> +               }
> +           }
> +       }
> +    }
> +}
> +
> +bool
> +pass_vsetvl::commit_vsetvls (void)
> +{
> +  bool need_commit = false;
> +
> +  for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list);
> ed++)
> +    {
> +      for (size_t i = 0; i < m_vector_manager->vector_exprs.length ();
> i++)
> +       {
> +         edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
> +         if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i))
> +           {
> +             const vector_insn_info *require
> +               = m_vector_manager->vector_exprs[i];
> +             gcc_assert (require->valid_or_dirty_p ());
> +             rtl_profile_for_edge (eg);
> +             start_sequence ();
> +
> +             insn_info *insn = require->get_insn ();
> +             vector_insn_info prev_info = vector_insn_info ();
> +             if (m_vector_manager->all_same_ratio_p (
> +                   m_vector_manager->vector_avout[eg->src->index]))
> +               {
> +                 size_t first = bitmap_first_set_bit (
> +                   m_vector_manager->vector_avout[eg->src->index]);
> +                 prev_info = *m_vector_manager->vector_exprs[first];
> +               }
> +
> +             insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require,
> prev_info);
> +             rtx_insn *rinsn = get_insns ();
> +             end_sequence ();
> +             default_rtl_profile ();
> +
> +             /* We should not get an abnormal edge here.  */
> +             gcc_assert (!(eg->flags & EDGE_ABNORMAL));
> +             need_commit = true;
> +             insert_insn_on_edge (rinsn, eg);
> +           }
> +       }
> +    }
> +
> +  basic_block cfg_bb;
> +  FOR_EACH_BB_FN (cfg_bb, cfun)
> +    {
> +      const auto reaching_out
> +       = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
> +      if (!reaching_out.dirty_p ())
> +       continue;
> +
> +      rtx new_pat = reaching_out.get_dirty_pat ();
> +      if (can_refine_vsetvl_p (cfg_bb, reaching_out.get_ratio ()))
> +       new_pat
> +         = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out,
> NULL_RTX);
> +
> +      start_sequence ();
> +      emit_insn (new_pat);
> +      rtx_insn *rinsn = get_insns ();
> +      end_sequence ();
> +      insert_insn_end_basic_block (rinsn, cfg_bb);
> +      if (dump_file)
> +       {
> +         fprintf (dump_file,
> +                  "\nInsert vsetvl insn %d at the end of <bb %d>:\n",
> +                  INSN_UID (rinsn), cfg_bb->index);
> +         print_rtl_single (dump_file, rinsn);
> +       }
> +    }
> +
> +  return need_commit;
> +}
> +
> +void
> +pass_vsetvl::pre_vsetvl (void)
> +{
> +  /* Compute entity list.  */
> +  prune_expressions ();
> +
> +  /* Create the bitmap vectors.  */
> +  m_vector_manager->vector_antic
> +    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
> +                           m_vector_manager->vector_exprs.length ());
> +  m_vector_manager->vector_transp
> +    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
> +                           m_vector_manager->vector_exprs.length ());
> +  m_vector_manager->vector_comp
> +    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
> +                           m_vector_manager->vector_exprs.length ());
> +  m_vector_manager->vector_avin
> +    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
> +                           m_vector_manager->vector_exprs.length ());
> +  m_vector_manager->vector_avout
> +    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
> +                           m_vector_manager->vector_exprs.length ());
> +  m_vector_manager->vector_kill
> +    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
> +                           m_vector_manager->vector_exprs.length ());
> +
> +  bitmap_vector_ones (m_vector_manager->vector_transp,
> +                     last_basic_block_for_fn (cfun));
> +  bitmap_vector_clear (m_vector_manager->vector_antic,
> +                      last_basic_block_for_fn (cfun));
> +  bitmap_vector_clear (m_vector_manager->vector_comp,
> +                      last_basic_block_for_fn (cfun));
> +  compute_local_properties ();
> +  m_vector_manager->vector_edge_list = pre_edge_lcm_avs (
> +    m_vector_manager->vector_exprs.length (),
> m_vector_manager->vector_transp,
> +    m_vector_manager->vector_comp, m_vector_manager->vector_antic,
> +    m_vector_manager->vector_kill, m_vector_manager->vector_avin,
> +    m_vector_manager->vector_avout, &m_vector_manager->vector_insert,
> +    &m_vector_manager->vector_del);
> +
> +  /* We should dump the information before CFG is changed. Otherwise it
> will
> +     produce ICE (internal compiler error).  */
> +  if (dump_file)
> +    m_vector_manager->dump (dump_file);
> +
> +  refine_vsetvls ();
> +  cleanup_vsetvls ();
> +  bool need_commit = commit_vsetvls ();
> +  if (need_commit)
> +    commit_edge_insertions ();
> +}
> +
> +void
> +pass_vsetvl::cleanup_insns (void) const
> +{
> +  for (const bb_info *bb : crtl->ssa->bbs ())
> +    {
> +      for (insn_info *insn : bb->real_nondebug_insns ())
> +       {
> +         rtx_insn *rinsn = insn->rtl ();
> +
> +         if (vlmax_avl_insn_p (rinsn))
> +           {
> +             eliminate_insn (rinsn);
> +             continue;
> +           }
> +
> +         /* Erase the AVL operand from the instruction.  */
> +         if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
> +           continue;
> +         rtx avl = get_vl (rinsn);
> +         if (count_occurrences (PATTERN (rinsn), avl, true) == 1)
> +           {
> +             /* Get the list of uses for the new instruction.  */
> +             auto attempt = crtl->ssa->new_change_attempt ();
> +             insn_change change (insn);
> +             /* Remove the use of the substituted value.  */
> +             access_array_builder uses_builder (attempt);
> +             uses_builder.reserve (insn->num_uses () - 1);
> +             for (use_info *use : insn->uses ())
> +               if (use != find_access (insn->uses (), REGNO (avl)))
> +                 uses_builder.quick_push (use);
> +             use_array new_uses = use_array (uses_builder.finish ());
> +             change.new_uses = new_uses;
> +             change.move_range = insn->ebb ()->insn_range ();
> +             rtx pat = simplify_replace_rtx (PATTERN (rinsn), avl,
> const0_rtx);
> +             gcc_assert (change_insn (crtl->ssa, change, insn, pat));
> +           }
> +       }
> +    }
> +}
> +
> +void
> +pass_vsetvl::init (void)
> +{
> +  if (optimize > 0)
> +    {
> +      /* Initialization of RTL_SSA.  */
> +      calculate_dominance_info (CDI_DOMINATORS);
> +      df_analyze ();
> +      crtl->ssa = new function_info (cfun);
> +
Andreas Schwab Dec. 23, 2022, 10:53 a.m. UTC | #2
How has this been tested?

In file included from ../../gcc/config/riscv/riscv-vsetvl.cc:89:
../../gcc/config/riscv/riscv-vsetvl.h: In member function 'riscv_vector::avl_info riscv_vector::vl_vtype_info::get_avl_info() const':
../../gcc/config/riscv/riscv-vsetvl.h:175:43: error: implicitly-declared 'constexpr riscv_vector::avl_info::avl_info(const riscv_vector::avl_info&)' is deprecated [-Werror=deprecated-copy]
  175 |   avl_info get_avl_info () const { return m_avl; }
      |                                           ^~~~~
../../gcc/config/riscv/riscv-vsetvl.h:131:13: note: because 'riscv_vector::avl_info' has user-provided 'riscv_vector::avl_info& riscv_vector::avl_info::operator=(const riscv_vector::avl_info&)'
  131 |   avl_info &operator= (const avl_info &);
      |             ^~~~~~~~
../../gcc/config/riscv/riscv-vsetvl.cc: In function 'bool change_insn(rtl_ssa::function_info*, rtl_ssa::insn_change, rtl_ssa::insn_info*, rtx)':
../../gcc/config/riscv/riscv-vsetvl.cc:823:27: error: unquoted whitespace character '\x0a' in format [-Werror=format-diag]
  823 |           pp_printf (&pp, "\n");
      |                           ^~~~
../../gcc/config/riscv/riscv-vsetvl.cc:847:27: error: unquoted whitespace character '\x0a' in format [-Werror=format-diag]
  847 |           pp_printf (&pp, "\n");
      |                           ^~~~
../../gcc/config/riscv/riscv-vsetvl.cc: In constructor 'riscv_vector::vl_vtype_info::vl_vtype_info(riscv_vector::avl_info, uint8_t, riscv_vector::vlmul_type, uint8_t, bool, bool)':
../../gcc/config/riscv/riscv-vsetvl.cc:905:5: error: implicitly-declared 'constexpr riscv_vector::avl_info::avl_info(const riscv_vector::avl_info&)' is deprecated [-Werror=deprecated-copy]
  905 |   : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
      |     ^~~~~~~~~~~~~~
../../gcc/config/riscv/riscv-vsetvl.cc:859:1: note: because 'riscv_vector::avl_info' has user-provided 'riscv_vector::avl_info& riscv_vector::avl_info::operator=(const riscv_vector::avl_info&)'
  859 | avl_info::operator= (const avl_info &other)
      | ^~~~~~~~
../../gcc/config/riscv/riscv-vsetvl.cc: In member function 'void riscv_vector::vector_insn_info::dump(FILE*) const':
../../gcc/config/riscv/riscv-vsetvl.cc:1366:27: error: unquoted whitespace character '\x0a' in format [-Werror=format-diag]
 1366 |           pp_printf (&pp, "\n");
      |                           ^~~~
cc1plus: all warnings being treated as errors
make[3]: *** [../../gcc/config/riscv/t-riscv:59: riscv-vsetvl.o] Error 1
juzhe.zhong@rivai.ai Dec. 23, 2022, 12:19 p.m. UTC | #3
Would you mind telling me how you reproduce these errors ?
I failed to reproduce this. Thanks



juzhe.zhong@rivai.ai
 
From: Andreas Schwab
Date: 2022-12-23 18:53
To: juzhe.zhong
CC: gcc-patches; kito.cheng; palmer
Subject: Re: [PATCH] RISC-V: Support VSETVL PASS for RVV support
How has this been tested?
 
In file included from ../../gcc/config/riscv/riscv-vsetvl.cc:89:
../../gcc/config/riscv/riscv-vsetvl.h: In member function 'riscv_vector::avl_info riscv_vector::vl_vtype_info::get_avl_info() const':
../../gcc/config/riscv/riscv-vsetvl.h:175:43: error: implicitly-declared 'constexpr riscv_vector::avl_info::avl_info(const riscv_vector::avl_info&)' is deprecated [-Werror=deprecated-copy]
  175 |   avl_info get_avl_info () const { return m_avl; }
      |                                           ^~~~~
../../gcc/config/riscv/riscv-vsetvl.h:131:13: note: because 'riscv_vector::avl_info' has user-provided 'riscv_vector::avl_info& riscv_vector::avl_info::operator=(const riscv_vector::avl_info&)'
  131 |   avl_info &operator= (const avl_info &);
      |             ^~~~~~~~
../../gcc/config/riscv/riscv-vsetvl.cc: In function 'bool change_insn(rtl_ssa::function_info*, rtl_ssa::insn_change, rtl_ssa::insn_info*, rtx)':
../../gcc/config/riscv/riscv-vsetvl.cc:823:27: error: unquoted whitespace character '\x0a' in format [-Werror=format-diag]
  823 |           pp_printf (&pp, "\n");
      |                           ^~~~
../../gcc/config/riscv/riscv-vsetvl.cc:847:27: error: unquoted whitespace character '\x0a' in format [-Werror=format-diag]
  847 |           pp_printf (&pp, "\n");
      |                           ^~~~
../../gcc/config/riscv/riscv-vsetvl.cc: In constructor 'riscv_vector::vl_vtype_info::vl_vtype_info(riscv_vector::avl_info, uint8_t, riscv_vector::vlmul_type, uint8_t, bool, bool)':
../../gcc/config/riscv/riscv-vsetvl.cc:905:5: error: implicitly-declared 'constexpr riscv_vector::avl_info::avl_info(const riscv_vector::avl_info&)' is deprecated [-Werror=deprecated-copy]
  905 |   : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
      |     ^~~~~~~~~~~~~~
../../gcc/config/riscv/riscv-vsetvl.cc:859:1: note: because 'riscv_vector::avl_info' has user-provided 'riscv_vector::avl_info& riscv_vector::avl_info::operator=(const riscv_vector::avl_info&)'
  859 | avl_info::operator= (const avl_info &other)
      | ^~~~~~~~
../../gcc/config/riscv/riscv-vsetvl.cc: In member function 'void riscv_vector::vector_insn_info::dump(FILE*) const':
../../gcc/config/riscv/riscv-vsetvl.cc:1366:27: error: unquoted whitespace character '\x0a' in format [-Werror=format-diag]
1366 |           pp_printf (&pp, "\n");
      |                           ^~~~
cc1plus: all warnings being treated as errors
make[3]: *** [../../gcc/config/riscv/t-riscv:59: riscv-vsetvl.o] Error 1
juzhe.zhong@rivai.ai Dec. 23, 2022, 12:45 p.m. UTC | #4
Hi, Andreas. Thank you for reporting this.
Even though I didn't reproduce this error, I have an idea to fix it:
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/609045.html 
Would you mind testing this patch for me before merging it?
Thanks.


juzhe.zhong@rivai.ai
 
From: Andreas Schwab
Date: 2022-12-23 18:53
To: juzhe.zhong
CC: gcc-patches; kito.cheng; palmer
Subject: Re: [PATCH] RISC-V: Support VSETVL PASS for RVV support
How has this been tested?
 
In file included from ../../gcc/config/riscv/riscv-vsetvl.cc:89:
../../gcc/config/riscv/riscv-vsetvl.h: In member function 'riscv_vector::avl_info riscv_vector::vl_vtype_info::get_avl_info() const':
../../gcc/config/riscv/riscv-vsetvl.h:175:43: error: implicitly-declared 'constexpr riscv_vector::avl_info::avl_info(const riscv_vector::avl_info&)' is deprecated [-Werror=deprecated-copy]
  175 |   avl_info get_avl_info () const { return m_avl; }
      |                                           ^~~~~
../../gcc/config/riscv/riscv-vsetvl.h:131:13: note: because 'riscv_vector::avl_info' has user-provided 'riscv_vector::avl_info& riscv_vector::avl_info::operator=(const riscv_vector::avl_info&)'
  131 |   avl_info &operator= (const avl_info &);
      |             ^~~~~~~~
../../gcc/config/riscv/riscv-vsetvl.cc: In function 'bool change_insn(rtl_ssa::function_info*, rtl_ssa::insn_change, rtl_ssa::insn_info*, rtx)':
../../gcc/config/riscv/riscv-vsetvl.cc:823:27: error: unquoted whitespace character '\x0a' in format [-Werror=format-diag]
  823 |           pp_printf (&pp, "\n");
      |                           ^~~~
../../gcc/config/riscv/riscv-vsetvl.cc:847:27: error: unquoted whitespace character '\x0a' in format [-Werror=format-diag]
  847 |           pp_printf (&pp, "\n");
      |                           ^~~~
../../gcc/config/riscv/riscv-vsetvl.cc: In constructor 'riscv_vector::vl_vtype_info::vl_vtype_info(riscv_vector::avl_info, uint8_t, riscv_vector::vlmul_type, uint8_t, bool, bool)':
../../gcc/config/riscv/riscv-vsetvl.cc:905:5: error: implicitly-declared 'constexpr riscv_vector::avl_info::avl_info(const riscv_vector::avl_info&)' is deprecated [-Werror=deprecated-copy]
  905 |   : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
      |     ^~~~~~~~~~~~~~
../../gcc/config/riscv/riscv-vsetvl.cc:859:1: note: because 'riscv_vector::avl_info' has user-provided 'riscv_vector::avl_info& riscv_vector::avl_info::operator=(const riscv_vector::avl_info&)'
  859 | avl_info::operator= (const avl_info &other)
      | ^~~~~~~~
../../gcc/config/riscv/riscv-vsetvl.cc: In member function 'void riscv_vector::vector_insn_info::dump(FILE*) const':
../../gcc/config/riscv/riscv-vsetvl.cc:1366:27: error: unquoted whitespace character '\x0a' in format [-Werror=format-diag]
1366 |           pp_printf (&pp, "\n");
      |                           ^~~~
cc1plus: all warnings being treated as errors
make[3]: *** [../../gcc/config/riscv/t-riscv:59: riscv-vsetvl.o] Error 1
Andreas Schwab Dec. 23, 2022, 2:54 p.m. UTC | #5
On Dez 23 2022, 钟居哲 wrote:

> Would you mind telling me how you reproduce these errors ?

make bootstrap
juzhe.zhong@rivai.ai Dec. 23, 2022, 3 p.m. UTC | #6
Thank you. Would you mind testing this patch:
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/609045.html 
to see whether the issue is fixed ?
Thanks



juzhe.zhong@rivai.ai
 
From: Andreas Schwab
Date: 2022-12-23 22:54
To: 钟居哲
CC: gcc-patches; kito.cheng; palmer
Subject: Re: [PATCH] RISC-V: Support VSETVL PASS for RVV support
On Dez 23 2022, 钟居哲 wrote:
 
> Would you mind telling me how you reproduce these errors ?
 
make bootstrap
diff mbox series

Patch

diff --git a/gcc/config.gcc b/gcc/config.gcc
index b5eda046033..1eb76c6c076 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -518,7 +518,7 @@  pru-*-*)
 	;;
 riscv*)
 	cpu_type=riscv
-	extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o"
+	extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o riscv-vsetvl.o"
 	extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
 	d_target_objs="riscv-d.o"
 	extra_headers="riscv_vector.h"
diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def
index 23ef8ac6114..d2d48f231aa 100644
--- a/gcc/config/riscv/riscv-passes.def
+++ b/gcc/config/riscv/riscv-passes.def
@@ -18,3 +18,4 @@ 
    <http://www.gnu.org/licenses/>.  */
 
 INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs);
+INSERT_PASS_BEFORE (pass_sched2, 1, pass_vsetvl);
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index e17e003f8e2..cfd0f284f91 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -96,6 +96,7 @@  extern void riscv_parse_arch_string (const char *, struct gcc_options *, locatio
 extern bool riscv_hard_regno_rename_ok (unsigned, unsigned);
 
 rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
+rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
@@ -131,6 +132,12 @@  enum vlmul_type
   LMUL_F4 = 6,
   LMUL_F2 = 7,
 };
+
+enum avl_type
+{
+  NONVLMAX,
+  VLMAX,
+};
 /* Routines implemented in riscv-vector-builtins.cc.  */
 extern void init_builtins (void);
 extern const char *mangle_builtin_type (const_tree);
@@ -145,17 +152,25 @@  extern bool legitimize_move (rtx, rtx, machine_mode);
 extern void emit_pred_op (unsigned, rtx, rtx, machine_mode);
 extern enum vlmul_type get_vlmul (machine_mode);
 extern unsigned int get_ratio (machine_mode);
+extern int get_ta (rtx);
+extern int get_ma (rtx);
+extern int get_avl_type (rtx);
+extern unsigned int calculate_ratio (unsigned int, enum vlmul_type);
 enum tail_policy
 {
   TAIL_UNDISTURBED = 0,
   TAIL_AGNOSTIC = 1,
+  TAIL_ANY = 2,
 };
 
 enum mask_policy
 {
   MASK_UNDISTURBED = 0,
   MASK_AGNOSTIC = 1,
+  MASK_ANY = 2,
 };
+enum tail_policy get_prefer_tail_policy ();
+enum mask_policy get_prefer_mask_policy ();
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 13ee33938bb..f02a048f76d 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -72,11 +72,16 @@  public:
   }
   void add_policy_operand (enum tail_policy vta, enum mask_policy vma)
   {
-    rtx tail_policy_rtx = vta == TAIL_UNDISTURBED ? const0_rtx : const1_rtx;
-    rtx mask_policy_rtx = vma == MASK_UNDISTURBED ? const0_rtx : const1_rtx;
+    rtx tail_policy_rtx = gen_int_mode (vta, Pmode);
+    rtx mask_policy_rtx = gen_int_mode (vma, Pmode);
     add_input_operand (tail_policy_rtx, Pmode);
     add_input_operand (mask_policy_rtx, Pmode);
   }
+  void add_avl_type_operand ()
+  {
+    rtx vlmax_rtx = gen_int_mode (avl_type::VLMAX, Pmode);
+    add_input_operand (vlmax_rtx, Pmode);
+  }
 
   void expand (enum insn_code icode, bool temporary_volatile_p = false)
   {
@@ -112,19 +117,58 @@  emit_vlmax_vsetvl (machine_mode vmode)
   unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL
 		       ? 8
 		       : GET_MODE_BITSIZE (GET_MODE_INNER (vmode));
+  enum vlmul_type vlmul = get_vlmul (vmode);
+  unsigned int ratio = calculate_ratio (sew, vlmul);
+
+  if (!optimize)
+    emit_insn (gen_vsetvl (Pmode, vl, RVV_VLMAX, gen_int_mode (sew, Pmode),
+			   gen_int_mode (get_vlmul (vmode), Pmode), const0_rtx,
+			   const0_rtx));
+  else
+    emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode)));
 
-  emit_insn (
-    gen_vsetvl_no_side_effects (Pmode, vl, RVV_VLMAX, gen_int_mode (sew, Pmode),
-				gen_int_mode (get_vlmul (vmode), Pmode),
-				const1_rtx, const1_rtx));
   return vl;
 }
 
+/* Calculate SEW/LMUL ratio.  */
+unsigned int
+calculate_ratio (unsigned int sew, enum vlmul_type vlmul)
+{
+  unsigned int ratio;
+  switch (vlmul)
+    {
+    case LMUL_1:
+      ratio = sew;
+      break;
+    case LMUL_2:
+      ratio = sew / 2;
+      break;
+    case LMUL_4:
+      ratio = sew / 4;
+      break;
+    case LMUL_8:
+      ratio = sew / 8;
+      break;
+    case LMUL_F8:
+      ratio = sew * 8;
+      break;
+    case LMUL_F4:
+      ratio = sew * 4;
+      break;
+    case LMUL_F2:
+      ratio = sew * 2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return ratio;
+}
+
 /* Emit an RVV unmask && vl mov from SRC to DEST.  */
 void
 emit_pred_op (unsigned icode, rtx dest, rtx src, machine_mode mask_mode)
 {
-  insn_expander<7> e;
+  insn_expander<8> e;
   machine_mode mode = GET_MODE (dest);
 
   e.add_output_operand (dest, mode);
@@ -137,7 +181,9 @@  emit_pred_op (unsigned icode, rtx dest, rtx src, machine_mode mask_mode)
   e.add_input_operand (vlmax, Pmode);
 
   if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
-    e.add_policy_operand (TAIL_AGNOSTIC, MASK_AGNOSTIC);
+    e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
+  
+  e.add_avl_type_operand ();
 
   e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src));
 }
@@ -256,4 +302,44 @@  get_ratio (machine_mode mode)
     return mode_vtype_infos.ratio_for_min_vlen64[mode];
 }
 
+/* Get ta according to operand[tail_op_idx].  */
+int
+get_ta (rtx ta)
+{
+  if (INTVAL (ta) == TAIL_ANY)
+    return INVALID_ATTRIBUTE;
+  return INTVAL (ta);
+}
+
+/* Get ma according to operand[mask_op_idx].  */
+int
+get_ma (rtx ma)
+{
+  if (INTVAL (ma) == MASK_ANY)
+    return INVALID_ATTRIBUTE;
+  return INTVAL (ma);
+}
+
+/* Get prefer tail policy.  */
+enum tail_policy
+get_prefer_tail_policy ()
+{
+  /* TODO: By default, we choose to use TAIL_ANY which allows
+     compiler pick up either agnostic or undisturbed. Maybe we
+     will have a compile option like -mprefer=agnostic to set
+     this value???.  */
+  return TAIL_ANY;
+}
+
+/* Get prefer mask policy.  */
+enum mask_policy
+get_prefer_mask_policy ()
+{
+  /* TODO: By default, we choose to use MASK_ANY which allows
+     compiler pick up either agnostic or undisturbed. Maybe we
+     will have a compile option like -mprefer=agnostic to set
+     this value???.  */
+  return MASK_ANY;
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
new file mode 100644
index 00000000000..c602426b542
--- /dev/null
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -0,0 +1,2509 @@ 
+/* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
+   Copyright(C) 2022-2022 Free Software Foundation, Inc.
+   Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or(at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/*  This pass is to Set VL/VTYPE global status for RVV instructions
+    that depend on VL and VTYPE registers by Lazy code motion (LCM).
+
+    Strategy:
+
+    -  Backward demanded info fusion within block.
+
+    -  Lazy code motion (LCM) based demanded info backward propagation.
+
+    -  RTL_SSA framework for def-use, PHI analysis.
+
+    -  Lazy code motion (LCM) for global VL/VTYPE optimization.
+
+    Assumption:
+
+    -  Each avl operand is either an immediate (must be in range 0 ~ 31) or reg.
+
+    This pass consists of 3 phases:
+
+    -  Phase 1 - compute VL/VTYPE demanded information within each block
+       by backward data-flow analysis.
+
+    -  Phase 2 - Emit vsetvl instructions within each basic block according to
+       demand, compute and save ANTLOC && AVLOC of each block.
+
+    -  Phase 3 - Backward demanded info propagation and fusion across blocks.
+
+    -  Phase 4 - Lazy code motion including: compute local properties,
+       pre_edge_lcm and vsetvl insertion && delete edges for LCM results.
+
+    -  Phase 5 - Cleanup AVL operand of RVV instruction since it will not be
+       used any more and VL operand of VSETVL instruction if it is not used by
+       any non-debug instructions.
+
+    Implementation:
+
+    -  The subroutine of optimize == 0 is simple_vsetvl.
+       This function simplily vsetvl insertion for each RVV
+       instruction. No optimization.
+
+    -  The subroutine of optimize > 0 is lazy_vsetvl.
+       This function optimize vsetvl insertion process by
+       lazy code motion (LCM) layering on RTL_SSA.  */
+
+#define IN_TARGET_CODE 1
+#define INCLUDE_ALGORITHM
+#define INCLUDE_FUNCTIONAL
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "backend.h"
+#include "rtl.h"
+#include "target.h"
+#include "tree-pass.h"
+#include "df.h"
+#include "rtl-ssa.h"
+#include "cfgcleanup.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "insn-opinit.h"
+#include "tm-constrs.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "predict.h"
+#include "profile-count.h"
+#include "riscv-vsetvl.h"
+
+using namespace rtl_ssa;
+using namespace riscv_vector;
+
+DEBUG_FUNCTION void
+debug (const vector_insn_info *info)
+{
+  info->dump (stderr);
+}
+
+DEBUG_FUNCTION void
+debug (const vector_infos_manager *info)
+{
+  info->dump (stderr);
+}
+
+static bool
+vlmax_avl_p (rtx x)
+{
+  return x && rtx_equal_p (x, RVV_VLMAX);
+}
+
+static bool
+vlmax_avl_insn_p (rtx_insn *rinsn)
+{
+  return INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi
+	 || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi;
+}
+
+static bool
+loop_basic_block_p (const basic_block cfg_bb)
+{
+  return JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb));
+}
+
+/* Return true if it is an RVV instruction depends on VTYPE global
+   status register.  */
+static bool
+has_vtype_op (rtx_insn *rinsn)
+{
+  return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
+}
+
+/* Return true if it is an RVV instruction depends on VL global
+   status register.  */
+static bool
+has_vl_op (rtx_insn *rinsn)
+{
+  return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn);
+}
+
+/* Is this a SEW value that can be encoded into the VTYPE format.  */
+static bool
+valid_sew_p (size_t sew)
+{
+  return exact_log2 (sew) && sew >= 8 && sew <= 64;
+}
+
+/* Return true if it is a vsetvl instruction.  */
+static bool
+vector_config_insn_p (rtx_insn *rinsn)
+{
+  return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
+}
+
+/* Return true if it is vsetvldi or vsetvlsi.  */
+static bool
+vsetvl_insn_p (rtx_insn *rinsn)
+{
+  return INSN_CODE (rinsn) == CODE_FOR_vsetvldi
+	 || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi;
+}
+
+/* Return true if INSN1 comes befeore INSN2 in the same block.  */
+static bool
+same_bb_and_before_p (const insn_info *insn1, const insn_info *insn2)
+{
+  return (insn1->bb ()->index () == insn2->bb ()->index ())
+	 && (*insn1 < *insn2);
+}
+
+/* Return true if INSN1 comes after or equal INSN2 in the same block.  */
+static bool
+same_bb_and_after_or_equal_p (const insn_info *insn1, const insn_info *insn2)
+{
+  return (insn1->bb ()->index () == insn2->bb ()->index ())
+	 && (*insn1 >= *insn2);
+}
+
+/* An "anticipatable occurrence" is one that is the first occurrence in the
+   basic block, the operands are not modified in the basic block prior
+   to the occurrence and the output is not used between the start of
+   the block and the occurrence.  */
+static bool
+anticipatable_occurrence_p (const insn_info *insn, const vector_insn_info dem)
+{
+  /* The only possible operand we care of VSETVL is AVL.  */
+  if (dem.has_avl_reg ())
+    {
+      /* The operands shoule not be modified in the basic block prior
+	 to the occurrence.  */
+      if (!vlmax_avl_p (dem.get_avl ()))
+	{
+	  set_info *set
+	    = find_access (insn->uses (), REGNO (dem.get_avl ()))->def ();
+	  /* If it's undefined, it's not anticipatable conservatively.  */
+	  if (!set)
+	    return false;
+	  if (same_bb_and_before_p (set->insn (), insn))
+	    return false;
+	}
+    }
+
+  /* The output should not be used between the start of the block
+     and the occurrence.  */
+  if (vsetvl_insn_p (insn->rtl ()))
+    {
+      rtx dest = SET_DEST (XVECEXP (PATTERN (insn->rtl ()), 0, 0));
+      for (insn_info *i = insn->prev_nondebug_insn (); i != nullptr;
+	   i = i->prev_nondebug_insn ())
+	if (find_access (i->uses (), REGNO (dest)))
+	  return false;
+    }
+
+  return true;
+}
+
+/* An "available occurrence" is one that is the last occurrence in the
+   basic block and the operands are not modified by following statements in
+   the basic block [including this insn].  */
+static bool
+available_occurrence_p (const insn_info *insn, const vector_insn_info dem)
+{
+  /* The only possible operand we care of VSETVL is AVL.  */
+  if (dem.has_avl_reg ())
+    {
+      /* The operands shoule not be modified in the basic block prior
+	 to the occurrence.
+	 e.g.
+	    bb:
+	      vsetvl hr3, hr2, ...
+	      ...
+	      vadd ... (vl=hr3)
+      */
+      if (!vlmax_avl_p (dem.get_avl ()))
+	{
+	  set_info *set
+	    = find_access (insn->uses (), REGNO (dem.get_avl ()))->def ();
+	  /* If it's undefined, it's not available conservatively.  */
+	  if (!set)
+	    return false;
+	  if (same_bb_and_after_or_equal_p (set->insn (), insn))
+	    return false;
+	}
+    }
+  return true;
+}
+
+/* Return true if the branch probability is dominate.  */
+static bool
+dominate_probability_p (edge e)
+{
+  /* TODO: We simpily pick dominate probability >= 50%.
+     However, it isn't always optimal. Consider cases
+     like this:
+       bb 0: 80% succs: bb 2, bb 3, bb 4, bb 5.
+       bb 1: 20%
+
+     Assume bb 1, bb 2, bb 3, bb 4, bb 5 are different
+     one another, and bb 2, bb 3, bb 4, bb 5 are incompatible.
+
+     ??? Maybe backward propagate bb 1 is better ???
+     May need to support an accurate and reliable COST model
+     in the future.  */
+  return e->probability >= profile_probability::even ();
+}
+
+/* Return true if the block is worthwhile backward propagation.  */
+static bool
+backward_propagate_worthwhile_p (const basic_block cfg_bb,
+				 const vector_block_info block_info)
+{
+  if (loop_basic_block_p (cfg_bb))
+    {
+      if (block_info.local_dem.compatible_p (block_info.reaching_out))
+	return true;
+
+      /* There is a obvious case that is not worthwhile and meaningless
+	 to propagate the demand information:
+			  local_dem
+			     __________
+			 ____|____     |
+			|        |     |
+			|________|     |
+			     |_________|
+			  reaching_out
+	  Header is incompatible with reaching_out and the block is loop itself,
+	  we don't backward propagete the local_dem since we can't avoid emit
+	  vsetvl for the local_dem.  */
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, cfg_bb->succs)
+	if (e->dest->index == cfg_bb->index)
+	  return false;
+    }
+
+  return true;
+}
+
+/* Helper function to get VL operand.  */
+static rtx
+get_vl (rtx_insn *rinsn)
+{
+  if (has_vl_op (rinsn))
+    {
+      /* We only call get_vl for VLMAX use VTYPE instruction.
+	 It's used to get the VL operand to emit VLMAX VSETVL instruction:
+	 vsetvl a5,zero,e32,m1,ta,ma.  */
+      gcc_assert (get_attr_avl_type (rinsn) == VLMAX);
+      extract_insn_cached (rinsn);
+      return recog_data.operand[get_attr_vl_op_idx (rinsn)];
+    }
+  return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
+}
+
+/* Helper function to get AVL operand.  */
+static rtx
+get_avl (rtx_insn *rinsn)
+{
+  if (vsetvl_insn_p (rinsn))
+    return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
+
+  if (!has_vl_op (rinsn))
+    return NULL_RTX;
+  if (get_attr_avl_type (rinsn) == VLMAX)
+    return RVV_VLMAX;
+  extract_insn_cached (rinsn);
+  return recog_data.operand[get_attr_vl_op_idx (rinsn)];
+}
+
+static bool
+can_backward_propagate_p (const function_info *ssa, const basic_block cfg_bb,
+			  const vector_insn_info prop)
+{
+  insn_info *insn = prop.get_insn ();
+
+  /* TODO: We don't backward propagate the explict VSETVL here
+     since we will change vsetvl and vsetvlmax intrinsiscs into
+     no side effects which can be optimized into optimzal location
+     by GCC internal PASSes. We only need to support these backward
+     propagation if vsetvl instrinsics have side effects.  */
+  if (vsetvl_insn_p (insn->rtl ()))
+    return false;
+
+  gcc_assert (has_vtype_op (insn->rtl ()));
+  rtx reg = NULL_RTX;
+
+  /* Case 1: Don't need VL. Just let it backward propagate.  */
+  if (!has_vl_op (insn->rtl ()))
+    return true;
+  else
+    {
+      /* Case 2: CONST_INT AVL, we don't need to check def.  */
+      if (prop.has_avl_imm ())
+	return true;
+      else
+	{
+	  /* Case 3: REG AVL, we need to check the distance of def to make
+	     sure we won't backward propagate over the def.  */
+	  gcc_assert (prop.has_avl_reg ());
+	  if (vlmax_avl_p (prop.get_avl ()))
+	    /* Check VL operand for vsetvl vl,zero.  */
+	    reg = get_vl (insn->rtl ());
+	  else
+	    /* Check AVL operand for vsetvl zero,avl.  */
+	    reg = get_avl (insn->rtl ());
+	}
+    }
+
+  def_info *def = find_access (insn->uses (), REGNO (reg))->def ();
+
+  /* If the definition is in the current block, we can't propagate it
+     acrocss blocks.  */
+  if (def->bb ()->cfg_bb ()->index == insn->bb ()->cfg_bb ()->index)
+    {
+      set_info *set = safe_dyn_cast<set_info *> (def);
+
+      /* True if it is a degenerate PHI that can be backward propagated.  */
+      auto valid_degenerate_phi_p = [&] () {
+	if (!set)
+	  return false;
+
+	phi_info *phi = safe_dyn_cast<phi_info *> (set);
+	if (!phi)
+	  return false;
+
+	basic_block iter_bb;
+	set_info *ultimate_def = look_through_degenerate_phi (set);
+	const basic_block ultimate_bb = ultimate_def->bb ()->cfg_bb ();
+	FOR_BB_BETWEEN (iter_bb, ultimate_bb, def->bb ()->cfg_bb (), next_bb)
+	  if (iter_bb->index == cfg_bb->index)
+	    return true;
+
+	return false;
+      };
+
+      if (valid_degenerate_phi_p ())
+	return true;
+
+      /* TODO: Support non-degenerate PHI backward propagation later.  */
+      return false;
+    }
+
+  /* If the definition block is the current block that we iterate, we
+     can backward propagate it since we will insert or change VL/VTYPE
+     info at the end of the current block we iterate.  */
+  if (def->bb ()->cfg_bb ()->index == cfg_bb->index)
+    return true;
+
+  /* Make sure we don't backward propagete the VL/VTYPE info over the
+     definition blocks.  */
+  bool visited_p = false;
+  for (const bb_info *bb : ssa->reverse_bbs ())
+    {
+      if (bb->cfg_bb ()->index == cfg_bb->index && visited_p)
+	return false;
+      if (bb->cfg_bb ()->index == def->bb ()->cfg_bb ()->index)
+	visited_p = true;
+    }
+
+  return true;
+}
+
+/* Helper function to get SEW operand. We always have SEW value for
+   all RVV instructions that have VTYPE OP.  */
+static uint8_t
+get_sew (rtx_insn *rinsn)
+{
+  return get_attr_sew (rinsn);
+}
+
+/* Helper function to get VLMUL operand. We always have VLMUL value for
+   all RVV instructions that have VTYPE OP. */
+static enum vlmul_type
+get_vlmul (rtx_insn *rinsn)
+{
+  return (enum vlmul_type) get_attr_vlmul (rinsn);
+}
+
+/* Get default tail policy.  */
+static bool
+get_default_ta ()
+{
+  /* For the instruction that doesn't require TA, we still need a default value
+     to emit vsetvl. We pick up the default value according to prefer policy. */
+  return (bool) (get_prefer_tail_policy () & 0x1
+		 || (get_prefer_tail_policy () >> 1 & 0x1));
+}
+
+/* Get default mask policy.  */
+static bool
+get_default_ma ()
+{
+  /* For the instruction that doesn't require MA, we still need a default value
+     to emit vsetvl. We pick up the default value according to prefer policy. */
+  return (bool) (get_prefer_mask_policy () & 0x1
+		 || (get_prefer_mask_policy () >> 1 & 0x1));
+}
+
+/* Helper function to get TA operand.  */
+static bool
+tail_agnostic_p (rtx_insn *rinsn)
+{
+  /* If it doesn't have TA, we return agnostic by default.  */
+  extract_insn_cached (rinsn);
+  int ta = get_attr_ta (rinsn);
+  return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta);
+}
+
+/* Helper function to get MA operand.  */
+static bool
+mask_agnostic_p (rtx_insn *rinsn)
+{
+  /* If it doesn't have MA, we return agnostic by default.  */
+  extract_insn_cached (rinsn);
+  int ma = get_attr_ma (rinsn);
+  return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
+}
+
+/* Return true if FN has a vector instruction that use VL/VTYPE.  */
+static bool
+has_vector_insn (function *fn)
+{
+  basic_block cfg_bb;
+  rtx_insn *rinsn;
+  FOR_ALL_BB_FN (cfg_bb, fn)
+    FOR_BB_INSNS (cfg_bb, rinsn)
+      if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
+	return true;
+  return false;
+}
+
+/* Emit vsetvl instruction.  */
+static rtx
+gen_vsetvl_pat (enum vsetvl_type insn_type, vl_vtype_info info, rtx vl)
+{
+  rtx avl = info.get_avl ();
+  rtx sew = gen_int_mode (info.get_sew (), Pmode);
+  rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode);
+  rtx ta = gen_int_mode (info.get_ta (), Pmode);
+  rtx ma = gen_int_mode (info.get_ma (), Pmode);
+
+  if (insn_type == VSETVL_NORMAL)
+    {
+      gcc_assert (vl != NULL_RTX);
+      return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma);
+    }
+  else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY)
+    return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
+  else
+    return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
+}
+
+static rtx
+gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info info)
+{
+  rtx new_pat;
+  if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ()))
+    {
+      rtx dest = get_vl (rinsn);
+      new_pat = gen_vsetvl_pat (VSETVL_NORMAL, info, dest);
+    }
+  else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
+    new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
+  else
+    new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX);
+  return new_pat;
+}
+
+static void
+emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type,
+		  vl_vtype_info info, rtx vl, rtx_insn *rinsn)
+{
+  rtx pat = gen_vsetvl_pat (insn_type, info, vl);
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n");
+      print_rtl_single (dump_file, pat);
+    }
+
+  if (emit_type == EMIT_DIRECT)
+    emit_insn (pat);
+  else if (emit_type == EMIT_BEFORE)
+    emit_insn_before (pat, rinsn);
+  else
+    emit_insn_after (pat, rinsn);
+}
+
+static void
+eliminate_insn (rtx_insn *rinsn)
+{
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn));
+      print_rtl_single (dump_file, rinsn);
+    }
+  if (in_sequence_p ())
+    remove_insn (rinsn);
+  else
+    delete_insn (rinsn);
+}
+
+static void
+insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn,
+	       const vector_insn_info &info, const vector_insn_info &prev_info)
+{
+  /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
+     VLMAX.  */
+  if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
+      && info.same_avl_p (prev_info) && info.same_vlmax_p (prev_info))
+    {
+      emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
+			rinsn);
+      return;
+    }
+
+  if (info.has_avl_imm ())
+    {
+      emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
+			rinsn);
+      return;
+    }
+
+  if (info.has_avl_no_reg ())
+    {
+      /* We can only use x0, x0 if there's no chance of the vtype change causing
+	 the previous vl to become invalid.  */
+      if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
+	  && info.same_vlmax_p (prev_info))
+	{
+	  emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
+			    rinsn);
+	  return;
+	}
+      /* Otherwise use an AVL of 0 to avoid depending on previous vl.  */
+      vl_vtype_info new_info = info;
+      new_info.set_avl_info (avl_info (const0_rtx, nullptr));
+      emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX,
+			rinsn);
+      return;
+    }
+
+  /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the
+     opcode if the AVLReg is X0 as they have different register classes for
+     the AVL operand.  */
+  if (vlmax_avl_p (info.get_avl ()))
+    {
+      gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn));
+      rtx vl_op = get_vl (rinsn);
+      gcc_assert (!vlmax_avl_p (vl_op));
+      emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn);
+      return;
+    }
+
+  emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Update VL/VTYPE info, previous info=");
+      prev_info.dump (dump_file);
+    }
+}
+
+/* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
+   to INSN.  If such notes are added to an insn which references a
+   CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
+   that note, because the following loop optimization pass requires
+   them.  */
+
+/* ??? If there was a jump optimization pass after gcse and before loop,
+   then we would not need to do this here, because jump would add the
+   necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
+
+static void
+add_label_notes (rtx x, rtx_insn *insn)
+{
+  enum rtx_code code = GET_CODE (x);
+  int i, j;
+  const char *fmt;
+
+  if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
+    {
+      /* This code used to ignore labels that referred to dispatch tables to
+	 avoid flow generating (slightly) worse code.
+
+	 We no longer ignore such label references (see LABEL_REF handling in
+	 mark_jump_label for additional information).  */
+
+      /* There's no reason for current users to emit jump-insns with
+	 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
+	 notes.  */
+      gcc_assert (!JUMP_P (insn));
+      add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (x));
+
+      if (LABEL_P (label_ref_label (x)))
+	LABEL_NUSES (label_ref_label (x))++;
+
+      return;
+    }
+
+  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	add_label_notes (XEXP (x, i), insn);
+      else if (fmt[i] == 'E')
+	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	  add_label_notes (XVECEXP (x, i, j), insn);
+    }
+}
+
+/* Add EXPR to the end of basic block BB.
+
+   This is used by both the PRE and code hoisting.  */
+
+static void
+insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb)
+{
+  rtx_insn *end_rinsn = BB_END (cfg_bb);
+  rtx_insn *new_insn;
+  rtx_insn *pat, *pat_end;
+
+  pat = rinsn;
+  gcc_assert (pat && INSN_P (pat));
+
+  pat_end = pat;
+  while (NEXT_INSN (pat_end) != NULL_RTX)
+    pat_end = NEXT_INSN (pat_end);
+
+  /* If the last end_rinsn is a jump, insert EXPR in front.  Similarly we need
+     to take care of trapping instructions in presence of non-call exceptions.
+   */
+
+  if (JUMP_P (end_rinsn)
+      || (NONJUMP_INSN_P (end_rinsn)
+	  && (!single_succ_p (cfg_bb)
+	      || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)))
+    {
+      /* FIXME: What if something in jump uses value set in new end_rinsn?  */
+      new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
+    }
+
+  /* Likewise if the last end_rinsn is a call, as will happen in the presence
+     of exception handling.  */
+  else if (CALL_P (end_rinsn)
+	   && (!single_succ_p (cfg_bb)
+	       || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))
+    {
+      /* Keeping in mind targets with small register classes and parameters
+	 in registers, we search backward and place the instructions before
+	 the first parameter is loaded.  Do this for everyone for consistency
+	 and a presumption that we'll get better code elsewhere as well.  */
+
+      /* Since different machines initialize their parameter registers
+	 in different orders, assume nothing.  Collect the set of all
+	 parameter registers.  */
+      end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb));
+
+      /* If we found all the parameter loads, then we want to insert
+	 before the first parameter load.
+
+	 If we did not find all the parameter loads, then we might have
+	 stopped on the head of the block, which could be a CODE_LABEL.
+	 If we inserted before the CODE_LABEL, then we would be putting
+	 the end_rinsn in the wrong basic block.  In that case, put the
+	 end_rinsn after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.
+       */
+      while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn))
+	end_rinsn = NEXT_INSN (end_rinsn);
+
+      new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
+    }
+  else
+    new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb);
+
+  while (1)
+    {
+      if (INSN_P (pat))
+	add_label_notes (PATTERN (pat), new_insn);
+      if (pat == pat_end)
+	break;
+      pat = NEXT_INSN (pat);
+    }
+}
+
+/* Get VL/VTYPE information for INSN.  */
+static vl_vtype_info
+get_vl_vtype_info (const insn_info *insn)
+{
+  if (vector_config_insn_p (insn->rtl ()))
+    gcc_assert (vsetvl_insn_p (insn->rtl ())
+		&& "Can't handle X0, rs1 vsetvli yet");
+
+  set_info *set = nullptr;
+  rtx avl = ::get_avl (insn->rtl ());
+  if (avl && REG_P (avl) && !vlmax_avl_p (avl))
+    set = find_access (insn->uses (), REGNO (avl))->def ();
+
+  uint8_t sew = get_sew (insn->rtl ());
+  enum vlmul_type vlmul = get_vlmul (insn->rtl ());
+  uint8_t ratio = get_attr_ratio (insn->rtl ());
+  /* when get_attr_ratio is invalid, this kind of instructions
+     doesn't care about ratio. However, we still need this value
+     in demand info backward analysis.  */
+  if (ratio == INVALID_ATTRIBUTE)
+    ratio = calculate_ratio (sew, vlmul);
+  bool ta = tail_agnostic_p (insn->rtl ());
+  bool ma = mask_agnostic_p (insn->rtl ());
+
+  /* If merge operand is undef value, we prefer agnostic.  */
+  int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
+  if (merge_op_idx != INVALID_ATTRIBUTE
+      && satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
+    {
+      ta = true;
+      ma = true;
+    }
+
+  vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma);
+  return info;
+}
+
+static void
+change_insn (rtx_insn *rinsn, rtx new_pat)
+{
+  /* We don't apply change on RTL_SSA here since it's possible a
+     new INSN we add in the PASS before which doesn't have RTL_SSA
+     info yet.*/
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
+	       INSN_UID (rinsn));
+      print_rtl_single (dump_file, PATTERN (rinsn));
+    }
+
+  validate_change (rinsn, &PATTERN (rinsn), new_pat, true);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nto:\n");
+      print_rtl_single (dump_file, PATTERN (rinsn));
+    }
+}
+
+static bool
+change_insn (function_info *ssa, insn_change change, insn_info *insn,
+	     rtx new_pat)
+{
+  rtx_insn *rinsn = insn->rtl ();
+  auto attempt = ssa->new_change_attempt ();
+  if (!restrict_movement (change))
+    return false;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
+	       INSN_UID (rinsn));
+      print_rtl_single (dump_file, PATTERN (rinsn));
+      if (dump_flags & TDF_DETAILS)
+	{
+	  fprintf (dump_file, "RTL_SSA info:\n");
+	  pretty_printer pp;
+	  pp.buffer->stream = dump_file;
+	  insn->print_full (&pp);
+	  pp_printf (&pp, "\n");
+	  pp_flush (&pp);
+	}
+    }
+
+  insn_change_watermark watermark;
+  validate_change (rinsn, &PATTERN (rinsn), new_pat, true);
+
+  /* These routines report failures themselves.  */
+  if (!recog (attempt, change) || !change_is_worthwhile (change, false))
+    return false;
+  confirm_change_group ();
+  ssa->change_insn (change);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nto:\n");
+      print_rtl_single (dump_file, PATTERN (rinsn));
+      if (dump_flags & TDF_DETAILS)
+	{
+	  fprintf (dump_file, "RTL_SSA info:\n");
+	  pretty_printer pp;
+	  pp.buffer->stream = dump_file;
+	  insn->print_full (&pp);
+	  pp_printf (&pp, "\n");
+	  pp_flush (&pp);
+	}
+    }
+  return true;
+}
+
+avl_info::avl_info (rtx value_in, set_info *source_in)
+  : m_value (value_in), m_source (source_in)
+{}
+
+avl_info &
+avl_info::operator= (const avl_info &other)
+{
+  m_value = other.get_value ();
+  m_source = other.get_source ();
+  return *this;
+}
+
+bool
+avl_info::operator== (const avl_info &other) const
+{
+  if (!m_value)
+    return !other.get_value ();
+  if (!other.get_value ())
+    return false;
+
+  /* It's safe to consider they are equal if their RTX value are
+     strictly the same.  */
+  if (m_value == other.get_value ())
+    return true;
+
+  if (GET_CODE (m_value) != GET_CODE (other.get_value ()))
+    return false;
+
+  /* Handle CONST_INT AVL.  */
+  if (CONST_INT_P (m_value))
+    return INTVAL (m_value) == INTVAL (other.get_value ());
+
+  /* Handle VLMAX AVL.  */
+  if (vlmax_avl_p (m_value))
+    return vlmax_avl_p (other.get_value ());
+
+  /* TODO: So far we only support VLMAX (AVL=zero) comparison,
+     we will support non-VLMAX AVL in the future.  */
+  return false;
+}
+
+bool
+avl_info::operator!= (const avl_info &other) const
+{
+  return !(*this == other);
+}
+
+/* Initialize VL/VTYPE information.  */
+vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in,
+			      enum vlmul_type vlmul_in, uint8_t ratio_in,
+			      bool ta_in, bool ma_in)
+  : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
+    m_ta (ta_in), m_ma (ma_in)
+{
+  gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW");
+}
+
+bool
+vl_vtype_info::operator== (const vl_vtype_info &other) const
+{
+  return m_avl == other.get_avl_info () && m_sew == other.get_sew ()
+	 && m_vlmul == other.get_vlmul () && m_ta == other.get_ta ()
+	 && m_ma == other.get_ma () && m_ratio == other.get_ratio ();
+}
+
+bool
+vl_vtype_info::operator!= (const vl_vtype_info &other) const
+{
+  return !(*this == other);
+}
+
+bool
+vl_vtype_info::has_non_zero_avl () const
+{
+  if (has_avl_imm ())
+    return INTVAL (get_avl ()) > 0;
+  if (has_avl_reg ())
+    return vlmax_avl_p (get_avl ());
+  return false;
+}
+
+bool
+vl_vtype_info::same_avl_p (const vl_vtype_info &other) const
+{
+  return get_avl_info () == other.get_avl_info ();
+}
+
+bool
+vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const
+{
+  return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ()
+	 && get_ta () == other.get_ta () && get_ma () == other.get_ma ();
+}
+
+bool
+vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const
+{
+  return get_ratio () == other.get_ratio ();
+}
+
+/* Compare the compatibility between Dem1 and Dem2.
+   If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2
+   meaning Dem1 is easier be compatible with others than Dem2
+   or Dem2 is stricter than Dem1.
+   For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO).  */
+bool
+vector_insn_info::operator> (const vector_insn_info &other) const
+{
+  if (other.compatible_p (static_cast<const vl_vtype_info &> (*this))
+      && !this->compatible_p (static_cast<const vl_vtype_info &> (other)))
+    return true;
+  return false;
+}
+
+bool
+vector_insn_info::operator>= (const vector_insn_info &other) const
+{
+  if (*this > other)
+    return true;
+
+  if (*this == other)
+    return true;
+
+  if (!compatible_p (other))
+    return false;
+
+  if (!demand_p (DEMAND_AVL) && other.demand_p (DEMAND_AVL))
+    return false;
+
+  if (same_vlmax_p (other))
+    {
+      if (demand_p (DEMAND_RATIO) && !other.demand_p (DEMAND_RATIO)
+	  && (get_sew () != other.get_sew ()
+	      || get_vlmul () != other.get_vlmul ()))
+	return false;
+
+      if (get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ())
+	{
+	  if (demand_p (DEMAND_RATIO) && !other.demand_p (DEMAND_RATIO))
+	    return false;
+	}
+    }
+
+  if (demand_p (DEMAND_TAIL_POLICY) && !other.demand_p (DEMAND_TAIL_POLICY)
+      && get_ta () != other.get_ta ())
+    return false;
+
+  if (demand_p (DEMAND_MASK_POLICY) && !other.demand_p (DEMAND_MASK_POLICY)
+      && get_ma () != other.get_ma ())
+    return false;
+
+  return true;
+}
+
+bool
+vector_insn_info::operator== (const vector_insn_info &other) const
+{
+  gcc_assert (!uninit_p () && !other.uninit_p ()
+	      && "Uninitialization should not happen");
+
+  /* Empty is only equal to another Empty.  */
+  if (empty_p ())
+    return other.empty_p ();
+  if (other.empty_p ())
+    return empty_p ();
+
+  /* Unknown is only equal to another Unknown.  */
+  if (unknown_p ())
+    return other.unknown_p ();
+  if (other.unknown_p ())
+    return unknown_p ();
+
+  for (size_t i = 0; i < NUM_DEMAND; i++)
+    if (m_demands[i] != other.demand_p ((enum demand_type) i))
+      return false;
+
+  if (m_insn != other.get_insn ())
+    return false;
+  if (m_dirty_pat != other.get_dirty_pat ())
+    return false;
+
+  if (!same_avl_p (other))
+    return false;
+
+  /* If the full VTYPE is valid, check that it is the same.  */
+  return same_vtype_p (other);
+}
+
+void
+vector_insn_info::parse_insn (rtx_insn *rinsn)
+{
+  *this = vector_insn_info ();
+  if (!NONDEBUG_INSN_P (rinsn))
+    return;
+  if (!has_vtype_op (rinsn))
+    return;
+  m_state = VALID;
+  extract_insn_cached (rinsn);
+  const rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)];
+  m_avl = avl_info (avl, nullptr);
+  m_sew = ::get_sew (rinsn);
+  m_vlmul = ::get_vlmul (rinsn);
+  m_ta = tail_agnostic_p (rinsn);
+  m_ma = mask_agnostic_p (rinsn);
+}
+
+void
+vector_insn_info::parse_insn (insn_info *insn)
+{
+  *this = vector_insn_info ();
+
+  /* Return if it is debug insn for the consistency with optimize == 0.  */
+  if (insn->is_debug_insn ())
+    return;
+
+  /* We set it as unknown since we don't what will happen in CALL or ASM.  */
+  if (insn->is_call () || insn->is_asm ())
+    {
+      set_unknown ();
+      return;
+    }
+
+  /* If this is something that updates VL/VTYPE that we don't know about, set
+     the state to unknown.  */
+  if (!vector_config_insn_p (insn->rtl ())
+      && (find_access (insn->defs (), VL_REGNUM)
+	  || find_access (insn->defs (), VTYPE_REGNUM)))
+    {
+      set_unknown ();
+      return;
+    }
+
+  if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
+    return;
+
+  /* Warning: This function has to work on both the lowered (i.e. post
+     emit_local_forward_vsetvls) and pre-lowering forms.  The main implication
+     of this is that it can't use the value of a SEW, VL, or Policy operand as
+     they might be stale after lowering.  */
+  vl_vtype_info::operator= (get_vl_vtype_info (insn));
+  m_insn = insn;
+  m_state = VALID;
+  if (vector_config_insn_p (insn->rtl ()))
+    {
+      m_demands[DEMAND_AVL] = true;
+      m_demands[DEMAND_RATIO] = true;
+      return;
+    }
+
+  if (has_vl_op (insn->rtl ()))
+    m_demands[DEMAND_AVL] = true;
+
+  if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
+    m_demands[DEMAND_RATIO] = true;
+  else
+    {
+      /* TODO: By default, if it doesn't demand RATIO, we set it
+	 demand SEW && LMUL both. Some instructions may demand SEW
+	 only and ignore LMUL, will fix it later.  */
+      m_demands[DEMAND_SEW] = true;
+      m_demands[DEMAND_LMUL] = true;
+    }
+
+  if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE)
+    m_demands[DEMAND_TAIL_POLICY] = true;
+  if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE)
+    m_demands[DEMAND_MASK_POLICY] = true;
+}
+
+void
+vector_insn_info::demand_vl_vtype ()
+{
+  m_state = VALID;
+  m_demands[DEMAND_AVL] = true;
+  m_demands[DEMAND_SEW] = true;
+  m_demands[DEMAND_LMUL] = true;
+  m_demands[DEMAND_TAIL_POLICY] = true;
+  m_demands[DEMAND_MASK_POLICY] = true;
+}
+
+bool
+vector_insn_info::compatible_p (const vector_insn_info &other) const
+{
+  gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
+	      && "Can't compare invalid demanded infos");
+
+  /* Check SEW.  */
+  if (demand_p (DEMAND_SEW) && other.demand_p (DEMAND_SEW)
+      && get_sew () != other.get_sew ())
+    return false;
+
+  /* Check LMUL.  */
+  if (demand_p (DEMAND_LMUL) && other.demand_p (DEMAND_LMUL)
+      && get_vlmul () != other.get_vlmul ())
+    return false;
+
+  /* Check RATIO.  */
+  if (demand_p (DEMAND_RATIO) && other.demand_p (DEMAND_RATIO)
+      && get_ratio () != other.get_ratio ())
+    return false;
+  if (demand_p (DEMAND_RATIO) && (other.get_sew () || other.get_vlmul ())
+      && get_ratio () != other.get_ratio ())
+    return false;
+  if (other.demand_p (DEMAND_RATIO) && (get_sew () || get_vlmul ())
+      && get_ratio () != other.get_ratio ())
+    return false;
+
+  if (demand_p (DEMAND_TAIL_POLICY) && other.demand_p (DEMAND_TAIL_POLICY)
+      && get_ta () != other.get_ta ())
+    return false;
+  if (demand_p (DEMAND_MASK_POLICY) && other.demand_p (DEMAND_MASK_POLICY)
+      && get_ma () != other.get_ma ())
+    return false;
+
+  if (demand_p (DEMAND_AVL) && other.demand_p (DEMAND_AVL))
+    return m_avl == other.get_avl_info ();
+
+  return true;
+}
+
+bool
+vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const
+{
+  gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
+  gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
+  if (!demand_p (DEMAND_AVL))
+    return true;
+  return get_avl_info () == other.get_avl_info ();
+}
+
+bool
+vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const
+{
+  gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
+  gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state");
+  if (demand_p (DEMAND_SEW) && m_sew != other.get_sew ())
+    return false;
+  if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ())
+    return false;
+  if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ())
+    return false;
+  if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ())
+    return false;
+  if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ())
+    return false;
+  return true;
+}
+
+/* Determine whether the vector instructions requirements represented by
+   Require are compatible with the previous vsetvli instruction represented
+   by this.  INSN is the instruction whose requirements we're considering.  */
+bool
+vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const
+{
+  gcc_assert (!uninit_p () && "Can't handle uninitialized info");
+  if (empty_p ())
+    return false;
+
+  /* Nothing is compatible with Unknown.  */
+  if (unknown_p ())
+    return false;
+
+  /* If the instruction doesn't need an AVLReg and the SEW matches, consider
+     it compatible.  */
+  if (!demand_p (DEMAND_AVL))
+    if (m_sew == curr_info.get_sew ())
+      return true;
+
+  return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info);
+}
+
+vector_insn_info
+vector_insn_info::merge (const vector_insn_info &merge_info,
+			 bool across_bb_p = false) const
+{
+  gcc_assert (this->compatible_p (merge_info)
+	      && "Can't merge incompatible demanded infos");
+
+  vector_insn_info new_info;
+  new_info.demand_vl_vtype ();
+
+  if (dirty_p ())
+    {
+      gcc_assert (across_bb_p);
+      if (demand_p (DEMAND_AVL))
+	new_info.set_insn (get_insn ());
+      else
+	new_info.set_insn (merge_info.get_insn ());
+    }
+  else
+    {
+      if (across_bb_p)
+	new_info.set_insn (get_insn ());
+      else
+	new_info.set_insn (merge_info.get_insn ());
+    }
+
+  new_info.set_dirty_pat (merge_info.get_dirty_pat ());
+
+  if (!demand_p (DEMAND_AVL) && !merge_info.demand_p (DEMAND_AVL))
+    new_info.undemand (DEMAND_AVL);
+  if (!demand_p (DEMAND_SEW) && !merge_info.demand_p (DEMAND_SEW))
+    new_info.undemand (DEMAND_SEW);
+  if (!demand_p (DEMAND_LMUL) && !merge_info.demand_p (DEMAND_LMUL))
+    new_info.undemand (DEMAND_LMUL);
+
+  if (!demand_p (DEMAND_TAIL_POLICY)
+      && !merge_info.demand_p (DEMAND_TAIL_POLICY))
+    new_info.undemand (DEMAND_TAIL_POLICY);
+  if (!demand_p (DEMAND_MASK_POLICY)
+      && !merge_info.demand_p (DEMAND_MASK_POLICY))
+    new_info.undemand (DEMAND_MASK_POLICY);
+
+  if (merge_info.demand_p (DEMAND_AVL))
+    new_info.set_avl_info (merge_info.get_avl_info ());
+  else if (demand_p (DEMAND_AVL))
+    new_info.set_avl_info (get_avl_info ());
+
+  if (merge_info.demand_p (DEMAND_SEW))
+    new_info.set_sew (merge_info.get_sew ());
+  else if (demand_p (DEMAND_SEW))
+    new_info.set_sew (get_sew ());
+
+  if (merge_info.demand_p (DEMAND_LMUL))
+    new_info.set_vlmul (merge_info.get_vlmul ());
+  else if (demand_p (DEMAND_LMUL))
+    new_info.set_vlmul (get_vlmul ());
+
+  if (!new_info.demand_p (DEMAND_SEW) && !new_info.demand_p (DEMAND_LMUL))
+    {
+      if (demand_p (DEMAND_RATIO) || merge_info.demand_p (DEMAND_RATIO))
+	new_info.demand (DEMAND_RATIO);
+      /* Even though we don't demand_p SEW && VLMUL in this case, we still
+       * need them.  */
+      if (merge_info.demand_p (DEMAND_RATIO))
+	{
+	  new_info.set_sew (merge_info.get_sew ());
+	  new_info.set_vlmul (merge_info.get_vlmul ());
+	  new_info.set_ratio (merge_info.get_ratio ());
+	}
+      else if (demand_p (DEMAND_RATIO))
+	{
+	  new_info.set_sew (get_sew ());
+	  new_info.set_vlmul (get_vlmul ());
+	  new_info.set_ratio (get_ratio ());
+	}
+    }
+  else
+    {
+      /* when get_attr_ratio is invalid, this kind of instructions
+	 doesn't care about ratio. However, we still need this value
+	 in demand_p info backward analysis.  */
+      new_info.set_ratio (
+	calculate_ratio (new_info.get_sew (), new_info.get_vlmul ()));
+    }
+
+  if (merge_info.demand_p (DEMAND_TAIL_POLICY))
+    new_info.set_ta (merge_info.get_ta ());
+  else if (demand_p (DEMAND_TAIL_POLICY))
+    new_info.set_ta (get_ta ());
+  else
+    new_info.set_ta (get_default_ta ());
+
+  if (merge_info.demand_p (DEMAND_MASK_POLICY))
+    new_info.set_ma (merge_info.get_ma ());
+  else if (demand_p (DEMAND_MASK_POLICY))
+    new_info.set_ma (get_ma ());
+  else
+    new_info.set_ma (get_default_ma ());
+
+  return new_info;
+}
+
+void
+vector_insn_info::dump (FILE *file) const
+{
+  fprintf (file, "[");
+  if (uninit_p ())
+    fprintf (file, "UNINITIALIZED,");
+  else if (valid_p ())
+    fprintf (file, "VALID,");
+  else if (unknown_p ())
+    fprintf (file, "UNKNOWN,");
+  else if (empty_p ())
+    fprintf (file, "EMPTY,");
+  else
+    fprintf (file, "DIRTY,");
+
+  fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL));
+  fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW));
+  fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL));
+  fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO));
+  fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY));
+  fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY));
+
+  fprintf (file, "AVL=");
+  print_rtl_single (file, get_avl ());
+  fprintf (file, "SEW=%d,", get_sew ());
+  fprintf (file, "VLMUL=%d,", get_vlmul ());
+  fprintf (file, "RATIO=%d,", get_ratio ());
+  fprintf (file, "TAIL_POLICY=%d,", get_ta ());
+  fprintf (file, "MASK_POLICY=%d", get_ma ());
+  fprintf (file, "]\n");
+
+  if (valid_p ())
+    {
+      if (get_insn ())
+	{
+	  fprintf (file, "RTL_SSA insn_info=");
+	  pretty_printer pp;
+	  pp.buffer->stream = file;
+	  get_insn ()->print_full (&pp);
+	  pp_printf (&pp, "\n");
+	  pp_flush (&pp);
+	}
+      if (get_dirty_pat ())
+	{
+	  fprintf (file, "Dirty RTL Pattern=");
+	  print_rtl_single (file, get_dirty_pat ());
+	}
+    }
+}
+
+vector_infos_manager::vector_infos_manager ()
+{
+  vector_edge_list = nullptr;
+  vector_kill = nullptr;
+  vector_del = nullptr;
+  vector_insert = nullptr;
+  vector_antic = nullptr;
+  vector_transp = nullptr;
+  vector_comp = nullptr;
+  vector_avin = nullptr;
+  vector_avout = nullptr;
+  vector_insn_infos.safe_grow (get_max_uid ());
+  vector_block_infos.safe_grow (last_basic_block_for_fn (cfun));
+  if (!optimize)
+    {
+      basic_block cfg_bb;
+      rtx_insn *rinsn;
+      FOR_ALL_BB_FN (cfg_bb, cfun)
+	{
+	  vector_block_infos[cfg_bb->index].local_dem = vector_insn_info ();
+	  vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info ();
+	  FOR_BB_INSNS (cfg_bb, rinsn)
+	    vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn);
+	}
+    }
+  else
+    {
+      for (const bb_info *bb : crtl->ssa->bbs ())
+	{
+	  vector_block_infos[bb->index ()].local_dem = vector_insn_info ();
+	  vector_block_infos[bb->index ()].reaching_out = vector_insn_info ();
+	  for (insn_info *insn : bb->real_insns ())
+	    vector_insn_infos[insn->uid ()].parse_insn (insn);
+	}
+    }
+}
+
+void
+vector_infos_manager::create_expr (vector_insn_info &info)
+{
+  for (size_t i = 0; i < vector_exprs.length (); i++)
+    if (*vector_exprs[i] == info)
+      return;
+  vector_exprs.safe_push (&info);
+}
+
+size_t
+vector_infos_manager::get_expr_id (const vector_insn_info &info) const
+{
+  for (size_t i = 0; i < vector_exprs.length (); i++)
+    if (*vector_exprs[i] == info)
+      return i;
+  gcc_unreachable ();
+}
+
+auto_vec<size_t>
+vector_infos_manager::get_all_available_exprs (
+  const vector_insn_info &info) const
+{
+  auto_vec<size_t> available_list;
+  for (size_t i = 0; i < vector_exprs.length (); i++)
+    if (info >= *vector_exprs[i])
+      available_list.safe_push (i);
+  return available_list;
+}
+
+bool
+vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const
+{
+  if (bitmap_empty_p (bitdata))
+    return false;
+
+  int ratio = -1;
+  unsigned int bb_index;
+  sbitmap_iterator sbi;
+
+  EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
+  {
+    if (ratio == -1)
+      ratio = vector_exprs[bb_index]->get_ratio ();
+    else if (vector_exprs[bb_index]->get_ratio () != ratio)
+      return false;
+  }
+  return true;
+}
+
+size_t
+vector_infos_manager::expr_set_num (sbitmap bitdata) const
+{
+  size_t count = 0;
+  for (size_t i = 0; i < vector_exprs.length (); i++)
+    if (bitmap_bit_p (bitdata, i))
+      count++;
+  return count;
+}
+
+void
+vector_infos_manager::release (void)
+{
+  if (!vector_insn_infos.is_empty ())
+    vector_insn_infos.release ();
+  if (!vector_block_infos.is_empty ())
+    vector_block_infos.release ();
+  if (!vector_exprs.is_empty ())
+    vector_exprs.release ();
+
+  if (optimize > 0)
+    {
+      /* Finished. Free up all the things we've allocated.  */
+      free_edge_list (vector_edge_list);
+      sbitmap_vector_free (vector_del);
+      sbitmap_vector_free (vector_insert);
+      sbitmap_vector_free (vector_kill);
+      sbitmap_vector_free (vector_antic);
+      sbitmap_vector_free (vector_transp);
+      sbitmap_vector_free (vector_comp);
+      sbitmap_vector_free (vector_avin);
+      sbitmap_vector_free (vector_avout);
+    }
+}
+
+void
+vector_infos_manager::dump (FILE *file) const
+{
+  basic_block cfg_bb;
+  rtx_insn *rinsn;
+
+  fprintf (file, "\n");
+  FOR_ALL_BB_FN (cfg_bb, cfun)
+    {
+      fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index);
+      fprintf (file, "<HEADER>=");
+      vector_block_infos[cfg_bb->index].local_dem.dump (file);
+      FOR_BB_INSNS (cfg_bb, rinsn)
+	{
+	  if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn))
+	    continue;
+	  fprintf (file, "<insn %d>=", INSN_UID (rinsn));
+	  const auto &info = vector_insn_infos[INSN_UID (rinsn)];
+	  info.dump (file);
+	}
+      fprintf (file, "<FOOTER>=");
+      vector_block_infos[cfg_bb->index].reaching_out.dump (file);
+      fprintf (file, "\n\n");
+    }
+
+  fprintf (file, "\n");
+  FOR_ALL_BB_FN (cfg_bb, cfun)
+    {
+      fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index);
+
+      fprintf (file, "<ANTLOC>=");
+      if (vector_antic == nullptr)
+	fprintf (file, "(nil)\n");
+      else
+	dump_bitmap_file (file, vector_antic[cfg_bb->index]);
+
+      fprintf (file, "<AVLOC>=");
+      if (vector_comp == nullptr)
+	fprintf (file, "(nil)\n");
+      else
+	dump_bitmap_file (file, vector_comp[cfg_bb->index]);
+
+      fprintf (file, "<TRANSP>=");
+      if (vector_transp == nullptr)
+	fprintf (file, "(nil)\n");
+      else
+	dump_bitmap_file (file, vector_transp[cfg_bb->index]);
+
+      fprintf (file, "<KILL>=");
+      if (vector_kill == nullptr)
+	fprintf (file, "(nil)\n");
+      else
+	dump_bitmap_file (file, vector_kill[cfg_bb->index]);
+    }
+
+  fprintf (file, "\n");
+  FOR_ALL_BB_FN (cfg_bb, cfun)
+    {
+      fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n",
+	       cfg_bb->index);
+
+      fprintf (file, "<AVIN>=");
+      if (vector_avin == nullptr)
+	fprintf (file, "(nil)\n");
+      else
+	dump_bitmap_file (file, vector_avin[cfg_bb->index]);
+
+      fprintf (file, "<AVOUT>=");
+      if (vector_avout == nullptr)
+	fprintf (file, "(nil)\n");
+      else
+	dump_bitmap_file (file, vector_avout[cfg_bb->index]);
+
+      fprintf (file, "<DELETE>=");
+      if (vector_del == nullptr)
+	fprintf (file, "(nil)\n");
+      else
+	dump_bitmap_file (file, vector_del[cfg_bb->index]);
+    }
+
+  fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n");
+  for (size_t i = 0; i < vector_exprs.length (); i++)
+    {
+      for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++)
+	{
+	  edge eg = INDEX_EDGE (vector_edge_list, ed);
+	  if (bitmap_bit_p (vector_insert[ed], i))
+	    fprintf (dump_file,
+		     "INSERT edge %d from bb %d to bb %d for VSETVL "
+		     "expr[%ld]\n",
+		     ed, eg->src->index, eg->dest->index, i);
+	}
+    }
+}
+
+const pass_data pass_data_vsetvl = {
+  RTL_PASS,	 /* type */
+  "vsetvl",	 /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE,	 /* tv_id */
+  0,		 /* properties_required */
+  0,		 /* properties_provided */
+  0,		 /* properties_destroyed */
+  0,		 /* todo_flags_start */
+  0,		 /* todo_flags_finish */
+};
+
+class pass_vsetvl : public rtl_opt_pass
+{
+private:
+  class vector_infos_manager *m_vector_manager;
+
+  void simple_vsetvl (void) const;
+  void lazy_vsetvl (void);
+
+  /* Phase 1.  */
+  void compute_local_backward_infos (const bb_info *);
+
+  /* Phase 2.  */
+  bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const;
+  void transfer_before (vector_insn_info &, insn_info *) const;
+  void transfer_after (vector_insn_info &, insn_info *) const;
+  void emit_local_forward_vsetvls (const bb_info *);
+
+  /* Phase 3.  */
+  void merge_successors (const basic_block, const basic_block);
+  void compute_global_backward_infos (void);
+
+  /* Phase 4.  */
+  void prune_expressions (void);
+  void compute_local_properties (void);
+  bool can_refine_vsetvl_p (const basic_block, uint8_t) const;
+  void refine_vsetvls (void) const;
+  void cleanup_vsetvls (void);
+  bool commit_vsetvls (void);
+  void pre_vsetvl (void);
+
+  /* Phase 5.  */
+  void cleanup_insns (void) const;
+
+  void init (void);
+  void done (void);
+
+public:
+  pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) final override { return TARGET_VECTOR; }
+  virtual unsigned int execute (function *) final override;
+}; // class pass_vsetvl
+
+/* Simple m_vsetvl_insert vsetvl for optimize == 0.  */
+void
+pass_vsetvl::simple_vsetvl (void) const
+{
+  if (dump_file)
+    fprintf (dump_file,
+	     "\nEntering Simple VSETVL PASS and Handling %d basic blocks for "
+	     "function:%s\n",
+	     n_basic_blocks_for_fn (cfun), function_name (cfun));
+
+  basic_block cfg_bb;
+  rtx_insn *rinsn;
+  FOR_ALL_BB_FN (cfg_bb, cfun)
+    {
+      FOR_BB_INSNS (cfg_bb, rinsn)
+	{
+	  if (!NONDEBUG_INSN_P (rinsn))
+	    continue;
+	  if (has_vtype_op (rinsn))
+	    {
+	      const auto info
+		= m_vector_manager->vector_insn_infos[INSN_UID (rinsn)];
+	      emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info,
+				NULL_RTX, rinsn);
+	    }
+	}
+    }
+}
+
+/* Compute demanded information by backward data-flow analysis.  */
+void
+pass_vsetvl::compute_local_backward_infos (const bb_info *bb)
+{
+  vector_insn_info change;
+  change.set_empty ();
+
+  auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
+  block_info.reaching_out = change;
+
+  for (insn_info *insn : bb->reverse_real_nondebug_insns ())
+    {
+      auto &info = m_vector_manager->vector_insn_infos[insn->uid ()];
+
+      if (info.uninit_p ())
+	/* If it is uninitialized, propagate it directly.  */
+	info = change;
+      else if (info.unknown_p ())
+	change = info;
+      else
+	{
+	  gcc_assert (info.valid_p () && "Unexpected Invalid demanded info");
+	  if (change.valid_p () && change.compatible_p (info))
+	    info = change.merge (info);
+	  change = info;
+	}
+    }
+
+  block_info.local_dem = change;
+  if (block_info.local_dem.empty_p ())
+    block_info.reaching_out = block_info.local_dem;
+}
+
+/* Return true if a dem_info is required to transition from curr_info to
+   require before INSN.  */
+bool
+pass_vsetvl::need_vsetvl (const vector_insn_info &require,
+			  const vector_insn_info &curr_info) const
+{
+  if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ())
+    return true;
+
+  if (require.compatible_p (curr_info))
+    return false;
+
+  return true;
+}
+
+/* Given an incoming state reaching INSN, modifies that state so that it is
+   minimally compatible with INSN.  The resulting state is guaranteed to be
+   semantically legal for INSN, but may not be the state requested by INSN.  */
+void
+pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const
+{
+  if (!has_vtype_op (insn->rtl ()))
+    return;
+
+  const vector_insn_info require
+    = m_vector_manager->vector_insn_infos[insn->uid ()];
+  if (info.valid_p () && !need_vsetvl (require, info))
+    return;
+  info = require;
+}
+
+/* Given a state with which we evaluated insn (see transfer_before above for why
+   this might be different that the state insn requested), modify the state to
+   reflect the changes insn might make.  */
+void
+pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const
+{
+  if (vector_config_insn_p (insn->rtl ()))
+    {
+      info = m_vector_manager->vector_insn_infos[insn->uid ()];
+      return;
+    }
+
+  /* TODO: Support fault first load info update VL in the future.  */
+
+  /* If this is something that updates VL/VTYPE that we don't know about, set
+     the state to unknown.  */
+  if (insn->is_call () || insn->is_asm ()
+      || find_access (insn->defs (), VL_REGNUM)
+      || find_access (insn->defs (), VTYPE_REGNUM))
+    info = vector_insn_info::get_unknown ();
+}
+
+/* Emit vsetvl within each block by forward data-flow analysis.  */
+void
+pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb)
+{
+  auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
+  if (block_info.local_dem.empty_p ())
+    return;
+
+  vector_insn_info curr_info;
+  for (insn_info *insn : bb->real_nondebug_insns ())
+    {
+      const vector_insn_info prev_info = curr_info;
+      transfer_before (curr_info, insn);
+
+      if (has_vtype_op (insn->rtl ()))
+	{
+	  if (static_cast<const vl_vtype_info &> (prev_info)
+	      != static_cast<const vl_vtype_info &> (curr_info))
+	    {
+	      const auto require
+		= m_vector_manager->vector_insn_infos[insn->uid ()];
+	      if (!require.compatible_p (
+		    static_cast<const vl_vtype_info &> (prev_info)))
+		insert_vsetvl (EMIT_BEFORE, insn->rtl (), require, prev_info);
+	    }
+	}
+
+      transfer_after (curr_info, insn);
+    }
+
+  block_info.reaching_out = curr_info;
+}
+
+/* Merge all successors of Father except child node.  */
+void
+pass_vsetvl::merge_successors (const basic_block father,
+			       const basic_block child)
+{
+  edge e;
+  edge_iterator ei;
+  auto &father_info = m_vector_manager->vector_block_infos[father->index];
+  gcc_assert (father_info.local_dem.dirty_p ()
+	      || father_info.local_dem.empty_p ());
+  gcc_assert (father_info.reaching_out.dirty_p ()
+	      || father_info.reaching_out.empty_p ());
+
+  FOR_EACH_EDGE (e, ei, father->succs)
+    {
+      const basic_block succ = e->dest;
+      if (succ->index == child->index)
+	continue;
+
+      const auto succ_info
+	= m_vector_manager->vector_block_infos[succ->index].local_dem;
+
+      if (!succ_info.valid_p ())
+	continue;
+
+      vector_insn_info new_info;
+      if (father_info.reaching_out.dirty_p ())
+	{
+	  if (!father_info.reaching_out.compatible_p (succ_info))
+	    continue;
+
+	  new_info = succ_info.merge (father_info.reaching_out, true);
+	}
+      else
+	new_info = succ_info;
+
+      new_info.set_dirty ();
+      rtx new_pat = gen_vsetvl_pat (new_info.get_insn ()->rtl (), new_info);
+      new_info.set_dirty_pat (new_pat);
+
+      father_info.local_dem = new_info;
+      father_info.reaching_out = new_info;
+    }
+}
+
+/* Compute global backward demanded info.  */
+void
+pass_vsetvl::compute_global_backward_infos (void)
+{
+  /* We compute global infos by backward propagation.
+     We want to have better performance in these following cases:
+
+	1. for (size_t i = 0; i < n; i++) {
+	     if (i != cond) {
+	       vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
+	       *(vint8mf8_t*)(out + i + 100) = v;
+	     } else {
+	       vbool1_t v = *(vbool1_t*)(in + i + 400);
+	       *(vbool1_t*)(out + i + 400) = v;
+	     }
+	   }
+
+	   Since we don't have any RVV instruction in the BEFORE blocks,
+	   LCM fails to optimize such case. We want to backward propagate
+	   them into empty blocks so that we could have better performance
+	   in LCM.
+
+	2. bb 0:
+	     vsetvl e8,mf8 (demand RATIO)
+	   bb 1:
+	     vsetvl e32,mf2 (demand SEW and LMUL)
+	   We backward propagate the first VSETVL into e32,mf2 so that we
+	   could be able to eliminate the second VSETVL in LCM.  */
+
+  for (const bb_info *bb : crtl->ssa->reverse_bbs ())
+    {
+      basic_block cfg_bb = bb->cfg_bb ();
+      const auto &prop
+	= m_vector_manager->vector_block_infos[cfg_bb->index].local_dem;
+
+      /* If there is nothing to propagate, just skip it.  */
+      if (!prop.valid_or_dirty_p ())
+	continue;
+
+      if (!backward_propagate_worthwhile_p (
+	    cfg_bb, m_vector_manager->vector_block_infos[cfg_bb->index]))
+	continue;
+
+      edge e;
+      edge_iterator ei;
+      /* Backward propagate to each predecessor.  */
+      FOR_EACH_EDGE (e, ei, cfg_bb->preds)
+	{
+	  rtx new_pat;
+	  auto &block_info
+	    = m_vector_manager->vector_block_infos[e->src->index];
+
+	  /* We don't propagate through critical edges.  */
+	  if (e->flags & EDGE_COMPLEX)
+	    continue;
+	  if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index)
+	    continue;
+
+	  if (block_info.reaching_out.unknown_p ())
+	    continue;
+	  else if (block_info.reaching_out.empty_p ())
+	    {
+	      if (!can_backward_propagate_p (crtl->ssa, e->src, prop))
+		continue;
+
+	      if (dominate_probability_p (e))
+		{
+		  rtx new_pat = gen_vsetvl_pat (prop.get_insn ()->rtl (), prop);
+
+		  block_info.reaching_out = prop;
+		  block_info.reaching_out.set_dirty ();
+		  block_info.reaching_out.set_dirty_pat (new_pat);
+		  block_info.local_dem = block_info.reaching_out;
+		}
+
+	      merge_successors (e->src, cfg_bb);
+	    }
+	  else if (block_info.reaching_out.dirty_p ())
+	    {
+	      /* DIRTY -> DIRTY or VALID -> DIRTY.  */
+	      vector_insn_info new_info;
+
+	      if (block_info.reaching_out.compatible_p (prop))
+		{
+		  if (block_info.reaching_out >= prop)
+		    continue;
+		  new_info = block_info.reaching_out.merge (prop, true);
+		}
+	      else
+		{
+		  if (dominate_probability_p (e))
+		    new_info = prop;
+		  else
+		    continue;
+		}
+
+	      rtx new_pat
+		= gen_vsetvl_pat (new_info.get_insn ()->rtl (), new_info);
+	      new_info.set_dirty ();
+	      new_info.set_dirty_pat (new_pat);
+	      block_info.local_dem = new_info;
+	      block_info.reaching_out = new_info;
+	    }
+	  else
+	    {
+	      /* We not only change the info during backward propagation,
+		 but also change the VSETVL instruction.  */
+	      gcc_assert (block_info.reaching_out.valid_p ());
+	      if (!block_info.reaching_out.compatible_p (prop))
+		continue;
+	      if (block_info.reaching_out >= prop)
+		continue;
+
+	      vector_insn_info be_merged = block_info.reaching_out;
+	      if (block_info.local_dem == block_info.reaching_out)
+		be_merged = block_info.local_dem;
+	      vector_insn_info new_info = be_merged.merge (prop, true);
+
+	      rtx_insn *rinsn;
+	      if (vector_config_insn_p (new_info.get_insn ()->rtl ()))
+		{
+		  rinsn = new_info.get_insn ()->rtl ();
+		  gcc_assert (vsetvl_insn_p (rinsn)
+			      && "Can't handle X0, rs1 vsetvli yet");
+		}
+	      else
+		{
+		  gcc_assert (has_vtype_op (new_info.get_insn ()->rtl ()));
+		  rinsn = PREV_INSN (new_info.get_insn ()->rtl ());
+		  gcc_assert (vector_config_insn_p (rinsn));
+		}
+	      new_pat = gen_vsetvl_pat (rinsn, new_info);
+	      change_insn (rinsn, new_pat);
+	      if (block_info.local_dem == block_info.reaching_out)
+		block_info.local_dem = new_info;
+	      block_info.reaching_out = new_info;
+	    }
+	}
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\n\nDirty blocks list: ");
+      for (size_t i = 0; i < m_vector_manager->vector_block_infos.length ();
+	   i++)
+	{
+	  if (m_vector_manager->vector_block_infos[i].reaching_out.dirty_p ())
+	    fprintf (dump_file, "%ld ", i);
+	}
+      fprintf (dump_file, "\n\n");
+    }
+}
+
+/* Assemble the candidates expressions for LCM.  */
+void
+pass_vsetvl::prune_expressions (void)
+{
+  for (size_t i = 0; i < m_vector_manager->vector_block_infos.length (); i++)
+    {
+      if (m_vector_manager->vector_block_infos[i].local_dem.valid_or_dirty_p ())
+	m_vector_manager->create_expr (
+	  m_vector_manager->vector_block_infos[i].local_dem);
+      if (m_vector_manager->vector_block_infos[i]
+	    .reaching_out.valid_or_dirty_p ())
+	m_vector_manager->create_expr (
+	  m_vector_manager->vector_block_infos[i].reaching_out);
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nThe total VSETVL expression num = %d\n",
+	       m_vector_manager->vector_exprs.length ());
+      fprintf (dump_file, "Expression List:\n");
+      for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
+	{
+	  fprintf (dump_file, "Expr[%ld]:\n", i);
+	  m_vector_manager->vector_exprs[i]->dump (dump_file);
+	  fprintf (dump_file, "\n");
+	}
+    }
+}
+
+void
+pass_vsetvl::compute_local_properties (void)
+{
+  /* -  If T is locally available at the end of a block, then T' must be
+	available at the end of the same block. Since some optimization has
+	occurred earlier, T' might not be locally available, however, it must
+	have been previously computed on all paths. As a formula, T at AVLOC(B)
+	implies that T' at AVOUT(B).
+	An "available occurrence" is one that is the last occurrence in the
+	basic block and the operands are not modified by following statements in
+	the basic block [including this insn].
+
+     -  If T is locally anticipated at the beginning of a block, then either
+	T', is locally anticipated or it is already available from previous
+	blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
+	ANTLOC(B) at AVIN(B).
+	An "anticipatable occurrence" is one that is the first occurrence in the
+	basic block, the operands are not modified in the basic block prior
+	to the occurrence and the output is not used between the start of
+	the block and the occurrence.  */
+
+  basic_block cfg_bb;
+  FOR_EACH_BB_FN (cfg_bb, cfun)
+    {
+      int curr_bb_idx = cfg_bb->index;
+      const auto local_dem
+	= m_vector_manager->vector_block_infos[curr_bb_idx].local_dem;
+      const auto reaching_out
+	= m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out;
+
+      if (!local_dem.empty_p ())
+	{
+	  for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
+	    bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i);
+	}
+
+      if (local_dem.valid_or_dirty_p ())
+	{
+	  const insn_info *header_insn = local_dem.get_insn ();
+	  size_t header_index = m_vector_manager->get_expr_id (local_dem);
+	  if (anticipatable_occurrence_p (header_insn, local_dem))
+	    bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx],
+			    header_index);
+	}
+
+      if (reaching_out.valid_or_dirty_p ())
+	{
+	  const insn_info *footer_insn = reaching_out.get_insn ();
+	  size_t footer_index = m_vector_manager->get_expr_id (reaching_out);
+	  if (available_occurrence_p (footer_insn, reaching_out))
+	    bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
+			    footer_index);
+	  auto_vec<size_t> available_list
+	    = m_vector_manager->get_all_available_exprs (reaching_out);
+	  for (size_t i = 0; i < available_list.length (); i++)
+	    bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
+			    available_list[i]);
+	}
+    }
+
+  /* Compute kill for each basic block using:
+
+     ~(TRANSP | COMP)
+  */
+
+  FOR_EACH_BB_FN (cfg_bb, cfun)
+    {
+      bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index],
+		  m_vector_manager->vector_transp[cfg_bb->index],
+		  m_vector_manager->vector_comp[cfg_bb->index]);
+      bitmap_not (m_vector_manager->vector_kill[cfg_bb->index],
+		  m_vector_manager->vector_kill[cfg_bb->index]);
+    }
+
+  FOR_EACH_BB_FN (cfg_bb, cfun)
+    {
+      edge e;
+      edge_iterator ei;
+
+      /* If the current block is the destination of an abnormal edge, we
+	 kill all trapping (for PRE) and memory (for hoist) expressions
+	 because we won't be able to properly place the instruction on
+	 the edge.  So make them neither anticipatable nor transparent.
+	 This is fairly conservative.
+
+	 ??? For hoisting it may be necessary to check for set-and-jump
+	 instructions here, not just for abnormal edges.  The general problem
+	 is that when an expression cannot not be placed right at the end of
+	 a basic block we should account for any side-effects of a subsequent
+	 jump instructions that could clobber the expression.  It would
+	 be best to implement this check along the lines of
+	 should_hoist_expr_to_dom where the target block is already known
+	 and, hence, there's no need to conservatively prune expressions on
+	 "intermediate" set-and-jump instructions.  */
+      FOR_EACH_EDGE (e, ei, cfg_bb->preds)
+	if (e->flags & EDGE_COMPLEX)
+	  {
+	    bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]);
+	    bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]);
+	  }
+    }
+}
+
+/* Return true if VSETVL in the block can be refined as vsetvl zero,zero.  */
+bool
+pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb, uint8_t ratio) const
+{
+  if (!m_vector_manager->all_same_ratio_p (
+	m_vector_manager->vector_avin[cfg_bb->index]))
+    return false;
+
+  size_t expr_id
+    = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
+  if (m_vector_manager->vector_exprs[expr_id]->get_ratio () != ratio)
+    return false;
+
+  edge e;
+  edge_iterator ei;
+  bool all_valid_p = true;
+  FOR_EACH_EDGE (e, ei, cfg_bb->preds)
+    {
+      if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index]))
+	{
+	  all_valid_p = false;
+	  break;
+	}
+    }
+
+  if (!all_valid_p)
+    return false;
+  return true;
+}
+
+/* Optimize athe case like this:
+
+      bb 0:
+	vsetvl 0 a5,zero,e8,mf8
+	insn 0 (demand SEW + LMUL)
+      bb 1:
+	vsetvl 1 a5,zero,e16,mf4
+	insn 1 (demand SEW + LMUL)
+
+   In this case, we should be able to refine
+   vsetvl 1 into vsetvl zero, zero according AVIN.  */
+void
+pass_vsetvl::refine_vsetvls (void) const
+{
+  basic_block cfg_bb;
+  FOR_EACH_BB_FN (cfg_bb, cfun)
+    {
+      auto info = m_vector_manager->vector_block_infos[cfg_bb->index].local_dem;
+      insn_info *insn = info.get_insn ();
+      if (!info.valid_p ())
+	continue;
+
+      rtx_insn *rinsn = insn->rtl ();
+      if (!can_refine_vsetvl_p (cfg_bb, info.get_ratio ()))
+	continue;
+
+      if (!vector_config_insn_p (rinsn))
+	rinsn = PREV_INSN (rinsn);
+      rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
+      change_insn (rinsn, new_pat);
+    }
+}
+
+void
+pass_vsetvl::cleanup_vsetvls ()
+{
+  basic_block cfg_bb;
+  FOR_EACH_BB_FN (cfg_bb, cfun)
+    {
+      auto &info
+	= m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
+      gcc_assert (m_vector_manager->expr_set_num (
+		    m_vector_manager->vector_del[cfg_bb->index])
+		  <= 1);
+      for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
+	{
+	  if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i))
+	    {
+	      if (info.dirty_p ())
+		info.set_unknown ();
+	      else
+		{
+		  insn_info *insn
+		    = m_vector_manager->vector_exprs[i]->get_insn ();
+		  gcc_assert (insn && insn->rtl ());
+		  rtx_insn *rinsn;
+		  if (vector_config_insn_p (insn->rtl ()))
+		    rinsn = insn->rtl ();
+		  else
+		    {
+		      gcc_assert (has_vtype_op (insn->rtl ()));
+		      rinsn = PREV_INSN (insn->rtl ());
+		      gcc_assert (
+			vector_config_insn_p (PREV_INSN (insn->rtl ())));
+		    }
+		  eliminate_insn (rinsn);
+		}
+	    }
+	}
+    }
+}
+
+bool
+pass_vsetvl::commit_vsetvls (void)
+{
+  bool need_commit = false;
+
+  for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
+    {
+      for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
+	{
+	  edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
+	  if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i))
+	    {
+	      const vector_insn_info *require
+		= m_vector_manager->vector_exprs[i];
+	      gcc_assert (require->valid_or_dirty_p ());
+	      rtl_profile_for_edge (eg);
+	      start_sequence ();
+
+	      insn_info *insn = require->get_insn ();
+	      vector_insn_info prev_info = vector_insn_info ();
+	      if (m_vector_manager->all_same_ratio_p (
+		    m_vector_manager->vector_avout[eg->src->index]))
+		{
+		  size_t first = bitmap_first_set_bit (
+		    m_vector_manager->vector_avout[eg->src->index]);
+		  prev_info = *m_vector_manager->vector_exprs[first];
+		}
+
+	      insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info);
+	      rtx_insn *rinsn = get_insns ();
+	      end_sequence ();
+	      default_rtl_profile ();
+
+	      /* We should not get an abnormal edge here.  */
+	      gcc_assert (!(eg->flags & EDGE_ABNORMAL));
+	      need_commit = true;
+	      insert_insn_on_edge (rinsn, eg);
+	    }
+	}
+    }
+
+  basic_block cfg_bb;
+  FOR_EACH_BB_FN (cfg_bb, cfun)
+    {
+      const auto reaching_out
+	= m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
+      if (!reaching_out.dirty_p ())
+	continue;
+
+      rtx new_pat = reaching_out.get_dirty_pat ();
+      if (can_refine_vsetvl_p (cfg_bb, reaching_out.get_ratio ()))
+	new_pat
+	  = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX);
+
+      start_sequence ();
+      emit_insn (new_pat);
+      rtx_insn *rinsn = get_insns ();
+      end_sequence ();
+      insert_insn_end_basic_block (rinsn, cfg_bb);
+      if (dump_file)
+	{
+	  fprintf (dump_file,
+		   "\nInsert vsetvl insn %d at the end of <bb %d>:\n",
+		   INSN_UID (rinsn), cfg_bb->index);
+	  print_rtl_single (dump_file, rinsn);
+	}
+    }
+
+  return need_commit;
+}
+
+void
+pass_vsetvl::pre_vsetvl (void)
+{
+  /* Compute entity list.  */
+  prune_expressions ();
+
+  /* Create the bitmap vectors.  */
+  m_vector_manager->vector_antic
+    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
+			    m_vector_manager->vector_exprs.length ());
+  m_vector_manager->vector_transp
+    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
+			    m_vector_manager->vector_exprs.length ());
+  m_vector_manager->vector_comp
+    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
+			    m_vector_manager->vector_exprs.length ());
+  m_vector_manager->vector_avin
+    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
+			    m_vector_manager->vector_exprs.length ());
+  m_vector_manager->vector_avout
+    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
+			    m_vector_manager->vector_exprs.length ());
+  m_vector_manager->vector_kill
+    = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
+			    m_vector_manager->vector_exprs.length ());
+
+  bitmap_vector_ones (m_vector_manager->vector_transp,
+		      last_basic_block_for_fn (cfun));
+  bitmap_vector_clear (m_vector_manager->vector_antic,
+		       last_basic_block_for_fn (cfun));
+  bitmap_vector_clear (m_vector_manager->vector_comp,
+		       last_basic_block_for_fn (cfun));
+  compute_local_properties ();
+  m_vector_manager->vector_edge_list = pre_edge_lcm_avs (
+    m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp,
+    m_vector_manager->vector_comp, m_vector_manager->vector_antic,
+    m_vector_manager->vector_kill, m_vector_manager->vector_avin,
+    m_vector_manager->vector_avout, &m_vector_manager->vector_insert,
+    &m_vector_manager->vector_del);
+
+  /* We should dump the information before CFG is changed. Otherwise it will
+     produce ICE (internal compiler error).  */
+  if (dump_file)
+    m_vector_manager->dump (dump_file);
+
+  refine_vsetvls ();
+  cleanup_vsetvls ();
+  bool need_commit = commit_vsetvls ();
+  if (need_commit)
+    commit_edge_insertions ();
+}
+
+void
+pass_vsetvl::cleanup_insns (void) const
+{
+  for (const bb_info *bb : crtl->ssa->bbs ())
+    {
+      for (insn_info *insn : bb->real_nondebug_insns ())
+	{
+	  rtx_insn *rinsn = insn->rtl ();
+
+	  if (vlmax_avl_insn_p (rinsn))
+	    {
+	      eliminate_insn (rinsn);
+	      continue;
+	    }
+
+	  /* Erase the AVL operand from the instruction.  */
+	  if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
+	    continue;
+	  rtx avl = get_vl (rinsn);
+	  if (count_occurrences (PATTERN (rinsn), avl, true) == 1)
+	    {
+	      /* Get the list of uses for the new instruction.  */
+	      auto attempt = crtl->ssa->new_change_attempt ();
+	      insn_change change (insn);
+	      /* Remove the use of the substituted value.  */
+	      access_array_builder uses_builder (attempt);
+	      uses_builder.reserve (insn->num_uses () - 1);
+	      for (use_info *use : insn->uses ())
+		if (use != find_access (insn->uses (), REGNO (avl)))
+		  uses_builder.quick_push (use);
+	      use_array new_uses = use_array (uses_builder.finish ());
+	      change.new_uses = new_uses;
+	      change.move_range = insn->ebb ()->insn_range ();
+	      rtx pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
+	      gcc_assert (change_insn (crtl->ssa, change, insn, pat));
+	    }
+	}
+    }
+}
+
+void
+pass_vsetvl::init (void)
+{
+  if (optimize > 0)
+    {
+      /* Initialization of RTL_SSA.  */
+      calculate_dominance_info (CDI_DOMINATORS);
+      df_analyze ();
+      crtl->ssa = new function_info (cfun);
+    }
+
+  m_vector_manager = new vector_infos_manager ();
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nPrologue: Initialize vector infos\n");
+      m_vector_manager->dump (dump_file);
+    }
+}
+
+void
+pass_vsetvl::done (void)
+{
+  if (optimize > 0)
+    {
+      /* Finalization of RTL_SSA.  */
+      free_dominance_info (CDI_DOMINATORS);
+      if (crtl->ssa->perform_pending_updates ())
+	cleanup_cfg (0);
+      delete crtl->ssa;
+      crtl->ssa = nullptr;
+    }
+  m_vector_manager->release ();
+  delete m_vector_manager;
+  m_vector_manager = nullptr;
+}
+
+/* Lazy vsetvl insertion for optimize > 0. */
+void
+pass_vsetvl::lazy_vsetvl (void)
+{
+  if (dump_file)
+    fprintf (dump_file,
+	     "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for "
+	     "function:%s\n",
+	     n_basic_blocks_for_fn (cfun), function_name (cfun));
+
+  /* Phase 1 - Compute the local dems within each block.
+     The data-flow analysis within each block is backward analysis.  */
+  if (dump_file)
+    fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n");
+  for (const bb_info *bb : crtl->ssa->bbs ())
+    compute_local_backward_infos (bb);
+  if (dump_file)
+    m_vector_manager->dump (dump_file);
+
+  /* Phase 2 - Emit vsetvl instructions within each basic block according to
+     demand, compute and save ANTLOC && AVLOC of each block.  */
+  if (dump_file)
+    fprintf (dump_file,
+	     "\nPhase 2: Emit vsetvl instruction within each block\n");
+  for (const bb_info *bb : crtl->ssa->bbs ())
+    emit_local_forward_vsetvls (bb);
+  if (dump_file)
+    m_vector_manager->dump (dump_file);
+
+  /* Phase 3 - Propagate demanded info across blocks.  */
+  if (dump_file)
+    fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n");
+  compute_global_backward_infos ();
+  if (dump_file)
+    m_vector_manager->dump (dump_file);
+
+  /* Phase 4 - Lazy code motion.  */
+  if (dump_file)
+    fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n");
+  pre_vsetvl ();
+
+  /* Phase 5 - Cleanup AVL && VL operand of RVV instruction.  */
+  if (dump_file)
+    fprintf (dump_file, "\nPhase 5: Cleanup AVL and VL operands\n");
+  cleanup_insns ();
+}
+
+/* Main entry point for this pass.  */
+unsigned int
+pass_vsetvl::execute (function *)
+{
+  if (n_basic_blocks_for_fn (cfun) <= 0)
+    return 0;
+
+  /* The reason we have this since we didn't finish splitting yet
+     when optimize == 0. In this case, we should conservatively
+     split all instructions here to make sure we don't miss any
+     RVV instruction.  */
+  if (!optimize)
+    split_all_insns ();
+
+  /* Early return for there is no vector instructions.  */
+  if (!has_vector_insn (cfun))
+    return 0;
+
+  init ();
+
+  if (!optimize)
+    simple_vsetvl ();
+  else
+    lazy_vsetvl ();
+
+  done ();
+  return 0;
+}
+
+rtl_opt_pass *
+make_pass_vsetvl (gcc::context *ctxt)
+{
+  return new pass_vsetvl (ctxt);
+}
diff --git a/gcc/config/riscv/riscv-vsetvl.h b/gcc/config/riscv/riscv-vsetvl.h
new file mode 100644
index 00000000000..ad9bb27cebf
--- /dev/null
+++ b/gcc/config/riscv/riscv-vsetvl.h
@@ -0,0 +1,344 @@ 
+/* VSETVL pass header for RISC-V 'V' Extension for GNU compiler.
+   Copyright(C) 2022-2022 Free Software Foundation, Inc.
+   Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or(at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_RISCV_VSETVL_H
+#define GCC_RISCV_VSETVL_H
+
+#define IS_AGNOSTIC(VALUE) (bool) (VALUE & 0x1 || (VALUE >> 1 & 0x1))
+
+namespace riscv_vector {
+
+/* Classification of vsetvl instruction.  */
+enum vsetvl_type
+{
+  VSETVL_NORMAL,
+  VSETVL_VTYPE_CHANGE_ONLY,
+  VSETVL_DISCARD_RESULT,
+};
+
+enum emit_type
+{
+  /* emit_insn directly.  */
+  EMIT_DIRECT,
+  EMIT_BEFORE,
+  EMIT_AFTER,
+};
+
+enum demand_type
+{
+  DEMAND_AVL,
+  DEMAND_SEW,
+  DEMAND_LMUL,
+  DEMAND_RATIO,
+  DEMAND_TAIL_POLICY,
+  DEMAND_MASK_POLICY,
+  NUM_DEMAND
+};
+
+/* AVL info for RVV instruction. Most RVV instructions have AVL operand in
+   implicit dependency. The AVL comparison between 2 RVV instructions is
+   very important since it affects our decision whether we should insert
+   a vsetvl instruction in this situation. AVL operand of all RVV instructions
+   can only be either a const_int value with < 32 or a reg value which can be
+   define by either a real RTL instruction or a PHI instruction. So we need a
+   standalone method to define AVL comparison and we can not simpily use
+   operator "==" to compare 2 RTX value since it's to strict which will make
+   use miss a lot of optimization opportunities. This method handle these
+   following cases:
+
+     -  Background:
+	  Insert-vsetvl PASS is working after RA.
+
+     -  Terminology:
+	  - pr: Pseudo-register.
+	  - hr: Hardware-register.
+
+     -  Case 1:
+
+	Before RA:
+	  li pr138,13
+	  insn1 (implicit depend on pr138).
+	  li pr138,14
+	  insn2 (implicit depend on pr139).
+
+	After RA:
+	  li hr5,13
+	  insn1 (implicit depend on hr5).
+	  li hr5,14
+	  insn2 (implicit depend on hr5).
+
+	Correct IR after vsetvl PASS:
+	  li hr5,13
+	  vsetvl1 zero,hr5....
+	  insn1 (implicit depend on hr5).
+	  li hr5,14
+	  vsetvl2 zero,hr5....
+	  insn2 (implicit depend on hr5).
+
+     In this case, both insn1 and insn2 are using hr5 as the same AVL.
+     If we use "rtx_equal_p" or "REGNO (AVL1) == REGNO (AVL)", we will end
+     up with missing the vsetvl2 instruction which creates wrong result.
+
+     Note: Using "==" operator to compare 2 AVL RTX strictly can fix this
+     issue. However, it is a too strict comparison method since not all member
+     variables in RTX data structure are not neccessary to be the same. It will
+     make us miss a lot of optimization opportunities.
+
+     -  Case 2:
+
+	After RA:
+	bb 0:
+	  li hr5,13
+	bb 1:
+	  li hr5,14
+	bb2:
+	  insn1 (implicit depend on hr5).
+	  insn2 (implicit depend on hr5).
+
+     In this case, we may end up with different AVL RTX and produce redundant
+     vsetvl instruction.
+
+     VALUE is the implicit dependency in each RVV instruction.
+     SOURCE is the source definition information of AVL operand.  */
+class avl_info
+{
+private:
+  rtx m_value;
+  rtl_ssa::set_info *m_source;
+
+public:
+  avl_info () : m_value (NULL_RTX), m_source (nullptr) {}
+  avl_info (rtx, rtl_ssa::set_info *);
+  rtx get_value () const { return m_value; }
+  rtl_ssa::set_info *get_source () const { return m_source; }
+  avl_info &operator= (const avl_info &);
+  bool operator== (const avl_info &) const;
+  bool operator!= (const avl_info &) const;
+};
+
+/* Basic structure to save VL/VTYPE information.  */
+struct vl_vtype_info
+{
+protected:
+  /* AVL can be either register or const_int.  */
+  avl_info m_avl;
+  /* Fields from VTYPE. The VTYPE checking depend on the flag
+     dem_* before.  */
+  uint8_t m_sew;
+  riscv_vector::vlmul_type m_vlmul;
+  uint8_t m_ratio;
+  bool m_ta;
+  bool m_ma;
+
+public:
+  void set_sew (uint8_t sew) { m_sew = sew; }
+  void set_vlmul (riscv_vector::vlmul_type vlmul) { m_vlmul = vlmul; }
+  void set_ratio (uint8_t ratio) { m_ratio = ratio; }
+  void set_ta (bool ta) { m_ta = ta; }
+  void set_ma (bool ma) { m_ma = ma; }
+
+  vl_vtype_info ()
+    : m_avl (avl_info ()), m_sew (0), m_vlmul (riscv_vector::LMUL_RESERVED),
+      m_ratio (0), m_ta (0), m_ma (0)
+  {}
+  vl_vtype_info (const vl_vtype_info &) = default;
+  vl_vtype_info &operator= (const vl_vtype_info &) = default;
+  vl_vtype_info (avl_info, uint8_t, riscv_vector::vlmul_type, uint8_t, bool,
+		 bool);
+
+  bool operator== (const vl_vtype_info &) const;
+  bool operator!= (const vl_vtype_info &) const;
+
+  bool has_avl_imm () const { return get_avl () && CONST_INT_P (get_avl ()); }
+  bool has_avl_reg () const { return get_avl () && REG_P (get_avl ()); }
+  bool has_avl_no_reg () const { return !get_avl (); }
+  bool has_non_zero_avl () const;
+
+  rtx get_avl () const { return m_avl.get_value (); }
+  avl_info get_avl_info () const { return m_avl; }
+  void set_avl_info (const avl_info &avl) { m_avl = avl; }
+  uint8_t get_sew () const { return m_sew; }
+  riscv_vector::vlmul_type get_vlmul () const { return m_vlmul; }
+  uint8_t get_ratio () const { return m_ratio; }
+  bool get_ta () const { return m_ta; }
+  bool get_ma () const { return m_ma; }
+
+  bool same_avl_p (const vl_vtype_info &) const;
+  bool same_vtype_p (const vl_vtype_info &) const;
+  bool same_vlmax_p (const vl_vtype_info &) const;
+};
+
+class vector_insn_info : public vl_vtype_info
+{
+private:
+  enum state_type
+  {
+    UNINITIALIZED,
+    VALID,
+    UNKNOWN,
+    EMPTY,
+
+    /* The block is polluted as containing VSETVL instruction during dem
+       backward propagation to gain better LCM optimization even though
+       such VSETVL instruction is not really emit yet during this time.  */
+    DIRTY
+  };
+
+  enum state_type m_state;
+
+  bool m_demands[NUM_DEMAND];
+
+  /* TODO: Assume INSN1 = INSN holding of definition of AVL.
+		  INSN2 = INSN that is inserted a vsetvl insn before.
+     We may need to add a new member to save INSN of holding AVL.
+     m_insn is holding the INSN that is inserted a vsetvl insn before in
+     Phase 2. Ideally, most of the time INSN1 == INSN2. However, considering
+     such case:
+
+	vmv.x.s (INSN2)
+	vle8.v (INSN1)
+
+     If these 2 instructions are compatible, we should only issue a vsetvl INSN
+     (with AVL included) before vmv.x.s, but vmv.x.s is not the INSN holding the
+     definition of AVL.  */
+  rtl_ssa::insn_info *m_insn;
+  /* Save instruction pattern for Dirty block.
+     Since empty block may be polluted as a dirty block during dem backward
+     propagation (phase 3) which is intending to cheat LCM there is a VSETVL
+     instruction here to gain better LCM optimization. Such instruction is not
+     emit yet, we save this here and then emit it in the 4th phase if it is
+     necessary.  */
+  rtx m_dirty_pat;
+
+  /* Parse the instruction to get VL/VTYPE information and demanding
+   * information.  */
+  /* This is only called by simple_vsetvl subroutine when optimize == 0.
+     Since RTL_SSA can not be enabled when optimize == 0, we don't initialize
+     the m_insn.  */
+  void parse_insn (rtx_insn *);
+  /* This is only called by lazy_vsetvl subroutine when optimize > 0.
+     We use RTL_SSA framework to initialize the insn_info.  */
+  void parse_insn (rtl_ssa::insn_info *);
+
+  friend class vector_infos_manager;
+
+public:
+  vector_insn_info ()
+    : vl_vtype_info (), m_state (UNINITIALIZED), m_demands{false},
+      m_insn (nullptr), m_dirty_pat (NULL_RTX)
+  {}
+
+  bool operator> (const vector_insn_info &) const;
+  bool operator>= (const vector_insn_info &) const;
+  bool operator== (const vector_insn_info &) const;
+
+  bool uninit_p () const { return m_state == UNINITIALIZED; }
+  bool valid_p () const { return m_state == VALID; }
+  bool unknown_p () const { return m_state == UNKNOWN; }
+  bool empty_p () const { return m_state == EMPTY; }
+  bool dirty_p () const { return m_state == DIRTY; }
+  bool valid_or_dirty_p () const
+  {
+    return m_state == VALID || m_state == DIRTY;
+  }
+
+  static vector_insn_info get_unknown ()
+  {
+    vector_insn_info info;
+    info.set_unknown ();
+    return info;
+  }
+
+  void set_valid () { m_state = VALID; }
+  void set_unknown () { m_state = UNKNOWN; }
+  void set_empty () { m_state = EMPTY; }
+  void set_dirty () { m_state = DIRTY; }
+  void set_dirty_pat (rtx pat) { m_dirty_pat = pat; }
+  void set_insn (rtl_ssa::insn_info *insn) { m_insn = insn; }
+
+  bool demand_p (enum demand_type type) const { return m_demands[type]; }
+  void demand (enum demand_type type) { m_demands[type] = true; }
+  void demand_vl_vtype ();
+  void undemand (enum demand_type type) { m_demands[type] = false; }
+
+  bool compatible_p (const vector_insn_info &) const;
+  bool compatible_avl_p (const vl_vtype_info &) const;
+  bool compatible_vtype_p (const vl_vtype_info &) const;
+  bool compatible_p (const vl_vtype_info &) const;
+  vector_insn_info merge (const vector_insn_info &, bool) const;
+
+  rtl_ssa::insn_info *get_insn () const { return m_insn; }
+  rtx get_dirty_pat () const { return m_dirty_pat; }
+
+  void dump (FILE *) const;
+};
+
+struct vector_block_info
+{
+  /* The local_dem vector insn_info of the block.  */
+  vector_insn_info local_dem;
+
+  /* The reaching_out vector insn_info of the block.  */
+  vector_insn_info reaching_out;
+
+  vector_block_info () = default;
+};
+
+class vector_infos_manager
+{
+public:
+  auto_vec<vector_insn_info> vector_insn_infos;
+  auto_vec<vector_block_info> vector_block_infos;
+  auto_vec<vector_insn_info *> vector_exprs;
+
+  struct edge_list *vector_edge_list;
+  sbitmap *vector_kill;
+  sbitmap *vector_del;
+  sbitmap *vector_insert;
+  sbitmap *vector_antic;
+  sbitmap *vector_transp;
+  sbitmap *vector_comp;
+  sbitmap *vector_avin;
+  sbitmap *vector_avout;
+
+  vector_infos_manager ();
+
+  /* Create a new expr in expr list if it is not exist.  */
+  void create_expr (vector_insn_info &);
+
+  /* Get the expr id of the pair of expr.  */
+  size_t get_expr_id (const vector_insn_info &) const;
+
+  /* Return the number of expr that is set in the bitmap.  */
+  size_t expr_set_num (sbitmap) const;
+
+  /* Get all relaxer expression id for corresponding vector info.  */
+  auto_vec<size_t> get_all_available_exprs (const vector_insn_info &) const;
+
+  /* Return true if all expression set in bitmap are same ratio.  */
+  bool all_same_ratio_p (sbitmap) const;
+
+  void release (void);
+
+  void dump (FILE *) const;
+};
+
+} // namespace riscv_vector
+#endif
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 7997db3d898..7af9f5402ec 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -51,6 +51,14 @@  riscv-c.o: $(srcdir)/config/riscv/riscv-c.cc $(CONFIG_H) $(SYSTEM_H) \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/riscv/riscv-c.cc
 
+riscv-vsetvl.o: $(srcdir)/config/riscv/riscv-vsetvl.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \
+  $(TARGET_H) tree-pass.h df.h rtl-ssa.h cfgcleanup.h insn-config.h \
+  insn-attr.h insn-opinit.h tm-constrs.h cfgrtl.h cfganal.h lcm.h \
+  predict.h profile-count.h $(srcdir)/config/riscv/riscv-vsetvl.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/riscv/riscv-vsetvl.cc
+
 riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc
 	$(COMPILE) $<
 	$(POSTCOMPILE)
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 38da2f7f095..3bfda652318 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -32,6 +32,7 @@ 
   UNSPEC_VSETVL
   UNSPEC_VUNDEF
   UNSPEC_VPREDICATE
+  UNSPEC_VLMAX
 ])
 
 (define_constants [
@@ -94,7 +95,13 @@ 
 	 (const_int 32)
 	 (eq_attr "mode" "VNx1DI,VNx2DI,VNx4DI,VNx8DI,\
 			  VNx1DF,VNx2DF,VNx4DF,VNx8DF")
-	 (const_int 64)]
+	 (const_int 64)
+	 
+	 (eq_attr "type" "vsetvl")
+	 (if_then_else (eq_attr "INSN_CODE (curr_insn) == CODE_FOR_vsetvldi
+				 || INSN_CODE (curr_insn) == CODE_FOR_vsetvlsi")
+		       (symbol_ref "INTVAL (operands[2])")
+		       (const_int INVALID_ATTRIBUTE))]
 	(const_int INVALID_ATTRIBUTE)))
 
 ;; Ditto to LMUL.
@@ -142,7 +149,12 @@ 
 	 (eq_attr "mode" "VNx4DI,VNx4DF")
 	   (symbol_ref "riscv_vector::get_vlmul(E_VNx4DImode)")
 	 (eq_attr "mode" "VNx8DI,VNx8DF")
-	   (symbol_ref "riscv_vector::get_vlmul(E_VNx8DImode)")]
+	   (symbol_ref "riscv_vector::get_vlmul(E_VNx8DImode)")
+	 (eq_attr "type" "vsetvl")
+	 (if_then_else (eq_attr "INSN_CODE (curr_insn) == CODE_FOR_vsetvldi
+				 || INSN_CODE (curr_insn) == CODE_FOR_vsetvlsi")
+		       (symbol_ref "INTVAL (operands[3])")
+		       (const_int INVALID_ATTRIBUTE))]
 	(const_int INVALID_ATTRIBUTE)))
 
 ;; It is valid for instruction that require sew/lmul ratio.
@@ -219,6 +231,34 @@ 
 	 (const_int 6)]
 	(const_int INVALID_ATTRIBUTE)))
 
+;; The index of operand[] to get the mask policy op.
+(define_attr "avl_type_op_idx" ""
+  (cond [(eq_attr "type" "vlde,vlde,vste,vimov,vimov,vimov,vfmov,vlds,vlds")
+	 (const_int 7)
+	 (eq_attr "type" "vldm,vstm,vimov,vmalu,vmalu")
+	 (const_int 5)]
+	(const_int INVALID_ATTRIBUTE)))
+
+;; The tail policy op value.
+(define_attr "ta" ""
+  (cond [(eq_attr "type" "vlde,vste,vimov,vfmov,vlds")
+	   (symbol_ref "riscv_vector::get_ta(operands[5])")]
+	(const_int INVALID_ATTRIBUTE)))
+
+;; The mask policy op value.
+(define_attr "ma" ""
+  (cond [(eq_attr "type" "vlde,vlds")
+	   (symbol_ref "riscv_vector::get_ma(operands[6])")]
+	(const_int INVALID_ATTRIBUTE)))
+
+;; The avl type value.
+(define_attr "avl_type" ""
+  (cond [(eq_attr "type" "vlde,vlde,vste,vimov,vimov,vimov,vfmov,vlds,vlds")
+	   (symbol_ref "INTVAL (operands[7])")
+	 (eq_attr "type" "vldm,vstm,vimov,vmalu,vmalu")
+	   (symbol_ref "INTVAL (operands[5])")]
+	(const_int INVALID_ATTRIBUTE)))
+
 ;; -----------------------------------------------------------------
 ;; ---- Miscellaneous Operations
 ;; -----------------------------------------------------------------
@@ -229,6 +269,37 @@ 
   "TARGET_VECTOR"
   "")
 
+;; This pattern is used to hold the AVL operand for
+;; RVV instructions that implicity use VLMAX AVL.
+;; RVV instruction implicitly use GPR that is ultimately
+;; defined by this pattern is safe for VSETVL pass emit
+;; a vsetvl instruction modify this register after RA.
+;; Case 1:
+;;   vlmax_avl a5
+;;   ... (across many blocks)
+;;   vadd (implicit use a5)  ====> emit: vsetvl a5,zero
+;; Case 2:
+;;   vlmax_avl a5
+;;   ... (across many blocks)
+;;   mv a6,a5
+;;   ... (across many blocks)
+;;   vadd (implicit use a6)  ====> emit: vsetvl a6,zero
+;; Case 3:
+;;   vlmax_avl a5
+;;   ... (across many blocks)
+;;   store mem,a5 (spill)
+;;   ... (across many blocks)
+;;   load a7,mem (spill)
+;;   ... (across many blocks)
+;;   vadd (implicit use a7)  ====> emit: vsetvl a7,zero
+;; Such cases are all safe for VSETVL PASS to emit a vsetvl
+;; instruction that modifies the AVL operand.
+(define_insn "@vlmax_avl<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(match_operand:P 1 "const_int_operand" "i")] UNSPEC_VLMAX))]
+  "TARGET_VECTOR"
+  "")
+
 ;; -----------------------------------------------------------------
 ;; ---- Moves Operations
 ;; -----------------------------------------------------------------
@@ -482,32 +553,35 @@ 
   [(set_attr "type" "vsetvl")
    (set_attr "mode" "<MODE>")])
 
-;; We keep it as no side effects before reload_completed.
-;; In this case, we can gain benefits from different GCC
-;; internal PASS such as cprop, fwprop, combine,...etc.
-
-;; Then recover it for "insert-vsetvl" and "sched2" PASS
-;; in order to get correct codegen.
-(define_insn_and_split "@vsetvl<mode>_no_side_effects"
-  [(set (match_operand:P 0 "register_operand" "=r")
-	(unspec:P [(match_operand:P 1 "csr_operand" "rK")
-		   (match_operand 2 "const_int_operand" "i")
-		   (match_operand 3 "const_int_operand" "i")
-		   (match_operand 4 "const_int_operand" "i")
-		   (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))]
+;; vsetvl zero,zero,vtype instruction.
+;; This pattern has no side effects and does not set X0 register.
+(define_insn "vsetvl_vtype_change_only"
+  [(set (reg:SI VTYPE_REGNUM)
+	(unspec:SI
+	  [(match_operand 0 "const_int_operand" "i")
+	   (match_operand 1 "const_int_operand" "i")
+	   (match_operand 2 "const_int_operand" "i")
+	   (match_operand 3 "const_int_operand" "i")] UNSPEC_VSETVL))]
   "TARGET_VECTOR"
-  "#"
-  "&& reload_completed"
-  [(parallel
-    [(set (match_dup 0)
-	  (unspec:P [(match_dup 1) (match_dup 2) (match_dup 3)
-		     (match_dup 4) (match_dup 5)] UNSPEC_VSETVL))
-     (set (reg:SI VL_REGNUM)
-	  (unspec:SI [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_VSETVL))
-     (set (reg:SI VTYPE_REGNUM)
-	  (unspec:SI [(match_dup 2) (match_dup 3) (match_dup 4)
-		      (match_dup 5)] UNSPEC_VSETVL))])]
-  ""
+  "vsetvli\tzero,zero,e%0,%m1,t%p2,m%p3"
+  [(set_attr "type" "vsetvl")
+   (set_attr "mode" "SI")])
+
+;; vsetvl zero,rs1,vtype instruction.
+;; The reason we need this pattern since we should avoid setting X0 register
+;; in vsetvl instruction pattern.
+(define_insn "@vsetvl_discard_result<mode>"
+  [(set (reg:SI VL_REGNUM)
+	(unspec:SI [(match_operand:P 0 "csr_operand" "rK")
+		    (match_operand 1 "const_int_operand" "i")
+		    (match_operand 2 "const_int_operand" "i")] UNSPEC_VSETVL))
+   (set (reg:SI VTYPE_REGNUM)
+	(unspec:SI [(match_dup 1)
+		    (match_dup 2)
+		    (match_operand 3 "const_int_operand" "i")
+		    (match_operand 4 "const_int_operand" "i")] UNSPEC_VSETVL))]
+  "TARGET_VECTOR"
+  "vsetvli\tzero,%0,e%1,%m2,t%p3,m%p4"
   [(set_attr "type" "vsetvl")
    (set_attr "mode" "<MODE>")])
 
@@ -563,6 +637,7 @@ 
 	     (match_operand 4 "vector_length_operand"    " rK,  rK,    rK,    rK,    rK")
 	     (match_operand 5 "const_int_operand"        "  i,   i,     i,     i,     i")
 	     (match_operand 6 "const_int_operand"        "  i,   i,     i,     i,     i")
+	     (match_operand 7 "const_int_operand"        "  i,   i,     i,     i,     i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operand:V 3 "vector_move_operand"       "  m,   m,    vr,    vr, viWc0")
@@ -593,6 +668,7 @@ 
 	  (unspec:VB
 	    [(match_operand:VB 1 "vector_mask_operand"   "Wc1, Wc1, Wc1, Wc1, Wc1")
 	     (match_operand 4 "vector_length_operand"    " rK,  rK,  rK,  rK,  rK")
+	     (match_operand 5 "const_int_operand"        "  i,   i,   i,   i,   i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (match_operand:VB 3 "vector_move_operand"      "  m,  vr,  vr, Wc0, Wc1")
@@ -628,6 +704,7 @@ 
 	     (match_operand 4 "vector_length_operand"         " rK,  rK,  rK,  rK")
 	     (match_operand 5 "const_int_operand"             "  i,   i,   i,   i")
 	     (match_operand 6 "const_int_operand"             "  i,   i,   i,   i")
+	     (match_operand 7 "const_int_operand"             "  i,   i,   i,   i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (vec_duplicate:V