diff mbox series

[v2] lib: sbi_misaligned_ldst: Add handling of vector load/store

Message ID 20241206032149.1828583-1-nylon.chen@sifive.com
State Accepted
Headers show
Series [v2] lib: sbi_misaligned_ldst: Add handling of vector load/store | expand

Commit Message

Nylon Chen Dec. 6, 2024, 3:21 a.m. UTC
Add exception handling vector instructions from

the vector extension to the sbi_misaligned_ldst library.

This implementation references the misaligned_vec_ldst
implementation in the riscv-pk project.

Co-developed-by: Zong Li <zong.li@sifive.com>
Signed-off-by: Zong Li <zong.li@sifive.com>
Signed-off-by: Nylon Chen <nylon.chen@sifive.com>
Reviewed-by: Andy Chiu <andy.chiu@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
---
 Makefile                     |  11 +-
 include/sbi/riscv_encoding.h | 385 ++++++++++++++++++++++++++++++++++-
 include/sbi/sbi_trap_ldst.h  |   9 +
 lib/sbi/objects.mk           |   1 +
 lib/sbi/sbi_trap_ldst.c      |  23 ++-
 lib/sbi/sbi_trap_v_ldst.c    | 341 +++++++++++++++++++++++++++++++
 6 files changed, 758 insertions(+), 12 deletions(-)
 create mode 100644 lib/sbi/sbi_trap_v_ldst.c

Comments

Anup Patel Dec. 6, 2024, 12:18 p.m. UTC | #1
On Fri, Dec 6, 2024 at 8:40 AM Nylon Chen <nylon.chen@sifive.com> wrote:
>
> Add exception handling vector instructions from
>
> the vector extension to the sbi_misaligned_ldst library.
>
> This implementation references the misaligned_vec_ldst
> implementation in the riscv-pk project.
>
> Co-developed-by: Zong Li <zong.li@sifive.com>
> Signed-off-by: Zong Li <zong.li@sifive.com>
> Signed-off-by: Nylon Chen <nylon.chen@sifive.com>
> Reviewed-by: Andy Chiu <andy.chiu@sifive.com>
> Reviewed-by: Anup Patel <anup@brainfault.org>

I updated the commit description and replaced spaces with tabs
for alignment at a few places at the time of merging this patch.

Applied this patch to the riscv/opensbi repo.

Thanks,
Anup

> ---
>  Makefile                     |  11 +-
>  include/sbi/riscv_encoding.h | 385 ++++++++++++++++++++++++++++++++++-
>  include/sbi/sbi_trap_ldst.h  |   9 +
>  lib/sbi/objects.mk           |   1 +
>  lib/sbi/sbi_trap_ldst.c      |  23 ++-
>  lib/sbi/sbi_trap_v_ldst.c    | 341 +++++++++++++++++++++++++++++++
>  6 files changed, 758 insertions(+), 12 deletions(-)
>  create mode 100644 lib/sbi/sbi_trap_v_ldst.c
>
> diff --git a/Makefile b/Makefile
> index d9cee49..5ac95a0 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -189,6 +189,9 @@ CC_SUPPORT_STRICT_ALIGN := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib
>  # Check whether the assembler and the compiler support the Zicsr and Zifencei extensions
>  CC_SUPPORT_ZICSR_ZIFENCEI := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib -march=rv$(OPENSBI_CC_XLEN)imafd_zicsr_zifencei -x c /dev/null -o /dev/null 2>&1 | grep -e "zicsr" -e "zifencei" > /dev/null && echo n || echo y)
>
> +# Check whether the assembler and the compiler support the Vector extension
> +CC_SUPPORT_VECT := $(shell echo | $(CC) -dM -E -march=rv$(OPENSBI_CC_XLEN)gv - | grep -q riscv.*vector && echo y || echo n)
> +
>  ifneq ($(OPENSBI_LD_PIE),y)
>  $(error Your linker does not support creating PIEs, opensbi requires this.)
>  endif
> @@ -294,10 +297,12 @@ ifndef PLATFORM_RISCV_ABI
>  endif
>  ifndef PLATFORM_RISCV_ISA
>    ifneq ($(PLATFORM_RISCV_TOOLCHAIN_DEFAULT), 1)
> +    PLATFORM_RISCV_ISA := rv$(PLATFORM_RISCV_XLEN)imafdc
> +    ifeq ($(CC_SUPPORT_VECT), y)
> +      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)v
> +    endif
>      ifeq ($(CC_SUPPORT_ZICSR_ZIFENCEI), y)
> -      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc_zicsr_zifencei
> -    else
> -      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc
> +      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)_zicsr_zifencei
>      endif
>    else
>      PLATFORM_RISCV_ISA = $(OPENSBI_CC_ISA)
> diff --git a/include/sbi/riscv_encoding.h b/include/sbi/riscv_encoding.h
> index 38997ef..8b724b8 100644
> --- a/include/sbi/riscv_encoding.h
> +++ b/include/sbi/riscv_encoding.h
> @@ -763,6 +763,12 @@
>  #define CSR_MVIPH                      0x319
>  #define CSR_MIPH                       0x354
>
> +/* Vector extension registers */
> +#define CSR_VSTART                     0x8
> +#define CSR_VL                         0xc20
> +#define CSR_VTYPE                      0xc21
> +#define CSR_VLENB                      0xc22
> +
>  /* ===== Trap/Exception Causes ===== */
>
>  #define CAUSE_MISALIGNED_FETCH         0x0
> @@ -891,11 +897,364 @@
>  #define INSN_MASK_FENCE_TSO            0xffffffff
>  #define INSN_MATCH_FENCE_TSO           0x8330000f
>
> +#define INSN_MASK_VECTOR_UNIT_STRIDE           0xfdf0707f
> +#define INSN_MASK_VECTOR_FAULT_ONLY_FIRST      0xfdf0707f
> +#define INSN_MASK_VECTOR_STRIDE                0xfc00707f
> +#define INSN_MASK_VECTOR_WHOLE_REG             0xfff0707f
> +#define INSN_MASK_VECTOR_INDEXED               0xfc00707f
> +
> +#define INSN_MATCH_VLUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000007 | \
> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> +#define INSN_MATCH_VSUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000027 | \
> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> +#define INSN_MATCH_VLOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000007 | \
> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> +#define INSN_MATCH_VSOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000027 | \
> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> +#define INSN_MATCH_VLSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000007 | \
> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> +#define INSN_MATCH_VSSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000027 | \
> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> +#define INSN_MATCH_VSSEG(n, bits)   ((((n) - 1) << 29) | 0x00004027 | \
> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> +#define INSN_MATCH_VLSEG(n, bits)   ((((n) - 1) << 29) | 0x00004007 | \
> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> +#define INSN_MATCH_VLSEGFF(n, bits) ((((n) - 1) << 29) | 0x1000007 | \
> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> +
> +#define INSN_MATCH_VLE16V              0x00005007
> +#define INSN_MATCH_VLE32V              0x00006007
> +#define INSN_MATCH_VLE64V              0x00007007
> +#define INSN_MATCH_VSE16V              0x00005027
> +#define INSN_MATCH_VSE32V              0x00006027
> +#define INSN_MATCH_VSE64V              0x00007027
> +#define INSN_MATCH_VLSE16V             0x08005007
> +#define INSN_MATCH_VLSE32V             0x08006007
> +#define INSN_MATCH_VLSE64V             0x08007007
> +#define INSN_MATCH_VSSE16V             0x08005027
> +#define INSN_MATCH_VSSE32V             0x08006027
> +#define INSN_MATCH_VSSE64V             0x08007027
> +#define INSN_MATCH_VLOXEI16V           0x0c005007
> +#define INSN_MATCH_VLOXEI32V           0x0c006007
> +#define INSN_MATCH_VLOXEI64V           0x0c007007
> +#define INSN_MATCH_VSOXEI16V           0x0c005027
> +#define INSN_MATCH_VSOXEI32V           0x0c006027
> +#define INSN_MATCH_VSOXEI64V           0x0c007027
> +#define INSN_MATCH_VLUXEI16V           0x04005007
> +#define INSN_MATCH_VLUXEI32V           0x04006007
> +#define INSN_MATCH_VLUXEI64V           0x04007007
> +#define INSN_MATCH_VSUXEI16V           0x04005027
> +#define INSN_MATCH_VSUXEI32V           0x04006027
> +#define INSN_MATCH_VSUXEI64V           0x04007027
> +#define INSN_MATCH_VLE16FFV            0x01005007
> +#define INSN_MATCH_VLE32FFV            0x01006007
> +#define INSN_MATCH_VLE64FFV            0x01007007
> +#define INSN_MATCH_VL1RE8V             0x02800007
> +#define INSN_MATCH_VL1RE16V            0x02805007
> +#define INSN_MATCH_VL1RE32V            0x02806007
> +#define INSN_MATCH_VL1RE64V            0x02807007
> +#define INSN_MATCH_VL2RE8V             0x22800007
> +#define INSN_MATCH_VL2RE16V            0x22805007
> +#define INSN_MATCH_VL2RE32V            0x22806007
> +#define INSN_MATCH_VL2RE64V            0x22807007
> +#define INSN_MATCH_VL4RE8V             0x62800007
> +#define INSN_MATCH_VL4RE16V            0x62805007
> +#define INSN_MATCH_VL4RE32V            0x62806007
> +#define INSN_MATCH_VL4RE64V            0x62807007
> +#define INSN_MATCH_VL8RE8V             0xe2800007
> +#define INSN_MATCH_VL8RE16V            0xe2805007
> +#define INSN_MATCH_VL8RE32V            0xe2806007
> +#define INSN_MATCH_VL8RE64V            0xe2807007
> +#define INSN_MATCH_VS1RV               0x02800027
> +#define INSN_MATCH_VS2RV               0x22800027
> +#define INSN_MATCH_VS4RV               0x62800027
> +#define INSN_MATCH_VS8RV               0xe2800027
> +
> +#define INSN_MASK_VECTOR_LOAD_STORE    0x7f
> +#define INSN_MATCH_VECTOR_LOAD         0x07
> +#define INSN_MATCH_VECTOR_STORE                0x27
> +
> +#define IS_VECTOR_LOAD_STORE(insn) \
> +       ((((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_LOAD) || \
> +       (((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_STORE))
> +
> +#define IS_VECTOR_INSN_MATCH(insn, match, mask) \
> +       (((insn) & (mask)) == ((match) & (mask)))
> +
> +#define IS_UNIT_STRIDE_MATCH(insn, match) \
> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_UNIT_STRIDE)
> +
> +#define IS_STRIDE_MATCH(insn, match) \
> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_STRIDE)
> +
> +#define IS_INDEXED_MATCH(insn, match) \
> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_INDEXED)
> +
> +#define IS_FAULT_ONLY_FIRST_MATCH(insn, match) \
> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_FAULT_ONLY_FIRST)
> +
> +#define IS_WHOLE_REG_MATCH(insn, match) \
> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_WHOLE_REG)
> +
> +#define IS_UNIT_STRIDE_LOAD(insn) ( \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE16V) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE32V) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE64V) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 64)))
> +
> +#define IS_UNIT_STRIDE_STORE(insn) ( \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE16V) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE32V) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE64V) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 16)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 32)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 64)) || \
> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 64)))
> +
> +#define IS_STRIDE_LOAD(insn) ( \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE16V) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE32V) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE64V) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 64)))
> +
> +#define IS_STRIDE_STORE(insn) ( \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE16V) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE32V) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE64V) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 16)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 32)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 64)) || \
> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 64)))
> +
> +#define IS_INDEXED_LOAD(insn) ( \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI16V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI32V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI64V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI16V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI32V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI64V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 64)))
> +
> +#define IS_INDEXED_STORE(insn) ( \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI16V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI32V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI64V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI16V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI32V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI64V) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 16)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 32)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 64)) || \
> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 64)))
> +
> +#define IS_FAULT_ONLY_FIRST_LOAD(insn) ( \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE16FFV) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE32FFV) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE64FFV) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 16)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 16)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 16)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 16)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 16)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 16)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 16)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 32)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 32)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 32)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 32)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 32)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 32)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 32)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 64)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 64)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 64)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 64)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 64)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 64)) || \
> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 64)))
> +
> +       #define IS_WHOLE_REG_LOAD(insn) ( \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE8V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE16V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE32V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE64V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE8V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE16V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE32V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE64V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE8V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE16V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE32V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE64V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE8V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE16V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE32V) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE64V))
> +
> +#define IS_WHOLE_REG_STORE(insn) ( \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS1RV) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS2RV) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS4RV) || \
> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS8RV))
> +
> +
>  #if __riscv_xlen == 64
>
>  /* 64-bit read for VS-stage address translation (RV64) */
>  #define INSN_PSEUDO_VS_LOAD            0x00003000
> -
>  /* 64-bit write for VS-stage address translation (RV64) */
>  #define INSN_PSEUDO_VS_STORE   0x00003020
>
> @@ -911,6 +1270,12 @@
>  #error "Unexpected __riscv_xlen"
>  #endif
>
> +#define VM_MASK                                0x1
> +#define VIEW_MASK                              0x3
> +#define VSEW_MASK                              0x3
> +#define VLMUL_MASK                             0x7
> +#define VD_MASK                                0x1f
> +#define VS2_MASK                               0x1f
>  #define INSN_16BIT_MASK                        0x3
>  #define INSN_32BIT_MASK                        0x1c
>
> @@ -929,6 +1294,12 @@
>  #endif
>  #define REGBYTES                       (1 << LOG_REGBYTES)
>
> +#define SH_VSEW                        3
> +#define SH_VIEW                        12
> +#define SH_VD                          7
> +#define SH_VS2                         20
> +#define SH_VM                          25
> +#define SH_MEW                         28
>  #define SH_RD                          7
>  #define SH_RS1                         15
>  #define SH_RS2                         20
> @@ -982,6 +1353,18 @@
>  #define IMM_S(insn)                    (((s32)(insn) >> 25 << 5) | \
>                                          (s32)(((insn) >> 7) & 0x1f))
>
> +#define IS_MASKED(insn)                (((insn >> SH_VM) & VM_MASK) == 0)
> +#define GET_VD(insn)                   ((insn >> SH_VD) & VD_MASK)
> +#define GET_VS2(insn)                  ((insn >> SH_VS2) & VS2_MASK)
> +#define GET_VIEW(insn)                 (((insn) >> SH_VIEW) & VIEW_MASK)
> +#define GET_MEW(insn)                  (((insn) >> SH_MEW) & 1)
> +#define GET_VSEW(vtype)                (((vtype) >> SH_VSEW) & VSEW_MASK)
> +#define GET_VLMUL(vtype)               ((vtype) & VLMUL_MASK)
> +#define GET_LEN(view)                  (1UL << (view))
> +#define GET_NF(insn)                   (1 + ((insn >> 29) & 7))
> +#define GET_VEMUL(vlmul, view, vsew)   ((vlmul + view - vsew) & 7)
> +#define GET_EMUL(vemul)                (1UL << ((vemul) >= 4 ? 0 : (vemul)))
> +
>  #define MASK_FUNCT3                    0x7000
>  #define MASK_RS1                       0xf8000
>  #define MASK_CSR                       0xfff00000
> diff --git a/include/sbi/sbi_trap_ldst.h b/include/sbi/sbi_trap_ldst.h
> index 4c5cc37..34877cc 100644
> --- a/include/sbi/sbi_trap_ldst.h
> +++ b/include/sbi/sbi_trap_ldst.h
> @@ -30,4 +30,13 @@ int sbi_store_access_handler(struct sbi_trap_context *tcntx);
>
>  int sbi_double_trap_handler(struct sbi_trap_context *tcntx);
>
> +ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
> +                                ulong addr_offset);
> +
> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
> +                                struct sbi_trap_context *tcntx);
> +
> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
> +                                struct sbi_trap_context *tcntx);
> +
>  #endif
> diff --git a/lib/sbi/objects.mk b/lib/sbi/objects.mk
> index a6f7c5f..47a0866 100644
> --- a/lib/sbi/objects.mk
> +++ b/lib/sbi/objects.mk
> @@ -91,6 +91,7 @@ libsbi-objs-y += sbi_timer.o
>  libsbi-objs-y += sbi_tlb.o
>  libsbi-objs-y += sbi_trap.o
>  libsbi-objs-y += sbi_trap_ldst.o
> +libsbi-objs-y += sbi_trap_v_ldst.o
>  libsbi-objs-y += sbi_unpriv.o
>  libsbi-objs-y += sbi_expected_trap.o
>  libsbi-objs-y += sbi_cppc.o
> diff --git a/lib/sbi/sbi_trap_ldst.c b/lib/sbi/sbi_trap_ldst.c
> index ebc4a92..448406b 100644
> --- a/lib/sbi/sbi_trap_ldst.c
> +++ b/lib/sbi/sbi_trap_ldst.c
> @@ -32,7 +32,7 @@ typedef int (*sbi_trap_ld_emulator)(int rlen, union sbi_ldst_data *out_val,
>  typedef int (*sbi_trap_st_emulator)(int wlen, union sbi_ldst_data in_val,
>                                     struct sbi_trap_context *tcntx);
>
> -static ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
> +ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
>                                         ulong addr_offset)
>  {
>         if (new_tinst == INSN_PSEUDO_VS_LOAD ||
> @@ -52,7 +52,7 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>         ulong insn, insn_len;
>         union sbi_ldst_data val = { 0 };
>         struct sbi_trap_info uptrap;
> -       int rc, fp = 0, shift = 0, len = 0;
> +       int rc, fp = 0, shift = 0, len = 0, vector = 0;
>
>         if (orig_trap->tinst & 0x1) {
>                 /*
> @@ -144,6 +144,9 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>                 len = 2;
>                 shift = 8 * (sizeof(ulong) - len);
>                 insn = RVC_RS2S(insn) << SH_RD;
> +       } else if (IS_VECTOR_LOAD_STORE(insn)) {
> +               vector = 1;
> +               emu = sbi_misaligned_v_ld_emulator;
>         } else {
>                 return sbi_trap_redirect(regs, orig_trap);
>         }
> @@ -152,14 +155,16 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>         if (rc <= 0)
>                 return rc;
>
> -       if (!fp)
> -               SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
> +       if (!vector) {
> +               if (!fp)
> +                       SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
>  #ifdef __riscv_flen
> -       else if (len == 8)
> -               SET_F64_RD(insn, regs, val.data_u64);
> -       else
> -               SET_F32_RD(insn, regs, val.data_ulong);
> +               else if (len == 8)
> +                       SET_F64_RD(insn, regs, val.data_u64);
> +               else
> +                       SET_F32_RD(insn, regs, val.data_ulong);
>  #endif
> +       }
>
>         regs->mepc += insn_len;
>
> @@ -248,6 +253,8 @@ static int sbi_trap_emulate_store(struct sbi_trap_context *tcntx,
>         } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) {
>                 len             = 2;
>                 val.data_ulong = GET_RS2S(insn, regs);
> +       } else if (IS_VECTOR_LOAD_STORE(insn)) {
> +               emu = sbi_misaligned_v_st_emulator;
>         } else {
>                 return sbi_trap_redirect(regs, orig_trap);
>         }
> diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
> new file mode 100644
> index 0000000..72b2309
> --- /dev/null
> +++ b/lib/sbi/sbi_trap_v_ldst.c
> @@ -0,0 +1,341 @@
> +/*
> + * SPDX-License-Identifier: BSD-2-Clause
> + *
> + * Copyright (c) 2024 SiFive Inc.
> + *
> + * Authors:
> + *   Andrew Waterman <andrew@sifive.com>
> + *   Nylon Chen <nylon.chen@sifive.com>
> + *   Zong Li <nylon.chen@sifive.com>
> + */
> +
> +#include <sbi/riscv_asm.h>
> +#include <sbi/riscv_encoding.h>
> +#include <sbi/sbi_error.h>
> +#include <sbi/sbi_trap_ldst.h>
> +#include <sbi/sbi_trap.h>
> +#include <sbi/sbi_unpriv.h>
> +#include <sbi/sbi_trap.h>
> +
> +#ifdef __riscv_vector
> +#define VLEN_MAX 65536
> +
> +static inline void set_vreg(ulong vlenb, ulong which, ulong pos, ulong size, const uint8_t *bytes)
> +{
> +       pos += (which % 8) * vlenb;
> +       bytes -= pos;
> +
> +       asm volatile (
> +               "       .option push\n\t"
> +               "       .option arch, +v\n\t"
> +               "       vsetvli x0, %0, e8, m8, tu, ma\n\t"
> +               "       .option pop\n\t"
> +               :: "r" (pos + size));
> +
> +       csr_write(CSR_VSTART, pos);
> +
> +       switch (which / 8) {
> +       case 0:
> +               asm volatile (
> +                       "       .option push\n\t"
> +                       "       .option arch, +v\n\t"
> +                       "       vle8.v v0,  (%0)\n\t"
> +                       "       .option pop\n\t"
> +                       :: "r" (bytes) : "memory");
> +               break;
> +       case 1:
> +               asm volatile (
> +                       "       .option push\n\t"
> +                       "       .option arch, +v\n\t"
> +                       "       vle8.v v8,  (%0)\n\t"
> +                       "       .option pop\n\t"
> +                       :: "r" (bytes) : "memory");
> +               break;
> +       case 2:
> +               asm volatile (
> +                       "       .option push\n\t"
> +                       "       .option arch, +v\n\t"
> +                       "       vle8.v v16,  (%0)\n\t"
> +                       "       .option pop\n\t"
> +                       :: "r" (bytes) : "memory");
> +               break;
> +       case 3:
> +               asm volatile (
> +                       "       .option push\n\t"
> +                       "       .option arch, +v\n\t"
> +                       "       vle8.v v24,  (%0)\n\t"
> +                       "       .option pop\n\t"
> +                       :: "r" (bytes) : "memory");
> +               break;
> +       default:
> +               break;
> +       }
> +}
> +
> +static inline void get_vreg(ulong vlenb, ulong which, ulong pos, ulong size, uint8_t *bytes)
> +{
> +       pos += (which % 8) * vlenb;
> +       bytes -= pos;
> +
> +       asm volatile (
> +               "       .option push\n\t"
> +               "       .option arch, +v\n\t"
> +               "       vsetvli x0, %0, e8, m8, tu, ma\n\t"
> +               "       .option pop\n\t"
> +               :: "r" (pos + size));
> +
> +       csr_write(CSR_VSTART, pos);
> +
> +       switch (which / 8) {
> +       case 0:
> +               asm volatile (
> +                       "       .option push\n\t"
> +                       "       .option arch, +v\n\t"
> +                       "       vse8.v v0,  (%0)\n\t"
> +                       "       .option pop\n\t"
> +                       :: "r" (bytes) : "memory");
> +               break;
> +       case 1:
> +               asm volatile (
> +                       "       .option push\n\t"
> +                       "       .option arch, +v\n\t"
> +                       "       vse8.v v8,  (%0)\n\t"
> +                       "       .option pop\n\t"
> +                       :: "r" (bytes) : "memory");
> +               break;
> +       case 2:
> +               asm volatile (
> +                       "       .option push\n\t"
> +                       "       .option arch, +v\n\t"
> +                       "       vse8.v v16, (%0)\n\t"
> +                       "       .option pop\n\t"
> +                       :: "r" (bytes) : "memory");
> +               break;
> +       case 3:
> +               asm volatile (
> +                               ".option push\n\t"
> +                               ".option arch, +v\n\t"
> +                               "vse8.v v24, (%0)\n\t"
> +                               ".option pop\n\t"
> +                               :: "r" (bytes) : "memory");
> +               break;
> +       default:
> +               break;
> +       }
> +}
> +
> +static inline void vsetvl(ulong vl, ulong vtype)
> +{
> +       asm volatile (
> +               "       .option push\n\t"
> +               "       .option arch, +v\n\t"
> +               "       vsetvl x0, %0, %1\n\t"
> +               "       .option pop\n\t"
> +                       :: "r" (vl), "r" (vtype));
> +}
> +
> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
> +                                struct sbi_trap_context *tcntx)
> +{
> +       const struct sbi_trap_info *orig_trap = &tcntx->trap;
> +       struct sbi_trap_regs *regs = &tcntx->regs;
> +       struct sbi_trap_info uptrap;
> +       ulong insn = sbi_get_insn(regs->mepc, &uptrap);
> +       ulong vl = csr_read(CSR_VL);
> +       ulong vtype = csr_read(CSR_VTYPE);
> +       ulong vlenb = csr_read(CSR_VLENB);
> +       ulong vstart = csr_read(CSR_VSTART);
> +       ulong base = GET_RS1(insn, regs);
> +       ulong stride = GET_RS2(insn, regs);
> +       ulong vd = GET_VD(insn);
> +       ulong vs2 = GET_VS2(insn);
> +       ulong view = GET_VIEW(insn);
> +       ulong vsew = GET_VSEW(vtype);
> +       ulong vlmul = GET_VLMUL(vtype);
> +       bool illegal = GET_MEW(insn);
> +       bool masked = IS_MASKED(insn);
> +       uint8_t mask[VLEN_MAX / 8];
> +       uint8_t bytes[8 * sizeof(uint64_t)];
> +       ulong len = GET_LEN(view);
> +       ulong nf = GET_NF(insn);
> +       ulong vemul = GET_VEMUL(vlmul, view, vsew);
> +       ulong emul = GET_EMUL(vemul);
> +
> +       if (IS_UNIT_STRIDE_LOAD(insn) || IS_FAULT_ONLY_FIRST_LOAD(insn)) {
> +               stride = nf * len;
> +       } else if (IS_WHOLE_REG_LOAD(insn)) {
> +               vl = (nf * vlenb) >> view;
> +               nf = 1;
> +               vemul = 0;
> +               emul = 1;
> +               stride = nf * len;
> +       } else if (IS_INDEXED_LOAD(insn)) {
> +               len = 1 << vsew;
> +               vemul = (vlmul + vsew - vsew) & 7;
> +               emul = 1 << ((vemul & 4) ? 0 : vemul);
> +               stride = nf * len;
> +       }
> +
> +       if (illegal || vlenb > VLEN_MAX / 8) {
> +               struct sbi_trap_info trap = {
> +                       uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
> +                       uptrap.tval = insn,
> +               };
> +               return sbi_trap_redirect(regs, &trap);
> +       }
> +
> +       if (masked)
> +               get_vreg(vlenb, 0, 0, vlenb, mask);
> +
> +       do {
> +               if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
> +                       // compute element address
> +                       ulong addr = base + vstart * stride;
> +
> +                       if (IS_INDEXED_LOAD(insn)) {
> +                               ulong offset = 0;
> +
> +                               get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
> +                               addr = base + offset;
> +                       }
> +
> +                       csr_write(CSR_VSTART, vstart);
> +
> +                       // obtain load data from memory
> +                       for (ulong seg = 0; seg < nf; seg++) {
> +                               for (ulong i = 0; i < len; i++) {
> +                                       bytes[seg * len + i] =
> +                                               sbi_load_u8((void *)(addr + seg * len + i),
> +                                                           &uptrap);
> +
> +                                       if (uptrap.cause) {
> +                                               if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
> +                                                       vl = vstart;
> +                                                       break;
> +                                               }
> +                                               vsetvl(vl, vtype);
> +                                               uptrap.tinst = sbi_misaligned_tinst_fixup(
> +                                                       orig_trap->tinst, uptrap.tinst, i);
> +                                               return sbi_trap_redirect(regs, &uptrap);
> +                                       }
> +                               }
> +                       }
> +
> +                       // write load data to regfile
> +                       for (ulong seg = 0; seg < nf; seg++)
> +                               set_vreg(vlenb, vd + seg * emul, vstart * len,
> +                                        len, &bytes[seg * len]);
> +               }
> +       } while (++vstart < vl);
> +
> +       // restore clobbered vl/vtype
> +       vsetvl(vl, vtype);
> +
> +       return vl;
> +}
> +
> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
> +                                struct sbi_trap_context *tcntx)
> +{
> +       const struct sbi_trap_info *orig_trap = &tcntx->trap;
> +       struct sbi_trap_regs *regs = &tcntx->regs;
> +       struct sbi_trap_info uptrap;
> +       ulong insn = sbi_get_insn(regs->mepc, &uptrap);
> +       ulong vl = csr_read(CSR_VL);
> +       ulong vtype = csr_read(CSR_VTYPE);
> +       ulong vlenb = csr_read(CSR_VLENB);
> +       ulong vstart = csr_read(CSR_VSTART);
> +       ulong base = GET_RS1(insn, regs);
> +       ulong stride = GET_RS2(insn, regs);
> +       ulong vd = GET_VD(insn);
> +       ulong vs2 = GET_VS2(insn);
> +       ulong view = GET_VIEW(insn);
> +       ulong vsew = GET_VSEW(vtype);
> +       ulong vlmul = GET_VLMUL(vtype);
> +       bool illegal = GET_MEW(insn);
> +       bool masked = IS_MASKED(insn);
> +       uint8_t mask[VLEN_MAX / 8];
> +       uint8_t bytes[8 * sizeof(uint64_t)];
> +       ulong len = GET_LEN(view);
> +       ulong nf = GET_NF(insn);
> +       ulong vemul = GET_VEMUL(vlmul, view, vsew);
> +       ulong emul = GET_EMUL(vemul);
> +
> +       if (IS_UNIT_STRIDE_STORE(insn)) {
> +               stride = nf * len;
> +       } else if (IS_WHOLE_REG_STORE(insn)) {
> +               vl = (nf * vlenb) >> view;
> +               nf = 1;
> +               vemul = 0;
> +               emul = 1;
> +               stride = nf * len;
> +       } else if (IS_INDEXED_STORE(insn)) {
> +               len = 1 << vsew;
> +               vemul = (vlmul + vsew - vsew) & 7;
> +               emul = 1 << ((vemul & 4) ? 0 : vemul);
> +               stride = nf * len;
> +       }
> +
> +       if (illegal || vlenb > VLEN_MAX / 8) {
> +               struct sbi_trap_info trap = {
> +                       uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
> +                       uptrap.tval = insn,
> +               };
> +               return sbi_trap_redirect(regs, &trap);
> +       }
> +
> +       if (masked)
> +               get_vreg(vlenb, 0, 0, vlenb, mask);
> +
> +       do {
> +               if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
> +                       // compute element address
> +                       ulong addr = base + vstart * stride;
> +
> +                       if (IS_INDEXED_STORE(insn)) {
> +                               ulong offset = 0;
> +
> +                               get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
> +                               addr = base + offset;
> +                       }
> +
> +                       // obtain store data from regfile
> +                       for (ulong seg = 0; seg < nf; seg++)
> +                               get_vreg(vlenb, vd + seg * emul, vstart * len,
> +                                        len, &bytes[seg * len]);
> +
> +                       csr_write(CSR_VSTART, vstart);
> +
> +                       // write store data to memory
> +                       for (ulong seg = 0; seg < nf; seg++) {
> +                               for (ulong i = 0; i < len; i++) {
> +                                       sbi_store_u8((void *)(addr + seg * len + i),
> +                                                    bytes[seg * len + i], &uptrap);
> +                                       if (uptrap.cause) {
> +                                               vsetvl(vl, vtype);
> +                                               uptrap.tinst = sbi_misaligned_tinst_fixup(
> +                                                       orig_trap->tinst, uptrap.tinst, i);
> +                                               return sbi_trap_redirect(regs, &uptrap);
> +                                       }
> +                               }
> +                       }
> +               }
> +       } while (++vstart < vl);
> +
> +       // restore clobbered vl/vtype
> +       vsetvl(vl, vtype);
> +
> +       return vl;
> +}
> +#else
> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
> +                                struct sbi_trap_context *tcntx)
> +{
> +       return 0;
> +}
> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
> +                                struct sbi_trap_context *tcntx)
> +{
> +       return 0;
> +}
> +#endif /* __riscv_vector  */
> --
> 2.34.1
>
Clément Léger Dec. 6, 2024, 8:57 p.m. UTC | #2
Hi Anup & Nylon,

This commits breaks openSBI boot on my setup. A quick analysis of the
problem shows that the changes to objects.mk now allows GCC to generate
vector assembly in OpenSBI. It now crashes in bitmap_zero() really early
(Used in fw_platform_coldboot_harts_init()) due to vector instructions
being used:

   0x0000000080007800 <+474>:	mv	a0,s2
   0x0000000080007802 <+476>:	sw	a5,88(s4)
   0x0000000080007806 <+480>:	jal	0x8001adbe <fdt_check_imsic_mlevel>
=> 0x000000008000780a <+484>:	vsetivli	zero,2,e64,m1,ta,ma
   0x000000008000780e <+488>:	vmv.v.i	v1,0
   0x0000000080007812 <+492>:	mv	a5,a0


And then it raises an illegal instruction probably because V isn't
enabled. But I'm guessing we do not expect to use vector code in OpenSBI
at all ? I'm using a fairly new qemu if that is of any help
(248f9209edfd289e7d97fb323e5075ccd55cc157).

Thanks,

Clément

On 06/12/2024 13:18, Anup Patel wrote:
> On Fri, Dec 6, 2024 at 8:40 AM Nylon Chen <nylon.chen@sifive.com> wrote:
>>
>> Add exception handling vector instructions from
>>
>> the vector extension to the sbi_misaligned_ldst library.
>>
>> This implementation references the misaligned_vec_ldst
>> implementation in the riscv-pk project.
>>
>> Co-developed-by: Zong Li <zong.li@sifive.com>
>> Signed-off-by: Zong Li <zong.li@sifive.com>
>> Signed-off-by: Nylon Chen <nylon.chen@sifive.com>
>> Reviewed-by: Andy Chiu <andy.chiu@sifive.com>
>> Reviewed-by: Anup Patel <anup@brainfault.org>
> 
> I updated the commit description and replaced spaces with tabs
> for alignment at a few places at the time of merging this patch.
> 
> Applied this patch to the riscv/opensbi repo.
> 
> Thanks,
> Anup
> 
>> ---
>>  Makefile                     |  11 +-
>>  include/sbi/riscv_encoding.h | 385 ++++++++++++++++++++++++++++++++++-
>>  include/sbi/sbi_trap_ldst.h  |   9 +
>>  lib/sbi/objects.mk           |   1 +
>>  lib/sbi/sbi_trap_ldst.c      |  23 ++-
>>  lib/sbi/sbi_trap_v_ldst.c    | 341 +++++++++++++++++++++++++++++++
>>  6 files changed, 758 insertions(+), 12 deletions(-)
>>  create mode 100644 lib/sbi/sbi_trap_v_ldst.c
>>
>> diff --git a/Makefile b/Makefile
>> index d9cee49..5ac95a0 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -189,6 +189,9 @@ CC_SUPPORT_STRICT_ALIGN := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib
>>  # Check whether the assembler and the compiler support the Zicsr and Zifencei extensions
>>  CC_SUPPORT_ZICSR_ZIFENCEI := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib -march=rv$(OPENSBI_CC_XLEN)imafd_zicsr_zifencei -x c /dev/null -o /dev/null 2>&1 | grep -e "zicsr" -e "zifencei" > /dev/null && echo n || echo y)
>>
>> +# Check whether the assembler and the compiler support the Vector extension
>> +CC_SUPPORT_VECT := $(shell echo | $(CC) -dM -E -march=rv$(OPENSBI_CC_XLEN)gv - | grep -q riscv.*vector && echo y || echo n)
>> +
>>  ifneq ($(OPENSBI_LD_PIE),y)
>>  $(error Your linker does not support creating PIEs, opensbi requires this.)
>>  endif
>> @@ -294,10 +297,12 @@ ifndef PLATFORM_RISCV_ABI
>>  endif
>>  ifndef PLATFORM_RISCV_ISA
>>    ifneq ($(PLATFORM_RISCV_TOOLCHAIN_DEFAULT), 1)
>> +    PLATFORM_RISCV_ISA := rv$(PLATFORM_RISCV_XLEN)imafdc
>> +    ifeq ($(CC_SUPPORT_VECT), y)
>> +      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)v
>> +    endif
>>      ifeq ($(CC_SUPPORT_ZICSR_ZIFENCEI), y)
>> -      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc_zicsr_zifencei
>> -    else
>> -      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc
>> +      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)_zicsr_zifencei
>>      endif
>>    else
>>      PLATFORM_RISCV_ISA = $(OPENSBI_CC_ISA)
>> diff --git a/include/sbi/riscv_encoding.h b/include/sbi/riscv_encoding.h
>> index 38997ef..8b724b8 100644
>> --- a/include/sbi/riscv_encoding.h
>> +++ b/include/sbi/riscv_encoding.h
>> @@ -763,6 +763,12 @@
>>  #define CSR_MVIPH                      0x319
>>  #define CSR_MIPH                       0x354
>>
>> +/* Vector extension registers */
>> +#define CSR_VSTART                     0x8
>> +#define CSR_VL                         0xc20
>> +#define CSR_VTYPE                      0xc21
>> +#define CSR_VLENB                      0xc22
>> +
>>  /* ===== Trap/Exception Causes ===== */
>>
>>  #define CAUSE_MISALIGNED_FETCH         0x0
>> @@ -891,11 +897,364 @@
>>  #define INSN_MASK_FENCE_TSO            0xffffffff
>>  #define INSN_MATCH_FENCE_TSO           0x8330000f
>>
>> +#define INSN_MASK_VECTOR_UNIT_STRIDE           0xfdf0707f
>> +#define INSN_MASK_VECTOR_FAULT_ONLY_FIRST      0xfdf0707f
>> +#define INSN_MASK_VECTOR_STRIDE                0xfc00707f
>> +#define INSN_MASK_VECTOR_WHOLE_REG             0xfff0707f
>> +#define INSN_MASK_VECTOR_INDEXED               0xfc00707f
>> +
>> +#define INSN_MATCH_VLUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000007 | \
>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>> +#define INSN_MATCH_VSUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000027 | \
>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>> +#define INSN_MATCH_VLOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000007 | \
>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>> +#define INSN_MATCH_VSOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000027 | \
>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>> +#define INSN_MATCH_VLSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000007 | \
>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>> +#define INSN_MATCH_VSSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000027 | \
>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>> +#define INSN_MATCH_VSSEG(n, bits)   ((((n) - 1) << 29) | 0x00004027 | \
>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>> +#define INSN_MATCH_VLSEG(n, bits)   ((((n) - 1) << 29) | 0x00004007 | \
>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>> +#define INSN_MATCH_VLSEGFF(n, bits) ((((n) - 1) << 29) | 0x1000007 | \
>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>> +
>> +#define INSN_MATCH_VLE16V              0x00005007
>> +#define INSN_MATCH_VLE32V              0x00006007
>> +#define INSN_MATCH_VLE64V              0x00007007
>> +#define INSN_MATCH_VSE16V              0x00005027
>> +#define INSN_MATCH_VSE32V              0x00006027
>> +#define INSN_MATCH_VSE64V              0x00007027
>> +#define INSN_MATCH_VLSE16V             0x08005007
>> +#define INSN_MATCH_VLSE32V             0x08006007
>> +#define INSN_MATCH_VLSE64V             0x08007007
>> +#define INSN_MATCH_VSSE16V             0x08005027
>> +#define INSN_MATCH_VSSE32V             0x08006027
>> +#define INSN_MATCH_VSSE64V             0x08007027
>> +#define INSN_MATCH_VLOXEI16V           0x0c005007
>> +#define INSN_MATCH_VLOXEI32V           0x0c006007
>> +#define INSN_MATCH_VLOXEI64V           0x0c007007
>> +#define INSN_MATCH_VSOXEI16V           0x0c005027
>> +#define INSN_MATCH_VSOXEI32V           0x0c006027
>> +#define INSN_MATCH_VSOXEI64V           0x0c007027
>> +#define INSN_MATCH_VLUXEI16V           0x04005007
>> +#define INSN_MATCH_VLUXEI32V           0x04006007
>> +#define INSN_MATCH_VLUXEI64V           0x04007007
>> +#define INSN_MATCH_VSUXEI16V           0x04005027
>> +#define INSN_MATCH_VSUXEI32V           0x04006027
>> +#define INSN_MATCH_VSUXEI64V           0x04007027
>> +#define INSN_MATCH_VLE16FFV            0x01005007
>> +#define INSN_MATCH_VLE32FFV            0x01006007
>> +#define INSN_MATCH_VLE64FFV            0x01007007
>> +#define INSN_MATCH_VL1RE8V             0x02800007
>> +#define INSN_MATCH_VL1RE16V            0x02805007
>> +#define INSN_MATCH_VL1RE32V            0x02806007
>> +#define INSN_MATCH_VL1RE64V            0x02807007
>> +#define INSN_MATCH_VL2RE8V             0x22800007
>> +#define INSN_MATCH_VL2RE16V            0x22805007
>> +#define INSN_MATCH_VL2RE32V            0x22806007
>> +#define INSN_MATCH_VL2RE64V            0x22807007
>> +#define INSN_MATCH_VL4RE8V             0x62800007
>> +#define INSN_MATCH_VL4RE16V            0x62805007
>> +#define INSN_MATCH_VL4RE32V            0x62806007
>> +#define INSN_MATCH_VL4RE64V            0x62807007
>> +#define INSN_MATCH_VL8RE8V             0xe2800007
>> +#define INSN_MATCH_VL8RE16V            0xe2805007
>> +#define INSN_MATCH_VL8RE32V            0xe2806007
>> +#define INSN_MATCH_VL8RE64V            0xe2807007
>> +#define INSN_MATCH_VS1RV               0x02800027
>> +#define INSN_MATCH_VS2RV               0x22800027
>> +#define INSN_MATCH_VS4RV               0x62800027
>> +#define INSN_MATCH_VS8RV               0xe2800027
>> +
>> +#define INSN_MASK_VECTOR_LOAD_STORE    0x7f
>> +#define INSN_MATCH_VECTOR_LOAD         0x07
>> +#define INSN_MATCH_VECTOR_STORE                0x27
>> +
>> +#define IS_VECTOR_LOAD_STORE(insn) \
>> +       ((((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_LOAD) || \
>> +       (((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_STORE))
>> +
>> +#define IS_VECTOR_INSN_MATCH(insn, match, mask) \
>> +       (((insn) & (mask)) == ((match) & (mask)))
>> +
>> +#define IS_UNIT_STRIDE_MATCH(insn, match) \
>> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_UNIT_STRIDE)
>> +
>> +#define IS_STRIDE_MATCH(insn, match) \
>> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_STRIDE)
>> +
>> +#define IS_INDEXED_MATCH(insn, match) \
>> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_INDEXED)
>> +
>> +#define IS_FAULT_ONLY_FIRST_MATCH(insn, match) \
>> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_FAULT_ONLY_FIRST)
>> +
>> +#define IS_WHOLE_REG_MATCH(insn, match) \
>> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_WHOLE_REG)
>> +
>> +#define IS_UNIT_STRIDE_LOAD(insn) ( \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE16V) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE32V) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE64V) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 64)))
>> +
>> +#define IS_UNIT_STRIDE_STORE(insn) ( \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE16V) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE32V) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE64V) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 16)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 32)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 64)) || \
>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 64)))
>> +
>> +#define IS_STRIDE_LOAD(insn) ( \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE16V) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE32V) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE64V) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 64)))
>> +
>> +#define IS_STRIDE_STORE(insn) ( \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE16V) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE32V) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE64V) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 16)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 32)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 64)) || \
>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 64)))
>> +
>> +#define IS_INDEXED_LOAD(insn) ( \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI16V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI32V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI64V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI16V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI32V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI64V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 64)))
>> +
>> +#define IS_INDEXED_STORE(insn) ( \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI16V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI32V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI64V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI16V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI32V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI64V) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 16)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 32)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 64)) || \
>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 64)))
>> +
>> +#define IS_FAULT_ONLY_FIRST_LOAD(insn) ( \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE16FFV) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE32FFV) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE64FFV) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 16)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 16)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 16)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 16)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 16)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 16)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 16)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 32)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 32)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 32)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 32)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 32)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 32)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 32)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 64)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 64)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 64)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 64)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 64)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 64)) || \
>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 64)))
>> +
>> +       #define IS_WHOLE_REG_LOAD(insn) ( \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE8V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE16V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE32V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE64V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE8V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE16V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE32V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE64V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE8V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE16V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE32V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE64V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE8V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE16V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE32V) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE64V))
>> +
>> +#define IS_WHOLE_REG_STORE(insn) ( \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS1RV) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS2RV) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS4RV) || \
>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS8RV))
>> +
>> +
>>  #if __riscv_xlen == 64
>>
>>  /* 64-bit read for VS-stage address translation (RV64) */
>>  #define INSN_PSEUDO_VS_LOAD            0x00003000
>> -
>>  /* 64-bit write for VS-stage address translation (RV64) */
>>  #define INSN_PSEUDO_VS_STORE   0x00003020
>>
>> @@ -911,6 +1270,12 @@
>>  #error "Unexpected __riscv_xlen"
>>  #endif
>>
>> +#define VM_MASK                                0x1
>> +#define VIEW_MASK                              0x3
>> +#define VSEW_MASK                              0x3
>> +#define VLMUL_MASK                             0x7
>> +#define VD_MASK                                0x1f
>> +#define VS2_MASK                               0x1f
>>  #define INSN_16BIT_MASK                        0x3
>>  #define INSN_32BIT_MASK                        0x1c
>>
>> @@ -929,6 +1294,12 @@
>>  #endif
>>  #define REGBYTES                       (1 << LOG_REGBYTES)
>>
>> +#define SH_VSEW                        3
>> +#define SH_VIEW                        12
>> +#define SH_VD                          7
>> +#define SH_VS2                         20
>> +#define SH_VM                          25
>> +#define SH_MEW                         28
>>  #define SH_RD                          7
>>  #define SH_RS1                         15
>>  #define SH_RS2                         20
>> @@ -982,6 +1353,18 @@
>>  #define IMM_S(insn)                    (((s32)(insn) >> 25 << 5) | \
>>                                          (s32)(((insn) >> 7) & 0x1f))
>>
>> +#define IS_MASKED(insn)                (((insn >> SH_VM) & VM_MASK) == 0)
>> +#define GET_VD(insn)                   ((insn >> SH_VD) & VD_MASK)
>> +#define GET_VS2(insn)                  ((insn >> SH_VS2) & VS2_MASK)
>> +#define GET_VIEW(insn)                 (((insn) >> SH_VIEW) & VIEW_MASK)
>> +#define GET_MEW(insn)                  (((insn) >> SH_MEW) & 1)
>> +#define GET_VSEW(vtype)                (((vtype) >> SH_VSEW) & VSEW_MASK)
>> +#define GET_VLMUL(vtype)               ((vtype) & VLMUL_MASK)
>> +#define GET_LEN(view)                  (1UL << (view))
>> +#define GET_NF(insn)                   (1 + ((insn >> 29) & 7))
>> +#define GET_VEMUL(vlmul, view, vsew)   ((vlmul + view - vsew) & 7)
>> +#define GET_EMUL(vemul)                (1UL << ((vemul) >= 4 ? 0 : (vemul)))
>> +
>>  #define MASK_FUNCT3                    0x7000
>>  #define MASK_RS1                       0xf8000
>>  #define MASK_CSR                       0xfff00000
>> diff --git a/include/sbi/sbi_trap_ldst.h b/include/sbi/sbi_trap_ldst.h
>> index 4c5cc37..34877cc 100644
>> --- a/include/sbi/sbi_trap_ldst.h
>> +++ b/include/sbi/sbi_trap_ldst.h
>> @@ -30,4 +30,13 @@ int sbi_store_access_handler(struct sbi_trap_context *tcntx);
>>
>>  int sbi_double_trap_handler(struct sbi_trap_context *tcntx);
>>
>> +ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
>> +                                ulong addr_offset);
>> +
>> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
>> +                                struct sbi_trap_context *tcntx);
>> +
>> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
>> +                                struct sbi_trap_context *tcntx);
>> +
>>  #endif
>> diff --git a/lib/sbi/objects.mk b/lib/sbi/objects.mk
>> index a6f7c5f..47a0866 100644
>> --- a/lib/sbi/objects.mk
>> +++ b/lib/sbi/objects.mk
>> @@ -91,6 +91,7 @@ libsbi-objs-y += sbi_timer.o
>>  libsbi-objs-y += sbi_tlb.o
>>  libsbi-objs-y += sbi_trap.o
>>  libsbi-objs-y += sbi_trap_ldst.o
>> +libsbi-objs-y += sbi_trap_v_ldst.o
>>  libsbi-objs-y += sbi_unpriv.o
>>  libsbi-objs-y += sbi_expected_trap.o
>>  libsbi-objs-y += sbi_cppc.o
>> diff --git a/lib/sbi/sbi_trap_ldst.c b/lib/sbi/sbi_trap_ldst.c
>> index ebc4a92..448406b 100644
>> --- a/lib/sbi/sbi_trap_ldst.c
>> +++ b/lib/sbi/sbi_trap_ldst.c
>> @@ -32,7 +32,7 @@ typedef int (*sbi_trap_ld_emulator)(int rlen, union sbi_ldst_data *out_val,
>>  typedef int (*sbi_trap_st_emulator)(int wlen, union sbi_ldst_data in_val,
>>                                     struct sbi_trap_context *tcntx);
>>
>> -static ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
>> +ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
>>                                         ulong addr_offset)
>>  {
>>         if (new_tinst == INSN_PSEUDO_VS_LOAD ||
>> @@ -52,7 +52,7 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>>         ulong insn, insn_len;
>>         union sbi_ldst_data val = { 0 };
>>         struct sbi_trap_info uptrap;
>> -       int rc, fp = 0, shift = 0, len = 0;
>> +       int rc, fp = 0, shift = 0, len = 0, vector = 0;
>>
>>         if (orig_trap->tinst & 0x1) {
>>                 /*
>> @@ -144,6 +144,9 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>>                 len = 2;
>>                 shift = 8 * (sizeof(ulong) - len);
>>                 insn = RVC_RS2S(insn) << SH_RD;
>> +       } else if (IS_VECTOR_LOAD_STORE(insn)) {
>> +               vector = 1;
>> +               emu = sbi_misaligned_v_ld_emulator;
>>         } else {
>>                 return sbi_trap_redirect(regs, orig_trap);
>>         }
>> @@ -152,14 +155,16 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>>         if (rc <= 0)
>>                 return rc;
>>
>> -       if (!fp)
>> -               SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
>> +       if (!vector) {
>> +               if (!fp)
>> +                       SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
>>  #ifdef __riscv_flen
>> -       else if (len == 8)
>> -               SET_F64_RD(insn, regs, val.data_u64);
>> -       else
>> -               SET_F32_RD(insn, regs, val.data_ulong);
>> +               else if (len == 8)
>> +                       SET_F64_RD(insn, regs, val.data_u64);
>> +               else
>> +                       SET_F32_RD(insn, regs, val.data_ulong);
>>  #endif
>> +       }
>>
>>         regs->mepc += insn_len;
>>
>> @@ -248,6 +253,8 @@ static int sbi_trap_emulate_store(struct sbi_trap_context *tcntx,
>>         } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) {
>>                 len             = 2;
>>                 val.data_ulong = GET_RS2S(insn, regs);
>> +       } else if (IS_VECTOR_LOAD_STORE(insn)) {
>> +               emu = sbi_misaligned_v_st_emulator;
>>         } else {
>>                 return sbi_trap_redirect(regs, orig_trap);
>>         }
>> diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
>> new file mode 100644
>> index 0000000..72b2309
>> --- /dev/null
>> +++ b/lib/sbi/sbi_trap_v_ldst.c
>> @@ -0,0 +1,341 @@
>> +/*
>> + * SPDX-License-Identifier: BSD-2-Clause
>> + *
>> + * Copyright (c) 2024 SiFive Inc.
>> + *
>> + * Authors:
>> + *   Andrew Waterman <andrew@sifive.com>
>> + *   Nylon Chen <nylon.chen@sifive.com>
>> + *   Zong Li <nylon.chen@sifive.com>
>> + */
>> +
>> +#include <sbi/riscv_asm.h>
>> +#include <sbi/riscv_encoding.h>
>> +#include <sbi/sbi_error.h>
>> +#include <sbi/sbi_trap_ldst.h>
>> +#include <sbi/sbi_trap.h>
>> +#include <sbi/sbi_unpriv.h>
>> +#include <sbi/sbi_trap.h>
>> +
>> +#ifdef __riscv_vector
>> +#define VLEN_MAX 65536
>> +
>> +static inline void set_vreg(ulong vlenb, ulong which, ulong pos, ulong size, const uint8_t *bytes)
>> +{
>> +       pos += (which % 8) * vlenb;
>> +       bytes -= pos;
>> +
>> +       asm volatile (
>> +               "       .option push\n\t"
>> +               "       .option arch, +v\n\t"
>> +               "       vsetvli x0, %0, e8, m8, tu, ma\n\t"
>> +               "       .option pop\n\t"
>> +               :: "r" (pos + size));
>> +
>> +       csr_write(CSR_VSTART, pos);
>> +
>> +       switch (which / 8) {
>> +       case 0:
>> +               asm volatile (
>> +                       "       .option push\n\t"
>> +                       "       .option arch, +v\n\t"
>> +                       "       vle8.v v0,  (%0)\n\t"
>> +                       "       .option pop\n\t"
>> +                       :: "r" (bytes) : "memory");
>> +               break;
>> +       case 1:
>> +               asm volatile (
>> +                       "       .option push\n\t"
>> +                       "       .option arch, +v\n\t"
>> +                       "       vle8.v v8,  (%0)\n\t"
>> +                       "       .option pop\n\t"
>> +                       :: "r" (bytes) : "memory");
>> +               break;
>> +       case 2:
>> +               asm volatile (
>> +                       "       .option push\n\t"
>> +                       "       .option arch, +v\n\t"
>> +                       "       vle8.v v16,  (%0)\n\t"
>> +                       "       .option pop\n\t"
>> +                       :: "r" (bytes) : "memory");
>> +               break;
>> +       case 3:
>> +               asm volatile (
>> +                       "       .option push\n\t"
>> +                       "       .option arch, +v\n\t"
>> +                       "       vle8.v v24,  (%0)\n\t"
>> +                       "       .option pop\n\t"
>> +                       :: "r" (bytes) : "memory");
>> +               break;
>> +       default:
>> +               break;
>> +       }
>> +}
>> +
>> +static inline void get_vreg(ulong vlenb, ulong which, ulong pos, ulong size, uint8_t *bytes)
>> +{
>> +       pos += (which % 8) * vlenb;
>> +       bytes -= pos;
>> +
>> +       asm volatile (
>> +               "       .option push\n\t"
>> +               "       .option arch, +v\n\t"
>> +               "       vsetvli x0, %0, e8, m8, tu, ma\n\t"
>> +               "       .option pop\n\t"
>> +               :: "r" (pos + size));
>> +
>> +       csr_write(CSR_VSTART, pos);
>> +
>> +       switch (which / 8) {
>> +       case 0:
>> +               asm volatile (
>> +                       "       .option push\n\t"
>> +                       "       .option arch, +v\n\t"
>> +                       "       vse8.v v0,  (%0)\n\t"
>> +                       "       .option pop\n\t"
>> +                       :: "r" (bytes) : "memory");
>> +               break;
>> +       case 1:
>> +               asm volatile (
>> +                       "       .option push\n\t"
>> +                       "       .option arch, +v\n\t"
>> +                       "       vse8.v v8,  (%0)\n\t"
>> +                       "       .option pop\n\t"
>> +                       :: "r" (bytes) : "memory");
>> +               break;
>> +       case 2:
>> +               asm volatile (
>> +                       "       .option push\n\t"
>> +                       "       .option arch, +v\n\t"
>> +                       "       vse8.v v16, (%0)\n\t"
>> +                       "       .option pop\n\t"
>> +                       :: "r" (bytes) : "memory");
>> +               break;
>> +       case 3:
>> +               asm volatile (
>> +                               ".option push\n\t"
>> +                               ".option arch, +v\n\t"
>> +                               "vse8.v v24, (%0)\n\t"
>> +                               ".option pop\n\t"
>> +                               :: "r" (bytes) : "memory");
>> +               break;
>> +       default:
>> +               break;
>> +       }
>> +}
>> +
>> +static inline void vsetvl(ulong vl, ulong vtype)
>> +{
>> +       asm volatile (
>> +               "       .option push\n\t"
>> +               "       .option arch, +v\n\t"
>> +               "       vsetvl x0, %0, %1\n\t"
>> +               "       .option pop\n\t"
>> +                       :: "r" (vl), "r" (vtype));
>> +}
>> +
>> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
>> +                                struct sbi_trap_context *tcntx)
>> +{
>> +       const struct sbi_trap_info *orig_trap = &tcntx->trap;
>> +       struct sbi_trap_regs *regs = &tcntx->regs;
>> +       struct sbi_trap_info uptrap;
>> +       ulong insn = sbi_get_insn(regs->mepc, &uptrap);
>> +       ulong vl = csr_read(CSR_VL);
>> +       ulong vtype = csr_read(CSR_VTYPE);
>> +       ulong vlenb = csr_read(CSR_VLENB);
>> +       ulong vstart = csr_read(CSR_VSTART);
>> +       ulong base = GET_RS1(insn, regs);
>> +       ulong stride = GET_RS2(insn, regs);
>> +       ulong vd = GET_VD(insn);
>> +       ulong vs2 = GET_VS2(insn);
>> +       ulong view = GET_VIEW(insn);
>> +       ulong vsew = GET_VSEW(vtype);
>> +       ulong vlmul = GET_VLMUL(vtype);
>> +       bool illegal = GET_MEW(insn);
>> +       bool masked = IS_MASKED(insn);
>> +       uint8_t mask[VLEN_MAX / 8];
>> +       uint8_t bytes[8 * sizeof(uint64_t)];
>> +       ulong len = GET_LEN(view);
>> +       ulong nf = GET_NF(insn);
>> +       ulong vemul = GET_VEMUL(vlmul, view, vsew);
>> +       ulong emul = GET_EMUL(vemul);
>> +
>> +       if (IS_UNIT_STRIDE_LOAD(insn) || IS_FAULT_ONLY_FIRST_LOAD(insn)) {
>> +               stride = nf * len;
>> +       } else if (IS_WHOLE_REG_LOAD(insn)) {
>> +               vl = (nf * vlenb) >> view;
>> +               nf = 1;
>> +               vemul = 0;
>> +               emul = 1;
>> +               stride = nf * len;
>> +       } else if (IS_INDEXED_LOAD(insn)) {
>> +               len = 1 << vsew;
>> +               vemul = (vlmul + vsew - vsew) & 7;
>> +               emul = 1 << ((vemul & 4) ? 0 : vemul);
>> +               stride = nf * len;
>> +       }
>> +
>> +       if (illegal || vlenb > VLEN_MAX / 8) {
>> +               struct sbi_trap_info trap = {
>> +                       uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
>> +                       uptrap.tval = insn,
>> +               };
>> +               return sbi_trap_redirect(regs, &trap);
>> +       }
>> +
>> +       if (masked)
>> +               get_vreg(vlenb, 0, 0, vlenb, mask);
>> +
>> +       do {
>> +               if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
>> +                       // compute element address
>> +                       ulong addr = base + vstart * stride;
>> +
>> +                       if (IS_INDEXED_LOAD(insn)) {
>> +                               ulong offset = 0;
>> +
>> +                               get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
>> +                               addr = base + offset;
>> +                       }
>> +
>> +                       csr_write(CSR_VSTART, vstart);
>> +
>> +                       // obtain load data from memory
>> +                       for (ulong seg = 0; seg < nf; seg++) {
>> +                               for (ulong i = 0; i < len; i++) {
>> +                                       bytes[seg * len + i] =
>> +                                               sbi_load_u8((void *)(addr + seg * len + i),
>> +                                                           &uptrap);
>> +
>> +                                       if (uptrap.cause) {
>> +                                               if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
>> +                                                       vl = vstart;
>> +                                                       break;
>> +                                               }
>> +                                               vsetvl(vl, vtype);
>> +                                               uptrap.tinst = sbi_misaligned_tinst_fixup(
>> +                                                       orig_trap->tinst, uptrap.tinst, i);
>> +                                               return sbi_trap_redirect(regs, &uptrap);
>> +                                       }
>> +                               }
>> +                       }
>> +
>> +                       // write load data to regfile
>> +                       for (ulong seg = 0; seg < nf; seg++)
>> +                               set_vreg(vlenb, vd + seg * emul, vstart * len,
>> +                                        len, &bytes[seg * len]);
>> +               }
>> +       } while (++vstart < vl);
>> +
>> +       // restore clobbered vl/vtype
>> +       vsetvl(vl, vtype);
>> +
>> +       return vl;
>> +}
>> +
>> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
>> +                                struct sbi_trap_context *tcntx)
>> +{
>> +       const struct sbi_trap_info *orig_trap = &tcntx->trap;
>> +       struct sbi_trap_regs *regs = &tcntx->regs;
>> +       struct sbi_trap_info uptrap;
>> +       ulong insn = sbi_get_insn(regs->mepc, &uptrap);
>> +       ulong vl = csr_read(CSR_VL);
>> +       ulong vtype = csr_read(CSR_VTYPE);
>> +       ulong vlenb = csr_read(CSR_VLENB);
>> +       ulong vstart = csr_read(CSR_VSTART);
>> +       ulong base = GET_RS1(insn, regs);
>> +       ulong stride = GET_RS2(insn, regs);
>> +       ulong vd = GET_VD(insn);
>> +       ulong vs2 = GET_VS2(insn);
>> +       ulong view = GET_VIEW(insn);
>> +       ulong vsew = GET_VSEW(vtype);
>> +       ulong vlmul = GET_VLMUL(vtype);
>> +       bool illegal = GET_MEW(insn);
>> +       bool masked = IS_MASKED(insn);
>> +       uint8_t mask[VLEN_MAX / 8];
>> +       uint8_t bytes[8 * sizeof(uint64_t)];
>> +       ulong len = GET_LEN(view);
>> +       ulong nf = GET_NF(insn);
>> +       ulong vemul = GET_VEMUL(vlmul, view, vsew);
>> +       ulong emul = GET_EMUL(vemul);
>> +
>> +       if (IS_UNIT_STRIDE_STORE(insn)) {
>> +               stride = nf * len;
>> +       } else if (IS_WHOLE_REG_STORE(insn)) {
>> +               vl = (nf * vlenb) >> view;
>> +               nf = 1;
>> +               vemul = 0;
>> +               emul = 1;
>> +               stride = nf * len;
>> +       } else if (IS_INDEXED_STORE(insn)) {
>> +               len = 1 << vsew;
>> +               vemul = (vlmul + vsew - vsew) & 7;
>> +               emul = 1 << ((vemul & 4) ? 0 : vemul);
>> +               stride = nf * len;
>> +       }
>> +
>> +       if (illegal || vlenb > VLEN_MAX / 8) {
>> +               struct sbi_trap_info trap = {
>> +                       uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
>> +                       uptrap.tval = insn,
>> +               };
>> +               return sbi_trap_redirect(regs, &trap);
>> +       }
>> +
>> +       if (masked)
>> +               get_vreg(vlenb, 0, 0, vlenb, mask);
>> +
>> +       do {
>> +               if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
>> +                       // compute element address
>> +                       ulong addr = base + vstart * stride;
>> +
>> +                       if (IS_INDEXED_STORE(insn)) {
>> +                               ulong offset = 0;
>> +
>> +                               get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
>> +                               addr = base + offset;
>> +                       }
>> +
>> +                       // obtain store data from regfile
>> +                       for (ulong seg = 0; seg < nf; seg++)
>> +                               get_vreg(vlenb, vd + seg * emul, vstart * len,
>> +                                        len, &bytes[seg * len]);
>> +
>> +                       csr_write(CSR_VSTART, vstart);
>> +
>> +                       // write store data to memory
>> +                       for (ulong seg = 0; seg < nf; seg++) {
>> +                               for (ulong i = 0; i < len; i++) {
>> +                                       sbi_store_u8((void *)(addr + seg * len + i),
>> +                                                    bytes[seg * len + i], &uptrap);
>> +                                       if (uptrap.cause) {
>> +                                               vsetvl(vl, vtype);
>> +                                               uptrap.tinst = sbi_misaligned_tinst_fixup(
>> +                                                       orig_trap->tinst, uptrap.tinst, i);
>> +                                               return sbi_trap_redirect(regs, &uptrap);
>> +                                       }
>> +                               }
>> +                       }
>> +               }
>> +       } while (++vstart < vl);
>> +
>> +       // restore clobbered vl/vtype
>> +       vsetvl(vl, vtype);
>> +
>> +       return vl;
>> +}
>> +#else
>> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
>> +                                struct sbi_trap_context *tcntx)
>> +{
>> +       return 0;
>> +}
>> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
>> +                                struct sbi_trap_context *tcntx)
>> +{
>> +       return 0;
>> +}
>> +#endif /* __riscv_vector  */
>> --
>> 2.34.1
>>
Anup Patel Dec. 7, 2024, 2:31 p.m. UTC | #3
On Sat, Dec 7, 2024 at 2:27 AM Clément Léger <cleger@rivosinc.com> wrote:
>
> Hi Anup & Nylon,
>
> This commits breaks openSBI boot on my setup. A quick analysis of the
> problem shows that the changes to objects.mk now allows GCC to generate
> vector assembly in OpenSBI. It now crashes in bitmap_zero() really early
> (Used in fw_platform_coldboot_harts_init()) due to vector instructions
> being used:
>
>    0x0000000080007800 <+474>:   mv      a0,s2
>    0x0000000080007802 <+476>:   sw      a5,88(s4)
>    0x0000000080007806 <+480>:   jal     0x8001adbe <fdt_check_imsic_mlevel>
> => 0x000000008000780a <+484>:   vsetivli        zero,2,e64,m1,ta,ma
>    0x000000008000780e <+488>:   vmv.v.i v1,0
>    0x0000000080007812 <+492>:   mv      a5,a0
>
>
> And then it raises an illegal instruction probably because V isn't
> enabled. But I'm guessing we do not expect to use vector code in OpenSBI
> at all ? I'm using a fairly new qemu if that is of any help
> (248f9209edfd289e7d97fb323e5075ccd55cc157).

Ahh, it seems auto-vectorization is enabled by default.

We certainly don't want to use vector code in OpenSBI
at the moment.

How about using the compiler command-line option to
force disable auto-vectorization ?

Regards,
Anup

>
> Thanks,
>
> Clément
>
> On 06/12/2024 13:18, Anup Patel wrote:
> > On Fri, Dec 6, 2024 at 8:40 AM Nylon Chen <nylon.chen@sifive.com> wrote:
> >>
> >> Add exception handling vector instructions from
> >>
> >> the vector extension to the sbi_misaligned_ldst library.
> >>
> >> This implementation references the misaligned_vec_ldst
> >> implementation in the riscv-pk project.
> >>
> >> Co-developed-by: Zong Li <zong.li@sifive.com>
> >> Signed-off-by: Zong Li <zong.li@sifive.com>
> >> Signed-off-by: Nylon Chen <nylon.chen@sifive.com>
> >> Reviewed-by: Andy Chiu <andy.chiu@sifive.com>
> >> Reviewed-by: Anup Patel <anup@brainfault.org>
> >
> > I updated the commit description and replaced spaces with tabs
> > for alignment at a few places at the time of merging this patch.
> >
> > Applied this patch to the riscv/opensbi repo.
> >
> > Thanks,
> > Anup
> >
> >> ---
> >>  Makefile                     |  11 +-
> >>  include/sbi/riscv_encoding.h | 385 ++++++++++++++++++++++++++++++++++-
> >>  include/sbi/sbi_trap_ldst.h  |   9 +
> >>  lib/sbi/objects.mk           |   1 +
> >>  lib/sbi/sbi_trap_ldst.c      |  23 ++-
> >>  lib/sbi/sbi_trap_v_ldst.c    | 341 +++++++++++++++++++++++++++++++
> >>  6 files changed, 758 insertions(+), 12 deletions(-)
> >>  create mode 100644 lib/sbi/sbi_trap_v_ldst.c
> >>
> >> diff --git a/Makefile b/Makefile
> >> index d9cee49..5ac95a0 100644
> >> --- a/Makefile
> >> +++ b/Makefile
> >> @@ -189,6 +189,9 @@ CC_SUPPORT_STRICT_ALIGN := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib
> >>  # Check whether the assembler and the compiler support the Zicsr and Zifencei extensions
> >>  CC_SUPPORT_ZICSR_ZIFENCEI := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib -march=rv$(OPENSBI_CC_XLEN)imafd_zicsr_zifencei -x c /dev/null -o /dev/null 2>&1 | grep -e "zicsr" -e "zifencei" > /dev/null && echo n || echo y)
> >>
> >> +# Check whether the assembler and the compiler support the Vector extension
> >> +CC_SUPPORT_VECT := $(shell echo | $(CC) -dM -E -march=rv$(OPENSBI_CC_XLEN)gv - | grep -q riscv.*vector && echo y || echo n)
> >> +
> >>  ifneq ($(OPENSBI_LD_PIE),y)
> >>  $(error Your linker does not support creating PIEs, opensbi requires this.)
> >>  endif
> >> @@ -294,10 +297,12 @@ ifndef PLATFORM_RISCV_ABI
> >>  endif
> >>  ifndef PLATFORM_RISCV_ISA
> >>    ifneq ($(PLATFORM_RISCV_TOOLCHAIN_DEFAULT), 1)
> >> +    PLATFORM_RISCV_ISA := rv$(PLATFORM_RISCV_XLEN)imafdc
> >> +    ifeq ($(CC_SUPPORT_VECT), y)
> >> +      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)v
> >> +    endif
> >>      ifeq ($(CC_SUPPORT_ZICSR_ZIFENCEI), y)
> >> -      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc_zicsr_zifencei
> >> -    else
> >> -      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc
> >> +      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)_zicsr_zifencei
> >>      endif
> >>    else
> >>      PLATFORM_RISCV_ISA = $(OPENSBI_CC_ISA)
> >> diff --git a/include/sbi/riscv_encoding.h b/include/sbi/riscv_encoding.h
> >> index 38997ef..8b724b8 100644
> >> --- a/include/sbi/riscv_encoding.h
> >> +++ b/include/sbi/riscv_encoding.h
> >> @@ -763,6 +763,12 @@
> >>  #define CSR_MVIPH                      0x319
> >>  #define CSR_MIPH                       0x354
> >>
> >> +/* Vector extension registers */
> >> +#define CSR_VSTART                     0x8
> >> +#define CSR_VL                         0xc20
> >> +#define CSR_VTYPE                      0xc21
> >> +#define CSR_VLENB                      0xc22
> >> +
> >>  /* ===== Trap/Exception Causes ===== */
> >>
> >>  #define CAUSE_MISALIGNED_FETCH         0x0
> >> @@ -891,11 +897,364 @@
> >>  #define INSN_MASK_FENCE_TSO            0xffffffff
> >>  #define INSN_MATCH_FENCE_TSO           0x8330000f
> >>
> >> +#define INSN_MASK_VECTOR_UNIT_STRIDE           0xfdf0707f
> >> +#define INSN_MASK_VECTOR_FAULT_ONLY_FIRST      0xfdf0707f
> >> +#define INSN_MASK_VECTOR_STRIDE                0xfc00707f
> >> +#define INSN_MASK_VECTOR_WHOLE_REG             0xfff0707f
> >> +#define INSN_MASK_VECTOR_INDEXED               0xfc00707f
> >> +
> >> +#define INSN_MATCH_VLUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000007 | \
> >> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> >> +#define INSN_MATCH_VSUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000027 | \
> >> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> >> +#define INSN_MATCH_VLOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000007 | \
> >> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> >> +#define INSN_MATCH_VSOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000027 | \
> >> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> >> +#define INSN_MATCH_VLSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000007 | \
> >> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> >> +#define INSN_MATCH_VSSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000027 | \
> >> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> >> +#define INSN_MATCH_VSSEG(n, bits)   ((((n) - 1) << 29) | 0x00004027 | \
> >> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> >> +#define INSN_MATCH_VLSEG(n, bits)   ((((n) - 1) << 29) | 0x00004007 | \
> >> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> >> +#define INSN_MATCH_VLSEGFF(n, bits) ((((n) - 1) << 29) | 0x1000007 | \
> >> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
> >> +
> >> +#define INSN_MATCH_VLE16V              0x00005007
> >> +#define INSN_MATCH_VLE32V              0x00006007
> >> +#define INSN_MATCH_VLE64V              0x00007007
> >> +#define INSN_MATCH_VSE16V              0x00005027
> >> +#define INSN_MATCH_VSE32V              0x00006027
> >> +#define INSN_MATCH_VSE64V              0x00007027
> >> +#define INSN_MATCH_VLSE16V             0x08005007
> >> +#define INSN_MATCH_VLSE32V             0x08006007
> >> +#define INSN_MATCH_VLSE64V             0x08007007
> >> +#define INSN_MATCH_VSSE16V             0x08005027
> >> +#define INSN_MATCH_VSSE32V             0x08006027
> >> +#define INSN_MATCH_VSSE64V             0x08007027
> >> +#define INSN_MATCH_VLOXEI16V           0x0c005007
> >> +#define INSN_MATCH_VLOXEI32V           0x0c006007
> >> +#define INSN_MATCH_VLOXEI64V           0x0c007007
> >> +#define INSN_MATCH_VSOXEI16V           0x0c005027
> >> +#define INSN_MATCH_VSOXEI32V           0x0c006027
> >> +#define INSN_MATCH_VSOXEI64V           0x0c007027
> >> +#define INSN_MATCH_VLUXEI16V           0x04005007
> >> +#define INSN_MATCH_VLUXEI32V           0x04006007
> >> +#define INSN_MATCH_VLUXEI64V           0x04007007
> >> +#define INSN_MATCH_VSUXEI16V           0x04005027
> >> +#define INSN_MATCH_VSUXEI32V           0x04006027
> >> +#define INSN_MATCH_VSUXEI64V           0x04007027
> >> +#define INSN_MATCH_VLE16FFV            0x01005007
> >> +#define INSN_MATCH_VLE32FFV            0x01006007
> >> +#define INSN_MATCH_VLE64FFV            0x01007007
> >> +#define INSN_MATCH_VL1RE8V             0x02800007
> >> +#define INSN_MATCH_VL1RE16V            0x02805007
> >> +#define INSN_MATCH_VL1RE32V            0x02806007
> >> +#define INSN_MATCH_VL1RE64V            0x02807007
> >> +#define INSN_MATCH_VL2RE8V             0x22800007
> >> +#define INSN_MATCH_VL2RE16V            0x22805007
> >> +#define INSN_MATCH_VL2RE32V            0x22806007
> >> +#define INSN_MATCH_VL2RE64V            0x22807007
> >> +#define INSN_MATCH_VL4RE8V             0x62800007
> >> +#define INSN_MATCH_VL4RE16V            0x62805007
> >> +#define INSN_MATCH_VL4RE32V            0x62806007
> >> +#define INSN_MATCH_VL4RE64V            0x62807007
> >> +#define INSN_MATCH_VL8RE8V             0xe2800007
> >> +#define INSN_MATCH_VL8RE16V            0xe2805007
> >> +#define INSN_MATCH_VL8RE32V            0xe2806007
> >> +#define INSN_MATCH_VL8RE64V            0xe2807007
> >> +#define INSN_MATCH_VS1RV               0x02800027
> >> +#define INSN_MATCH_VS2RV               0x22800027
> >> +#define INSN_MATCH_VS4RV               0x62800027
> >> +#define INSN_MATCH_VS8RV               0xe2800027
> >> +
> >> +#define INSN_MASK_VECTOR_LOAD_STORE    0x7f
> >> +#define INSN_MATCH_VECTOR_LOAD         0x07
> >> +#define INSN_MATCH_VECTOR_STORE                0x27
> >> +
> >> +#define IS_VECTOR_LOAD_STORE(insn) \
> >> +       ((((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_LOAD) || \
> >> +       (((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_STORE))
> >> +
> >> +#define IS_VECTOR_INSN_MATCH(insn, match, mask) \
> >> +       (((insn) & (mask)) == ((match) & (mask)))
> >> +
> >> +#define IS_UNIT_STRIDE_MATCH(insn, match) \
> >> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_UNIT_STRIDE)
> >> +
> >> +#define IS_STRIDE_MATCH(insn, match) \
> >> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_STRIDE)
> >> +
> >> +#define IS_INDEXED_MATCH(insn, match) \
> >> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_INDEXED)
> >> +
> >> +#define IS_FAULT_ONLY_FIRST_MATCH(insn, match) \
> >> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_FAULT_ONLY_FIRST)
> >> +
> >> +#define IS_WHOLE_REG_MATCH(insn, match) \
> >> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_WHOLE_REG)
> >> +
> >> +#define IS_UNIT_STRIDE_LOAD(insn) ( \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE16V) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE32V) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE64V) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 64)))
> >> +
> >> +#define IS_UNIT_STRIDE_STORE(insn) ( \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE16V) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE32V) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE64V) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 16)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 32)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 64)) || \
> >> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 64)))
> >> +
> >> +#define IS_STRIDE_LOAD(insn) ( \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE16V) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE32V) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE64V) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 64)))
> >> +
> >> +#define IS_STRIDE_STORE(insn) ( \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE16V) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE32V) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE64V) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 16)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 32)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 64)) || \
> >> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 64)))
> >> +
> >> +#define IS_INDEXED_LOAD(insn) ( \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI16V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI32V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI64V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI16V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI32V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI64V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 64)))
> >> +
> >> +#define IS_INDEXED_STORE(insn) ( \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI16V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI32V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI64V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI16V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI32V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI64V) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 16)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 32)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 64)) || \
> >> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 64)))
> >> +
> >> +#define IS_FAULT_ONLY_FIRST_LOAD(insn) ( \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE16FFV) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE32FFV) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE64FFV) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 16)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 16)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 16)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 16)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 16)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 16)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 16)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 32)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 32)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 32)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 32)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 32)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 32)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 32)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 64)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 64)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 64)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 64)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 64)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 64)) || \
> >> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 64)))
> >> +
> >> +       #define IS_WHOLE_REG_LOAD(insn) ( \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE8V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE16V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE32V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE64V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE8V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE16V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE32V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE64V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE8V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE16V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE32V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE64V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE8V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE16V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE32V) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE64V))
> >> +
> >> +#define IS_WHOLE_REG_STORE(insn) ( \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS1RV) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS2RV) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS4RV) || \
> >> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS8RV))
> >> +
> >> +
> >>  #if __riscv_xlen == 64
> >>
> >>  /* 64-bit read for VS-stage address translation (RV64) */
> >>  #define INSN_PSEUDO_VS_LOAD            0x00003000
> >> -
> >>  /* 64-bit write for VS-stage address translation (RV64) */
> >>  #define INSN_PSEUDO_VS_STORE   0x00003020
> >>
> >> @@ -911,6 +1270,12 @@
> >>  #error "Unexpected __riscv_xlen"
> >>  #endif
> >>
> >> +#define VM_MASK                                0x1
> >> +#define VIEW_MASK                              0x3
> >> +#define VSEW_MASK                              0x3
> >> +#define VLMUL_MASK                             0x7
> >> +#define VD_MASK                                0x1f
> >> +#define VS2_MASK                               0x1f
> >>  #define INSN_16BIT_MASK                        0x3
> >>  #define INSN_32BIT_MASK                        0x1c
> >>
> >> @@ -929,6 +1294,12 @@
> >>  #endif
> >>  #define REGBYTES                       (1 << LOG_REGBYTES)
> >>
> >> +#define SH_VSEW                        3
> >> +#define SH_VIEW                        12
> >> +#define SH_VD                          7
> >> +#define SH_VS2                         20
> >> +#define SH_VM                          25
> >> +#define SH_MEW                         28
> >>  #define SH_RD                          7
> >>  #define SH_RS1                         15
> >>  #define SH_RS2                         20
> >> @@ -982,6 +1353,18 @@
> >>  #define IMM_S(insn)                    (((s32)(insn) >> 25 << 5) | \
> >>                                          (s32)(((insn) >> 7) & 0x1f))
> >>
> >> +#define IS_MASKED(insn)                (((insn >> SH_VM) & VM_MASK) == 0)
> >> +#define GET_VD(insn)                   ((insn >> SH_VD) & VD_MASK)
> >> +#define GET_VS2(insn)                  ((insn >> SH_VS2) & VS2_MASK)
> >> +#define GET_VIEW(insn)                 (((insn) >> SH_VIEW) & VIEW_MASK)
> >> +#define GET_MEW(insn)                  (((insn) >> SH_MEW) & 1)
> >> +#define GET_VSEW(vtype)                (((vtype) >> SH_VSEW) & VSEW_MASK)
> >> +#define GET_VLMUL(vtype)               ((vtype) & VLMUL_MASK)
> >> +#define GET_LEN(view)                  (1UL << (view))
> >> +#define GET_NF(insn)                   (1 + ((insn >> 29) & 7))
> >> +#define GET_VEMUL(vlmul, view, vsew)   ((vlmul + view - vsew) & 7)
> >> +#define GET_EMUL(vemul)                (1UL << ((vemul) >= 4 ? 0 : (vemul)))
> >> +
> >>  #define MASK_FUNCT3                    0x7000
> >>  #define MASK_RS1                       0xf8000
> >>  #define MASK_CSR                       0xfff00000
> >> diff --git a/include/sbi/sbi_trap_ldst.h b/include/sbi/sbi_trap_ldst.h
> >> index 4c5cc37..34877cc 100644
> >> --- a/include/sbi/sbi_trap_ldst.h
> >> +++ b/include/sbi/sbi_trap_ldst.h
> >> @@ -30,4 +30,13 @@ int sbi_store_access_handler(struct sbi_trap_context *tcntx);
> >>
> >>  int sbi_double_trap_handler(struct sbi_trap_context *tcntx);
> >>
> >> +ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
> >> +                                ulong addr_offset);
> >> +
> >> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
> >> +                                struct sbi_trap_context *tcntx);
> >> +
> >> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
> >> +                                struct sbi_trap_context *tcntx);
> >> +
> >>  #endif
> >> diff --git a/lib/sbi/objects.mk b/lib/sbi/objects.mk
> >> index a6f7c5f..47a0866 100644
> >> --- a/lib/sbi/objects.mk
> >> +++ b/lib/sbi/objects.mk
> >> @@ -91,6 +91,7 @@ libsbi-objs-y += sbi_timer.o
> >>  libsbi-objs-y += sbi_tlb.o
> >>  libsbi-objs-y += sbi_trap.o
> >>  libsbi-objs-y += sbi_trap_ldst.o
> >> +libsbi-objs-y += sbi_trap_v_ldst.o
> >>  libsbi-objs-y += sbi_unpriv.o
> >>  libsbi-objs-y += sbi_expected_trap.o
> >>  libsbi-objs-y += sbi_cppc.o
> >> diff --git a/lib/sbi/sbi_trap_ldst.c b/lib/sbi/sbi_trap_ldst.c
> >> index ebc4a92..448406b 100644
> >> --- a/lib/sbi/sbi_trap_ldst.c
> >> +++ b/lib/sbi/sbi_trap_ldst.c
> >> @@ -32,7 +32,7 @@ typedef int (*sbi_trap_ld_emulator)(int rlen, union sbi_ldst_data *out_val,
> >>  typedef int (*sbi_trap_st_emulator)(int wlen, union sbi_ldst_data in_val,
> >>                                     struct sbi_trap_context *tcntx);
> >>
> >> -static ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
> >> +ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
> >>                                         ulong addr_offset)
> >>  {
> >>         if (new_tinst == INSN_PSEUDO_VS_LOAD ||
> >> @@ -52,7 +52,7 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
> >>         ulong insn, insn_len;
> >>         union sbi_ldst_data val = { 0 };
> >>         struct sbi_trap_info uptrap;
> >> -       int rc, fp = 0, shift = 0, len = 0;
> >> +       int rc, fp = 0, shift = 0, len = 0, vector = 0;
> >>
> >>         if (orig_trap->tinst & 0x1) {
> >>                 /*
> >> @@ -144,6 +144,9 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
> >>                 len = 2;
> >>                 shift = 8 * (sizeof(ulong) - len);
> >>                 insn = RVC_RS2S(insn) << SH_RD;
> >> +       } else if (IS_VECTOR_LOAD_STORE(insn)) {
> >> +               vector = 1;
> >> +               emu = sbi_misaligned_v_ld_emulator;
> >>         } else {
> >>                 return sbi_trap_redirect(regs, orig_trap);
> >>         }
> >> @@ -152,14 +155,16 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
> >>         if (rc <= 0)
> >>                 return rc;
> >>
> >> -       if (!fp)
> >> -               SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
> >> +       if (!vector) {
> >> +               if (!fp)
> >> +                       SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
> >>  #ifdef __riscv_flen
> >> -       else if (len == 8)
> >> -               SET_F64_RD(insn, regs, val.data_u64);
> >> -       else
> >> -               SET_F32_RD(insn, regs, val.data_ulong);
> >> +               else if (len == 8)
> >> +                       SET_F64_RD(insn, regs, val.data_u64);
> >> +               else
> >> +                       SET_F32_RD(insn, regs, val.data_ulong);
> >>  #endif
> >> +       }
> >>
> >>         regs->mepc += insn_len;
> >>
> >> @@ -248,6 +253,8 @@ static int sbi_trap_emulate_store(struct sbi_trap_context *tcntx,
> >>         } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) {
> >>                 len             = 2;
> >>                 val.data_ulong = GET_RS2S(insn, regs);
> >> +       } else if (IS_VECTOR_LOAD_STORE(insn)) {
> >> +               emu = sbi_misaligned_v_st_emulator;
> >>         } else {
> >>                 return sbi_trap_redirect(regs, orig_trap);
> >>         }
> >> diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
> >> new file mode 100644
> >> index 0000000..72b2309
> >> --- /dev/null
> >> +++ b/lib/sbi/sbi_trap_v_ldst.c
> >> @@ -0,0 +1,341 @@
> >> +/*
> >> + * SPDX-License-Identifier: BSD-2-Clause
> >> + *
> >> + * Copyright (c) 2024 SiFive Inc.
> >> + *
> >> + * Authors:
> >> + *   Andrew Waterman <andrew@sifive.com>
> >> + *   Nylon Chen <nylon.chen@sifive.com>
> >> + *   Zong Li <nylon.chen@sifive.com>
> >> + */
> >> +
> >> +#include <sbi/riscv_asm.h>
> >> +#include <sbi/riscv_encoding.h>
> >> +#include <sbi/sbi_error.h>
> >> +#include <sbi/sbi_trap_ldst.h>
> >> +#include <sbi/sbi_trap.h>
> >> +#include <sbi/sbi_unpriv.h>
> >> +#include <sbi/sbi_trap.h>
> >> +
> >> +#ifdef __riscv_vector
> >> +#define VLEN_MAX 65536
> >> +
> >> +static inline void set_vreg(ulong vlenb, ulong which, ulong pos, ulong size, const uint8_t *bytes)
> >> +{
> >> +       pos += (which % 8) * vlenb;
> >> +       bytes -= pos;
> >> +
> >> +       asm volatile (
> >> +               "       .option push\n\t"
> >> +               "       .option arch, +v\n\t"
> >> +               "       vsetvli x0, %0, e8, m8, tu, ma\n\t"
> >> +               "       .option pop\n\t"
> >> +               :: "r" (pos + size));
> >> +
> >> +       csr_write(CSR_VSTART, pos);
> >> +
> >> +       switch (which / 8) {
> >> +       case 0:
> >> +               asm volatile (
> >> +                       "       .option push\n\t"
> >> +                       "       .option arch, +v\n\t"
> >> +                       "       vle8.v v0,  (%0)\n\t"
> >> +                       "       .option pop\n\t"
> >> +                       :: "r" (bytes) : "memory");
> >> +               break;
> >> +       case 1:
> >> +               asm volatile (
> >> +                       "       .option push\n\t"
> >> +                       "       .option arch, +v\n\t"
> >> +                       "       vle8.v v8,  (%0)\n\t"
> >> +                       "       .option pop\n\t"
> >> +                       :: "r" (bytes) : "memory");
> >> +               break;
> >> +       case 2:
> >> +               asm volatile (
> >> +                       "       .option push\n\t"
> >> +                       "       .option arch, +v\n\t"
> >> +                       "       vle8.v v16,  (%0)\n\t"
> >> +                       "       .option pop\n\t"
> >> +                       :: "r" (bytes) : "memory");
> >> +               break;
> >> +       case 3:
> >> +               asm volatile (
> >> +                       "       .option push\n\t"
> >> +                       "       .option arch, +v\n\t"
> >> +                       "       vle8.v v24,  (%0)\n\t"
> >> +                       "       .option pop\n\t"
> >> +                       :: "r" (bytes) : "memory");
> >> +               break;
> >> +       default:
> >> +               break;
> >> +       }
> >> +}
> >> +
> >> +static inline void get_vreg(ulong vlenb, ulong which, ulong pos, ulong size, uint8_t *bytes)
> >> +{
> >> +       pos += (which % 8) * vlenb;
> >> +       bytes -= pos;
> >> +
> >> +       asm volatile (
> >> +               "       .option push\n\t"
> >> +               "       .option arch, +v\n\t"
> >> +               "       vsetvli x0, %0, e8, m8, tu, ma\n\t"
> >> +               "       .option pop\n\t"
> >> +               :: "r" (pos + size));
> >> +
> >> +       csr_write(CSR_VSTART, pos);
> >> +
> >> +       switch (which / 8) {
> >> +       case 0:
> >> +               asm volatile (
> >> +                       "       .option push\n\t"
> >> +                       "       .option arch, +v\n\t"
> >> +                       "       vse8.v v0,  (%0)\n\t"
> >> +                       "       .option pop\n\t"
> >> +                       :: "r" (bytes) : "memory");
> >> +               break;
> >> +       case 1:
> >> +               asm volatile (
> >> +                       "       .option push\n\t"
> >> +                       "       .option arch, +v\n\t"
> >> +                       "       vse8.v v8,  (%0)\n\t"
> >> +                       "       .option pop\n\t"
> >> +                       :: "r" (bytes) : "memory");
> >> +               break;
> >> +       case 2:
> >> +               asm volatile (
> >> +                       "       .option push\n\t"
> >> +                       "       .option arch, +v\n\t"
> >> +                       "       vse8.v v16, (%0)\n\t"
> >> +                       "       .option pop\n\t"
> >> +                       :: "r" (bytes) : "memory");
> >> +               break;
> >> +       case 3:
> >> +               asm volatile (
> >> +                               ".option push\n\t"
> >> +                               ".option arch, +v\n\t"
> >> +                               "vse8.v v24, (%0)\n\t"
> >> +                               ".option pop\n\t"
> >> +                               :: "r" (bytes) : "memory");
> >> +               break;
> >> +       default:
> >> +               break;
> >> +       }
> >> +}
> >> +
> >> +static inline void vsetvl(ulong vl, ulong vtype)
> >> +{
> >> +       asm volatile (
> >> +               "       .option push\n\t"
> >> +               "       .option arch, +v\n\t"
> >> +               "       vsetvl x0, %0, %1\n\t"
> >> +               "       .option pop\n\t"
> >> +                       :: "r" (vl), "r" (vtype));
> >> +}
> >> +
> >> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
> >> +                                struct sbi_trap_context *tcntx)
> >> +{
> >> +       const struct sbi_trap_info *orig_trap = &tcntx->trap;
> >> +       struct sbi_trap_regs *regs = &tcntx->regs;
> >> +       struct sbi_trap_info uptrap;
> >> +       ulong insn = sbi_get_insn(regs->mepc, &uptrap);
> >> +       ulong vl = csr_read(CSR_VL);
> >> +       ulong vtype = csr_read(CSR_VTYPE);
> >> +       ulong vlenb = csr_read(CSR_VLENB);
> >> +       ulong vstart = csr_read(CSR_VSTART);
> >> +       ulong base = GET_RS1(insn, regs);
> >> +       ulong stride = GET_RS2(insn, regs);
> >> +       ulong vd = GET_VD(insn);
> >> +       ulong vs2 = GET_VS2(insn);
> >> +       ulong view = GET_VIEW(insn);
> >> +       ulong vsew = GET_VSEW(vtype);
> >> +       ulong vlmul = GET_VLMUL(vtype);
> >> +       bool illegal = GET_MEW(insn);
> >> +       bool masked = IS_MASKED(insn);
> >> +       uint8_t mask[VLEN_MAX / 8];
> >> +       uint8_t bytes[8 * sizeof(uint64_t)];
> >> +       ulong len = GET_LEN(view);
> >> +       ulong nf = GET_NF(insn);
> >> +       ulong vemul = GET_VEMUL(vlmul, view, vsew);
> >> +       ulong emul = GET_EMUL(vemul);
> >> +
> >> +       if (IS_UNIT_STRIDE_LOAD(insn) || IS_FAULT_ONLY_FIRST_LOAD(insn)) {
> >> +               stride = nf * len;
> >> +       } else if (IS_WHOLE_REG_LOAD(insn)) {
> >> +               vl = (nf * vlenb) >> view;
> >> +               nf = 1;
> >> +               vemul = 0;
> >> +               emul = 1;
> >> +               stride = nf * len;
> >> +       } else if (IS_INDEXED_LOAD(insn)) {
> >> +               len = 1 << vsew;
> >> +               vemul = (vlmul + vsew - vsew) & 7;
> >> +               emul = 1 << ((vemul & 4) ? 0 : vemul);
> >> +               stride = nf * len;
> >> +       }
> >> +
> >> +       if (illegal || vlenb > VLEN_MAX / 8) {
> >> +               struct sbi_trap_info trap = {
> >> +                       uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
> >> +                       uptrap.tval = insn,
> >> +               };
> >> +               return sbi_trap_redirect(regs, &trap);
> >> +       }
> >> +
> >> +       if (masked)
> >> +               get_vreg(vlenb, 0, 0, vlenb, mask);
> >> +
> >> +       do {
> >> +               if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
> >> +                       // compute element address
> >> +                       ulong addr = base + vstart * stride;
> >> +
> >> +                       if (IS_INDEXED_LOAD(insn)) {
> >> +                               ulong offset = 0;
> >> +
> >> +                               get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
> >> +                               addr = base + offset;
> >> +                       }
> >> +
> >> +                       csr_write(CSR_VSTART, vstart);
> >> +
> >> +                       // obtain load data from memory
> >> +                       for (ulong seg = 0; seg < nf; seg++) {
> >> +                               for (ulong i = 0; i < len; i++) {
> >> +                                       bytes[seg * len + i] =
> >> +                                               sbi_load_u8((void *)(addr + seg * len + i),
> >> +                                                           &uptrap);
> >> +
> >> +                                       if (uptrap.cause) {
> >> +                                               if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
> >> +                                                       vl = vstart;
> >> +                                                       break;
> >> +                                               }
> >> +                                               vsetvl(vl, vtype);
> >> +                                               uptrap.tinst = sbi_misaligned_tinst_fixup(
> >> +                                                       orig_trap->tinst, uptrap.tinst, i);
> >> +                                               return sbi_trap_redirect(regs, &uptrap);
> >> +                                       }
> >> +                               }
> >> +                       }
> >> +
> >> +                       // write load data to regfile
> >> +                       for (ulong seg = 0; seg < nf; seg++)
> >> +                               set_vreg(vlenb, vd + seg * emul, vstart * len,
> >> +                                        len, &bytes[seg * len]);
> >> +               }
> >> +       } while (++vstart < vl);
> >> +
> >> +       // restore clobbered vl/vtype
> >> +       vsetvl(vl, vtype);
> >> +
> >> +       return vl;
> >> +}
> >> +
> >> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
> >> +                                struct sbi_trap_context *tcntx)
> >> +{
> >> +       const struct sbi_trap_info *orig_trap = &tcntx->trap;
> >> +       struct sbi_trap_regs *regs = &tcntx->regs;
> >> +       struct sbi_trap_info uptrap;
> >> +       ulong insn = sbi_get_insn(regs->mepc, &uptrap);
> >> +       ulong vl = csr_read(CSR_VL);
> >> +       ulong vtype = csr_read(CSR_VTYPE);
> >> +       ulong vlenb = csr_read(CSR_VLENB);
> >> +       ulong vstart = csr_read(CSR_VSTART);
> >> +       ulong base = GET_RS1(insn, regs);
> >> +       ulong stride = GET_RS2(insn, regs);
> >> +       ulong vd = GET_VD(insn);
> >> +       ulong vs2 = GET_VS2(insn);
> >> +       ulong view = GET_VIEW(insn);
> >> +       ulong vsew = GET_VSEW(vtype);
> >> +       ulong vlmul = GET_VLMUL(vtype);
> >> +       bool illegal = GET_MEW(insn);
> >> +       bool masked = IS_MASKED(insn);
> >> +       uint8_t mask[VLEN_MAX / 8];
> >> +       uint8_t bytes[8 * sizeof(uint64_t)];
> >> +       ulong len = GET_LEN(view);
> >> +       ulong nf = GET_NF(insn);
> >> +       ulong vemul = GET_VEMUL(vlmul, view, vsew);
> >> +       ulong emul = GET_EMUL(vemul);
> >> +
> >> +       if (IS_UNIT_STRIDE_STORE(insn)) {
> >> +               stride = nf * len;
> >> +       } else if (IS_WHOLE_REG_STORE(insn)) {
> >> +               vl = (nf * vlenb) >> view;
> >> +               nf = 1;
> >> +               vemul = 0;
> >> +               emul = 1;
> >> +               stride = nf * len;
> >> +       } else if (IS_INDEXED_STORE(insn)) {
> >> +               len = 1 << vsew;
> >> +               vemul = (vlmul + vsew - vsew) & 7;
> >> +               emul = 1 << ((vemul & 4) ? 0 : vemul);
> >> +               stride = nf * len;
> >> +       }
> >> +
> >> +       if (illegal || vlenb > VLEN_MAX / 8) {
> >> +               struct sbi_trap_info trap = {
> >> +                       uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
> >> +                       uptrap.tval = insn,
> >> +               };
> >> +               return sbi_trap_redirect(regs, &trap);
> >> +       }
> >> +
> >> +       if (masked)
> >> +               get_vreg(vlenb, 0, 0, vlenb, mask);
> >> +
> >> +       do {
> >> +               if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
> >> +                       // compute element address
> >> +                       ulong addr = base + vstart * stride;
> >> +
> >> +                       if (IS_INDEXED_STORE(insn)) {
> >> +                               ulong offset = 0;
> >> +
> >> +                               get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
> >> +                               addr = base + offset;
> >> +                       }
> >> +
> >> +                       // obtain store data from regfile
> >> +                       for (ulong seg = 0; seg < nf; seg++)
> >> +                               get_vreg(vlenb, vd + seg * emul, vstart * len,
> >> +                                        len, &bytes[seg * len]);
> >> +
> >> +                       csr_write(CSR_VSTART, vstart);
> >> +
> >> +                       // write store data to memory
> >> +                       for (ulong seg = 0; seg < nf; seg++) {
> >> +                               for (ulong i = 0; i < len; i++) {
> >> +                                       sbi_store_u8((void *)(addr + seg * len + i),
> >> +                                                    bytes[seg * len + i], &uptrap);
> >> +                                       if (uptrap.cause) {
> >> +                                               vsetvl(vl, vtype);
> >> +                                               uptrap.tinst = sbi_misaligned_tinst_fixup(
> >> +                                                       orig_trap->tinst, uptrap.tinst, i);
> >> +                                               return sbi_trap_redirect(regs, &uptrap);
> >> +                                       }
> >> +                               }
> >> +                       }
> >> +               }
> >> +       } while (++vstart < vl);
> >> +
> >> +       // restore clobbered vl/vtype
> >> +       vsetvl(vl, vtype);
> >> +
> >> +       return vl;
> >> +}
> >> +#else
> >> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
> >> +                                struct sbi_trap_context *tcntx)
> >> +{
> >> +       return 0;
> >> +}
> >> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
> >> +                                struct sbi_trap_context *tcntx)
> >> +{
> >> +       return 0;
> >> +}
> >> +#endif /* __riscv_vector  */
> >> --
> >> 2.34.1
> >>
>
>
> --
> opensbi mailing list
> opensbi@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi
Jessica Clarke Dec. 8, 2024, 8:26 p.m. UTC | #4
On 7 Dec 2024, at 14:31, Anup Patel <apatel@ventanamicro.com> wrote:
> 
> On Sat, Dec 7, 2024 at 2:27 AM Clément Léger <cleger@rivosinc.com> wrote:
>> 
>> Hi Anup & Nylon,
>> 
>> This commits breaks openSBI boot on my setup. A quick analysis of the
>> problem shows that the changes to objects.mk now allows GCC to generate
>> vector assembly in OpenSBI. It now crashes in bitmap_zero() really early
>> (Used in fw_platform_coldboot_harts_init()) due to vector instructions
>> being used:
>> 
>>   0x0000000080007800 <+474>:   mv      a0,s2
>>   0x0000000080007802 <+476>:   sw      a5,88(s4)
>>   0x0000000080007806 <+480>:   jal     0x8001adbe <fdt_check_imsic_mlevel>
>> => 0x000000008000780a <+484>:   vsetivli        zero,2,e64,m1,ta,ma
>>   0x000000008000780e <+488>:   vmv.v.i v1,0
>>   0x0000000080007812 <+492>:   mv      a5,a0
>> 
>> 
>> And then it raises an illegal instruction probably because V isn't
>> enabled. But I'm guessing we do not expect to use vector code in OpenSBI
>> at all ? I'm using a fairly new qemu if that is of any help
>> (248f9209edfd289e7d97fb323e5075ccd55cc157).
> 
> Ahh, it seems auto-vectorization is enabled by default.
> 
> We certainly don't want to use vector code in OpenSBI
> at the moment.
> 
> How about using the compiler command-line option to
> force disable auto-vectorization ?

Just don’t put V in the ISA string? The assembly already does

    .option arch, +v

AFAICT there’s no reason for any C to be compiled with V.

Jess

> Regards,
> Anup
> 
>> 
>> Thanks,
>> 
>> Clément
>> 
>> On 06/12/2024 13:18, Anup Patel wrote:
>>> On Fri, Dec 6, 2024 at 8:40 AM Nylon Chen <nylon.chen@sifive.com> wrote:
>>>> 
>>>> Add exception handling vector instructions from
>>>> 
>>>> the vector extension to the sbi_misaligned_ldst library.
>>>> 
>>>> This implementation references the misaligned_vec_ldst
>>>> implementation in the riscv-pk project.
>>>> 
>>>> Co-developed-by: Zong Li <zong.li@sifive.com>
>>>> Signed-off-by: Zong Li <zong.li@sifive.com>
>>>> Signed-off-by: Nylon Chen <nylon.chen@sifive.com>
>>>> Reviewed-by: Andy Chiu <andy.chiu@sifive.com>
>>>> Reviewed-by: Anup Patel <anup@brainfault.org>
>>> 
>>> I updated the commit description and replaced spaces with tabs
>>> for alignment at a few places at the time of merging this patch.
>>> 
>>> Applied this patch to the riscv/opensbi repo.
>>> 
>>> Thanks,
>>> Anup
>>> 
>>>> ---
>>>> Makefile                     |  11 +-
>>>> include/sbi/riscv_encoding.h | 385 ++++++++++++++++++++++++++++++++++-
>>>> include/sbi/sbi_trap_ldst.h  |   9 +
>>>> lib/sbi/objects.mk           |   1 +
>>>> lib/sbi/sbi_trap_ldst.c      |  23 ++-
>>>> lib/sbi/sbi_trap_v_ldst.c    | 341 +++++++++++++++++++++++++++++++
>>>> 6 files changed, 758 insertions(+), 12 deletions(-)
>>>> create mode 100644 lib/sbi/sbi_trap_v_ldst.c
>>>> 
>>>> diff --git a/Makefile b/Makefile
>>>> index d9cee49..5ac95a0 100644
>>>> --- a/Makefile
>>>> +++ b/Makefile
>>>> @@ -189,6 +189,9 @@ CC_SUPPORT_STRICT_ALIGN := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib
>>>> # Check whether the assembler and the compiler support the Zicsr and Zifencei extensions
>>>> CC_SUPPORT_ZICSR_ZIFENCEI := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib -march=rv$(OPENSBI_CC_XLEN)imafd_zicsr_zifencei -x c /dev/null -o /dev/null 2>&1 | grep -e "zicsr" -e "zifencei" > /dev/null && echo n || echo y)
>>>> 
>>>> +# Check whether the assembler and the compiler support the Vector extension
>>>> +CC_SUPPORT_VECT := $(shell echo | $(CC) -dM -E -march=rv$(OPENSBI_CC_XLEN)gv - | grep -q riscv.*vector && echo y || echo n)
>>>> +
>>>> ifneq ($(OPENSBI_LD_PIE),y)
>>>> $(error Your linker does not support creating PIEs, opensbi requires this.)
>>>> endif
>>>> @@ -294,10 +297,12 @@ ifndef PLATFORM_RISCV_ABI
>>>> endif
>>>> ifndef PLATFORM_RISCV_ISA
>>>>   ifneq ($(PLATFORM_RISCV_TOOLCHAIN_DEFAULT), 1)
>>>> +    PLATFORM_RISCV_ISA := rv$(PLATFORM_RISCV_XLEN)imafdc
>>>> +    ifeq ($(CC_SUPPORT_VECT), y)
>>>> +      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)v
>>>> +    endif
>>>>     ifeq ($(CC_SUPPORT_ZICSR_ZIFENCEI), y)
>>>> -      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc_zicsr_zifencei
>>>> -    else
>>>> -      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc
>>>> +      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)_zicsr_zifencei
>>>>     endif
>>>>   else
>>>>     PLATFORM_RISCV_ISA = $(OPENSBI_CC_ISA)
>>>> diff --git a/include/sbi/riscv_encoding.h b/include/sbi/riscv_encoding.h
>>>> index 38997ef..8b724b8 100644
>>>> --- a/include/sbi/riscv_encoding.h
>>>> +++ b/include/sbi/riscv_encoding.h
>>>> @@ -763,6 +763,12 @@
>>>> #define CSR_MVIPH                      0x319
>>>> #define CSR_MIPH                       0x354
>>>> 
>>>> +/* Vector extension registers */
>>>> +#define CSR_VSTART                     0x8
>>>> +#define CSR_VL                         0xc20
>>>> +#define CSR_VTYPE                      0xc21
>>>> +#define CSR_VLENB                      0xc22
>>>> +
>>>> /* ===== Trap/Exception Causes ===== */
>>>> 
>>>> #define CAUSE_MISALIGNED_FETCH         0x0
>>>> @@ -891,11 +897,364 @@
>>>> #define INSN_MASK_FENCE_TSO            0xffffffff
>>>> #define INSN_MATCH_FENCE_TSO           0x8330000f
>>>> 
>>>> +#define INSN_MASK_VECTOR_UNIT_STRIDE           0xfdf0707f
>>>> +#define INSN_MASK_VECTOR_FAULT_ONLY_FIRST      0xfdf0707f
>>>> +#define INSN_MASK_VECTOR_STRIDE                0xfc00707f
>>>> +#define INSN_MASK_VECTOR_WHOLE_REG             0xfff0707f
>>>> +#define INSN_MASK_VECTOR_INDEXED               0xfc00707f
>>>> +
>>>> +#define INSN_MATCH_VLUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000007 | \
>>>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>> +#define INSN_MATCH_VSUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000027 | \
>>>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>> +#define INSN_MATCH_VLOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000007 | \
>>>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>> +#define INSN_MATCH_VSOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000027 | \
>>>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>> +#define INSN_MATCH_VLSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000007 | \
>>>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>> +#define INSN_MATCH_VSSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000027 | \
>>>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>> +#define INSN_MATCH_VSSEG(n, bits)   ((((n) - 1) << 29) | 0x00004027 | \
>>>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>> +#define INSN_MATCH_VLSEG(n, bits)   ((((n) - 1) << 29) | 0x00004007 | \
>>>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>> +#define INSN_MATCH_VLSEGFF(n, bits) ((((n) - 1) << 29) | 0x1000007 | \
>>>> +               ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>> +
>>>> +#define INSN_MATCH_VLE16V              0x00005007
>>>> +#define INSN_MATCH_VLE32V              0x00006007
>>>> +#define INSN_MATCH_VLE64V              0x00007007
>>>> +#define INSN_MATCH_VSE16V              0x00005027
>>>> +#define INSN_MATCH_VSE32V              0x00006027
>>>> +#define INSN_MATCH_VSE64V              0x00007027
>>>> +#define INSN_MATCH_VLSE16V             0x08005007
>>>> +#define INSN_MATCH_VLSE32V             0x08006007
>>>> +#define INSN_MATCH_VLSE64V             0x08007007
>>>> +#define INSN_MATCH_VSSE16V             0x08005027
>>>> +#define INSN_MATCH_VSSE32V             0x08006027
>>>> +#define INSN_MATCH_VSSE64V             0x08007027
>>>> +#define INSN_MATCH_VLOXEI16V           0x0c005007
>>>> +#define INSN_MATCH_VLOXEI32V           0x0c006007
>>>> +#define INSN_MATCH_VLOXEI64V           0x0c007007
>>>> +#define INSN_MATCH_VSOXEI16V           0x0c005027
>>>> +#define INSN_MATCH_VSOXEI32V           0x0c006027
>>>> +#define INSN_MATCH_VSOXEI64V           0x0c007027
>>>> +#define INSN_MATCH_VLUXEI16V           0x04005007
>>>> +#define INSN_MATCH_VLUXEI32V           0x04006007
>>>> +#define INSN_MATCH_VLUXEI64V           0x04007007
>>>> +#define INSN_MATCH_VSUXEI16V           0x04005027
>>>> +#define INSN_MATCH_VSUXEI32V           0x04006027
>>>> +#define INSN_MATCH_VSUXEI64V           0x04007027
>>>> +#define INSN_MATCH_VLE16FFV            0x01005007
>>>> +#define INSN_MATCH_VLE32FFV            0x01006007
>>>> +#define INSN_MATCH_VLE64FFV            0x01007007
>>>> +#define INSN_MATCH_VL1RE8V             0x02800007
>>>> +#define INSN_MATCH_VL1RE16V            0x02805007
>>>> +#define INSN_MATCH_VL1RE32V            0x02806007
>>>> +#define INSN_MATCH_VL1RE64V            0x02807007
>>>> +#define INSN_MATCH_VL2RE8V             0x22800007
>>>> +#define INSN_MATCH_VL2RE16V            0x22805007
>>>> +#define INSN_MATCH_VL2RE32V            0x22806007
>>>> +#define INSN_MATCH_VL2RE64V            0x22807007
>>>> +#define INSN_MATCH_VL4RE8V             0x62800007
>>>> +#define INSN_MATCH_VL4RE16V            0x62805007
>>>> +#define INSN_MATCH_VL4RE32V            0x62806007
>>>> +#define INSN_MATCH_VL4RE64V            0x62807007
>>>> +#define INSN_MATCH_VL8RE8V             0xe2800007
>>>> +#define INSN_MATCH_VL8RE16V            0xe2805007
>>>> +#define INSN_MATCH_VL8RE32V            0xe2806007
>>>> +#define INSN_MATCH_VL8RE64V            0xe2807007
>>>> +#define INSN_MATCH_VS1RV               0x02800027
>>>> +#define INSN_MATCH_VS2RV               0x22800027
>>>> +#define INSN_MATCH_VS4RV               0x62800027
>>>> +#define INSN_MATCH_VS8RV               0xe2800027
>>>> +
>>>> +#define INSN_MASK_VECTOR_LOAD_STORE    0x7f
>>>> +#define INSN_MATCH_VECTOR_LOAD         0x07
>>>> +#define INSN_MATCH_VECTOR_STORE                0x27
>>>> +
>>>> +#define IS_VECTOR_LOAD_STORE(insn) \
>>>> +       ((((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_LOAD) || \
>>>> +       (((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_STORE))
>>>> +
>>>> +#define IS_VECTOR_INSN_MATCH(insn, match, mask) \
>>>> +       (((insn) & (mask)) == ((match) & (mask)))
>>>> +
>>>> +#define IS_UNIT_STRIDE_MATCH(insn, match) \
>>>> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_UNIT_STRIDE)
>>>> +
>>>> +#define IS_STRIDE_MATCH(insn, match) \
>>>> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_STRIDE)
>>>> +
>>>> +#define IS_INDEXED_MATCH(insn, match) \
>>>> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_INDEXED)
>>>> +
>>>> +#define IS_FAULT_ONLY_FIRST_MATCH(insn, match) \
>>>> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_FAULT_ONLY_FIRST)
>>>> +
>>>> +#define IS_WHOLE_REG_MATCH(insn, match) \
>>>> +       IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_WHOLE_REG)
>>>> +
>>>> +#define IS_UNIT_STRIDE_LOAD(insn) ( \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE16V) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE32V) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE64V) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 64)))
>>>> +
>>>> +#define IS_UNIT_STRIDE_STORE(insn) ( \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE16V) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE32V) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE64V) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 16)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 32)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 64)) || \
>>>> +       IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 64)))
>>>> +
>>>> +#define IS_STRIDE_LOAD(insn) ( \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE16V) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE32V) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE64V) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 64)))
>>>> +
>>>> +#define IS_STRIDE_STORE(insn) ( \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE16V) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE32V) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE64V) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 16)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 32)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 64)) || \
>>>> +       IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 64)))
>>>> +
>>>> +#define IS_INDEXED_LOAD(insn) ( \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI16V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI32V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI64V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI16V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI32V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI64V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 64)))
>>>> +
>>>> +#define IS_INDEXED_STORE(insn) ( \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI16V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI32V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI64V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI16V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI32V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI64V) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 16)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 32)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 64)) || \
>>>> +       IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 64)))
>>>> +
>>>> +#define IS_FAULT_ONLY_FIRST_LOAD(insn) ( \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE16FFV) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE32FFV) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE64FFV) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 16)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 16)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 16)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 16)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 16)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 16)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 16)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 32)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 32)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 32)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 32)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 32)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 32)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 32)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 64)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 64)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 64)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 64)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 64)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 64)) || \
>>>> +       IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 64)))
>>>> +
>>>> +       #define IS_WHOLE_REG_LOAD(insn) ( \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE8V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE16V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE32V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE64V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE8V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE16V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE32V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE64V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE8V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE16V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE32V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE64V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE8V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE16V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE32V) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE64V))
>>>> +
>>>> +#define IS_WHOLE_REG_STORE(insn) ( \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS1RV) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS2RV) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS4RV) || \
>>>> +       IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS8RV))
>>>> +
>>>> +
>>>> #if __riscv_xlen == 64
>>>> 
>>>> /* 64-bit read for VS-stage address translation (RV64) */
>>>> #define INSN_PSEUDO_VS_LOAD            0x00003000
>>>> -
>>>> /* 64-bit write for VS-stage address translation (RV64) */
>>>> #define INSN_PSEUDO_VS_STORE   0x00003020
>>>> 
>>>> @@ -911,6 +1270,12 @@
>>>> #error "Unexpected __riscv_xlen"
>>>> #endif
>>>> 
>>>> +#define VM_MASK                                0x1
>>>> +#define VIEW_MASK                              0x3
>>>> +#define VSEW_MASK                              0x3
>>>> +#define VLMUL_MASK                             0x7
>>>> +#define VD_MASK                                0x1f
>>>> +#define VS2_MASK                               0x1f
>>>> #define INSN_16BIT_MASK                        0x3
>>>> #define INSN_32BIT_MASK                        0x1c
>>>> 
>>>> @@ -929,6 +1294,12 @@
>>>> #endif
>>>> #define REGBYTES                       (1 << LOG_REGBYTES)
>>>> 
>>>> +#define SH_VSEW                        3
>>>> +#define SH_VIEW                        12
>>>> +#define SH_VD                          7
>>>> +#define SH_VS2                         20
>>>> +#define SH_VM                          25
>>>> +#define SH_MEW                         28
>>>> #define SH_RD                          7
>>>> #define SH_RS1                         15
>>>> #define SH_RS2                         20
>>>> @@ -982,6 +1353,18 @@
>>>> #define IMM_S(insn)                    (((s32)(insn) >> 25 << 5) | \
>>>>                                         (s32)(((insn) >> 7) & 0x1f))
>>>> 
>>>> +#define IS_MASKED(insn)                (((insn >> SH_VM) & VM_MASK) == 0)
>>>> +#define GET_VD(insn)                   ((insn >> SH_VD) & VD_MASK)
>>>> +#define GET_VS2(insn)                  ((insn >> SH_VS2) & VS2_MASK)
>>>> +#define GET_VIEW(insn)                 (((insn) >> SH_VIEW) & VIEW_MASK)
>>>> +#define GET_MEW(insn)                  (((insn) >> SH_MEW) & 1)
>>>> +#define GET_VSEW(vtype)                (((vtype) >> SH_VSEW) & VSEW_MASK)
>>>> +#define GET_VLMUL(vtype)               ((vtype) & VLMUL_MASK)
>>>> +#define GET_LEN(view)                  (1UL << (view))
>>>> +#define GET_NF(insn)                   (1 + ((insn >> 29) & 7))
>>>> +#define GET_VEMUL(vlmul, view, vsew)   ((vlmul + view - vsew) & 7)
>>>> +#define GET_EMUL(vemul)                (1UL << ((vemul) >= 4 ? 0 : (vemul)))
>>>> +
>>>> #define MASK_FUNCT3                    0x7000
>>>> #define MASK_RS1                       0xf8000
>>>> #define MASK_CSR                       0xfff00000
>>>> diff --git a/include/sbi/sbi_trap_ldst.h b/include/sbi/sbi_trap_ldst.h
>>>> index 4c5cc37..34877cc 100644
>>>> --- a/include/sbi/sbi_trap_ldst.h
>>>> +++ b/include/sbi/sbi_trap_ldst.h
>>>> @@ -30,4 +30,13 @@ int sbi_store_access_handler(struct sbi_trap_context *tcntx);
>>>> 
>>>> int sbi_double_trap_handler(struct sbi_trap_context *tcntx);
>>>> 
>>>> +ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
>>>> +                                ulong addr_offset);
>>>> +
>>>> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
>>>> +                                struct sbi_trap_context *tcntx);
>>>> +
>>>> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
>>>> +                                struct sbi_trap_context *tcntx);
>>>> +
>>>> #endif
>>>> diff --git a/lib/sbi/objects.mk b/lib/sbi/objects.mk
>>>> index a6f7c5f..47a0866 100644
>>>> --- a/lib/sbi/objects.mk
>>>> +++ b/lib/sbi/objects.mk
>>>> @@ -91,6 +91,7 @@ libsbi-objs-y += sbi_timer.o
>>>> libsbi-objs-y += sbi_tlb.o
>>>> libsbi-objs-y += sbi_trap.o
>>>> libsbi-objs-y += sbi_trap_ldst.o
>>>> +libsbi-objs-y += sbi_trap_v_ldst.o
>>>> libsbi-objs-y += sbi_unpriv.o
>>>> libsbi-objs-y += sbi_expected_trap.o
>>>> libsbi-objs-y += sbi_cppc.o
>>>> diff --git a/lib/sbi/sbi_trap_ldst.c b/lib/sbi/sbi_trap_ldst.c
>>>> index ebc4a92..448406b 100644
>>>> --- a/lib/sbi/sbi_trap_ldst.c
>>>> +++ b/lib/sbi/sbi_trap_ldst.c
>>>> @@ -32,7 +32,7 @@ typedef int (*sbi_trap_ld_emulator)(int rlen, union sbi_ldst_data *out_val,
>>>> typedef int (*sbi_trap_st_emulator)(int wlen, union sbi_ldst_data in_val,
>>>>                                    struct sbi_trap_context *tcntx);
>>>> 
>>>> -static ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
>>>> +ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
>>>>                                        ulong addr_offset)
>>>> {
>>>>        if (new_tinst == INSN_PSEUDO_VS_LOAD ||
>>>> @@ -52,7 +52,7 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>>>>        ulong insn, insn_len;
>>>>        union sbi_ldst_data val = { 0 };
>>>>        struct sbi_trap_info uptrap;
>>>> -       int rc, fp = 0, shift = 0, len = 0;
>>>> +       int rc, fp = 0, shift = 0, len = 0, vector = 0;
>>>> 
>>>>        if (orig_trap->tinst & 0x1) {
>>>>                /*
>>>> @@ -144,6 +144,9 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>>>>                len = 2;
>>>>                shift = 8 * (sizeof(ulong) - len);
>>>>                insn = RVC_RS2S(insn) << SH_RD;
>>>> +       } else if (IS_VECTOR_LOAD_STORE(insn)) {
>>>> +               vector = 1;
>>>> +               emu = sbi_misaligned_v_ld_emulator;
>>>>        } else {
>>>>                return sbi_trap_redirect(regs, orig_trap);
>>>>        }
>>>> @@ -152,14 +155,16 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>>>>        if (rc <= 0)
>>>>                return rc;
>>>> 
>>>> -       if (!fp)
>>>> -               SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
>>>> +       if (!vector) {
>>>> +               if (!fp)
>>>> +                       SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
>>>> #ifdef __riscv_flen
>>>> -       else if (len == 8)
>>>> -               SET_F64_RD(insn, regs, val.data_u64);
>>>> -       else
>>>> -               SET_F32_RD(insn, regs, val.data_ulong);
>>>> +               else if (len == 8)
>>>> +                       SET_F64_RD(insn, regs, val.data_u64);
>>>> +               else
>>>> +                       SET_F32_RD(insn, regs, val.data_ulong);
>>>> #endif
>>>> +       }
>>>> 
>>>>        regs->mepc += insn_len;
>>>> 
>>>> @@ -248,6 +253,8 @@ static int sbi_trap_emulate_store(struct sbi_trap_context *tcntx,
>>>>        } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) {
>>>>                len             = 2;
>>>>                val.data_ulong = GET_RS2S(insn, regs);
>>>> +       } else if (IS_VECTOR_LOAD_STORE(insn)) {
>>>> +               emu = sbi_misaligned_v_st_emulator;
>>>>        } else {
>>>>                return sbi_trap_redirect(regs, orig_trap);
>>>>        }
>>>> diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
>>>> new file mode 100644
>>>> index 0000000..72b2309
>>>> --- /dev/null
>>>> +++ b/lib/sbi/sbi_trap_v_ldst.c
>>>> @@ -0,0 +1,341 @@
>>>> +/*
>>>> + * SPDX-License-Identifier: BSD-2-Clause
>>>> + *
>>>> + * Copyright (c) 2024 SiFive Inc.
>>>> + *
>>>> + * Authors:
>>>> + *   Andrew Waterman <andrew@sifive.com>
>>>> + *   Nylon Chen <nylon.chen@sifive.com>
>>>> + *   Zong Li <nylon.chen@sifive.com>
>>>> + */
>>>> +
>>>> +#include <sbi/riscv_asm.h>
>>>> +#include <sbi/riscv_encoding.h>
>>>> +#include <sbi/sbi_error.h>
>>>> +#include <sbi/sbi_trap_ldst.h>
>>>> +#include <sbi/sbi_trap.h>
>>>> +#include <sbi/sbi_unpriv.h>
>>>> +#include <sbi/sbi_trap.h>
>>>> +
>>>> +#ifdef __riscv_vector
>>>> +#define VLEN_MAX 65536
>>>> +
>>>> +static inline void set_vreg(ulong vlenb, ulong which, ulong pos, ulong size, const uint8_t *bytes)
>>>> +{
>>>> +       pos += (which % 8) * vlenb;
>>>> +       bytes -= pos;
>>>> +
>>>> +       asm volatile (
>>>> +               "       .option push\n\t"
>>>> +               "       .option arch, +v\n\t"
>>>> +               "       vsetvli x0, %0, e8, m8, tu, ma\n\t"
>>>> +               "       .option pop\n\t"
>>>> +               :: "r" (pos + size));
>>>> +
>>>> +       csr_write(CSR_VSTART, pos);
>>>> +
>>>> +       switch (which / 8) {
>>>> +       case 0:
>>>> +               asm volatile (
>>>> +                       "       .option push\n\t"
>>>> +                       "       .option arch, +v\n\t"
>>>> +                       "       vle8.v v0,  (%0)\n\t"
>>>> +                       "       .option pop\n\t"
>>>> +                       :: "r" (bytes) : "memory");
>>>> +               break;
>>>> +       case 1:
>>>> +               asm volatile (
>>>> +                       "       .option push\n\t"
>>>> +                       "       .option arch, +v\n\t"
>>>> +                       "       vle8.v v8,  (%0)\n\t"
>>>> +                       "       .option pop\n\t"
>>>> +                       :: "r" (bytes) : "memory");
>>>> +               break;
>>>> +       case 2:
>>>> +               asm volatile (
>>>> +                       "       .option push\n\t"
>>>> +                       "       .option arch, +v\n\t"
>>>> +                       "       vle8.v v16,  (%0)\n\t"
>>>> +                       "       .option pop\n\t"
>>>> +                       :: "r" (bytes) : "memory");
>>>> +               break;
>>>> +       case 3:
>>>> +               asm volatile (
>>>> +                       "       .option push\n\t"
>>>> +                       "       .option arch, +v\n\t"
>>>> +                       "       vle8.v v24,  (%0)\n\t"
>>>> +                       "       .option pop\n\t"
>>>> +                       :: "r" (bytes) : "memory");
>>>> +               break;
>>>> +       default:
>>>> +               break;
>>>> +       }
>>>> +}
>>>> +
>>>> +static inline void get_vreg(ulong vlenb, ulong which, ulong pos, ulong size, uint8_t *bytes)
>>>> +{
>>>> +       pos += (which % 8) * vlenb;
>>>> +       bytes -= pos;
>>>> +
>>>> +       asm volatile (
>>>> +               "       .option push\n\t"
>>>> +               "       .option arch, +v\n\t"
>>>> +               "       vsetvli x0, %0, e8, m8, tu, ma\n\t"
>>>> +               "       .option pop\n\t"
>>>> +               :: "r" (pos + size));
>>>> +
>>>> +       csr_write(CSR_VSTART, pos);
>>>> +
>>>> +       switch (which / 8) {
>>>> +       case 0:
>>>> +               asm volatile (
>>>> +                       "       .option push\n\t"
>>>> +                       "       .option arch, +v\n\t"
>>>> +                       "       vse8.v v0,  (%0)\n\t"
>>>> +                       "       .option pop\n\t"
>>>> +                       :: "r" (bytes) : "memory");
>>>> +               break;
>>>> +       case 1:
>>>> +               asm volatile (
>>>> +                       "       .option push\n\t"
>>>> +                       "       .option arch, +v\n\t"
>>>> +                       "       vse8.v v8,  (%0)\n\t"
>>>> +                       "       .option pop\n\t"
>>>> +                       :: "r" (bytes) : "memory");
>>>> +               break;
>>>> +       case 2:
>>>> +               asm volatile (
>>>> +                       "       .option push\n\t"
>>>> +                       "       .option arch, +v\n\t"
>>>> +                       "       vse8.v v16, (%0)\n\t"
>>>> +                       "       .option pop\n\t"
>>>> +                       :: "r" (bytes) : "memory");
>>>> +               break;
>>>> +       case 3:
>>>> +               asm volatile (
>>>> +                               ".option push\n\t"
>>>> +                               ".option arch, +v\n\t"
>>>> +                               "vse8.v v24, (%0)\n\t"
>>>> +                               ".option pop\n\t"
>>>> +                               :: "r" (bytes) : "memory");
>>>> +               break;
>>>> +       default:
>>>> +               break;
>>>> +       }
>>>> +}
>>>> +
>>>> +static inline void vsetvl(ulong vl, ulong vtype)
>>>> +{
>>>> +       asm volatile (
>>>> +               "       .option push\n\t"
>>>> +               "       .option arch, +v\n\t"
>>>> +               "       vsetvl x0, %0, %1\n\t"
>>>> +               "       .option pop\n\t"
>>>> +                       :: "r" (vl), "r" (vtype));
>>>> +}
>>>> +
>>>> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
>>>> +                                struct sbi_trap_context *tcntx)
>>>> +{
>>>> +       const struct sbi_trap_info *orig_trap = &tcntx->trap;
>>>> +       struct sbi_trap_regs *regs = &tcntx->regs;
>>>> +       struct sbi_trap_info uptrap;
>>>> +       ulong insn = sbi_get_insn(regs->mepc, &uptrap);
>>>> +       ulong vl = csr_read(CSR_VL);
>>>> +       ulong vtype = csr_read(CSR_VTYPE);
>>>> +       ulong vlenb = csr_read(CSR_VLENB);
>>>> +       ulong vstart = csr_read(CSR_VSTART);
>>>> +       ulong base = GET_RS1(insn, regs);
>>>> +       ulong stride = GET_RS2(insn, regs);
>>>> +       ulong vd = GET_VD(insn);
>>>> +       ulong vs2 = GET_VS2(insn);
>>>> +       ulong view = GET_VIEW(insn);
>>>> +       ulong vsew = GET_VSEW(vtype);
>>>> +       ulong vlmul = GET_VLMUL(vtype);
>>>> +       bool illegal = GET_MEW(insn);
>>>> +       bool masked = IS_MASKED(insn);
>>>> +       uint8_t mask[VLEN_MAX / 8];
>>>> +       uint8_t bytes[8 * sizeof(uint64_t)];
>>>> +       ulong len = GET_LEN(view);
>>>> +       ulong nf = GET_NF(insn);
>>>> +       ulong vemul = GET_VEMUL(vlmul, view, vsew);
>>>> +       ulong emul = GET_EMUL(vemul);
>>>> +
>>>> +       if (IS_UNIT_STRIDE_LOAD(insn) || IS_FAULT_ONLY_FIRST_LOAD(insn)) {
>>>> +               stride = nf * len;
>>>> +       } else if (IS_WHOLE_REG_LOAD(insn)) {
>>>> +               vl = (nf * vlenb) >> view;
>>>> +               nf = 1;
>>>> +               vemul = 0;
>>>> +               emul = 1;
>>>> +               stride = nf * len;
>>>> +       } else if (IS_INDEXED_LOAD(insn)) {
>>>> +               len = 1 << vsew;
>>>> +               vemul = (vlmul + vsew - vsew) & 7;
>>>> +               emul = 1 << ((vemul & 4) ? 0 : vemul);
>>>> +               stride = nf * len;
>>>> +       }
>>>> +
>>>> +       if (illegal || vlenb > VLEN_MAX / 8) {
>>>> +               struct sbi_trap_info trap = {
>>>> +                       uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
>>>> +                       uptrap.tval = insn,
>>>> +               };
>>>> +               return sbi_trap_redirect(regs, &trap);
>>>> +       }
>>>> +
>>>> +       if (masked)
>>>> +               get_vreg(vlenb, 0, 0, vlenb, mask);
>>>> +
>>>> +       do {
>>>> +               if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
>>>> +                       // compute element address
>>>> +                       ulong addr = base + vstart * stride;
>>>> +
>>>> +                       if (IS_INDEXED_LOAD(insn)) {
>>>> +                               ulong offset = 0;
>>>> +
>>>> +                               get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
>>>> +                               addr = base + offset;
>>>> +                       }
>>>> +
>>>> +                       csr_write(CSR_VSTART, vstart);
>>>> +
>>>> +                       // obtain load data from memory
>>>> +                       for (ulong seg = 0; seg < nf; seg++) {
>>>> +                               for (ulong i = 0; i < len; i++) {
>>>> +                                       bytes[seg * len + i] =
>>>> +                                               sbi_load_u8((void *)(addr + seg * len + i),
>>>> +                                                           &uptrap);
>>>> +
>>>> +                                       if (uptrap.cause) {
>>>> +                                               if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
>>>> +                                                       vl = vstart;
>>>> +                                                       break;
>>>> +                                               }
>>>> +                                               vsetvl(vl, vtype);
>>>> +                                               uptrap.tinst = sbi_misaligned_tinst_fixup(
>>>> +                                                       orig_trap->tinst, uptrap.tinst, i);
>>>> +                                               return sbi_trap_redirect(regs, &uptrap);
>>>> +                                       }
>>>> +                               }
>>>> +                       }
>>>> +
>>>> +                       // write load data to regfile
>>>> +                       for (ulong seg = 0; seg < nf; seg++)
>>>> +                               set_vreg(vlenb, vd + seg * emul, vstart * len,
>>>> +                                        len, &bytes[seg * len]);
>>>> +               }
>>>> +       } while (++vstart < vl);
>>>> +
>>>> +       // restore clobbered vl/vtype
>>>> +       vsetvl(vl, vtype);
>>>> +
>>>> +       return vl;
>>>> +}
>>>> +
>>>> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
>>>> +                                struct sbi_trap_context *tcntx)
>>>> +{
>>>> +       const struct sbi_trap_info *orig_trap = &tcntx->trap;
>>>> +       struct sbi_trap_regs *regs = &tcntx->regs;
>>>> +       struct sbi_trap_info uptrap;
>>>> +       ulong insn = sbi_get_insn(regs->mepc, &uptrap);
>>>> +       ulong vl = csr_read(CSR_VL);
>>>> +       ulong vtype = csr_read(CSR_VTYPE);
>>>> +       ulong vlenb = csr_read(CSR_VLENB);
>>>> +       ulong vstart = csr_read(CSR_VSTART);
>>>> +       ulong base = GET_RS1(insn, regs);
>>>> +       ulong stride = GET_RS2(insn, regs);
>>>> +       ulong vd = GET_VD(insn);
>>>> +       ulong vs2 = GET_VS2(insn);
>>>> +       ulong view = GET_VIEW(insn);
>>>> +       ulong vsew = GET_VSEW(vtype);
>>>> +       ulong vlmul = GET_VLMUL(vtype);
>>>> +       bool illegal = GET_MEW(insn);
>>>> +       bool masked = IS_MASKED(insn);
>>>> +       uint8_t mask[VLEN_MAX / 8];
>>>> +       uint8_t bytes[8 * sizeof(uint64_t)];
>>>> +       ulong len = GET_LEN(view);
>>>> +       ulong nf = GET_NF(insn);
>>>> +       ulong vemul = GET_VEMUL(vlmul, view, vsew);
>>>> +       ulong emul = GET_EMUL(vemul);
>>>> +
>>>> +       if (IS_UNIT_STRIDE_STORE(insn)) {
>>>> +               stride = nf * len;
>>>> +       } else if (IS_WHOLE_REG_STORE(insn)) {
>>>> +               vl = (nf * vlenb) >> view;
>>>> +               nf = 1;
>>>> +               vemul = 0;
>>>> +               emul = 1;
>>>> +               stride = nf * len;
>>>> +       } else if (IS_INDEXED_STORE(insn)) {
>>>> +               len = 1 << vsew;
>>>> +               vemul = (vlmul + vsew - vsew) & 7;
>>>> +               emul = 1 << ((vemul & 4) ? 0 : vemul);
>>>> +               stride = nf * len;
>>>> +       }
>>>> +
>>>> +       if (illegal || vlenb > VLEN_MAX / 8) {
>>>> +               struct sbi_trap_info trap = {
>>>> +                       uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
>>>> +                       uptrap.tval = insn,
>>>> +               };
>>>> +               return sbi_trap_redirect(regs, &trap);
>>>> +       }
>>>> +
>>>> +       if (masked)
>>>> +               get_vreg(vlenb, 0, 0, vlenb, mask);
>>>> +
>>>> +       do {
>>>> +               if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
>>>> +                       // compute element address
>>>> +                       ulong addr = base + vstart * stride;
>>>> +
>>>> +                       if (IS_INDEXED_STORE(insn)) {
>>>> +                               ulong offset = 0;
>>>> +
>>>> +                               get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
>>>> +                               addr = base + offset;
>>>> +                       }
>>>> +
>>>> +                       // obtain store data from regfile
>>>> +                       for (ulong seg = 0; seg < nf; seg++)
>>>> +                               get_vreg(vlenb, vd + seg * emul, vstart * len,
>>>> +                                        len, &bytes[seg * len]);
>>>> +
>>>> +                       csr_write(CSR_VSTART, vstart);
>>>> +
>>>> +                       // write store data to memory
>>>> +                       for (ulong seg = 0; seg < nf; seg++) {
>>>> +                               for (ulong i = 0; i < len; i++) {
>>>> +                                       sbi_store_u8((void *)(addr + seg * len + i),
>>>> +                                                    bytes[seg * len + i], &uptrap);
>>>> +                                       if (uptrap.cause) {
>>>> +                                               vsetvl(vl, vtype);
>>>> +                                               uptrap.tinst = sbi_misaligned_tinst_fixup(
>>>> +                                                       orig_trap->tinst, uptrap.tinst, i);
>>>> +                                               return sbi_trap_redirect(regs, &uptrap);
>>>> +                                       }
>>>> +                               }
>>>> +                       }
>>>> +               }
>>>> +       } while (++vstart < vl);
>>>> +
>>>> +       // restore clobbered vl/vtype
>>>> +       vsetvl(vl, vtype);
>>>> +
>>>> +       return vl;
>>>> +}
>>>> +#else
>>>> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
>>>> +                                struct sbi_trap_context *tcntx)
>>>> +{
>>>> +       return 0;
>>>> +}
>>>> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
>>>> +                                struct sbi_trap_context *tcntx)
>>>> +{
>>>> +       return 0;
>>>> +}
>>>> +#endif /* __riscv_vector  */
>>>> --
>>>> 2.34.1
>>>> 
>> 
>> 
>> --
>> opensbi mailing list
>> opensbi@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/opensbi
> 
> -- 
> opensbi mailing list
> opensbi@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi
Jessica Clarke Dec. 8, 2024, 8:28 p.m. UTC | #5
On 6 Dec 2024, at 12:18, Anup Patel <anup@brainfault.org> wrote:
> 
> On Fri, Dec 6, 2024 at 8:40 AM Nylon Chen <nylon.chen@sifive.com> wrote:
>> 
>> Add exception handling vector instructions from
>> 
>> the vector extension to the sbi_misaligned_ldst library.
>> 
>> This implementation references the misaligned_vec_ldst
>> implementation in the riscv-pk project.
>> 
>> Co-developed-by: Zong Li <zong.li@sifive.com>
>> Signed-off-by: Zong Li <zong.li@sifive.com>
>> Signed-off-by: Nylon Chen <nylon.chen@sifive.com>
>> Reviewed-by: Andy Chiu <andy.chiu@sifive.com>
>> Reviewed-by: Anup Patel <anup@brainfault.org>
> 
> I updated the commit description and replaced spaces with tabs
> for alignment at a few places at the time of merging this patch.
> 
> Applied this patch to the riscv/opensbi repo.
> 
> Thanks,
> Anup
> 
>> ---
>> Makefile                     |  11 +-
>> include/sbi/riscv_encoding.h | 385 ++++++++++++++++++++++++++++++++++-
>> include/sbi/sbi_trap_ldst.h  |   9 +
>> lib/sbi/objects.mk           |   1 +
>> lib/sbi/sbi_trap_ldst.c      |  23 ++-
>> lib/sbi/sbi_trap_v_ldst.c    | 341 +++++++++++++++++++++++++++++++
>> 6 files changed, 758 insertions(+), 12 deletions(-)
>> create mode 100644 lib/sbi/sbi_trap_v_ldst.c
>> 
>> diff --git a/Makefile b/Makefile
>> index d9cee49..5ac95a0 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -189,6 +189,9 @@ CC_SUPPORT_STRICT_ALIGN := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib
>> # Check whether the assembler and the compiler support the Zicsr and Zifencei extensions
>> CC_SUPPORT_ZICSR_ZIFENCEI := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib -march=rv$(OPENSBI_CC_XLEN)imafd_zicsr_zifencei -x c /dev/null -o /dev/null 2>&1 | grep -e "zicsr" -e "zifencei" > /dev/null && echo n || echo y)
>> 
>> +# Check whether the assembler and the compiler support the Vector extension
>> +CC_SUPPORT_VECT := $(shell echo | $(CC) -dM -E -march=rv$(OPENSBI_CC_XLEN)gv - | grep -q riscv.*vector && echo y || echo n)

No 2>/dev/null on that CC invocation?

Jess
Anup Patel Dec. 9, 2024, 3:56 a.m. UTC | #6
On Mon, Dec 9, 2024 at 1:57 AM Jessica Clarke <jrtc27@jrtc27.com> wrote:
>
> On 7 Dec 2024, at 14:31, Anup Patel <apatel@ventanamicro.com> wrote:
> >
> > On Sat, Dec 7, 2024 at 2:27 AM Clément Léger <cleger@rivosinc.com> wrote:
> >>
> >> Hi Anup & Nylon,
> >>
> >> This commits breaks openSBI boot on my setup. A quick analysis of the
> >> problem shows that the changes to objects.mk now allows GCC to generate
> >> vector assembly in OpenSBI. It now crashes in bitmap_zero() really early
> >> (Used in fw_platform_coldboot_harts_init()) due to vector instructions
> >> being used:
> >>
> >>   0x0000000080007800 <+474>:   mv      a0,s2
> >>   0x0000000080007802 <+476>:   sw      a5,88(s4)
> >>   0x0000000080007806 <+480>:   jal     0x8001adbe <fdt_check_imsic_mlevel>
> >> => 0x000000008000780a <+484>:   vsetivli        zero,2,e64,m1,ta,ma
> >>   0x000000008000780e <+488>:   vmv.v.i v1,0
> >>   0x0000000080007812 <+492>:   mv      a5,a0
> >>
> >>
> >> And then it raises an illegal instruction probably because V isn't
> >> enabled. But I'm guessing we do not expect to use vector code in OpenSBI
> >> at all ? I'm using a fairly new qemu if that is of any help
> >> (248f9209edfd289e7d97fb323e5075ccd55cc157).
> >
> > Ahh, it seems auto-vectorization is enabled by default.
> >
> > We certainly don't want to use vector code in OpenSBI
> > at the moment.
> >
> > How about using the compiler command-line option to
> > force disable auto-vectorization ?
>
> Just don’t put V in the ISA string? The assembly already does
>
>     .option arch, +v
>
> AFAICT there’s no reason for any C to be compiled with V.

This is certainly another way but my worry is compilation would
fail for older toolchains without vector support hence my suggestion
to disable auto-vectorization.

Regards,
Anup
Anup Patel Dec. 9, 2024, 3:57 a.m. UTC | #7
On Mon, Dec 9, 2024 at 1:58 AM Jessica Clarke <jrtc27@jrtc27.com> wrote:
>
> On 6 Dec 2024, at 12:18, Anup Patel <anup@brainfault.org> wrote:
> >
> > On Fri, Dec 6, 2024 at 8:40 AM Nylon Chen <nylon.chen@sifive.com> wrote:
> >>
> >> Add exception handling vector instructions from
> >>
> >> the vector extension to the sbi_misaligned_ldst library.
> >>
> >> This implementation references the misaligned_vec_ldst
> >> implementation in the riscv-pk project.
> >>
> >> Co-developed-by: Zong Li <zong.li@sifive.com>
> >> Signed-off-by: Zong Li <zong.li@sifive.com>
> >> Signed-off-by: Nylon Chen <nylon.chen@sifive.com>
> >> Reviewed-by: Andy Chiu <andy.chiu@sifive.com>
> >> Reviewed-by: Anup Patel <anup@brainfault.org>
> >
> > I updated the commit description and replaced spaces with tabs
> > for alignment at a few places at the time of merging this patch.
> >
> > Applied this patch to the riscv/opensbi repo.
> >
> > Thanks,
> > Anup
> >
> >> ---
> >> Makefile                     |  11 +-
> >> include/sbi/riscv_encoding.h | 385 ++++++++++++++++++++++++++++++++++-
> >> include/sbi/sbi_trap_ldst.h  |   9 +
> >> lib/sbi/objects.mk           |   1 +
> >> lib/sbi/sbi_trap_ldst.c      |  23 ++-
> >> lib/sbi/sbi_trap_v_ldst.c    | 341 +++++++++++++++++++++++++++++++
> >> 6 files changed, 758 insertions(+), 12 deletions(-)
> >> create mode 100644 lib/sbi/sbi_trap_v_ldst.c
> >>
> >> diff --git a/Makefile b/Makefile
> >> index d9cee49..5ac95a0 100644
> >> --- a/Makefile
> >> +++ b/Makefile
> >> @@ -189,6 +189,9 @@ CC_SUPPORT_STRICT_ALIGN := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib
> >> # Check whether the assembler and the compiler support the Zicsr and Zifencei extensions
> >> CC_SUPPORT_ZICSR_ZIFENCEI := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib -march=rv$(OPENSBI_CC_XLEN)imafd_zicsr_zifencei -x c /dev/null -o /dev/null 2>&1 | grep -e "zicsr" -e "zifencei" > /dev/null && echo n || echo y)
> >>
> >> +# Check whether the assembler and the compiler support the Vector extension
> >> +CC_SUPPORT_VECT := $(shell echo | $(CC) -dM -E -march=rv$(OPENSBI_CC_XLEN)gv - | grep -q riscv.*vector && echo y || echo n)
>
> No 2>/dev/null on that CC invocation?
>

Ahh, yes. This needs to be fixed.

Regards,
Anup
Jessica Clarke Dec. 9, 2024, 4:48 a.m. UTC | #8
On 9 Dec 2024, at 03:56, Anup Patel <anup@brainfault.org> wrote:
> 
> On Mon, Dec 9, 2024 at 1:57 AM Jessica Clarke <jrtc27@jrtc27.com> wrote:
>> 
>> On 7 Dec 2024, at 14:31, Anup Patel <apatel@ventanamicro.com> wrote:
>>> 
>>> On Sat, Dec 7, 2024 at 2:27 AM Clément Léger <cleger@rivosinc.com> wrote:
>>>> 
>>>> Hi Anup & Nylon,
>>>> 
>>>> This commits breaks openSBI boot on my setup. A quick analysis of the
>>>> problem shows that the changes to objects.mk now allows GCC to generate
>>>> vector assembly in OpenSBI. It now crashes in bitmap_zero() really early
>>>> (Used in fw_platform_coldboot_harts_init()) due to vector instructions
>>>> being used:
>>>> 
>>>>  0x0000000080007800 <+474>:   mv      a0,s2
>>>>  0x0000000080007802 <+476>:   sw      a5,88(s4)
>>>>  0x0000000080007806 <+480>:   jal     0x8001adbe <fdt_check_imsic_mlevel>
>>>> => 0x000000008000780a <+484>:   vsetivli        zero,2,e64,m1,ta,ma
>>>>  0x000000008000780e <+488>:   vmv.v.i v1,0
>>>>  0x0000000080007812 <+492>:   mv      a5,a0
>>>> 
>>>> 
>>>> And then it raises an illegal instruction probably because V isn't
>>>> enabled. But I'm guessing we do not expect to use vector code in OpenSBI
>>>> at all ? I'm using a fairly new qemu if that is of any help
>>>> (248f9209edfd289e7d97fb323e5075ccd55cc157).
>>> 
>>> Ahh, it seems auto-vectorization is enabled by default.
>>> 
>>> We certainly don't want to use vector code in OpenSBI
>>> at the moment.
>>> 
>>> How about using the compiler command-line option to
>>> force disable auto-vectorization ?
>> 
>> Just don’t put V in the ISA string? The assembly already does
>> 
>>    .option arch, +v
>> 
>> AFAICT there’s no reason for any C to be compiled with V.
> 
> This is certainly another way but my worry is compilation would
> fail for older toolchains without vector support hence my suggestion
> to disable auto-vectorization.

You still check it’s supported as an option. You just don’t actually
enable it globally, only on the assembly you need it for.

Jess
Anup Patel Dec. 9, 2024, 12:26 p.m. UTC | #9
On Mon, Dec 9, 2024 at 10:18 AM Jessica Clarke <jrtc27@jrtc27.com> wrote:
>
> On 9 Dec 2024, at 03:56, Anup Patel <anup@brainfault.org> wrote:
> >
> > On Mon, Dec 9, 2024 at 1:57 AM Jessica Clarke <jrtc27@jrtc27.com> wrote:
> >>
> >> On 7 Dec 2024, at 14:31, Anup Patel <apatel@ventanamicro.com> wrote:
> >>>
> >>> On Sat, Dec 7, 2024 at 2:27 AM Clément Léger <cleger@rivosinc.com> wrote:
> >>>>
> >>>> Hi Anup & Nylon,
> >>>>
> >>>> This commits breaks openSBI boot on my setup. A quick analysis of the
> >>>> problem shows that the changes to objects.mk now allows GCC to generate
> >>>> vector assembly in OpenSBI. It now crashes in bitmap_zero() really early
> >>>> (Used in fw_platform_coldboot_harts_init()) due to vector instructions
> >>>> being used:
> >>>>
> >>>>  0x0000000080007800 <+474>:   mv      a0,s2
> >>>>  0x0000000080007802 <+476>:   sw      a5,88(s4)
> >>>>  0x0000000080007806 <+480>:   jal     0x8001adbe <fdt_check_imsic_mlevel>
> >>>> => 0x000000008000780a <+484>:   vsetivli        zero,2,e64,m1,ta,ma
> >>>>  0x000000008000780e <+488>:   vmv.v.i v1,0
> >>>>  0x0000000080007812 <+492>:   mv      a5,a0
> >>>>
> >>>>
> >>>> And then it raises an illegal instruction probably because V isn't
> >>>> enabled. But I'm guessing we do not expect to use vector code in OpenSBI
> >>>> at all ? I'm using a fairly new qemu if that is of any help
> >>>> (248f9209edfd289e7d97fb323e5075ccd55cc157).
> >>>
> >>> Ahh, it seems auto-vectorization is enabled by default.
> >>>
> >>> We certainly don't want to use vector code in OpenSBI
> >>> at the moment.
> >>>
> >>> How about using the compiler command-line option to
> >>> force disable auto-vectorization ?
> >>
> >> Just don’t put V in the ISA string? The assembly already does
> >>
> >>    .option arch, +v
> >>
> >> AFAICT there’s no reason for any C to be compiled with V.
> >
> > This is certainly another way but my worry is compilation would
> > fail for older toolchains without vector support hence my suggestion
> > to disable auto-vectorization.
>
> You still check it’s supported as an option. You just don’t actually
> enable it globally, only on the assembly you need it for.
>

Currently, we depend on the built-in __riscv_vector define which
is available when "v" enabled via "-march".

Now, if we don't enable "v" in "-march" then we will need some
other OpenSBI specific define as well as ".option arch, +v" in
the inline assembly.

Regards,
Anup
Jessica Clarke Dec. 9, 2024, 3:43 p.m. UTC | #10
On 9 Dec 2024, at 12:26, Anup Patel <anup@brainfault.org> wrote:
> 
> On Mon, Dec 9, 2024 at 10:18 AM Jessica Clarke <jrtc27@jrtc27.com> wrote:
>> 
>> On 9 Dec 2024, at 03:56, Anup Patel <anup@brainfault.org> wrote:
>>> 
>>> On Mon, Dec 9, 2024 at 1:57 AM Jessica Clarke <jrtc27@jrtc27.com> wrote:
>>>> 
>>>> On 7 Dec 2024, at 14:31, Anup Patel <apatel@ventanamicro.com> wrote:
>>>>> 
>>>>> On Sat, Dec 7, 2024 at 2:27 AM Clément Léger <cleger@rivosinc.com> wrote:
>>>>>> 
>>>>>> Hi Anup & Nylon,
>>>>>> 
>>>>>> This commits breaks openSBI boot on my setup. A quick analysis of the
>>>>>> problem shows that the changes to objects.mk now allows GCC to generate
>>>>>> vector assembly in OpenSBI. It now crashes in bitmap_zero() really early
>>>>>> (Used in fw_platform_coldboot_harts_init()) due to vector instructions
>>>>>> being used:
>>>>>> 
>>>>>> 0x0000000080007800 <+474>:   mv      a0,s2
>>>>>> 0x0000000080007802 <+476>:   sw      a5,88(s4)
>>>>>> 0x0000000080007806 <+480>:   jal     0x8001adbe <fdt_check_imsic_mlevel>
>>>>>> => 0x000000008000780a <+484>:   vsetivli        zero,2,e64,m1,ta,ma
>>>>>> 0x000000008000780e <+488>:   vmv.v.i v1,0
>>>>>> 0x0000000080007812 <+492>:   mv      a5,a0
>>>>>> 
>>>>>> 
>>>>>> And then it raises an illegal instruction probably because V isn't
>>>>>> enabled. But I'm guessing we do not expect to use vector code in OpenSBI
>>>>>> at all ? I'm using a fairly new qemu if that is of any help
>>>>>> (248f9209edfd289e7d97fb323e5075ccd55cc157).
>>>>> 
>>>>> Ahh, it seems auto-vectorization is enabled by default.
>>>>> 
>>>>> We certainly don't want to use vector code in OpenSBI
>>>>> at the moment.
>>>>> 
>>>>> How about using the compiler command-line option to
>>>>> force disable auto-vectorization ?
>>>> 
>>>> Just don’t put V in the ISA string? The assembly already does
>>>> 
>>>>   .option arch, +v
>>>> 
>>>> AFAICT there’s no reason for any C to be compiled with V.
>>> 
>>> This is certainly another way but my worry is compilation would
>>> fail for older toolchains without vector support hence my suggestion
>>> to disable auto-vectorization.
>> 
>> You still check it’s supported as an option. You just don’t actually
>> enable it globally, only on the assembly you need it for.
>> 
> 
> Currently, we depend on the built-in __riscv_vector define which
> is available when "v" enabled via "-march".
> 
> Now, if we don't enable "v" in "-march" then we will need some
> other OpenSBI specific define

So make one up? Or just don’t build the file in the first place.

> as well as ".option arch, +v" in
> the inline assembly.

That’s already there.

Jess
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index d9cee49..5ac95a0 100644
--- a/Makefile
+++ b/Makefile
@@ -189,6 +189,9 @@  CC_SUPPORT_STRICT_ALIGN := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib
 # Check whether the assembler and the compiler support the Zicsr and Zifencei extensions
 CC_SUPPORT_ZICSR_ZIFENCEI := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib -march=rv$(OPENSBI_CC_XLEN)imafd_zicsr_zifencei -x c /dev/null -o /dev/null 2>&1 | grep -e "zicsr" -e "zifencei" > /dev/null && echo n || echo y)
 
+# Check whether the assembler and the compiler support the Vector extension
+CC_SUPPORT_VECT := $(shell echo | $(CC) -dM -E -march=rv$(OPENSBI_CC_XLEN)gv - | grep -q riscv.*vector && echo y || echo n)
+
 ifneq ($(OPENSBI_LD_PIE),y)
 $(error Your linker does not support creating PIEs, opensbi requires this.)
 endif
@@ -294,10 +297,12 @@  ifndef PLATFORM_RISCV_ABI
 endif
 ifndef PLATFORM_RISCV_ISA
   ifneq ($(PLATFORM_RISCV_TOOLCHAIN_DEFAULT), 1)
+    PLATFORM_RISCV_ISA := rv$(PLATFORM_RISCV_XLEN)imafdc
+    ifeq ($(CC_SUPPORT_VECT), y)
+      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)v
+    endif
     ifeq ($(CC_SUPPORT_ZICSR_ZIFENCEI), y)
-      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc_zicsr_zifencei
-    else
-      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc
+      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)_zicsr_zifencei
     endif
   else
     PLATFORM_RISCV_ISA = $(OPENSBI_CC_ISA)
diff --git a/include/sbi/riscv_encoding.h b/include/sbi/riscv_encoding.h
index 38997ef..8b724b8 100644
--- a/include/sbi/riscv_encoding.h
+++ b/include/sbi/riscv_encoding.h
@@ -763,6 +763,12 @@ 
 #define CSR_MVIPH			0x319
 #define CSR_MIPH			0x354
 
+/* Vector extension registers */
+#define CSR_VSTART                     0x8
+#define CSR_VL                         0xc20
+#define CSR_VTYPE                      0xc21
+#define CSR_VLENB                      0xc22
+
 /* ===== Trap/Exception Causes ===== */
 
 #define CAUSE_MISALIGNED_FETCH		0x0
@@ -891,11 +897,364 @@ 
 #define INSN_MASK_FENCE_TSO		0xffffffff
 #define INSN_MATCH_FENCE_TSO		0x8330000f
 
+#define INSN_MASK_VECTOR_UNIT_STRIDE		0xfdf0707f
+#define INSN_MASK_VECTOR_FAULT_ONLY_FIRST	0xfdf0707f
+#define INSN_MASK_VECTOR_STRIDE		0xfc00707f
+#define INSN_MASK_VECTOR_WHOLE_REG		0xfff0707f
+#define INSN_MASK_VECTOR_INDEXED		0xfc00707f
+
+#define INSN_MATCH_VLUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000007 | \
+		((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
+#define INSN_MATCH_VSUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000027 | \
+		((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
+#define INSN_MATCH_VLOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000007 | \
+		((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
+#define INSN_MATCH_VSOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000027 | \
+		((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
+#define INSN_MATCH_VLSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000007 | \
+		((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
+#define INSN_MATCH_VSSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000027 | \
+		((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
+#define INSN_MATCH_VSSEG(n, bits)   ((((n) - 1) << 29) | 0x00004027 | \
+		((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
+#define INSN_MATCH_VLSEG(n, bits)   ((((n) - 1) << 29) | 0x00004007 | \
+		((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
+#define INSN_MATCH_VLSEGFF(n, bits) ((((n) - 1) << 29) | 0x1000007 | \
+		((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
+
+#define INSN_MATCH_VLE16V              0x00005007
+#define INSN_MATCH_VLE32V              0x00006007
+#define INSN_MATCH_VLE64V              0x00007007
+#define INSN_MATCH_VSE16V              0x00005027
+#define INSN_MATCH_VSE32V              0x00006027
+#define INSN_MATCH_VSE64V              0x00007027
+#define INSN_MATCH_VLSE16V             0x08005007
+#define INSN_MATCH_VLSE32V             0x08006007
+#define INSN_MATCH_VLSE64V             0x08007007
+#define INSN_MATCH_VSSE16V             0x08005027
+#define INSN_MATCH_VSSE32V             0x08006027
+#define INSN_MATCH_VSSE64V             0x08007027
+#define INSN_MATCH_VLOXEI16V           0x0c005007
+#define INSN_MATCH_VLOXEI32V           0x0c006007
+#define INSN_MATCH_VLOXEI64V           0x0c007007
+#define INSN_MATCH_VSOXEI16V           0x0c005027
+#define INSN_MATCH_VSOXEI32V           0x0c006027
+#define INSN_MATCH_VSOXEI64V           0x0c007027
+#define INSN_MATCH_VLUXEI16V           0x04005007
+#define INSN_MATCH_VLUXEI32V           0x04006007
+#define INSN_MATCH_VLUXEI64V           0x04007007
+#define INSN_MATCH_VSUXEI16V           0x04005027
+#define INSN_MATCH_VSUXEI32V           0x04006027
+#define INSN_MATCH_VSUXEI64V           0x04007027
+#define INSN_MATCH_VLE16FFV            0x01005007
+#define INSN_MATCH_VLE32FFV            0x01006007
+#define INSN_MATCH_VLE64FFV            0x01007007
+#define INSN_MATCH_VL1RE8V             0x02800007
+#define INSN_MATCH_VL1RE16V            0x02805007
+#define INSN_MATCH_VL1RE32V            0x02806007
+#define INSN_MATCH_VL1RE64V            0x02807007
+#define INSN_MATCH_VL2RE8V             0x22800007
+#define INSN_MATCH_VL2RE16V            0x22805007
+#define INSN_MATCH_VL2RE32V            0x22806007
+#define INSN_MATCH_VL2RE64V            0x22807007
+#define INSN_MATCH_VL4RE8V             0x62800007
+#define INSN_MATCH_VL4RE16V            0x62805007
+#define INSN_MATCH_VL4RE32V            0x62806007
+#define INSN_MATCH_VL4RE64V            0x62807007
+#define INSN_MATCH_VL8RE8V             0xe2800007
+#define INSN_MATCH_VL8RE16V            0xe2805007
+#define INSN_MATCH_VL8RE32V            0xe2806007
+#define INSN_MATCH_VL8RE64V            0xe2807007
+#define INSN_MATCH_VS1RV               0x02800027
+#define INSN_MATCH_VS2RV               0x22800027
+#define INSN_MATCH_VS4RV               0x62800027
+#define INSN_MATCH_VS8RV               0xe2800027
+
+#define INSN_MASK_VECTOR_LOAD_STORE	0x7f
+#define INSN_MATCH_VECTOR_LOAD		0x07
+#define INSN_MATCH_VECTOR_STORE		0x27
+
+#define IS_VECTOR_LOAD_STORE(insn) \
+	((((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_LOAD) || \
+	(((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_STORE))
+
+#define IS_VECTOR_INSN_MATCH(insn, match, mask) \
+	(((insn) & (mask)) == ((match) & (mask)))
+
+#define IS_UNIT_STRIDE_MATCH(insn, match) \
+	IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_UNIT_STRIDE)
+
+#define IS_STRIDE_MATCH(insn, match) \
+	IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_STRIDE)
+
+#define IS_INDEXED_MATCH(insn, match) \
+	IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_INDEXED)
+
+#define IS_FAULT_ONLY_FIRST_MATCH(insn, match) \
+	IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_FAULT_ONLY_FIRST)
+
+#define IS_WHOLE_REG_MATCH(insn, match) \
+	IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_WHOLE_REG)
+
+#define IS_UNIT_STRIDE_LOAD(insn) ( \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE16V) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE32V) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE64V) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 64)))
+
+#define IS_UNIT_STRIDE_STORE(insn) ( \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE16V) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE32V) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE64V) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 16)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 32)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 64)) || \
+	IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 64)))
+
+#define IS_STRIDE_LOAD(insn) ( \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE16V) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE32V) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE64V) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 64)))
+
+#define IS_STRIDE_STORE(insn) ( \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE16V) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE32V) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE64V) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 16)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 32)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 64)) || \
+	IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 64)))
+
+#define IS_INDEXED_LOAD(insn) ( \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI16V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI32V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI64V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI16V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI32V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI64V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 64)))
+
+#define IS_INDEXED_STORE(insn) ( \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI16V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI32V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI64V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI16V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI32V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI64V) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 16)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 32)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 64)) || \
+	IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 64)))
+
+#define IS_FAULT_ONLY_FIRST_LOAD(insn) ( \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE16FFV) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE32FFV) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE64FFV) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 16)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 16)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 16)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 16)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 16)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 16)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 16)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 32)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 32)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 32)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 32)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 32)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 32)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 32)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 64)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 64)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 64)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 64)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 64)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 64)) || \
+	IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 64)))
+
+	#define IS_WHOLE_REG_LOAD(insn) ( \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE8V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE16V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE32V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE64V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE8V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE16V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE32V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE64V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE8V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE16V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE32V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE64V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE8V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE16V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE32V) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE64V))
+
+#define IS_WHOLE_REG_STORE(insn) ( \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS1RV) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS2RV) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS4RV) || \
+	IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS8RV))
+
+
 #if __riscv_xlen == 64
 
 /* 64-bit read for VS-stage address translation (RV64) */
 #define INSN_PSEUDO_VS_LOAD		0x00003000
-
 /* 64-bit write for VS-stage address translation (RV64) */
 #define INSN_PSEUDO_VS_STORE	0x00003020
 
@@ -911,6 +1270,12 @@ 
 #error "Unexpected __riscv_xlen"
 #endif
 
+#define VM_MASK				0x1
+#define VIEW_MASK				0x3
+#define VSEW_MASK				0x3
+#define VLMUL_MASK				0x7
+#define VD_MASK				0x1f
+#define VS2_MASK				0x1f
 #define INSN_16BIT_MASK			0x3
 #define INSN_32BIT_MASK			0x1c
 
@@ -929,6 +1294,12 @@ 
 #endif
 #define REGBYTES			(1 << LOG_REGBYTES)
 
+#define SH_VSEW			3
+#define SH_VIEW			12
+#define SH_VD				7
+#define SH_VS2				20
+#define SH_VM				25
+#define SH_MEW				28
 #define SH_RD				7
 #define SH_RS1				15
 #define SH_RS2				20
@@ -982,6 +1353,18 @@ 
 #define IMM_S(insn)			(((s32)(insn) >> 25 << 5) | \
 					 (s32)(((insn) >> 7) & 0x1f))
 
+#define IS_MASKED(insn)		(((insn >> SH_VM) & VM_MASK) == 0)
+#define GET_VD(insn)			((insn >> SH_VD) & VD_MASK)
+#define GET_VS2(insn)			((insn >> SH_VS2) & VS2_MASK)
+#define GET_VIEW(insn)			(((insn) >> SH_VIEW) & VIEW_MASK)
+#define GET_MEW(insn)			(((insn) >> SH_MEW) & 1)
+#define GET_VSEW(vtype)		(((vtype) >> SH_VSEW) & VSEW_MASK)
+#define GET_VLMUL(vtype)		((vtype) & VLMUL_MASK)
+#define GET_LEN(view)			(1UL << (view))
+#define GET_NF(insn)			(1 + ((insn >> 29) & 7))
+#define GET_VEMUL(vlmul, view, vsew)	((vlmul + view - vsew) & 7)
+#define GET_EMUL(vemul)		(1UL << ((vemul) >= 4 ? 0 : (vemul)))
+
 #define MASK_FUNCT3			0x7000
 #define MASK_RS1			0xf8000
 #define MASK_CSR			0xfff00000
diff --git a/include/sbi/sbi_trap_ldst.h b/include/sbi/sbi_trap_ldst.h
index 4c5cc37..34877cc 100644
--- a/include/sbi/sbi_trap_ldst.h
+++ b/include/sbi/sbi_trap_ldst.h
@@ -30,4 +30,13 @@  int sbi_store_access_handler(struct sbi_trap_context *tcntx);
 
 int sbi_double_trap_handler(struct sbi_trap_context *tcntx);
 
+ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
+				 ulong addr_offset);
+
+int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
+				 struct sbi_trap_context *tcntx);
+
+int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
+				 struct sbi_trap_context *tcntx);
+
 #endif
diff --git a/lib/sbi/objects.mk b/lib/sbi/objects.mk
index a6f7c5f..47a0866 100644
--- a/lib/sbi/objects.mk
+++ b/lib/sbi/objects.mk
@@ -91,6 +91,7 @@  libsbi-objs-y += sbi_timer.o
 libsbi-objs-y += sbi_tlb.o
 libsbi-objs-y += sbi_trap.o
 libsbi-objs-y += sbi_trap_ldst.o
+libsbi-objs-y += sbi_trap_v_ldst.o
 libsbi-objs-y += sbi_unpriv.o
 libsbi-objs-y += sbi_expected_trap.o
 libsbi-objs-y += sbi_cppc.o
diff --git a/lib/sbi/sbi_trap_ldst.c b/lib/sbi/sbi_trap_ldst.c
index ebc4a92..448406b 100644
--- a/lib/sbi/sbi_trap_ldst.c
+++ b/lib/sbi/sbi_trap_ldst.c
@@ -32,7 +32,7 @@  typedef int (*sbi_trap_ld_emulator)(int rlen, union sbi_ldst_data *out_val,
 typedef int (*sbi_trap_st_emulator)(int wlen, union sbi_ldst_data in_val,
 				    struct sbi_trap_context *tcntx);
 
-static ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
+ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
 					ulong addr_offset)
 {
 	if (new_tinst == INSN_PSEUDO_VS_LOAD ||
@@ -52,7 +52,7 @@  static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
 	ulong insn, insn_len;
 	union sbi_ldst_data val = { 0 };
 	struct sbi_trap_info uptrap;
-	int rc, fp = 0, shift = 0, len = 0;
+	int rc, fp = 0, shift = 0, len = 0, vector = 0;
 
 	if (orig_trap->tinst & 0x1) {
 		/*
@@ -144,6 +144,9 @@  static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
 		len = 2;
 		shift = 8 * (sizeof(ulong) - len);
 		insn = RVC_RS2S(insn) << SH_RD;
+	} else if (IS_VECTOR_LOAD_STORE(insn)) {
+		vector = 1;
+		emu = sbi_misaligned_v_ld_emulator;
 	} else {
 		return sbi_trap_redirect(regs, orig_trap);
 	}
@@ -152,14 +155,16 @@  static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
 	if (rc <= 0)
 		return rc;
 
-	if (!fp)
-		SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
+	if (!vector) {
+		if (!fp)
+			SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
 #ifdef __riscv_flen
-	else if (len == 8)
-		SET_F64_RD(insn, regs, val.data_u64);
-	else
-		SET_F32_RD(insn, regs, val.data_ulong);
+		else if (len == 8)
+			SET_F64_RD(insn, regs, val.data_u64);
+		else
+			SET_F32_RD(insn, regs, val.data_ulong);
 #endif
+	}
 
 	regs->mepc += insn_len;
 
@@ -248,6 +253,8 @@  static int sbi_trap_emulate_store(struct sbi_trap_context *tcntx,
 	} else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) {
 		len		= 2;
 		val.data_ulong = GET_RS2S(insn, regs);
+	} else if (IS_VECTOR_LOAD_STORE(insn)) {
+		emu = sbi_misaligned_v_st_emulator;
 	} else {
 		return sbi_trap_redirect(regs, orig_trap);
 	}
diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
new file mode 100644
index 0000000..72b2309
--- /dev/null
+++ b/lib/sbi/sbi_trap_v_ldst.c
@@ -0,0 +1,341 @@ 
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 SiFive Inc.
+ *
+ * Authors:
+ *   Andrew Waterman <andrew@sifive.com>
+ *   Nylon Chen <nylon.chen@sifive.com>
+ *   Zong Li <nylon.chen@sifive.com>
+ */
+
+#include <sbi/riscv_asm.h>
+#include <sbi/riscv_encoding.h>
+#include <sbi/sbi_error.h>
+#include <sbi/sbi_trap_ldst.h>
+#include <sbi/sbi_trap.h>
+#include <sbi/sbi_unpriv.h>
+#include <sbi/sbi_trap.h>
+
+#ifdef __riscv_vector
+#define VLEN_MAX 65536
+
+static inline void set_vreg(ulong vlenb, ulong which, ulong pos, ulong size, const uint8_t *bytes)
+{
+	pos += (which % 8) * vlenb;
+	bytes -= pos;
+
+	asm volatile (
+		"	.option push\n\t"
+		"	.option arch, +v\n\t"
+		"	vsetvli x0, %0, e8, m8, tu, ma\n\t"
+		"	.option pop\n\t"
+		:: "r" (pos + size));
+
+	csr_write(CSR_VSTART, pos);
+
+	switch (which / 8) {
+	case 0:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vle8.v v0,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 1:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vle8.v v8,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 2:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vle8.v v16,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 3:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vle8.v v24,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	default:
+		break;
+	}
+}
+
+static inline void get_vreg(ulong vlenb, ulong which, ulong pos, ulong size, uint8_t *bytes)
+{
+	pos += (which % 8) * vlenb;
+	bytes -= pos;
+
+	asm volatile (
+		"	.option push\n\t"
+		"	.option arch, +v\n\t"
+		"	vsetvli x0, %0, e8, m8, tu, ma\n\t"
+		"	.option pop\n\t"
+		:: "r" (pos + size));
+
+	csr_write(CSR_VSTART, pos);
+
+	switch (which / 8) {
+	case 0:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vse8.v v0,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 1:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vse8.v v8,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 2:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vse8.v v16, (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 3:
+		asm volatile (
+				".option push\n\t"
+				".option arch, +v\n\t"
+				"vse8.v v24, (%0)\n\t"
+				".option pop\n\t"
+				:: "r" (bytes) : "memory");
+		break;
+	default:
+		break;
+	}
+}
+
+static inline void vsetvl(ulong vl, ulong vtype)
+{
+	asm volatile (
+		"	.option push\n\t"
+		"	.option arch, +v\n\t"
+		"	vsetvl x0, %0, %1\n\t"
+		"	.option pop\n\t"
+			:: "r" (vl), "r" (vtype));
+}
+
+int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
+				 struct sbi_trap_context *tcntx)
+{
+	const struct sbi_trap_info *orig_trap = &tcntx->trap;
+	struct sbi_trap_regs *regs = &tcntx->regs;
+	struct sbi_trap_info uptrap;
+	ulong insn = sbi_get_insn(regs->mepc, &uptrap);
+	ulong vl = csr_read(CSR_VL);
+	ulong vtype = csr_read(CSR_VTYPE);
+	ulong vlenb = csr_read(CSR_VLENB);
+	ulong vstart = csr_read(CSR_VSTART);
+	ulong base = GET_RS1(insn, regs);
+	ulong stride = GET_RS2(insn, regs);
+	ulong vd = GET_VD(insn);
+	ulong vs2 = GET_VS2(insn);
+	ulong view = GET_VIEW(insn);
+	ulong vsew = GET_VSEW(vtype);
+	ulong vlmul = GET_VLMUL(vtype);
+	bool illegal = GET_MEW(insn);
+	bool masked = IS_MASKED(insn);
+	uint8_t mask[VLEN_MAX / 8];
+	uint8_t bytes[8 * sizeof(uint64_t)];
+	ulong len = GET_LEN(view);
+	ulong nf = GET_NF(insn);
+	ulong vemul = GET_VEMUL(vlmul, view, vsew);
+	ulong emul = GET_EMUL(vemul);
+
+	if (IS_UNIT_STRIDE_LOAD(insn) || IS_FAULT_ONLY_FIRST_LOAD(insn)) {
+		stride = nf * len;
+	} else if (IS_WHOLE_REG_LOAD(insn)) {
+		vl = (nf * vlenb) >> view;
+		nf = 1;
+		vemul = 0;
+		emul = 1;
+		stride = nf * len;
+	} else if (IS_INDEXED_LOAD(insn)) {
+		len = 1 << vsew;
+		vemul = (vlmul + vsew - vsew) & 7;
+		emul = 1 << ((vemul & 4) ? 0 : vemul);
+		stride = nf * len;
+	}
+
+	if (illegal || vlenb > VLEN_MAX / 8) {
+		struct sbi_trap_info trap = {
+			uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
+			uptrap.tval = insn,
+		};
+		return sbi_trap_redirect(regs, &trap);
+	}
+
+	if (masked)
+		get_vreg(vlenb, 0, 0, vlenb, mask);
+
+	do {
+		if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
+			// compute element address
+			ulong addr = base + vstart * stride;
+
+			if (IS_INDEXED_LOAD(insn)) {
+				ulong offset = 0;
+
+				get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
+				addr = base + offset;
+			}
+
+			csr_write(CSR_VSTART, vstart);
+
+			// obtain load data from memory
+			for (ulong seg = 0; seg < nf; seg++) {
+				for (ulong i = 0; i < len; i++) {
+					bytes[seg * len + i] =
+						sbi_load_u8((void *)(addr + seg * len + i),
+							    &uptrap);
+
+					if (uptrap.cause) {
+						if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
+							vl = vstart;
+							break;
+						}
+						vsetvl(vl, vtype);
+						uptrap.tinst = sbi_misaligned_tinst_fixup(
+							orig_trap->tinst, uptrap.tinst, i);
+						return sbi_trap_redirect(regs, &uptrap);
+					}
+				}
+			}
+
+			// write load data to regfile
+			for (ulong seg = 0; seg < nf; seg++)
+				set_vreg(vlenb, vd + seg * emul, vstart * len,
+					 len, &bytes[seg * len]);
+		}
+	} while (++vstart < vl);
+
+	// restore clobbered vl/vtype
+	vsetvl(vl, vtype);
+
+	return vl;
+}
+
+int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
+				 struct sbi_trap_context *tcntx)
+{
+	const struct sbi_trap_info *orig_trap = &tcntx->trap;
+	struct sbi_trap_regs *regs = &tcntx->regs;
+	struct sbi_trap_info uptrap;
+	ulong insn = sbi_get_insn(regs->mepc, &uptrap);
+	ulong vl = csr_read(CSR_VL);
+	ulong vtype = csr_read(CSR_VTYPE);
+	ulong vlenb = csr_read(CSR_VLENB);
+	ulong vstart = csr_read(CSR_VSTART);
+	ulong base = GET_RS1(insn, regs);
+	ulong stride = GET_RS2(insn, regs);
+	ulong vd = GET_VD(insn);
+	ulong vs2 = GET_VS2(insn);
+	ulong view = GET_VIEW(insn);
+	ulong vsew = GET_VSEW(vtype);
+	ulong vlmul = GET_VLMUL(vtype);
+	bool illegal = GET_MEW(insn);
+	bool masked = IS_MASKED(insn);
+	uint8_t mask[VLEN_MAX / 8];
+	uint8_t bytes[8 * sizeof(uint64_t)];
+	ulong len = GET_LEN(view);
+	ulong nf = GET_NF(insn);
+	ulong vemul = GET_VEMUL(vlmul, view, vsew);
+	ulong emul = GET_EMUL(vemul);
+
+	if (IS_UNIT_STRIDE_STORE(insn)) {
+		stride = nf * len;
+	} else if (IS_WHOLE_REG_STORE(insn)) {
+		vl = (nf * vlenb) >> view;
+		nf = 1;
+		vemul = 0;
+		emul = 1;
+		stride = nf * len;
+	} else if (IS_INDEXED_STORE(insn)) {
+		len = 1 << vsew;
+		vemul = (vlmul + vsew - vsew) & 7;
+		emul = 1 << ((vemul & 4) ? 0 : vemul);
+		stride = nf * len;
+	}
+
+	if (illegal || vlenb > VLEN_MAX / 8) {
+		struct sbi_trap_info trap = {
+			uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
+			uptrap.tval = insn,
+		};
+		return sbi_trap_redirect(regs, &trap);
+	}
+
+	if (masked)
+		get_vreg(vlenb, 0, 0, vlenb, mask);
+
+	do {
+		if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
+			// compute element address
+			ulong addr = base + vstart * stride;
+
+			if (IS_INDEXED_STORE(insn)) {
+				ulong offset = 0;
+
+				get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
+				addr = base + offset;
+			}
+
+			// obtain store data from regfile
+			for (ulong seg = 0; seg < nf; seg++)
+				get_vreg(vlenb, vd + seg * emul, vstart * len,
+					 len, &bytes[seg * len]);
+
+			csr_write(CSR_VSTART, vstart);
+
+			// write store data to memory
+			for (ulong seg = 0; seg < nf; seg++) {
+				for (ulong i = 0; i < len; i++) {
+					sbi_store_u8((void *)(addr + seg * len + i),
+						     bytes[seg * len + i], &uptrap);
+					if (uptrap.cause) {
+						vsetvl(vl, vtype);
+						uptrap.tinst = sbi_misaligned_tinst_fixup(
+							orig_trap->tinst, uptrap.tinst, i);
+						return sbi_trap_redirect(regs, &uptrap);
+					}
+				}
+			}
+		}
+	} while (++vstart < vl);
+
+	// restore clobbered vl/vtype
+	vsetvl(vl, vtype);
+
+	return vl;
+}
+#else
+int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
+				 struct sbi_trap_context *tcntx)
+{
+	return 0;
+}
+int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
+				 struct sbi_trap_context *tcntx)
+{
+	return 0;
+}
+#endif /* __riscv_vector  */