diff mbox series

[v2,06/17] RISC-V: add vector extension fault-only-first implementation

Message ID 1568183141-67641-7-git-send-email-zhiwei_liu@c-sky.com
State New
Headers show
Series RISC-V: support vector extension | expand

Commit Message

LIU Zhiwei Sept. 11, 2019, 6:25 a.m. UTC
From: LIU Zhiwei <zhiwei_liu@c-sky.com>

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 linux-user/riscv/cpu_loop.c             |   7 +
 target/riscv/cpu_helper.c               |   7 +
 target/riscv/helper.h                   |   7 +
 target/riscv/insn32.decode              |   7 +
 target/riscv/insn_trans/trans_rvv.inc.c |   7 +
 target/riscv/vector_helper.c            | 567 ++++++++++++++++++++++++++++++++
 6 files changed, 602 insertions(+)

Comments

Richard Henderson Sept. 12, 2019, 2:32 p.m. UTC | #1
On 9/11/19 2:25 AM, liuzhiwei wrote:
> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
> index 12aa3c0..d673fa5 100644
> --- a/linux-user/riscv/cpu_loop.c
> +++ b/linux-user/riscv/cpu_loop.c
> @@ -41,6 +41,13 @@ void cpu_loop(CPURISCVState *env)
>          sigcode = 0;
>          sigaddr = 0;
>  
> +        if (env->foflag) {
> +            if (env->vfp.vl != 0) {
> +                env->foflag = false;
> +                env->pc += 4;
> +                continue;
> +            }
> +        }
>          switch (trapnr) {
>          case EXCP_INTERRUPT:
>              /* just indicate that signals should be handled asap */
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index e32b612..405caf6 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
>          [PRV_H] = RISCV_EXCP_H_ECALL,
>          [PRV_M] = RISCV_EXCP_M_ECALL
>      };
> +    if (env->foflag) {
> +        if (env->vfp.vl != 0) {
> +            env->foflag = false;
> +            env->pc += 4;
> +            return;
> +        }
> +    }

I renew my objection to this FOFLAG mechanism.  I believe, but have no proof,
that this will race between different types of interrupts.  Once again I
present the ARM SVE first-fault helpers as proof that there is another way.

Otherwise, all of the same comments from the normal loads apply.


r~
diff mbox series

Patch

diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
index 12aa3c0..d673fa5 100644
--- a/linux-user/riscv/cpu_loop.c
+++ b/linux-user/riscv/cpu_loop.c
@@ -41,6 +41,13 @@  void cpu_loop(CPURISCVState *env)
         sigcode = 0;
         sigaddr = 0;
 
+        if (env->foflag) {
+            if (env->vfp.vl != 0) {
+                env->foflag = false;
+                env->pc += 4;
+                continue;
+            }
+        }
         switch (trapnr) {
         case EXCP_INTERRUPT:
             /* just indicate that signals should be handled asap */
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index e32b612..405caf6 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -521,6 +521,13 @@  void riscv_cpu_do_interrupt(CPUState *cs)
         [PRV_H] = RISCV_EXCP_H_ECALL,
         [PRV_M] = RISCV_EXCP_M_ECALL
     };
+    if (env->foflag) {
+        if (env->vfp.vl != 0) {
+            env->foflag = false;
+            env->pc += 4;
+            return;
+        }
+    }
 
     if (!async) {
         /* set tval to badaddr for traps with address information */
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index f77c392..973342f 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -84,6 +84,13 @@  DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index b8a3d8a..b286997 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -218,6 +218,13 @@  vle_v      ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm
 vlbu_v     ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm
 vlhu_v     ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm
 vlwu_v     ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm
+vlbff_v    ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm
+vlhff_v    ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm
+vlwff_v    ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm
+vleff_v    ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm
+vlbuff_v   ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm
+vlhuff_v   ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm
+vlwuff_v   ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm
 vsb_v      ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm
 vsh_v      ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm
 vsw_v      ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c
index 16b1f90..bd83885 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -80,6 +80,13 @@  GEN_VECTOR_R2_NFVM(vle_v)
 GEN_VECTOR_R2_NFVM(vlbu_v)
 GEN_VECTOR_R2_NFVM(vlhu_v)
 GEN_VECTOR_R2_NFVM(vlwu_v)
+GEN_VECTOR_R2_NFVM(vlbff_v)
+GEN_VECTOR_R2_NFVM(vlhff_v)
+GEN_VECTOR_R2_NFVM(vlwff_v)
+GEN_VECTOR_R2_NFVM(vleff_v)
+GEN_VECTOR_R2_NFVM(vlbuff_v)
+GEN_VECTOR_R2_NFVM(vlhuff_v)
+GEN_VECTOR_R2_NFVM(vlwuff_v)
 GEN_VECTOR_R2_NFVM(vsb_v)
 GEN_VECTOR_R2_NFVM(vsh_v)
 GEN_VECTOR_R2_NFVM(vsw_v)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 62e4d2e..0ac8c74 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -2762,3 +2762,570 @@  void VECTOR_HELPER(vsuxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
     env->vfp.vstart = 0;
 }
 
+void VECTOR_HELPER(vlbuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+    uint32_t rs1, uint32_t rd)
+{
+    int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+    vl = env->vfp.vl;
+
+    lmul   = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    if (lmul * (nf + 1) > 32) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    vector_lmul_check_reg(env, lmul, rd, false);
+
+    env->foflag = true;
+    env->vfp.vl = 0;
+    for (i = 0; i < vlmax; i++) {
+        dest = rd + (i / (VLEN / width));
+        j = i % (VLEN / width);
+        k = nf;
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            switch (width) {
+            case 8:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = i * (nf + 1)  + k;
+                        env->vfp.vreg[dest + k * lmul].u8[j] =
+                            cpu_ldub_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 16:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = i * (nf + 1)  + k;
+                        env->vfp.vreg[dest + k * lmul].u16[j] =
+                            cpu_ldub_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 32:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = i * (nf + 1)  + k;
+                        env->vfp.vreg[dest + k * lmul].u32[j] =
+                            cpu_ldub_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 64:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = i * (nf + 1)  + k;
+                        env->vfp.vreg[dest + k * lmul].u64[j] =
+                            cpu_ldub_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            default:
+                riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+                return;
+            }
+        } else {
+            vector_tail_segment(env, dest, j, width, k, lmul);
+        }
+    }
+    env->foflag = false;
+    env->vfp.vl = vl;
+    env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlbff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+    uint32_t rs1, uint32_t rd)
+{
+    int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+    vl = env->vfp.vl;
+
+    lmul   = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    if (lmul * (nf + 1) > 32) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    vector_lmul_check_reg(env, lmul, rd, false);
+    env->foflag = true;
+    env->vfp.vl = 0;
+    for (i = 0; i < vlmax; i++) {
+        dest = rd + (i / (VLEN / width));
+        j = i % (VLEN / width);
+        k = nf;
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            switch (width) {
+            case 8:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = i * (nf + 1)  + k;
+                        env->vfp.vreg[dest + k * lmul].s8[j] =
+                            cpu_ldsb_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 16:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = i * (nf + 1)  + k;
+                        env->vfp.vreg[dest + k * lmul].s16[j] = sign_extend(
+                            cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 32:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = i * (nf + 1)  + k;
+                        env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend(
+                            cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 64:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = i * (nf + 1)  + k;
+                        env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+                            cpu_ldsb_data(env, env->gpr[rs1] + read), 8);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            default:
+                riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+                return;
+            }
+        } else {
+            vector_tail_segment(env, dest, j, width, k, lmul);
+        }
+    }
+    env->foflag = false;
+    env->vfp.vl = vl;
+    env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlhuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+    uint32_t rs1, uint32_t rd)
+{
+    int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+    vl = env->vfp.vl;
+
+    lmul   = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    if (lmul * (nf + 1) > 32) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    vector_lmul_check_reg(env, lmul, rd, false);
+    env->foflag = true;
+    env->vfp.vl = 0;
+    for (i = 0; i < vlmax; i++) {
+        dest = rd + (i / (VLEN / width));
+        j = i % (VLEN / width);
+        k = nf;
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            switch (width) {
+            case 16:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 2;
+                        env->vfp.vreg[dest + k * lmul].u16[j] =
+                            cpu_lduw_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 32:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 2;
+                        env->vfp.vreg[dest + k * lmul].u32[j] =
+                            cpu_lduw_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 64:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 2;
+                        env->vfp.vreg[dest + k * lmul].u64[j] =
+                            cpu_lduw_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            default:
+                riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+                return;
+            }
+        } else {
+            vector_tail_segment(env, dest, j, width, k, lmul);
+        }
+    }
+    env->foflag = false;
+    env->vfp.vl = vl;
+    env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlhff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+    uint32_t rs1, uint32_t rd)
+{
+    int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+    vl = env->vfp.vl;
+
+    lmul   = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    if (lmul * (nf + 1) > 32) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    vector_lmul_check_reg(env, lmul, rd, false);
+    env->foflag = true;
+    env->vfp.vl = 0;
+    for (i = 0; i < vlmax; i++) {
+        dest = rd + (i / (VLEN / width));
+        j = i % (VLEN / width);
+        k = nf;
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            switch (width) {
+            case 16:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 2;
+                        env->vfp.vreg[dest + k * lmul].s16[j] =
+                            cpu_ldsw_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 32:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 2;
+                        env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend(
+                            cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 64:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 2;
+                        env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+                            cpu_ldsw_data(env, env->gpr[rs1] + read), 16);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            default:
+                riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+                return;
+            }
+        } else {
+            vector_tail_segment(env, dest, j, width, k, lmul);
+        }
+    }
+    env->vfp.vl = vl;
+    env->foflag = false;
+    env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlwuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+    uint32_t rs1, uint32_t rd)
+{
+    int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+    vl = env->vfp.vl;
+
+    lmul   = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    if (lmul * (nf + 1) > 32) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    vector_lmul_check_reg(env, lmul, rd, false);
+    env->foflag = true;
+    env->vfp.vl = 0;
+    for (i = 0; i < vlmax; i++) {
+        dest = rd + (i / (VLEN / width));
+        j = i % (VLEN / width);
+        k = nf;
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            switch (width) {
+            case 32:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 4;
+                        env->vfp.vreg[dest + k * lmul].u32[j] =
+                            cpu_ldl_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 64:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 4;
+                        env->vfp.vreg[dest + k * lmul].u64[j] =
+                            cpu_ldl_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            default:
+                riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+                return;
+            }
+        } else {
+            vector_tail_segment(env, dest, j, width, k, lmul);
+        }
+    }
+    env->foflag = false;
+    env->vfp.vl = vl;
+    env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vlwff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+    uint32_t rs1, uint32_t rd)
+{
+    int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+    vl = env->vfp.vl;
+
+    lmul   = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    if (lmul * (nf + 1) > 32) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    vector_lmul_check_reg(env, lmul, rd, false);
+    env->foflag = true;
+    env->vfp.vl = 0;
+    for (i = 0; i < vlmax; i++) {
+        dest = rd + (i / (VLEN / width));
+        j = i % (VLEN / width);
+        k = nf;
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            switch (width) {
+            case 32:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 4;
+                        env->vfp.vreg[dest + k * lmul].s32[j] =
+                            cpu_ldl_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 64:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 4;
+                        env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend(
+                            cpu_ldl_data(env, env->gpr[rs1] + read), 32);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            default:
+                riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+                return;
+            }
+        } else {
+            vector_tail_segment(env, dest, j, width, k, lmul);
+        }
+    }
+    env->foflag = false;
+    env->vfp.vl = vl;
+    env->vfp.vstart = 0;
+}
+
+void VECTOR_HELPER(vleff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm,
+    uint32_t rs1, uint32_t rd)
+{
+    int i, j, k, vl, vlmax, lmul, width, dest, read;
+
+    vl = env->vfp.vl;
+    lmul   = vector_get_lmul(env);
+    width = vector_get_width(env);
+    vlmax = vector_get_vlmax(env);
+
+    if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+    if (lmul * (nf + 1) > 32) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+        return;
+    }
+
+    vector_lmul_check_reg(env, lmul, rd, false);
+    env->vfp.vl = 0;
+    env->foflag = true;
+    for (i = 0; i < vlmax; i++) {
+        dest = rd + (i / (VLEN / width));
+        j = i % (VLEN / width);
+        k = nf;
+        if (i < env->vfp.vstart) {
+            continue;
+        } else if (i < vl) {
+            switch (width) {
+            case 8:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = i * (nf + 1)  + k;
+                        env->vfp.vreg[dest + k * lmul].u8[j] =
+                            cpu_ldub_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 16:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 2;
+                        env->vfp.vreg[dest + k * lmul].u16[j] =
+                            cpu_lduw_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 32:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 4;
+                        env->vfp.vreg[dest + k * lmul].u32[j] =
+                            cpu_ldl_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            case 64:
+                if (vector_elem_mask(env, vm, width, lmul, i)) {
+                    while (k >= 0) {
+                        read = (i * (nf + 1)  + k) * 8;
+                        env->vfp.vreg[dest + k * lmul].u64[j] =
+                            cpu_ldq_data(env, env->gpr[rs1] + read);
+                        k--;
+                    }
+                    env->vfp.vstart++;
+                }
+                env->vfp.vl++;
+                break;
+            default:
+                riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+                return;
+            }
+        } else {
+            vector_tail_segment(env, dest, j, width, k, lmul);
+        }
+    }
+    env->foflag = false;
+    env->vfp.vl = vl;
+    env->vfp.vstart = 0;
+}