Message ID | 1566959818-38369-1-git-send-email-zhiwei_liu@c-sky.com |
---|---|
State | New |
Headers | show |
Series | RISCV: support riscv vector extension 0.7.1 | expand |
liuzhiwei <zhiwei_liu@c-sky.com> writes: > Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 > Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> > --- > fpu/softfloat.c | 119 + > include/fpu/softfloat.h | 4 + Changes to softfloat should be in a separate patch, but see bellow. > linux-user/riscv/cpu_loop.c | 8 +- > target/riscv/Makefile.objs | 2 +- > target/riscv/cpu.h | 30 + > target/riscv/cpu_bits.h | 15 + > target/riscv/cpu_helper.c | 7 + > target/riscv/csr.c | 65 +- > target/riscv/helper.h | 354 + > target/riscv/insn32.decode | 374 +- > target/riscv/insn_trans/trans_rvv.inc.c | 484 + > target/riscv/translate.c | 1 + > target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ This is likely too big to be reviewed. Is it possible to split the patch up into more discrete chunks, for example support pieces and then maybe a class at a time? > 13 files changed, 28017 insertions(+), 9 deletions(-) > create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c > create mode 100644 target/riscv/vector_helper.c > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 2ba36ec..da155ea 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a) > } > > /*---------------------------------------------------------------------------- > +| Returns the sign bit of the half-precision floating-point value `a'. > +*----------------------------------------------------------------------------*/ > + > +static inline flag extractFloat16Sign(float16 a) > +{ > + return float16_val(a) >> 0xf; > +} > + We are trying to avoid this sort of bit fiddling for new code when we already have generic decompose functions that can extract all the parts into a common format. > + > +/*---------------------------------------------------------------------------- > | Returns the fraction bits of the single-precision floating-point value `a'. > *----------------------------------------------------------------------------*/ > > @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status *status) > } > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point value `a' is less than > +| or equal to the corresponding value `b', and 0 otherwise. The invalid > +| exception is raised if either operand is a NaN. The comparison is performed > +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > +*----------------------------------------------------------------------------*/ > + > +int float16_le(float16 a, float16 b, float_status *status) > +{ > + flag aSign, bSign; > + uint16_t av, bv; > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) > + ) { > + float_raise(float_flag_invalid, status); > + return 0; > + } > + aSign = extractFloat16Sign( a ); > + bSign = extractFloat16Sign( b ); > + av = float16_val(a); > + bv = float16_val(b); > + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) == 0 ); > + return ( av == bv ) || ( aSign ^ ( av < bv ) ); > + > +} What does this provide that: float16_compare(a, b, status) == float_relation_less; doesn't? > + > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point value `a' is less than > | or equal to the corresponding value `b', and 0 otherwise. The invalid > | exception is raised if either operand is a NaN. The comparison is performed > @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status *status) > | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > *----------------------------------------------------------------------------*/ > > +int float16_lt(float16 a, float16 b, float_status *status) > +{ > + flag aSign, bSign; > + uint16_t av, bv; > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) > + ) { > + float_raise(float_flag_invalid, status); > + return 0; > + } > + aSign = extractFloat16Sign( a ); > + bSign = extractFloat16Sign( b ); > + av = float16_val(a); > + bv = float16_val(b); > + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) != 0 ); > + return ( av != bv ) && ( aSign ^ ( av < bv ) ); > + > +} > + > +/*---------------------------------------------------------------------------- > +| Returns 1 if the single-precision floating-point value `a' is less than > +| the corresponding value `b', and 0 otherwise. The invalid exception is > +| raised if either operand is a NaN. The comparison is performed according > +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > +*----------------------------------------------------------------------------*/ > + > int float32_lt(float32 a, float32 b, float_status *status) > { > flag aSign, bSign; > @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, float_status *status) > } > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point value `a' is equal to > +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an > +| exception. The comparison is performed according to the IEC/IEEE Standard > +| for Binary Floating-Point Arithmetic. > +*----------------------------------------------------------------------------*/ > + > +int float16_eq_quiet(float16 a, float16 b, float_status *status) > +{ > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) > + ) { > + if (float16_is_signaling_nan(a, status) > + || float16_is_signaling_nan(b, status)) { > + float_raise(float_flag_invalid, status); > + } > + return 0; > + } > + return ( float16_val(a) == float16_val(b) ) || > + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 ); > +} > + See also float_16_compare_quiet > + > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point value `a' is equal to > | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an > | exception. The comparison is performed according to the IEC/IEEE Standard > @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, float_status *status) > } > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point values `a' and `b' cannot > +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The > +| comparison is performed according to the IEC/IEEE Standard for Binary > +| Floating-Point Arithmetic. > +*----------------------------------------------------------------------------*/ > + > +int float16_unordered_quiet(float16 a, float16 b, float_status *status) > +{ > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) > + ) { > + if (float16_is_signaling_nan(a, status) > + || float16_is_signaling_nan(b, status)) { > + float_raise(float_flag_invalid, status); > + } > + return 1; > + } > + return 0; > +} > + > + > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point values `a' and `b' cannot > | be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The > | comparison is performed according to the IEC/IEEE Standard for Binary > diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h > index 3ff3fa5..3b0754c 100644 > --- a/include/fpu/softfloat.h > +++ b/include/fpu/softfloat.h > @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status); > float16 float16_sqrt(float16, float_status *status); > int float16_compare(float16, float16, float_status *status); > int float16_compare_quiet(float16, float16, float_status *status); > +int float16_unordered_quiet(float16, float16, float_status *status); > +int float16_le(float16, float16, float_status *status); > +int float16_lt(float16, float16, float_status *status); > +int float16_eq_quiet(float16, float16, float_status *status); > > int float16_is_quiet_nan(float16, float_status *status); > int float16_is_signaling_nan(float16, float_status *status); > diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c > index 12aa3c0..b01548a 100644 > --- a/linux-user/riscv/cpu_loop.c > +++ b/linux-user/riscv/cpu_loop.c > @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env) > signum = 0; > sigcode = 0; > sigaddr = 0; > - > + if (env->foflag) { > + if (env->vfp.vl != 0) { > + env->foflag = false; > + env->pc += 4; > + continue; > + } > + } What is this trying to do? > switch (trapnr) { > case EXCP_INTERRUPT: > /* just indicate that signals should be handled asap */ > diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs > index b1c79bc..d577cef 100644 > --- a/target/riscv/Makefile.objs > +++ b/target/riscv/Makefile.objs > @@ -1,4 +1,4 @@ > -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o pmp.o > +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o pmp.o > > DECODETREE = $(SRC_PATH)/scripts/decodetree.py > > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h > index 0adb307..5a93aa2 100644 > --- a/target/riscv/cpu.h > +++ b/target/riscv/cpu.h > @@ -67,6 +67,7 @@ > #define RVC RV('C') > #define RVS RV('S') > #define RVU RV('U') > +#define RVV RV('V') > > /* S extension denotes that Supervisor mode exists, however it is possible > to have a core that support S mode but does not have an MMU and there > @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState; > > #include "pmp.h" > > +#define VLEN 128 > +#define VUNIT(x) (VLEN / x) > + If you want to do vectors I suggest you look at the TCGvec types for passing pointers to vector registers to helpers. In this case you will want to ensure your vector registers are properly aligned. > struct CPURISCVState { > target_ulong gpr[32]; > uint64_t fpr[32]; /* assume both F and D extensions */ > + > + /* vector coprocessor state. */ > + struct { > + union VECTOR { > + float64 f64[VUNIT(64)]; > + float32 f32[VUNIT(32)]; > + float16 f16[VUNIT(16)]; > + target_ulong ul[VUNIT(sizeof(target_ulong))]; > + uint64_t u64[VUNIT(64)]; > + int64_t s64[VUNIT(64)]; > + uint32_t u32[VUNIT(32)]; > + int32_t s32[VUNIT(32)]; > + uint16_t u16[VUNIT(16)]; > + int16_t s16[VUNIT(16)]; > + uint8_t u8[VUNIT(8)]; > + int8_t s8[VUNIT(8)]; > + } vreg[32]; > + target_ulong vxrm; > + target_ulong vxsat; > + target_ulong vl; > + target_ulong vstart; > + target_ulong vtype; > + float_status fp_status; > + } vfp; > + > + bool foflag; Again I have no idea what foflag is here. > target_ulong pc; > target_ulong load_res; > target_ulong load_val; > diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h > index 11f971a..9eb43ec 100644 > --- a/target/riscv/cpu_bits.h > +++ b/target/riscv/cpu_bits.h > @@ -29,6 +29,14 @@ > #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) > #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) > > +/* Vector Fixed-Point round model */ > +#define FSR_VXRM_SHIFT 9 > +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) > + > +/* Vector Fixed-Point saturation flag */ > +#define FSR_VXSAT_SHIFT 8 > +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) > + > /* Control and Status Registers */ > > /* User Trap Setup */ > @@ -48,6 +56,13 @@ > #define CSR_FRM 0x002 > #define CSR_FCSR 0x003 > > +/* User Vector CSRs */ > +#define CSR_VSTART 0x008 > +#define CSR_VXSAT 0x009 > +#define CSR_VXRM 0x00a > +#define CSR_VL 0xc20 > +#define CSR_VTYPE 0xc21 > + > /* User Timers and Counters */ > #define CSR_CYCLE 0xc00 > #define CSR_TIME 0xc01 > diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c > index e32b612..405caf6 100644 > --- a/target/riscv/cpu_helper.c > +++ b/target/riscv/cpu_helper.c > @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs) > [PRV_H] = RISCV_EXCP_H_ECALL, > [PRV_M] = RISCV_EXCP_M_ECALL > }; > + if (env->foflag) { > + if (env->vfp.vl != 0) { > + env->foflag = false; > + env->pc += 4; > + return; > + } > + } > > if (!async) { > /* set tval to badaddr for traps with address information */ > diff --git a/target/riscv/csr.c b/target/riscv/csr.c > index e0d4586..a6131ff 100644 > --- a/target/riscv/csr.c > +++ b/target/riscv/csr.c > @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno) > return 0; > } > > -#if !defined(CONFIG_USER_ONLY) > static int any(CPURISCVState *env, int csrno) > { > return 0; > } > > +#if !defined(CONFIG_USER_ONLY) > static int smode(CPURISCVState *env, int csrno) > { > return -!riscv_has_ext(env, RVS); > @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val) > return -1; > } > #endif > - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) > - | (env->frm << FSR_RD_SHIFT); > + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT) > + | (env->vfp.vxsat << FSR_VXSAT_SHIFT) > + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) > + | (env->frm << FSR_RD_SHIFT); > return 0; > } > > @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val) > env->mstatus |= MSTATUS_FS; > #endif > env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; > + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; > + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; > riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); > return 0; > } > > +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vtype; > + return 0; > +} > + > +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vl; > + return 0; > +} > + > +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vxrm; > + return 0; > +} > + > +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vxsat; > + return 0; > +} > + > +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vstart; > + return 0; > +} > + > +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val) > +{ > + env->vfp.vxrm = val; > + return 0; > +} > + > +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val) > +{ > + env->vfp.vxsat = val; > + return 0; > +} > + > +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val) > +{ > + env->vfp.vstart = val; > + return 0; > +} A fixed return value makes me think these should be void functions. > + > /* User Timers and Counters */ > static int read_instret(CPURISCVState *env, int csrno, target_ulong *val) > { > @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { > [CSR_FFLAGS] = { fs, read_fflags, write_fflags }, > [CSR_FRM] = { fs, read_frm, write_frm }, > [CSR_FCSR] = { fs, read_fcsr, write_fcsr }, > - > + /* Vector CSRs */ > + [CSR_VSTART] = { any, read_vstart, write_vstart }, > + [CSR_VXSAT] = { any, read_vxsat, write_vxsat }, > + [CSR_VXRM] = { any, read_vxrm, write_vxrm }, > + [CSR_VL] = { any, read_vl }, > + [CSR_VTYPE] = { any, read_vtype }, > /* User Timers and Counters */ > [CSR_CYCLE] = { ctr, read_instret }, > [CSR_INSTRET] = { ctr, read_instret }, > diff --git a/target/riscv/helper.h b/target/riscv/helper.h > index debb22a..fee02c0 100644 > --- a/target/riscv/helper.h > +++ b/target/riscv/helper.h > @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl) > DEF_HELPER_1(wfi, void, env) > DEF_HELPER_1(tlb_flush, void, env) > #endif > +/* Vector functions */ Think about how you could split this patch up to introduce a group of instructions at a time. This will make it a lot easier review. I'm going to leave review of the specifics to the RISCV maintainers but I suspect they will want to wait until a v2 of the series. However it looks like a good first pass at implementing vectors. -- Alex Bennée
On 8/28/19 2:08 AM, Alex Bennée wrote: > If you want to do vectors I suggest you look at the TCGvec types for > passing pointers to vector registers to helpers. In this case you will > want to ensure your vector registers are properly aligned. The risc-v vector extension is very different from any other existing vector extension. In particular, the locations of the vector elements vary dynamically. Except for certain special cases I doubt that risc-v can make direct use of the generic TCG vector support. r~
On 8/27/19 7:36 PM, liuzhiwei wrote: > Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 > Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> > --- > fpu/softfloat.c | 119 + > include/fpu/softfloat.h | 4 + > linux-user/riscv/cpu_loop.c | 8 +- > target/riscv/Makefile.objs | 2 +- > target/riscv/cpu.h | 30 + > target/riscv/cpu_bits.h | 15 + > target/riscv/cpu_helper.c | 7 + > target/riscv/csr.c | 65 +- > target/riscv/helper.h | 354 + > target/riscv/insn32.decode | 374 +- > target/riscv/insn_trans/trans_rvv.inc.c | 484 + > target/riscv/translate.c | 1 + > target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ > 13 files changed, 28017 insertions(+), 9 deletions(-) As Alex mentioned, this is *far* too big to be presented as a single patch. > diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h > index 3ff3fa5..3b0754c 100644 > --- a/include/fpu/softfloat.h > +++ b/include/fpu/softfloat.h > @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status); > float16 float16_sqrt(float16, float_status *status); > int float16_compare(float16, float16, float_status *status); > int float16_compare_quiet(float16, float16, float_status *status); > +int float16_unordered_quiet(float16, float16, float_status *status); > +int float16_le(float16, float16, float_status *status); > +int float16_lt(float16, float16, float_status *status); > +int float16_eq_quiet(float16, float16, float_status *status); As Alex mentioned, none of these changes are required, as all functionality is provided by float16_compare{,_quiet}. > diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c > index 12aa3c0..b01548a 100644 > --- a/linux-user/riscv/cpu_loop.c > +++ b/linux-user/riscv/cpu_loop.c > @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env) > signum = 0; > sigcode = 0; > sigaddr = 0; > - > + if (env->foflag) { > + if (env->vfp.vl != 0) { > + env->foflag = false; > + env->pc += 4; > + continue; > + } This is most definitely not the correct way to implement first-fault. You need to have a look at target/arm/sve_helper.c, e.g. sve_ldff1_r, where we test pages for validity with tlb_vaddr_to_host. > + /* vector coprocessor state. */ > + struct { > + union VECTOR { > + float64 f64[VUNIT(64)]; > + float32 f32[VUNIT(32)]; > + float16 f16[VUNIT(16)]; > + target_ulong ul[VUNIT(sizeof(target_ulong))]; > + uint64_t u64[VUNIT(64)]; > + int64_t s64[VUNIT(64)]; > + uint32_t u32[VUNIT(32)]; > + int32_t s32[VUNIT(32)]; > + uint16_t u16[VUNIT(16)]; > + int16_t s16[VUNIT(16)]; > + uint8_t u8[VUNIT(8)]; > + int8_t s8[VUNIT(8)]; > + } vreg[32]; > + target_ulong vxrm; > + target_ulong vxsat; > + target_ulong vl; > + target_ulong vstart; > + target_ulong vtype; > + float_status fp_status; > + } vfp; You've obviously copied "vfp" from target/arm. Drop that. It makes no sense in the context of risc-v. I'm not sure that vreg[].element[] really makes the most sense in the context of how risc-v rearranges its elements. It will almost certainly fail clang validators, if enabled, since you'll be indexing beyond the end of vreg[n] into vreg[n+1]. It might be best to have a single array: union { uint64_t u64[32 * VLEN / 64]; ... uint8_t u8[32 * VLEN / 8]; } velt; This is clearer to the compiler that this is a single block of memory that we can index as we please. Note that float64/float32/float16 are legacy. They will always be equivalent to the unsigned integer types of the same size. Is there really any vector operation at all that is dependent on XLEN? If not, then there is no reason to confuse things by including target_ulong. > diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c > index e32b612..405caf6 100644 > --- a/target/riscv/cpu_helper.c > +++ b/target/riscv/cpu_helper.c > @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs) > [PRV_H] = RISCV_EXCP_H_ECALL, > [PRV_M] = RISCV_EXCP_M_ECALL > }; > + if (env->foflag) { > + if (env->vfp.vl != 0) { > + env->foflag = false; > + env->pc += 4; > + return; > + } > + } Again, not the way to implement first-fault. In particular, you haven't even verified that do_interrupt has been called on behalf of a RISCV_EXCP_LOAD_PAGE_FAULT. This could be a timer tick. > +#define MAX_U8 ((uint8_t)0xff) > +#define MIN_U8 ((uint8_t)0x0) > +#define MAX_S8 ((int8_t)0x7f) > +#define MIN_S8 ((int8_t)0x80) > +#define SIGNBIT16 (1 << 15) > +#define MAX_U16 ((uint16_t)0xffff) > +#define MIN_U16 ((uint16_t)0x0) > +#define MAX_S16 ((int16_t)0x7fff) > +#define MIN_S16 ((int16_t)0x8000) > +#define SIGNBIT32 (1 << 31) > +#define MAX_U32 ((uint32_t)0xffffffff) > +#define MIN_U32 ((uint32_t)0x0) > +#define MAX_S32 ((int32_t)0x7fffffff) > +#define MIN_S32 ((int32_t)0x80000000) > +#define SIGNBIT64 ((uint64_t)1 << 63) > +#define MAX_U64 ((uint64_t)0xffffffffffffffff) > +#define MIN_U64 ((uint64_t)0x0) > +#define MAX_S64 ((int64_t)0x7fffffffffffffff) > +#define MIN_S64 ((int64_t)0x8000000000000000) Why are you replicating INT8_MIN et al? > +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2, > + int index, int mem, int width, int nf) > +{ > + target_ulong abs_off, base = env->gpr[rs1]; > + target_long offset; > + switch (width) { > + case 8: > + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem; > + break; > + case 16: > + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf * mem; > + break; > + case 32: > + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf * mem; > + break; > + case 64: > + offset = env->vfp.vreg[rs2].s64[index] + nf * mem; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); This is broken. You cannot use GETPC() anywhere except in the outermost HELPER(). Otherwise you're not computing the return address back into the code_gen_buffer, which is what is required to properly unwind the guest state. > +static inline bool vector_vtype_ill(CPURISCVState *env) > +{ > + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) { > + return true; > + } > + return false; > +} > + > +static inline void vector_vtype_set_ill(CPURISCVState *env) > +{ > + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1); > + return; > +} > + > +static inline int vector_vtype_get_sew(CPURISCVState *env) > +{ > + return (env->vfp.vtype >> 2) & 0x7; > +} > + > +static inline int vector_get_width(CPURISCVState *env) > +{ > + return 8 * (1 << vector_vtype_get_sew(env)); > +} > + > +static inline int vector_get_lmul(CPURISCVState *env) > +{ > + return 1 << (env->vfp.vtype & 0x3); > +} > + > +static inline int vector_get_vlmax(CPURISCVState *env) > +{ > + return vector_get_lmul(env) * VLEN / vector_get_width(env); > +} > + > +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int width, > + int lmul, int index) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / 8; > + int pos = (index * mlen) % 8; > + > + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1); > +} I would strongly encourage you place the components of vtype within tb_flags via cpu_get_tb_cpu_state. This would allow you to move quite a few checks from run-time to translation-time. Recall that translation happens once (per configuration), whereas execution happens many times. Obviously, the more configurations that we create, the more translation that must happen. But the vtypei argument to vsetvli is a good choice, because it is constant, relates directly to the compiled code, and is unrelated to the length of the data being processed. With that, you can verify at translation: (1) vill (2) v[n], for (n % lmul) != 0 (3) v[n] overlapping v[0] for masked/carry operations, with lmul > 1 and (4) you can arrange the helpers so that instead of 1 helper that has to handle all SEW, you have N helpers, each handling a different SEW. And with all of this done, I believe you no longer need to pass the register number to the helper. You can pass the address of v[n], which is much more like how the tcg generic vector support works. Whether or not to include VL in tb_flags is a harder choice. Certainly not the exact value of VL, as that would lead to different translations for every loop tail. But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a single bit. Knowing that this condition is true would allow some use of the tcg generic vector support. E.g. vadd.vv could be if (masked) { switch (SEW) { case MO_8: gen_helper_vadd8_mask(...); break; ... } } else if (vl_eq_vlmax) { tcg_gen_gvec_add(SEW, vreg_ofs(vd), vreg_ofs(vs2), vreg_ofs(vs1), VLEN * LMUL, VLEN * LMUL); } else { switch (SEW) { case MO_8: gen_helper_vadd8(...); break; ... } } Or, equivalently, pack pointers to the actual generator functions into a structure so that this code structure can be shared between many instructions. Bear in mind that all tcg gvec operations operate strictly upon lanes. I.e. vd[x] = vs1[x] op vs2[x] thus the actual arrangement of the elements in storage is irrelevant and SLEN need not be considered here. r~
> On Wed, Aug 28, 2019 at 9:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: > Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 > Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> > --- > Such large patch and "Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25" is its entire commit message?? Horrible. Aleksandar > fpu/softfloat.c | 119 + > include/fpu/softfloat.h | 4 + > linux-user/riscv/cpu_loop.c | 8 +- > target/riscv/Makefile.objs | 2 +- > target/riscv/cpu.h | 30 + > target/riscv/cpu_bits.h | 15 + > target/riscv/cpu_helper.c | 7 + > target/riscv/csr.c | 65 +- > target/riscv/helper.h | 354 + > target/riscv/insn32.decode | 374 +- > target/riscv/insn_trans/trans_rvv.inc.c | 484 + > target/riscv/translate.c | 1 + > target/riscv/vector_helper.c | 26563 > ++++++++++++++++++++++++++++++ > 13 files changed, 28017 insertions(+), 9 deletions(-) > create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c > create mode 100644 target/riscv/vector_helper.c > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 2ba36ec..da155ea 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a) > } > > > /*---------------------------------------------------------------------------- > +| Returns the sign bit of the half-precision floating-point value `a'. > > +*----------------------------------------------------------------------------*/ > + > +static inline flag extractFloat16Sign(float16 a) > +{ > + return float16_val(a) >> 0xf; > +} > + > + > > +/*---------------------------------------------------------------------------- > | Returns the fraction bits of the single-precision floating-point value > `a'. > > *----------------------------------------------------------------------------*/ > > @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status > *status) > } > > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point value `a' is less than > +| or equal to the corresponding value `b', and 0 otherwise. The invalid > +| exception is raised if either operand is a NaN. The comparison is > performed > +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > > +*----------------------------------------------------------------------------*/ > + > +int float16_le(float16 a, float16 b, float_status *status) > +{ > + flag aSign, bSign; > + uint16_t av, bv; > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a > ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b > ) ) > + ) { > + float_raise(float_flag_invalid, status); > + return 0; > + } > + aSign = extractFloat16Sign( a ); > + bSign = extractFloat16Sign( b ); > + av = float16_val(a); > + bv = float16_val(b); > + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) > == 0 ); > + return ( av == bv ) || ( aSign ^ ( av < bv ) ); > + > +} > + > > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point value `a' is less than > | or equal to the corresponding value `b', and 0 otherwise. The invalid > | exception is raised if either operand is a NaN. The comparison is > performed > @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status > *status) > | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > > *----------------------------------------------------------------------------*/ > > +int float16_lt(float16 a, float16 b, float_status *status) > +{ > + flag aSign, bSign; > + uint16_t av, bv; > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a > ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b > ) ) > + ) { > + float_raise(float_flag_invalid, status); > + return 0; > + } > + aSign = extractFloat16Sign( a ); > + bSign = extractFloat16Sign( b ); > + av = float16_val(a); > + bv = float16_val(b); > + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) > != 0 ); > + return ( av != bv ) && ( aSign ^ ( av < bv ) ); > + > +} > + > > +/*---------------------------------------------------------------------------- > +| Returns 1 if the single-precision floating-point value `a' is less than > +| the corresponding value `b', and 0 otherwise. The invalid exception is > +| raised if either operand is a NaN. The comparison is performed > according > +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > > +*----------------------------------------------------------------------------*/ > + > int float32_lt(float32 a, float32 b, float_status *status) > { > flag aSign, bSign; > @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, > float_status *status) > } > > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point value `a' is equal to > +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause > an > +| exception. The comparison is performed according to the IEC/IEEE > Standard > +| for Binary Floating-Point Arithmetic. > > +*----------------------------------------------------------------------------*/ > + > +int float16_eq_quiet(float16 a, float16 b, float_status *status) > +{ > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a > ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b > ) ) > + ) { > + if (float16_is_signaling_nan(a, status) > + || float16_is_signaling_nan(b, status)) { > + float_raise(float_flag_invalid, status); > + } > + return 0; > + } > + return ( float16_val(a) == float16_val(b) ) || > + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 > ); > +} > + > + > > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point value `a' is equal to > | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause > an > | exception. The comparison is performed according to the IEC/IEEE > Standard > @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, > float_status *status) > } > > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point values `a' and `b' cannot > +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. > The > +| comparison is performed according to the IEC/IEEE Standard for Binary > +| Floating-Point Arithmetic. > > +*----------------------------------------------------------------------------*/ > + > +int float16_unordered_quiet(float16 a, float16 b, float_status *status) > +{ > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a > ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b > ) ) > + ) { > + if (float16_is_signaling_nan(a, status) > + || float16_is_signaling_nan(b, status)) { > + float_raise(float_flag_invalid, status); > + } > + return 1; > + } > + return 0; > +} > + > + > > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point values `a' and `b' > cannot > | be compared, and 0 otherwise. Quiet NaNs do not cause an exception. > The > | comparison is performed according to the IEC/IEEE Standard for Binary > diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h > index 3ff3fa5..3b0754c 100644 > --- a/include/fpu/softfloat.h > +++ b/include/fpu/softfloat.h > @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, > float_status *status); > float16 float16_sqrt(float16, float_status *status); > int float16_compare(float16, float16, float_status *status); > int float16_compare_quiet(float16, float16, float_status *status); > +int float16_unordered_quiet(float16, float16, float_status *status); > +int float16_le(float16, float16, float_status *status); > +int float16_lt(float16, float16, float_status *status); > +int float16_eq_quiet(float16, float16, float_status *status); > > int float16_is_quiet_nan(float16, float_status *status); > int float16_is_signaling_nan(float16, float_status *status); > diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c > index 12aa3c0..b01548a 100644 > --- a/linux-user/riscv/cpu_loop.c > +++ b/linux-user/riscv/cpu_loop.c > @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env) > signum = 0; > sigcode = 0; > sigaddr = 0; > - > + if (env->foflag) { > + if (env->vfp.vl != 0) { > + env->foflag = false; > + env->pc += 4; > + continue; > + } > + } > switch (trapnr) { > case EXCP_INTERRUPT: > /* just indicate that signals should be handled asap */ > diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs > index b1c79bc..d577cef 100644 > --- a/target/riscv/Makefile.objs > +++ b/target/riscv/Makefile.objs > @@ -1,4 +1,4 @@ > -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o > gdbstub.o pmp.o > +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o > vector_helper.o gdbstub.o pmp.o > > DECODETREE = $(SRC_PATH)/scripts/decodetree.py > > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h > index 0adb307..5a93aa2 100644 > --- a/target/riscv/cpu.h > +++ b/target/riscv/cpu.h > @@ -67,6 +67,7 @@ > #define RVC RV('C') > #define RVS RV('S') > #define RVU RV('U') > +#define RVV RV('V') > > /* S extension denotes that Supervisor mode exists, however it is possible > to have a core that support S mode but does not have an MMU and there > @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState; > > #include "pmp.h" > > +#define VLEN 128 > +#define VUNIT(x) (VLEN / x) > + > struct CPURISCVState { > target_ulong gpr[32]; > uint64_t fpr[32]; /* assume both F and D extensions */ > + > + /* vector coprocessor state. */ > + struct { > + union VECTOR { > + float64 f64[VUNIT(64)]; > + float32 f32[VUNIT(32)]; > + float16 f16[VUNIT(16)]; > + target_ulong ul[VUNIT(sizeof(target_ulong))]; > + uint64_t u64[VUNIT(64)]; > + int64_t s64[VUNIT(64)]; > + uint32_t u32[VUNIT(32)]; > + int32_t s32[VUNIT(32)]; > + uint16_t u16[VUNIT(16)]; > + int16_t s16[VUNIT(16)]; > + uint8_t u8[VUNIT(8)]; > + int8_t s8[VUNIT(8)]; > + } vreg[32]; > + target_ulong vxrm; > + target_ulong vxsat; > + target_ulong vl; > + target_ulong vstart; > + target_ulong vtype; > + float_status fp_status; > + } vfp; > + > + bool foflag; > target_ulong pc; > target_ulong load_res; > target_ulong load_val; > diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h > index 11f971a..9eb43ec 100644 > --- a/target/riscv/cpu_bits.h > +++ b/target/riscv/cpu_bits.h > @@ -29,6 +29,14 @@ > #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) > #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | > FSR_NXA) > > +/* Vector Fixed-Point round model */ > +#define FSR_VXRM_SHIFT 9 > +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) > + > +/* Vector Fixed-Point saturation flag */ > +#define FSR_VXSAT_SHIFT 8 > +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) > + > /* Control and Status Registers */ > > /* User Trap Setup */ > @@ -48,6 +56,13 @@ > #define CSR_FRM 0x002 > #define CSR_FCSR 0x003 > > +/* User Vector CSRs */ > +#define CSR_VSTART 0x008 > +#define CSR_VXSAT 0x009 > +#define CSR_VXRM 0x00a > +#define CSR_VL 0xc20 > +#define CSR_VTYPE 0xc21 > + > /* User Timers and Counters */ > #define CSR_CYCLE 0xc00 > #define CSR_TIME 0xc01 > diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c > index e32b612..405caf6 100644 > --- a/target/riscv/cpu_helper.c > +++ b/target/riscv/cpu_helper.c > @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs) > [PRV_H] = RISCV_EXCP_H_ECALL, > [PRV_M] = RISCV_EXCP_M_ECALL > }; > + if (env->foflag) { > + if (env->vfp.vl != 0) { > + env->foflag = false; > + env->pc += 4; > + return; > + } > + } > > if (!async) { > /* set tval to badaddr for traps with address information */ > diff --git a/target/riscv/csr.c b/target/riscv/csr.c > index e0d4586..a6131ff 100644 > --- a/target/riscv/csr.c > +++ b/target/riscv/csr.c > @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno) > return 0; > } > > -#if !defined(CONFIG_USER_ONLY) > static int any(CPURISCVState *env, int csrno) > { > return 0; > } > > +#if !defined(CONFIG_USER_ONLY) > static int smode(CPURISCVState *env, int csrno) > { > return -!riscv_has_ext(env, RVS); > @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, > target_ulong *val) > return -1; > } > #endif > - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) > - | (env->frm << FSR_RD_SHIFT); > + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT) > + | (env->vfp.vxsat << FSR_VXSAT_SHIFT) > + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) > + | (env->frm << FSR_RD_SHIFT); > return 0; > } > > @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, > target_ulong val) > env->mstatus |= MSTATUS_FS; > #endif > env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; > + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; > + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; > riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); > return 0; > } > > +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vtype; > + return 0; > +} > + > +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vl; > + return 0; > +} > + > +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vxrm; > + return 0; > +} > + > +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vxsat; > + return 0; > +} > + > +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vstart; > + return 0; > +} > + > +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val) > +{ > + env->vfp.vxrm = val; > + return 0; > +} > + > +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val) > +{ > + env->vfp.vxsat = val; > + return 0; > +} > + > +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val) > +{ > + env->vfp.vstart = val; > + return 0; > +} > + > /* User Timers and Counters */ > static int read_instret(CPURISCVState *env, int csrno, target_ulong *val) > { > @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = > { > [CSR_FFLAGS] = { fs, read_fflags, write_fflags > }, > [CSR_FRM] = { fs, read_frm, write_frm > }, > [CSR_FCSR] = { fs, read_fcsr, write_fcsr > }, > - > + /* Vector CSRs */ > + [CSR_VSTART] = { any, read_vstart, write_vstart > }, > + [CSR_VXSAT] = { any, read_vxsat, write_vxsat > }, > + [CSR_VXRM] = { any, read_vxrm, write_vxrm > }, > + [CSR_VL] = { any, read_vl > }, > + [CSR_VTYPE] = { any, read_vtype > }, > /* User Timers and Counters */ > [CSR_CYCLE] = { ctr, read_instret > }, > [CSR_INSTRET] = { ctr, read_instret > }, > diff --git a/target/riscv/helper.h b/target/riscv/helper.h > index debb22a..fee02c0 100644 > --- a/target/riscv/helper.h > +++ b/target/riscv/helper.h > @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl) > DEF_HELPER_1(wfi, void, env) > DEF_HELPER_1(tlb_flush, void, env) > #endif > +/* Vector functions */ > +DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32) > +DEF_HELPER_3(vector_vid_v, void, env, i32, i32) > +DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32) > +DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32) > diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode > index 77f794e..d125ff9 100644 > --- a/target/riscv/insn32.decode > +++ b/target/riscv/insn32.decode > @@ -25,7 +25,7 @@ > %sh10 20:10 > %csr 20:12 > %rm 12:3 > - > +%nf 29:3 > # immediates: > %imm_i 20:s12 > %imm_s 25:s7 7:5 > @@ -43,7 +43,6 @@ > &u imm rd > &shift shamt rs1 rd > &atomic aq rl rs2 rs1 rd > - > # Formats 32: > @r ....... ..... ..... ... ..... ....... &r %rs2 > %rs1 %rd > @i ............ ..... ... ..... ....... &i imm=%imm_i > %rs1 %rd > @@ -62,11 +61,17 @@ > @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd > @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd > @r2 ....... ..... ..... ... ..... ....... %rs1 %rd > +@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd > +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd > +@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd > +@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd > +@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd > +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd > +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd > > @sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1 > @sfence_vm ....... ..... ..... ... ..... ....... %rs1 > > - > # *** Privileged Instructions *** > ecall 000000000000 00000 000 00000 1110011 > ebreak 000000000001 00000 000 00000 1110011 > @@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... ..... 1010011 > @r2_rm > fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm > fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm > fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm > + > +# *** RV32V Standard Extension *** > + > +# *** Vector loads and stores are encoded within LOADFP/STORE-FP *** > +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm > +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm > +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm > +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm > +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm > +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm > +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm > +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm > +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm > +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm > +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm > +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm > +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm > +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm > +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm > +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm > +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm > +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm > + > +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm > +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm > +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm > +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm > +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm > +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm > +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm > + > +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm > +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm > +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm > +vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm > +vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm > +vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm > +vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm > +vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm > +vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm > +vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm > +vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm > + > +#*** Vector AMO operations are encoded under the standard AMO major > opcode.*** > +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm > + > +#*** new major opcode OP-V *** > +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm > +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm > +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm > +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm > +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm > +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm > +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm > +vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm > +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm > +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm > +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm > +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm > +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm > +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm > +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm > +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm > +vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm > +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm > +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm > +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm > +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm > +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm > +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm > +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm > +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm > +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm > +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm > +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm > +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm > +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm > +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm > +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm > +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm > +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm > +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm > +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm > +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm > +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm > +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm > +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm > +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm > +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm > +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm > +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm > +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm > +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm > +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm > +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm > +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm > +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r > +vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r > +vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r > +vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r > +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm > +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm > +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm > +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm > +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm > +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm > +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r > +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r > +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r > +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r > +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r > +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r > +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r > +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r > +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r > +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r > +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm > +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm > +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm > +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm > +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm > +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm > +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm > +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm > +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm > +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm > +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r > +vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm > +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm > +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm > +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm > +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r > +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm > +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm > +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm > +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm > +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm > +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r > +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm > +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm > +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm > +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm > +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r > +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm > +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm > +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm > +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm > +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r > +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm > +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm > +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm > +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm > +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm > +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r > +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm > +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm > +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm > +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm > +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm > +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r > +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm > +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm > +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm > +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r > +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm > +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm > +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r > +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm > +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm > +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm > +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm > +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm > +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm > +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm > +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm > +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm > +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm > +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm > +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm > +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm > +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm > +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm > +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm > +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm > +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm > +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm > +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm > +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm > +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm > +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm > +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm > +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm > +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm > +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm > +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm > +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm > +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm > +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm > +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm > +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm > +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm > +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm > +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm > +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm > +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm > +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm > +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm > +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm > +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm > +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm > +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm > +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm > +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm > +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm > +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm > +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm > +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm > +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm > +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm > +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm > +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm > +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm > +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm > +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm > +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm > +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm > +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm > +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm > +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm > +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm > +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm > +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm > +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm > +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm > +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm > +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm > +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm > +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm > +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm > +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm > +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm > +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm > +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm > +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm > +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm > +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm > +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm > +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm > +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm > +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm > +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm > +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm > +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm > +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm > +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm > +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm > +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm > +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm > +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm > +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm > +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm > +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm > +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm > +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm > +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm > +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm > +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm > +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm > +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm > +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm > +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm > +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm > +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm > +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm > +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm > +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm > +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm > +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm > +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm > +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm > +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm > +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm > +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm > +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm > +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm > +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm > +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm > +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm > +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm > +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm > +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm > +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm > +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm > +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm > +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm > +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm > +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm > +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm > +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm > +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm > +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm > +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm > +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm > +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm > +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm > +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm > +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm > +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm > +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm > +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm > +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm > +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm > +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm > +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm > +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm > +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm > +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm > +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm > +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm > +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm > +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm > +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm > +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r > diff --git a/target/riscv/insn_trans/trans_rvv.inc.c > b/target/riscv/insn_trans/trans_rvv.inc.c > new file mode 100644 > index 0000000..dc8e6ce > --- /dev/null > +++ b/target/riscv/insn_trans/trans_rvv.inc.c > @@ -0,0 +1,484 @@ > +/* > + * RISC-V translation routines for the RVV Standard Extension. > + * > + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2 or later, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > for > + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with > + * this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#define GEN_VECTOR_R2_NFVM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 nf = tcg_const_i32(a->nf); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(nf); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R_NFVM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 nf = tcg_const_i32(a->nf); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(nf); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > + > +#define GEN_VECTOR_R_WDVM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 wd = tcg_const_i32(a->wd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(wd); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + gen_helper_vector_##INSN(cpu_env, s1, s2, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + return true; \ > +} > +#define GEN_VECTOR_R2_VM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, vm, s2, d); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > + > +#define GEN_VECTOR_R1_VM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, vm, d); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R_VM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R2_ZIMM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 zimm = tcg_const_i32(a->zimm); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(zimm); \ > + tcg_temp_free_i32(d); \ > + return true; \ > +} > + > +GEN_VECTOR_R2_NFVM(vlb_v) > +GEN_VECTOR_R2_NFVM(vlh_v) > +GEN_VECTOR_R2_NFVM(vlw_v) > +GEN_VECTOR_R2_NFVM(vle_v) > +GEN_VECTOR_R2_NFVM(vlbu_v) > +GEN_VECTOR_R2_NFVM(vlhu_v) > +GEN_VECTOR_R2_NFVM(vlwu_v) > +GEN_VECTOR_R2_NFVM(vlbff_v) > +GEN_VECTOR_R2_NFVM(vlhff_v) > +GEN_VECTOR_R2_NFVM(vlwff_v) > +GEN_VECTOR_R2_NFVM(vleff_v) > +GEN_VECTOR_R2_NFVM(vlbuff_v) > +GEN_VECTOR_R2_NFVM(vlhuff_v) > +GEN_VECTOR_R2_NFVM(vlwuff_v) > +GEN_VECTOR_R2_NFVM(vsb_v) > +GEN_VECTOR_R2_NFVM(vsh_v) > +GEN_VECTOR_R2_NFVM(vsw_v) > +GEN_VECTOR_R2_NFVM(vse_v) > + > +GEN_VECTOR_R_NFVM(vlsb_v) > +GEN_VECTOR_R_NFVM(vlsh_v) > +GEN_VECTOR_R_NFVM(vlsw_v) > +GEN_VECTOR_R_NFVM(vlse_v) > +GEN_VECTOR_R_NFVM(vlsbu_v) > +GEN_VECTOR_R_NFVM(vlshu_v) > +GEN_VECTOR_R_NFVM(vlswu_v) > +GEN_VECTOR_R_NFVM(vssb_v) > +GEN_VECTOR_R_NFVM(vssh_v) > +GEN_VECTOR_R_NFVM(vssw_v) > +GEN_VECTOR_R_NFVM(vsse_v) > +GEN_VECTOR_R_NFVM(vlxb_v) > +GEN_VECTOR_R_NFVM(vlxh_v) > +GEN_VECTOR_R_NFVM(vlxw_v) > +GEN_VECTOR_R_NFVM(vlxe_v) > +GEN_VECTOR_R_NFVM(vlxbu_v) > +GEN_VECTOR_R_NFVM(vlxhu_v) > +GEN_VECTOR_R_NFVM(vlxwu_v) > +GEN_VECTOR_R_NFVM(vsxb_v) > +GEN_VECTOR_R_NFVM(vsxh_v) > +GEN_VECTOR_R_NFVM(vsxw_v) > +GEN_VECTOR_R_NFVM(vsxe_v) > +GEN_VECTOR_R_NFVM(vsuxb_v) > +GEN_VECTOR_R_NFVM(vsuxh_v) > +GEN_VECTOR_R_NFVM(vsuxw_v) > +GEN_VECTOR_R_NFVM(vsuxe_v) > + > +GEN_VECTOR_R_WDVM(vamoswapw_v) > +GEN_VECTOR_R_WDVM(vamoswapd_v) > +GEN_VECTOR_R_WDVM(vamoaddw_v) > +GEN_VECTOR_R_WDVM(vamoaddd_v) > +GEN_VECTOR_R_WDVM(vamoxorw_v) > +GEN_VECTOR_R_WDVM(vamoxord_v) > +GEN_VECTOR_R_WDVM(vamoandw_v) > +GEN_VECTOR_R_WDVM(vamoandd_v) > +GEN_VECTOR_R_WDVM(vamoorw_v) > +GEN_VECTOR_R_WDVM(vamoord_v) > +GEN_VECTOR_R_WDVM(vamominw_v) > +GEN_VECTOR_R_WDVM(vamomind_v) > +GEN_VECTOR_R_WDVM(vamomaxw_v) > +GEN_VECTOR_R_WDVM(vamomaxd_v) > +GEN_VECTOR_R_WDVM(vamominuw_v) > +GEN_VECTOR_R_WDVM(vamominud_v) > +GEN_VECTOR_R_WDVM(vamomaxuw_v) > +GEN_VECTOR_R_WDVM(vamomaxud_v) > + > +GEN_VECTOR_R(vext_x_v) > +GEN_VECTOR_R(vfmv_f_s) > +GEN_VECTOR_R(vmv_s_x) > +GEN_VECTOR_R(vfmv_s_f) > +GEN_VECTOR_R(vadc_vvm) > +GEN_VECTOR_R(vadc_vxm) > +GEN_VECTOR_R(vadc_vim) > +GEN_VECTOR_R(vmadc_vvm) > +GEN_VECTOR_R(vmadc_vxm) > +GEN_VECTOR_R(vmadc_vim) > +GEN_VECTOR_R(vsbc_vvm) > +GEN_VECTOR_R(vsbc_vxm) > +GEN_VECTOR_R(vmsbc_vvm) > +GEN_VECTOR_R(vmsbc_vxm) > +GEN_VECTOR_R2_VM(vmpopc_m) > +GEN_VECTOR_R2_VM(vmfirst_m) > +GEN_VECTOR_R(vcompress_vm) > +GEN_VECTOR_R(vmandnot_mm) > +GEN_VECTOR_R(vmand_mm) > +GEN_VECTOR_R(vmor_mm) > +GEN_VECTOR_R(vmxor_mm) > +GEN_VECTOR_R(vmornot_mm) > +GEN_VECTOR_R(vmnand_mm) > +GEN_VECTOR_R(vmnor_mm) > +GEN_VECTOR_R(vmxnor_mm) > +GEN_VECTOR_R2_VM(vmsbf_m) > +GEN_VECTOR_R2_VM(vmsof_m) > +GEN_VECTOR_R2_VM(vmsif_m) > +GEN_VECTOR_R2_VM(viota_m) > +GEN_VECTOR_R1_VM(vid_v) > +GEN_VECTOR_R2_VM(vfcvt_xu_f_v) > +GEN_VECTOR_R2_VM(vfcvt_x_f_v) > +GEN_VECTOR_R2_VM(vfcvt_f_xu_v) > +GEN_VECTOR_R2_VM(vfcvt_f_x_v) > +GEN_VECTOR_R2_VM(vfwcvt_xu_f_v) > +GEN_VECTOR_R2_VM(vfwcvt_x_f_v) > +GEN_VECTOR_R2_VM(vfwcvt_f_xu_v) > +GEN_VECTOR_R2_VM(vfwcvt_f_x_v) > +GEN_VECTOR_R2_VM(vfwcvt_f_f_v) > +GEN_VECTOR_R2_VM(vfncvt_xu_f_v) > +GEN_VECTOR_R2_VM(vfncvt_x_f_v) > +GEN_VECTOR_R2_VM(vfncvt_f_xu_v) > +GEN_VECTOR_R2_VM(vfncvt_f_x_v) > +GEN_VECTOR_R2_VM(vfncvt_f_f_v) > +GEN_VECTOR_R2_VM(vfsqrt_v) > +GEN_VECTOR_R2_VM(vfclass_v) > + > +GEN_VECTOR_R_VM(vadd_vv) > +GEN_VECTOR_R_VM(vadd_vx) > +GEN_VECTOR_R_VM(vadd_vi) > +GEN_VECTOR_R_VM(vredsum_vs) > +GEN_VECTOR_R_VM(vfadd_vv) > +GEN_VECTOR_R_VM(vfadd_vf) > +GEN_VECTOR_R_VM(vredand_vs) > +GEN_VECTOR_R_VM(vfredsum_vs) > +GEN_VECTOR_R_VM(vsub_vv) > +GEN_VECTOR_R_VM(vsub_vx) > +GEN_VECTOR_R_VM(vredor_vs) > +GEN_VECTOR_R_VM(vfsub_vv) > +GEN_VECTOR_R_VM(vfsub_vf) > +GEN_VECTOR_R_VM(vrsub_vx) > +GEN_VECTOR_R_VM(vrsub_vi) > +GEN_VECTOR_R_VM(vredxor_vs) > +GEN_VECTOR_R_VM(vfredosum_vs) > +GEN_VECTOR_R_VM(vminu_vv) > +GEN_VECTOR_R_VM(vminu_vx) > +GEN_VECTOR_R_VM(vredminu_vs) > +GEN_VECTOR_R_VM(vfmin_vv) > +GEN_VECTOR_R_VM(vfmin_vf) > +GEN_VECTOR_R_VM(vmin_vv) > +GEN_VECTOR_R_VM(vmin_vx) > +GEN_VECTOR_R_VM(vredmin_vs) > +GEN_VECTOR_R_VM(vfredmin_vs) > +GEN_VECTOR_R_VM(vmaxu_vv) > +GEN_VECTOR_R_VM(vmaxu_vx) > +GEN_VECTOR_R_VM(vredmaxu_vs) > +GEN_VECTOR_R_VM(vfmax_vv) > +GEN_VECTOR_R_VM(vfmax_vf) > +GEN_VECTOR_R_VM(vmax_vv) > +GEN_VECTOR_R_VM(vmax_vx) > +GEN_VECTOR_R_VM(vredmax_vs) > +GEN_VECTOR_R_VM(vfredmax_vs) > +GEN_VECTOR_R_VM(vfsgnj_vv) > +GEN_VECTOR_R_VM(vfsgnj_vf) > +GEN_VECTOR_R_VM(vand_vv) > +GEN_VECTOR_R_VM(vand_vx) > +GEN_VECTOR_R_VM(vand_vi) > +GEN_VECTOR_R_VM(vfsgnjn_vv) > +GEN_VECTOR_R_VM(vfsgnjn_vf) > +GEN_VECTOR_R_VM(vor_vv) > +GEN_VECTOR_R_VM(vor_vx) > +GEN_VECTOR_R_VM(vor_vi) > +GEN_VECTOR_R_VM(vfsgnjx_vv) > +GEN_VECTOR_R_VM(vfsgnjx_vf) > +GEN_VECTOR_R_VM(vxor_vv) > +GEN_VECTOR_R_VM(vxor_vx) > +GEN_VECTOR_R_VM(vxor_vi) > +GEN_VECTOR_R_VM(vrgather_vv) > +GEN_VECTOR_R_VM(vrgather_vx) > +GEN_VECTOR_R_VM(vrgather_vi) > +GEN_VECTOR_R_VM(vslideup_vx) > +GEN_VECTOR_R_VM(vslideup_vi) > +GEN_VECTOR_R_VM(vslide1up_vx) > +GEN_VECTOR_R_VM(vslidedown_vx) > +GEN_VECTOR_R_VM(vslidedown_vi) > +GEN_VECTOR_R_VM(vslide1down_vx) > +GEN_VECTOR_R_VM(vmerge_vvm) > +GEN_VECTOR_R_VM(vmerge_vxm) > +GEN_VECTOR_R_VM(vmerge_vim) > +GEN_VECTOR_R_VM(vfmerge_vfm) > +GEN_VECTOR_R_VM(vmseq_vv) > +GEN_VECTOR_R_VM(vmseq_vx) > +GEN_VECTOR_R_VM(vmseq_vi) > +GEN_VECTOR_R_VM(vmfeq_vv) > +GEN_VECTOR_R_VM(vmfeq_vf) > +GEN_VECTOR_R_VM(vmsne_vv) > +GEN_VECTOR_R_VM(vmsne_vx) > +GEN_VECTOR_R_VM(vmsne_vi) > +GEN_VECTOR_R_VM(vmfle_vv) > +GEN_VECTOR_R_VM(vmfle_vf) > +GEN_VECTOR_R_VM(vmsltu_vv) > +GEN_VECTOR_R_VM(vmsltu_vx) > +GEN_VECTOR_R_VM(vmford_vv) > +GEN_VECTOR_R_VM(vmford_vf) > +GEN_VECTOR_R_VM(vmslt_vv) > +GEN_VECTOR_R_VM(vmslt_vx) > +GEN_VECTOR_R_VM(vmflt_vv) > +GEN_VECTOR_R_VM(vmflt_vf) > +GEN_VECTOR_R_VM(vmsleu_vv) > +GEN_VECTOR_R_VM(vmsleu_vx) > +GEN_VECTOR_R_VM(vmsleu_vi) > +GEN_VECTOR_R_VM(vmfne_vv) > +GEN_VECTOR_R_VM(vmfne_vf) > +GEN_VECTOR_R_VM(vmsle_vv) > +GEN_VECTOR_R_VM(vmsle_vx) > +GEN_VECTOR_R_VM(vmsle_vi) > +GEN_VECTOR_R_VM(vmfgt_vf) > +GEN_VECTOR_R_VM(vmsgtu_vx) > +GEN_VECTOR_R_VM(vmsgtu_vi) > +GEN_VECTOR_R_VM(vmsgt_vx) > +GEN_VECTOR_R_VM(vmsgt_vi) > +GEN_VECTOR_R_VM(vmfge_vf) > +GEN_VECTOR_R_VM(vsaddu_vv) > +GEN_VECTOR_R_VM(vsaddu_vx) > +GEN_VECTOR_R_VM(vsaddu_vi) > +GEN_VECTOR_R_VM(vdivu_vv) > +GEN_VECTOR_R_VM(vdivu_vx) > +GEN_VECTOR_R_VM(vfdiv_vv) > +GEN_VECTOR_R_VM(vfdiv_vf) > +GEN_VECTOR_R_VM(vsadd_vv) > +GEN_VECTOR_R_VM(vsadd_vx) > +GEN_VECTOR_R_VM(vsadd_vi) > +GEN_VECTOR_R_VM(vdiv_vv) > +GEN_VECTOR_R_VM(vdiv_vx) > +GEN_VECTOR_R_VM(vfrdiv_vf) > +GEN_VECTOR_R_VM(vssubu_vv) > +GEN_VECTOR_R_VM(vssubu_vx) > +GEN_VECTOR_R_VM(vremu_vv) > +GEN_VECTOR_R_VM(vremu_vx) > +GEN_VECTOR_R_VM(vssub_vv) > +GEN_VECTOR_R_VM(vssub_vx) > +GEN_VECTOR_R_VM(vrem_vv) > +GEN_VECTOR_R_VM(vrem_vx) > +GEN_VECTOR_R_VM(vaadd_vv) > +GEN_VECTOR_R_VM(vaadd_vx) > +GEN_VECTOR_R_VM(vaadd_vi) > +GEN_VECTOR_R_VM(vmulhu_vv) > +GEN_VECTOR_R_VM(vmulhu_vx) > +GEN_VECTOR_R_VM(vfmul_vv) > +GEN_VECTOR_R_VM(vfmul_vf) > +GEN_VECTOR_R_VM(vsll_vv) > +GEN_VECTOR_R_VM(vsll_vx) > +GEN_VECTOR_R_VM(vsll_vi) > +GEN_VECTOR_R_VM(vmul_vv) > +GEN_VECTOR_R_VM(vmul_vx) > +GEN_VECTOR_R_VM(vasub_vv) > +GEN_VECTOR_R_VM(vasub_vx) > +GEN_VECTOR_R_VM(vmulhsu_vv) > +GEN_VECTOR_R_VM(vmulhsu_vx) > +GEN_VECTOR_R_VM(vsmul_vv) > +GEN_VECTOR_R_VM(vsmul_vx) > +GEN_VECTOR_R_VM(vmulh_vv) > +GEN_VECTOR_R_VM(vmulh_vx) > +GEN_VECTOR_R_VM(vfrsub_vf) > +GEN_VECTOR_R_VM(vsrl_vv) > +GEN_VECTOR_R_VM(vsrl_vx) > +GEN_VECTOR_R_VM(vsrl_vi) > +GEN_VECTOR_R_VM(vfmadd_vv) > +GEN_VECTOR_R_VM(vfmadd_vf) > +GEN_VECTOR_R_VM(vsra_vv) > +GEN_VECTOR_R_VM(vsra_vx) > +GEN_VECTOR_R_VM(vsra_vi) > +GEN_VECTOR_R_VM(vmadd_vv) > +GEN_VECTOR_R_VM(vmadd_vx) > +GEN_VECTOR_R_VM(vfnmadd_vv) > +GEN_VECTOR_R_VM(vfnmadd_vf) > +GEN_VECTOR_R_VM(vssrl_vv) > +GEN_VECTOR_R_VM(vssrl_vx) > +GEN_VECTOR_R_VM(vssrl_vi) > +GEN_VECTOR_R_VM(vfmsub_vv) > +GEN_VECTOR_R_VM(vfmsub_vf) > +GEN_VECTOR_R_VM(vssra_vv) > +GEN_VECTOR_R_VM(vssra_vx) > +GEN_VECTOR_R_VM(vssra_vi) > +GEN_VECTOR_R_VM(vnmsub_vv) > +GEN_VECTOR_R_VM(vnmsub_vx) > +GEN_VECTOR_R_VM(vfnmsub_vv) > +GEN_VECTOR_R_VM(vfnmsub_vf) > +GEN_VECTOR_R_VM(vnsrl_vv) > +GEN_VECTOR_R_VM(vnsrl_vx) > +GEN_VECTOR_R_VM(vnsrl_vi) > +GEN_VECTOR_R_VM(vfmacc_vv) > +GEN_VECTOR_R_VM(vfmacc_vf) > +GEN_VECTOR_R_VM(vnsra_vv) > +GEN_VECTOR_R_VM(vnsra_vx) > +GEN_VECTOR_R_VM(vnsra_vi) > +GEN_VECTOR_R_VM(vmacc_vv) > +GEN_VECTOR_R_VM(vmacc_vx) > +GEN_VECTOR_R_VM(vfnmacc_vv) > +GEN_VECTOR_R_VM(vfnmacc_vf) > +GEN_VECTOR_R_VM(vnclipu_vv) > +GEN_VECTOR_R_VM(vnclipu_vx) > +GEN_VECTOR_R_VM(vnclipu_vi) > +GEN_VECTOR_R_VM(vfmsac_vv) > +GEN_VECTOR_R_VM(vfmsac_vf) > +GEN_VECTOR_R_VM(vnclip_vv) > +GEN_VECTOR_R_VM(vnclip_vx) > +GEN_VECTOR_R_VM(vnclip_vi) > +GEN_VECTOR_R_VM(vnmsac_vv) > +GEN_VECTOR_R_VM(vnmsac_vx) > +GEN_VECTOR_R_VM(vfnmsac_vv) > +GEN_VECTOR_R_VM(vfnmsac_vf) > +GEN_VECTOR_R_VM(vwredsumu_vs) > +GEN_VECTOR_R_VM(vwaddu_vv) > +GEN_VECTOR_R_VM(vwaddu_vx) > +GEN_VECTOR_R_VM(vfwadd_vv) > +GEN_VECTOR_R_VM(vfwadd_vf) > +GEN_VECTOR_R_VM(vwredsum_vs) > +GEN_VECTOR_R_VM(vwadd_vv) > +GEN_VECTOR_R_VM(vwadd_vx) > +GEN_VECTOR_R_VM(vfwredsum_vs) > +GEN_VECTOR_R_VM(vwsubu_vv) > +GEN_VECTOR_R_VM(vwsubu_vx) > +GEN_VECTOR_R_VM(vfwsub_vv) > +GEN_VECTOR_R_VM(vfwsub_vf) > +GEN_VECTOR_R_VM(vwsub_vv) > +GEN_VECTOR_R_VM(vwsub_vx) > +GEN_VECTOR_R_VM(vfwredosum_vs) > +GEN_VECTOR_R_VM(vwaddu_wv) > +GEN_VECTOR_R_VM(vwaddu_wx) > +GEN_VECTOR_R_VM(vfwadd_wv) > +GEN_VECTOR_R_VM(vfwadd_wf) > +GEN_VECTOR_R_VM(vwadd_wv) > +GEN_VECTOR_R_VM(vwadd_wx) > +GEN_VECTOR_R_VM(vwsubu_wv) > +GEN_VECTOR_R_VM(vwsubu_wx) > +GEN_VECTOR_R_VM(vfwsub_wv) > +GEN_VECTOR_R_VM(vfwsub_wf) > +GEN_VECTOR_R_VM(vwsub_wv) > +GEN_VECTOR_R_VM(vwsub_wx) > +GEN_VECTOR_R_VM(vwmulu_vv) > +GEN_VECTOR_R_VM(vwmulu_vx) > +GEN_VECTOR_R_VM(vfwmul_vv) > +GEN_VECTOR_R_VM(vfwmul_vf) > +GEN_VECTOR_R_VM(vwmulsu_vv) > +GEN_VECTOR_R_VM(vwmulsu_vx) > +GEN_VECTOR_R_VM(vwmul_vv) > +GEN_VECTOR_R_VM(vwmul_vx) > +GEN_VECTOR_R_VM(vwsmaccu_vv) > +GEN_VECTOR_R_VM(vwsmaccu_vx) > +GEN_VECTOR_R_VM(vwmaccu_vv) > +GEN_VECTOR_R_VM(vwmaccu_vx) > +GEN_VECTOR_R_VM(vfwmacc_vv) > +GEN_VECTOR_R_VM(vfwmacc_vf) > +GEN_VECTOR_R_VM(vwsmacc_vv) > +GEN_VECTOR_R_VM(vwsmacc_vx) > +GEN_VECTOR_R_VM(vwmacc_vv) > +GEN_VECTOR_R_VM(vwmacc_vx) > +GEN_VECTOR_R_VM(vfwnmacc_vv) > +GEN_VECTOR_R_VM(vfwnmacc_vf) > +GEN_VECTOR_R_VM(vwsmaccsu_vv) > +GEN_VECTOR_R_VM(vwsmaccsu_vx) > +GEN_VECTOR_R_VM(vwmaccsu_vv) > +GEN_VECTOR_R_VM(vwmaccsu_vx) > +GEN_VECTOR_R_VM(vfwmsac_vv) > +GEN_VECTOR_R_VM(vfwmsac_vf) > +GEN_VECTOR_R_VM(vwsmaccus_vx) > +GEN_VECTOR_R_VM(vwmaccus_vx) > +GEN_VECTOR_R_VM(vfwnmsac_vv) > +GEN_VECTOR_R_VM(vfwnmsac_vf) > +GEN_VECTOR_R2_ZIMM(vsetvli) > +GEN_VECTOR_R(vsetvl) > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index 8d6ab73..587c23e 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -706,6 +706,7 @@ static bool gen_shift(DisasContext *ctx, arg_r *a, > #include "insn_trans/trans_rva.inc.c" > #include "insn_trans/trans_rvf.inc.c" > #include "insn_trans/trans_rvd.inc.c" > +#include "insn_trans/trans_rvv.inc.c" > #include "insn_trans/trans_privileged.inc.c" > > /* > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > new file mode 100644 > index 0000000..1f8f1ec > --- /dev/null > +++ b/target/riscv/vector_helper.c > @@ -0,0 +1,26563 @@ > +/* > + * RISC-V Vectore Extension Helpers for QEMU. > + * > + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2 or later, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > for > + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with > + * this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/log.h" > +#include "cpu.h" > +#include "qemu/main-loop.h" > +#include "exec/exec-all.h" > +#include "exec/helper-proto.h" > +#include "exec/translator.h" > +#include "exec/cpu_ldst.h" > +#include <math.h> > +#include "instmap.h" > + > +#define VECTOR_HELPER(name) HELPER(glue(vector_, name)) > +#define SIGNBIT8 (1 << 7) > +#define MAX_U8 ((uint8_t)0xff) > +#define MIN_U8 ((uint8_t)0x0) > +#define MAX_S8 ((int8_t)0x7f) > +#define MIN_S8 ((int8_t)0x80) > +#define SIGNBIT16 (1 << 15) > +#define MAX_U16 ((uint16_t)0xffff) > +#define MIN_U16 ((uint16_t)0x0) > +#define MAX_S16 ((int16_t)0x7fff) > +#define MIN_S16 ((int16_t)0x8000) > +#define SIGNBIT32 (1 << 31) > +#define MAX_U32 ((uint32_t)0xffffffff) > +#define MIN_U32 ((uint32_t)0x0) > +#define MAX_S32 ((int32_t)0x7fffffff) > +#define MIN_S32 ((int32_t)0x80000000) > +#define SIGNBIT64 ((uint64_t)1 << 63) > +#define MAX_U64 ((uint64_t)0xffffffffffffffff) > +#define MIN_U64 ((uint64_t)0x0) > +#define MAX_S64 ((int64_t)0x7fffffffffffffff) > +#define MIN_S64 ((int64_t)0x8000000000000000) > + > +static int64_t sign_extend(int64_t a, int8_t width) > +{ > + return a << (64 - width) >> (64 - width); > +} > + > +static int64_t extend_gpr(target_ulong reg) > +{ > + return sign_extend(reg, sizeof(target_ulong) * 8); > +} > + > +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2, > + int index, int mem, int width, int nf) > +{ > + target_ulong abs_off, base = env->gpr[rs1]; > + target_long offset; > + switch (width) { > + case 8: > + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem; > + break; > + case 16: > + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf * > mem; > + break; > + case 32: > + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf * > mem; > + break; > + case 64: > + offset = env->vfp.vreg[rs2].s64[index] + nf * mem; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return 0; > + } > + if (offset < 0) { > + abs_off = ~offset + 1; > + if (base >= abs_off) { > + return base - abs_off; > + } > + } else { > + if ((target_ulong)((target_ulong)offset + base) >= base) { > + return (target_ulong)offset + base; > + } > + } > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return 0; > +} > + > + > + > +/* ADD/SUB/COMPARE instructions. */ > +static inline uint8_t sat_add_u8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint8_t res = a + b; > + if (res < a) { > + res = MAX_U8; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_add_u16(CPURISCVState *env, uint16_t a, > uint16_t b) > +{ > + uint16_t res = a + b; > + if (res < a) { > + res = MAX_U16; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_add_u32(CPURISCVState *env, uint32_t a, > uint32_t b) > +{ > + uint32_t res = a + b; > + if (res < a) { > + res = MAX_U32; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_add_u64(CPURISCVState *env, uint64_t a, > uint64_t b) > +{ > + uint64_t res = a + b; > + if (res < a) { > + res = MAX_U64; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint8_t sat_add_s8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint8_t res = a + b; > + if (((res ^ a) & SIGNBIT8) && !((a ^ b) & SIGNBIT8)) { > + res = ~(((int8_t)a >> 7) ^ SIGNBIT8); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_add_s16(CPURISCVState *env, uint16_t a, > uint16_t b) > +{ > + uint16_t res = a + b; > + if (((res ^ a) & SIGNBIT16) && !((a ^ b) & SIGNBIT16)) { > + res = ~(((int16_t)a >> 15) ^ SIGNBIT16); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_add_s32(CPURISCVState *env, uint32_t a, > uint32_t b) > +{ > + uint32_t res = a + b; > + if (((res ^ a) & SIGNBIT32) && !((a ^ b) & SIGNBIT32)) { > + res = ~(((int32_t)a >> 31) ^ SIGNBIT32); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_add_s64(CPURISCVState *env, uint64_t a, > uint64_t b) > +{ > + uint64_t res = a + b; > + if (((res ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) { > + res = ~(((int64_t)a >> 63) ^ SIGNBIT64); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint8_t sat_sub_u8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint8_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_sub_u16(CPURISCVState *env, uint16_t a, > uint16_t b) > +{ > + uint16_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_sub_u32(CPURISCVState *env, uint32_t a, > uint32_t b) > +{ > + uint32_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_sub_u64(CPURISCVState *env, uint64_t a, > uint64_t b) > +{ > + uint64_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint8_t sat_sub_s8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint8_t res = a - b; > + if (((res ^ a) & SIGNBIT8) && ((a ^ b) & SIGNBIT8)) { > + res = ~(((int8_t)a >> 7) ^ SIGNBIT8); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_sub_s16(CPURISCVState *env, uint16_t a, > uint16_t b) > +{ > + uint16_t res = a - b; > + if (((res ^ a) & SIGNBIT16) && ((a ^ b) & SIGNBIT16)) { > + res = ~(((int16_t)a >> 15) ^ SIGNBIT16); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_sub_s32(CPURISCVState *env, uint32_t a, > uint32_t b) > +{ > + uint32_t res = a - b; > + if (((res ^ a) & SIGNBIT32) && ((a ^ b) & SIGNBIT32)) { > + res = ~(((int32_t)a >> 31) ^ SIGNBIT32); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_sub_s64(CPURISCVState *env, uint64_t a, > uint64_t b) > +{ > + uint64_t res = a - b; > + if (((res ^ a) & SIGNBIT64) && ((a ^ b) & SIGNBIT64)) { > + res = ~(((int64_t)a >> 63) ^ SIGNBIT64); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static uint64_t fix_data_round(CPURISCVState *env, uint64_t result, > + uint8_t shift) > +{ > + uint64_t lsb_1 = (uint64_t)1 << shift; > + int mod = env->vfp.vxrm; > + int mask = ((uint64_t)1 << shift) - 1; > + > + if (mod == 0x0) { /* rnu */ > + return lsb_1 >> 1; > + } else if (mod == 0x1) { /* rne */ > + if ((result & mask) > (lsb_1 >> 1) || > + (((result & mask) == (lsb_1 >> 1)) && > + (((result >> shift) & 0x1)) == 1)) { > + return lsb_1 >> 1; > + } > + } else if (mod == 0x3) { /* rod */ > + if (((result & mask) >= 0x1) && (((result >> shift) & 0x1) == 0)) > { > + return lsb_1; > + } > + } > + return 0; > +} > + > +static int8_t saturate_s8(CPURISCVState *env, int16_t res) > +{ > + if (res > MAX_S8) { > + env->vfp.vxsat = 0x1; > + return MAX_S8; > + } else if (res < MIN_S8) { > + env->vfp.vxsat = 0x1; > + return MIN_S8; > + } else { > + return res; > + } > +} > + > +static uint8_t saturate_u8(CPURISCVState *env, uint16_t res) > +{ > + if (res > MAX_U8) { > + env->vfp.vxsat = 0x1; > + return MAX_U8; > + } else { > + return res; > + } > +} > + > +static uint16_t saturate_u16(CPURISCVState *env, uint32_t res) > +{ > + if (res > MAX_U16) { > + env->vfp.vxsat = 0x1; > + return MAX_U16; > + } else { > + return res; > + } > +} > + > +static uint32_t saturate_u32(CPURISCVState *env, uint64_t res) > +{ > + if (res > MAX_U32) { > + env->vfp.vxsat = 0x1; > + return MAX_U32; > + } else { > + return res; > + } > +} > + > +static int16_t saturate_s16(CPURISCVState *env, int32_t res) > +{ > + if (res > MAX_S16) { > + env->vfp.vxsat = 0x1; > + return MAX_S16; > + } else if (res < MIN_S16) { > + env->vfp.vxsat = 0x1; > + return MIN_S16; > + } else { > + return res; > + } > +} > + > +static int32_t saturate_s32(CPURISCVState *env, int64_t res) > +{ > + if (res > MAX_S32) { > + env->vfp.vxsat = 0x1; > + return MAX_S32; > + } else if (res < MIN_S32) { > + env->vfp.vxsat = 0x1; > + return MIN_S32; > + } else { > + return res; > + } > +} > +static uint16_t vwsmaccu_8(CPURISCVState *env, uint8_t a, uint8_t b, > + uint16_t c) > +{ > + uint16_t round, res; > + uint16_t product = (uint16_t)a * (uint16_t)b; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (round + product) >> 4; > + return sat_add_u16(env, c, res); > +} > + > +static uint32_t vwsmaccu_16(CPURISCVState *env, uint16_t a, uint16_t b, > + uint32_t c) > +{ > + uint32_t round, res; > + uint32_t product = (uint32_t)a * (uint32_t)b; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (round + product) >> 8; > + return sat_add_u32(env, c, res); > +} > + > +static uint64_t vwsmaccu_32(CPURISCVState *env, uint32_t a, uint32_t b, > + uint64_t c) > +{ > + uint64_t round, res; > + uint64_t product = (uint64_t)a * (uint64_t)b; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (round + product) >> 16; > + return sat_add_u64(env, c, res); > +} > + > +static int16_t vwsmacc_8(CPURISCVState *env, int8_t a, int8_t b, > + int16_t c) > +{ > + int16_t round, res; > + int16_t product = (int16_t)a * (int16_t)b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (int16_t)(round + product) >> 4; > + return sat_add_s16(env, c, res); > +} > + > +static int32_t vwsmacc_16(CPURISCVState *env, int16_t a, int16_t b, > + int32_t c) > +{ > + int32_t round, res; > + int32_t product = (int32_t)a * (int32_t)b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (int32_t)(round + product) >> 8; > + return sat_add_s32(env, c, res); > +} > + > +static int64_t vwsmacc_32(CPURISCVState *env, int32_t a, int32_t b, > + int64_t c) > +{ > + int64_t round, res; > + int64_t product = (int64_t)a * (int64_t)b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (int64_t)(round + product) >> 16; > + return sat_add_s64(env, c, res); > +} > + > +static int16_t vwsmaccsu_8(CPURISCVState *env, uint8_t a, int8_t b, > + int16_t c) > +{ > + int16_t round, res; > + int16_t product = (uint16_t)a * (int16_t)b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (round + product) >> 4; > + return sat_sub_s16(env, c, res); > +} > + > +static int32_t vwsmaccsu_16(CPURISCVState *env, uint16_t a, int16_t b, > + uint32_t c) > +{ > + int32_t round, res; > + int32_t product = (uint32_t)a * (int32_t)b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (round + product) >> 8; > + return sat_sub_s32(env, c, res); > +} > + > +static int64_t vwsmaccsu_32(CPURISCVState *env, uint32_t a, int32_t b, > + int64_t c) > +{ > + int64_t round, res; > + int64_t product = (uint64_t)a * (int64_t)b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (round + product) >> 16; > + return sat_sub_s64(env, c, res); > +} > + > +static int16_t vwsmaccus_8(CPURISCVState *env, int8_t a, uint8_t b, > + int16_t c) > +{ > + int16_t round, res; > + int16_t product = (int16_t)a * (uint16_t)b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (round + product) >> 4; > + return sat_sub_s16(env, c, res); > +} > + > +static int32_t vwsmaccus_16(CPURISCVState *env, int16_t a, uint16_t b, > + int32_t c) > +{ > + int32_t round, res; > + int32_t product = (int32_t)a * (uint32_t)b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (round + product) >> 8; > + return sat_sub_s32(env, c, res); > +} > + > +static uint64_t vwsmaccus_32(CPURISCVState *env, int32_t a, uint32_t b, > + int64_t c) > +{ > + int64_t round, res; > + int64_t product = (int64_t)a * (uint64_t)b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (round + product) >> 16; > + return sat_sub_s64(env, c, res); > +} > + > +static int8_t vssra_8(CPURISCVState *env, int8_t a, uint8_t b) > +{ > + int16_t round, res; > + uint8_t shift = b & 0x7; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return res; > +} > + > +static int16_t vssra_16(CPURISCVState *env, int16_t a, uint16_t b) > +{ > + int32_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static int32_t vssra_32(CPURISCVState *env, int32_t a, uint32_t b) > +{ > + int64_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static int64_t vssra_64(CPURISCVState *env, int64_t a, uint64_t b) > +{ > + int64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a >> (shift - 1)) + (round >> (shift - 1)); > + return res >> 1; > +} > + > +static int8_t vssrai_8(CPURISCVState *env, int8_t a, uint8_t b) > +{ > + int16_t round, res; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static int16_t vssrai_16(CPURISCVState *env, int16_t a, uint8_t b) > +{ > + int32_t round, res; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static int32_t vssrai_32(CPURISCVState *env, int32_t a, uint8_t b) > +{ > + int64_t round, res; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static int64_t vssrai_64(CPURISCVState *env, int64_t a, uint8_t b) > +{ > + int64_t round, res; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a >> (b - 1)) + (round >> (b - 1)); > + return res >> 1; > +} > + > +static int8_t vnclip_16(CPURISCVState *env, int16_t a, uint8_t b) > +{ > + int16_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_s8(env, res); > +} > + > +static int16_t vnclip_32(CPURISCVState *env, int32_t a, uint16_t b) > +{ > + int32_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return saturate_s16(env, res); > +} > + > +static int32_t vnclip_64(CPURISCVState *env, int64_t a, uint32_t b) > +{ > + int64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_s32(env, res); > +} > + > +static int8_t vnclipi_16(CPURISCVState *env, int16_t a, uint8_t b) > +{ > + int16_t round, res; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_s8(env, res); > +} > + > +static int16_t vnclipi_32(CPURISCVState *env, int32_t a, uint8_t b) > +{ > + int32_t round, res; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_s16(env, res); > +} > + > +static int32_t vnclipi_64(CPURISCVState *env, int64_t a, uint8_t b) > +{ > + int32_t round, res; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_s32(env, res); > +} > + > +static uint8_t vnclipu_16(CPURISCVState *env, uint16_t a, uint8_t b) > +{ > + uint16_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_u8(env, res); > +} > + > +static uint16_t vnclipu_32(CPURISCVState *env, uint32_t a, uint16_t b) > +{ > + uint32_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_u16(env, res); > +} > + > +static uint32_t vnclipu_64(CPURISCVState *env, uint64_t a, uint32_t b) > +{ > + uint64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_u32(env, res); > +} > + > +static uint8_t vnclipui_16(CPURISCVState *env, uint16_t a, uint8_t b) > +{ > + uint16_t round, res; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_u8(env, res); > +} > + > +static uint16_t vnclipui_32(CPURISCVState *env, uint32_t a, uint8_t b) > +{ > + uint32_t round, res; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_u16(env, res); > +} > + > +static uint32_t vnclipui_64(CPURISCVState *env, uint64_t a, uint8_t b) > +{ > + uint64_t round, res; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_u32(env, res); > +} > + > +static uint8_t vssrl_8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint16_t round, res; > + uint8_t shift = b & 0x7; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static uint16_t vssrl_16(CPURISCVState *env, uint16_t a, uint16_t b) > +{ > + uint32_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static uint32_t vssrl_32(CPURISCVState *env, uint32_t a, uint32_t b) > +{ > + uint64_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static uint64_t vssrl_64(CPURISCVState *env, uint64_t a, uint64_t b) > +{ > + uint64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a >> (shift - 1)) + (round >> (shift - 1)); > + return res >> 1; > +} > + > +static uint8_t vssrli_8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint16_t round, res; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static uint16_t vssrli_16(CPURISCVState *env, uint16_t a, uint8_t b) > +{ > + uint32_t round, res; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static uint32_t vssrli_32(CPURISCVState *env, uint32_t a, uint8_t b) > +{ > + uint64_t round, res; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static uint64_t vssrli_64(CPURISCVState *env, uint64_t a, uint8_t b) > +{ > + uint64_t round, res; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a >> (b - 1)) + (round >> (b - 1)); > + return res >> 1; > +} > + > +static int8_t vsmul_8(CPURISCVState *env, int8_t a, int8_t b) > +{ > + int16_t round; > + int8_t res; > + int16_t product = (int16_t)a * (int16_t)b; > + > + if (a == MIN_S8 && b == MIN_S8) { > + env->vfp.vxsat = 1; > + > + return MAX_S8; > + } > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 7); > + res = sat_add_s16(env, product, round) >> 7; > + return res; > +} > + > + > +static int16_t vsmul_16(CPURISCVState *env, int16_t a, int16_t b) > +{ > + int32_t round; > + int16_t res; > + int32_t product = (int32_t)a * (int32_t)b; > + > + if (a == MIN_S16 && b == MIN_S16) { > + env->vfp.vxsat = 1; > + > + return MAX_S16; > + } > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 15); > + res = sat_add_s32(env, product, round) >> 15; > + return res; > +} > + > +static int32_t vsmul_32(CPURISCVState *env, int32_t a, int32_t b) > +{ > + int64_t round; > + int32_t res; > + int64_t product = (int64_t)a * (int64_t)b; > + > + if (a == MIN_S32 && b == MIN_S32) { > + env->vfp.vxsat = 1; > + > + return MAX_S32; > + } > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 31); > + res = sat_add_s64(env, product, round) >> 31; > + return res; > +} > + > + > +static int64_t vsmul_64(CPURISCVState *env, int64_t a, int64_t b) > +{ > + int64_t res; > + uint64_t abs_a = a, abs_b = b; > + uint64_t lo_64, hi_64, carry, round; > + > + if (a == MIN_S64 && b == MIN_S64) { > + env->vfp.vxsat = 1; > + > + return MAX_S64; > + } > + > + if (a < 0) { > + abs_a = ~a + 1; > + } > + if (b < 0) { > + abs_b = ~b + 1; > + } > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = abs_a >> 32; > + uint64_t a_lo = (uint32_t)abs_a; > + uint64_t b_hi = abs_b >> 32; > + uint64_t b_lo = (uint32_t)abs_b; > + > + /* > + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * > b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) > >> 32 > + */ > + > + lo_64 = abs_a * abs_b; > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + > + if ((a ^ b) & SIGNBIT64) { > + lo_64 = ~lo_64; > + hi_64 = ~hi_64; > + if (lo_64 == MAX_U64) { > + lo_64 = 0; > + hi_64 += 1; > + } else { > + lo_64 += 1; > + } > + } > + > + /* set rem and res */ > + round = fix_data_round(env, lo_64, 63); > + if ((lo_64 + round) < lo_64) { > + hi_64 += 1; > + res = (hi_64 << 1); > + } else { > + res = (hi_64 << 1) | ((lo_64 + round) >> 63); > + } > + > + return res; > +} > +static inline int8_t avg_round_s8(CPURISCVState *env, int8_t a, int8_t b) > +{ > + int16_t round; > + int8_t res; > + int16_t sum = a + b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)sum, 1); > + res = (sum + round) >> 1; > + > + return res; > +} > + > +static inline int16_t avg_round_s16(CPURISCVState *env, int16_t a, > int16_t b) > +{ > + int32_t round; > + int16_t res; > + int32_t sum = a + b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)sum, 1); > + res = (sum + round) >> 1; > + > + return res; > +} > + > +static inline int32_t avg_round_s32(CPURISCVState *env, int32_t a, > int32_t b) > +{ > + int64_t round; > + int32_t res; > + int64_t sum = a + b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)sum, 1); > + res = (sum + round) >> 1; > + > + return res; > +} > + > +static inline int64_t avg_round_s64(CPURISCVState *env, int64_t a, > int64_t b) > +{ > + int64_t rem = (a & 0x1) + (b & 0x1); > + int64_t res = (a >> 1) + (b >> 1) + (rem >> 1); > + int mod = env->vfp.vxrm; > + > + if (mod == 0x0) { /* rnu */ > + if (rem == 0x1) { > + return res + 1; > + } > + } else if (mod == 0x1) { /* rne */ > + if ((rem & 0x1) == 1 && ((res & 0x1) == 1)) { > + return res + 1; > + } > + } else if (mod == 0x3) { /* rod */ > + if (((rem & 0x1) >= 0x1) && (res & 0x1) == 0) { > + return res + 1; > + } > + } > + return res; > +} > + > +static target_ulong helper_fclass_h(uint64_t frs1) > +{ > + float16 f = frs1; > + bool sign = float16_is_neg(f); > + > + if (float16_is_infinity(f)) { > + return sign ? 1 << 0 : 1 << 7; > + } else if (float16_is_zero(f)) { > + return sign ? 1 << 3 : 1 << 4; > + } else if (float16_is_zero_or_denormal(f)) { > + return sign ? 1 << 2 : 1 << 5; > + } else if (float16_is_any_nan(f)) { > + float_status s = { }; /* for snan_bit_is_one */ > + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; > + } else { > + return sign ? 1 << 1 : 1 << 6; > + } > +} > + > +static inline bool vector_vtype_ill(CPURISCVState *env) > +{ > + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) { > + return true; > + } > + return false; > +} > + > +static inline void vector_vtype_set_ill(CPURISCVState *env) > +{ > + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1); > + return; > +} > + > +static inline int vector_vtype_get_sew(CPURISCVState *env) > +{ > + return (env->vfp.vtype >> 2) & 0x7; > +} > + > +static inline int vector_get_width(CPURISCVState *env) > +{ > + return 8 * (1 << vector_vtype_get_sew(env)); > +} > + > +static inline int vector_get_lmul(CPURISCVState *env) > +{ > + return 1 << (env->vfp.vtype & 0x3); > +} > + > +static inline int vector_get_vlmax(CPURISCVState *env) > +{ > + return vector_get_lmul(env) * VLEN / vector_get_width(env); > +} > + > +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int > width, > + int lmul, int index) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / 8; > + int pos = (index * mlen) % 8; > + > + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1); > +} > + > +static inline bool vector_overlap_vm_common(int lmul, int vm, int rd) > +{ > + if (lmul > 1 && vm == 0 && rd == 0) { > + return true; > + } > + return false; > +} > + > +static inline bool vector_overlap_vm_force(int vm, int rd) > +{ > + if (vm == 0 && rd == 0) { > + return true; > + } > + return false; > +} > + > +static inline bool vector_overlap_carry(int lmul, int rd) > +{ > + if (lmul > 1 && rd == 0) { > + return true; > + } > + return false; > +} > + > +static inline bool vector_overlap_dstgp_srcgp(int rd, int dlen, int rs, > + int slen) > +{ > + if ((rd >= rs && rd < rs + slen) || (rs >= rd && rs < rd + dlen)) { > + return true; > + } > + return false; > +} > + > +static inline uint64_t vector_get_mask(int start, int end) > +{ > + return ((uint64_t)(~((uint64_t)0))) << (63 - end + start) >> (63 - > end); > +} > + > +/* fetch unsigned element by width */ > +static inline uint64_t vector_get_iu_elem(CPURISCVState *env, uint32_t > width, > + uint32_t rs2, uint32_t index) > +{ > + uint64_t elem; > + if (width == 8) { > + elem = env->vfp.vreg[rs2].u8[index]; > + } else if (width == 16) { > + elem = env->vfp.vreg[rs2].u16[index]; > + } else if (width == 32) { > + elem = env->vfp.vreg[rs2].u32[index]; > + } else if (width == 64) { > + elem = env->vfp.vreg[rs2].u64[index]; > + } else { /* the max of (XLEN, FLEN) is no bigger than 64 */ > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return 0; > + } > + return elem; > +} > + > +static inline int vector_mask_reg(CPURISCVState *env, uint32_t reg, int > width, > + int lmul, int index) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / 8; > + int pos = (index * mlen) % 8; > + return (env->vfp.vreg[reg].u8[idx] >> pos) & 0x1; > +} > + > +static inline void vector_mask_result(CPURISCVState *env, uint32_t reg, > + int width, int lmul, int index, uint32_t result) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / width; > + int pos = (index * mlen) % width; > + uint64_t mask = ~((((uint64_t)1 << mlen) - 1) << pos); > + > + switch (width) { > + case 8: > + env->vfp.vreg[reg].u8[idx] = (env->vfp.vreg[reg].u8[idx] & mask) > + | (result << pos); > + break; > + case 16: > + env->vfp.vreg[reg].u16[idx] = (env->vfp.vreg[reg].u16[idx] & mask) > + | (result << pos); > + break; > + case 32: > + env->vfp.vreg[reg].u32[idx] = (env->vfp.vreg[reg].u32[idx] & mask) > + | (result << pos); > + break; > + case 64: > + env->vfp.vreg[reg].u64[idx] = (env->vfp.vreg[reg].u64[idx] & mask) > + | ((uint64_t)result << > pos); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + > + return; > +} > + > +/** > + * deposit16: > + * @value: initial value to insert bit field into > + * @start: the lowest bit in the bit field (numbered from 0) > + * @length: the length of the bit field > + * @fieldval: the value to insert into the bit field > + * > + * Deposit @fieldval into the 16 bit @value at the bit field specified > + * by the @start and @length parameters, and return the modified > + * @value. Bits of @value outside the bit field are not modified. > + * Bits of @fieldval above the least significant @length bits are > + * ignored. The bit field must lie entirely within the 16 bit word. > + * It is valid to request that all 16 bits are modified (ie @length > + * 16 and @start 0). > + * > + * Returns: the modified @value. > + */ > +static inline uint16_t deposit16(uint16_t value, int start, int length, > + uint16_t fieldval) > +{ > + uint16_t mask; > + assert(start >= 0 && length > 0 && length <= 16 - start); > + mask = (~0U >> (16 - length)) << start; > + return (value & ~mask) | ((fieldval << start) & mask); > +} > + > +static void vector_tail_amo(CPURISCVState *env, int vreg, int index, int > width) > +{ > + switch (width) { > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 64: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_common(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 8: > + env->vfp.vreg[vreg].u8[index] = 0; > + break; > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 64: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_segment(CPURISCVState *env, int vreg, int index, > + int width, int nf, int lmul) > +{ > + switch (width) { > + case 8: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u8[index] = 0; > + nf--; > + } > + break; > + case 16: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u16[index] = 0; > + nf--; > + } > + break; > + case 32: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u32[index] = 0; > + nf--; > + } > + break; > + case 64: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u64[index] = 0; > + nf--; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_widen(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 8: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 16: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_narrow(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 8: > + env->vfp.vreg[vreg].u8[index] = 0; > + break; > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_fcommon(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 64: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_fwiden(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 16: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_fnarrow(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > +static inline int vector_get_carry(CPURISCVState *env, int width, int > lmul, > + int index) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / 8; > + int pos = (index * mlen) % 8; > + > + return (env->vfp.vreg[0].u8[idx] >> pos) & 0x1; > +} > + > +static inline void vector_get_layout(CPURISCVState *env, int width, int > lmul, > + int index, int *idx, int *pos) > +{ > + int mlen = width / lmul; > + *idx = (index * mlen) / 8; > + *pos = (index * mlen) % 8; > +} > + > +static bool vector_lmul_check_reg(CPURISCVState *env, uint32_t lmul, > + uint32_t reg, bool widen) > +{ > + int legal = widen ? (lmul * 2) : lmul; > + > + if ((lmul != 1 && lmul != 2 && lmul != 4 && lmul != 8) || > + (lmul == 8 && widen)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return false; > + } > + > + if (reg % legal != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return false; > + } > + return true; > +} > + > +static inline uint64_t u64xu64_lh(uint64_t a, uint64_t b) > +{ > + uint64_t hi_64, carry; > + > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = a >> 32; > + uint64_t a_lo = (uint32_t)a; > + uint64_t b_hi = b >> 32; > + uint64_t b_lo = (uint32_t)b; > + > + /* > + * a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * > b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) > >> 32 > + */ > + > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + > + return hi_64; > +} > + > + > +static inline int64_t s64xu64_lh(int64_t a, uint64_t b) > +{ > + uint64_t abs_a = a; > + uint64_t lo_64, hi_64, carry; > + > + if (a < 0) { > + abs_a = ~a + 1; > + } > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = abs_a >> 32; > + uint64_t a_lo = (uint32_t)abs_a; > + uint64_t b_hi = b >> 32; > + uint64_t b_lo = (uint32_t)b; > + > + /* > + * abs_a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * > b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) > >> 32 > + */ > + > + lo_64 = abs_a * b; > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + if ((a ^ b) & SIGNBIT64) { > + lo_64 = ~lo_64; > + hi_64 = ~hi_64; > + if (lo_64 == MAX_U64) { > + lo_64 = 0; > + hi_64 += 1; > + } else { > + lo_64 += 1; > + } > + } > + return hi_64; > +} > + > + > +static inline int64_t s64xs64_lh(int64_t a, int64_t b) > +{ > + uint64_t abs_a = a, abs_b = b; > + uint64_t lo_64, hi_64, carry; > + > + if (a < 0) { > + abs_a = ~a + 1; > + } > + if (b < 0) { > + abs_b = ~b + 1; > + } > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = abs_a >> 32; > + uint64_t a_lo = (uint32_t)abs_a; > + uint64_t b_hi = abs_b >> 32; > + uint64_t b_lo = (uint32_t)abs_b; > + > + /* > + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * > b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) > >> 32 > + */ > + > + lo_64 = abs_a * abs_b; > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + > + if ((a ^ b) & SIGNBIT64) { > + lo_64 = ~lo_64; > + hi_64 = ~hi_64; > + if (lo_64 == MAX_U64) { > + lo_64 = 0; > + hi_64 += 1; > + } else { > + lo_64 += 1; > + } > + } > + return hi_64; > +} > + > +void VECTOR_HELPER(vsetvl)(CPURISCVState *env, uint32_t rs1, uint32_t rs2, > + uint32_t rd) > +{ > + int sew, max_sew, vlmax, vl; > + > + if (rs2 == 0) { > + vector_vtype_set_ill(env); > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + env->vfp.vtype = env->gpr[rs2]; > + sew = 1 << vector_get_width(env) / 8; > + max_sew = sizeof(target_ulong); > + > + > + if (env->misa & RVD) { > + max_sew = max_sew > 8 ? max_sew : 8; > + } else if (env->misa & RVF) { > + max_sew = max_sew > 4 ? max_sew : 4; > + } > + if (sew > max_sew) { > + vector_vtype_set_ill(env); > + return; > + } > + > + vlmax = vector_get_vlmax(env); > + if (rs1 == 0) { > + vl = vlmax; > + } else if (env->gpr[rs1] <= vlmax) { > + vl = env->gpr[rs1]; > + } else if (env->gpr[rs1] < 2 * vlmax) { > + vl = ceil(env->gpr[rs1] / 2); > + } else { > + vl = vlmax; > + } > + env->vfp.vl = vl; > + env->gpr[rd] = vl; > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsetvli)(CPURISCVState *env, uint32_t rs1, uint32_t > zimm, > + uint32_t rd) > +{ > + int sew, max_sew, vlmax, vl; > + > + env->vfp.vtype = zimm; > + sew = vector_get_width(env) / 8; > + max_sew = sizeof(target_ulong); > + > + if (env->misa & RVD) { > + max_sew = max_sew > 8 ? max_sew : 8; > + } else if (env->misa & RVF) { > + max_sew = max_sew > 4 ? max_sew : 4; > + } > + if (sew > max_sew) { > + vector_vtype_set_ill(env); > + return; > + } > + > + vlmax = vector_get_vlmax(env); > + if (rs1 == 0) { > + vl = vlmax; > + } else if (env->gpr[rs1] <= vlmax) { > + vl = env->gpr[rs1]; > + } else if (env->gpr[rs1] < 2 * vlmax) { > + vl = ceil(env->gpr[rs1] / 2); > + } else { > + vl = vlmax; > + } > + env->vfp.vl = vl; > + env->gpr[rd] = vl; > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vrgather.vv vd, vs2, vs1, vm # > + * vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; > + */ > +void VECTOR_HELPER(vrgather_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src, src1; > + uint32_t index; > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + index = env->vfp.vreg[src1].u8[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u8[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[index]; > + } > + } > + break; > + case 16: > + index = env->vfp.vreg[src1].u16[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u16[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[index]; > + } > + } > + break; > + case 32: > + index = env->vfp.vreg[src1].u32[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u32[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[index]; > + } > + } > + break; > + case 64: > + index = env->vfp.vreg[src1].u64[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u64[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[index]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : > vs2[rs1] */ > +void VECTOR_HELPER(vrgather_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src; > + uint32_t index; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u8[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[index]; > + } > + } > + break; > + case 16: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u16[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[index]; > + } > + } > + break; > + case 32: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u32[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[index]; > + } > + } > + break; > + case 64: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u64[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[index]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] > */ > +void VECTOR_HELPER(vrgather_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src; > + uint32_t index; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u8[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[index]; > + } > + } > + break; > + case 16: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u16[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[index]; > + } > + } > + break; > + case 32: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u32[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[index]; > + } > + } > + break; > + case 64: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u64[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[index]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vext_x_v)(CPURISCVState *env, uint32_t rs1, uint32_t > rs2, > + uint32_t rd) > +{ > + int width; > + uint64_t elem; > + target_ulong index = env->gpr[rs1]; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + > + elem = vector_get_iu_elem(env, width, rs2, index); > + if (index >= VLEN / width) { /* index is too big */ > + env->gpr[rd] = 0; > + } else { > + env->gpr[rd] = elem; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmv.f.s rd, vs2 # rd = vs2[0] (rs1=0) */ > +void VECTOR_HELPER(vfmv_f_s)(CPURISCVState *env, uint32_t rs1, uint32_t > rs2, > + uint32_t rd) > +{ > + int width, flen; > + uint64_t mask; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->misa & RVD) { > + flen = 8; > + } else if (env->misa & RVF) { > + flen = 4; > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + mask = (~((uint64_t)0)) << width; > + > + if (width == 8) { > + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s8[0] | mask; > + } else if (width == 16) { > + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s16[0] | mask; > + } else if (width == 32) { > + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s32[0] | mask; > + } else if (width == 64) { > + if (flen == 4) { > + env->fpr[rd] = env->vfp.vreg[rs2].s64[0] & 0xffffffff; > + } else { > + env->fpr[rd] = env->vfp.vreg[rs2].s64[0]; > + } > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmv.s.x vd, rs1 # vd[0] = rs1 */ > +void VECTOR_HELPER(vmv_s_x)(CPURISCVState *env, uint32_t rs1, uint32_t > rs2, > + uint32_t rd) > +{ > + int width; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= env->vfp.vl) { > + return; > + } > + > + memset(&env->vfp.vreg[rd].u8[0], 0, VLEN / 8); > + width = vector_get_width(env); > + > + if (width == 8) { > + env->vfp.vreg[rd].u8[0] = env->gpr[rs1]; > + } else if (width == 16) { > + env->vfp.vreg[rd].u16[0] = env->gpr[rs1]; > + } else if (width == 32) { > + env->vfp.vreg[rd].u32[0] = env->gpr[rs1]; > + } else if (width == 64) { > + env->vfp.vreg[rd].u64[0] = env->gpr[rs1]; > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2 = 0) */ > +void VECTOR_HELPER(vfmv_s_f)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, flen; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= env->vfp.vl) { > + return; > + } > + if (env->misa & RVD) { > + flen = 8; > + } else if (env->misa & RVF) { > + flen = 4; > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + > + if (width == 8) { > + env->vfp.vreg[rd].u8[0] = env->fpr[rs1]; > + } else if (width == 16) { > + env->vfp.vreg[rd].u16[0] = env->fpr[rs1]; > + } else if (width == 32) { > + env->vfp.vreg[rd].u32[0] = env->fpr[rs1]; > + } else if (width == 64) { > + if (flen == 4) { /* 1-extended to FLEN bits */ > + env->vfp.vreg[rd].u64[0] = (uint64_t)env->fpr[rs1] > + | 0xffffffff00000000; > + } else { > + env->vfp.vreg[rd].u64[0] = env->fpr[rs1]; > + } > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ > +void VECTOR_HELPER(vslideup_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = env->gpr[rs1]; > + > + if (offset < env->vfp.vstart) { > + offset = env->vfp.vstart; > + } > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i - offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i - offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vslideup.vi vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ > +void VECTOR_HELPER(vslideup_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = rs1; > + > + if (offset < env->vfp.vstart) { > + offset = env->vfp.vstart; > + } > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i - offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i - offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + if (width == 8) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } > + } else if (width == 16) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + } else if (width == 32) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + } else if (width == 64) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ > +void VECTOR_HELPER(vslide1up_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src, k; > + uint64_t s1; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + s1 = env->gpr[rs1]; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i - 1) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i - 1) % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i == 0 && env->vfp.vstart == 0) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = s1; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = s1; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = s1; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = s1; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i + rs1] */ > +void VECTOR_HELPER(vslidedown_vx)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = env->gpr[rs1]; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i + offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i + offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } else { > + env->vfp.vreg[dest].u8[j] = 0; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } else { > + env->vfp.vreg[dest].u16[j] = 0; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } else { > + env->vfp.vreg[dest].u32[j] = 0; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } else { > + env->vfp.vreg[dest].u64[j] = 0; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vslidedown_vi)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = rs1; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i + offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i + offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } else { > + env->vfp.vreg[dest].u8[j] = 0; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } else { > + env->vfp.vreg[dest].u16[j] = 0; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } else { > + env->vfp.vreg[dest].u32[j] = 0; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } else { > + env->vfp.vreg[dest].u64[j] = 0; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslide1down.vx vd, vs2, rs1, vm # vd[vl - 1]=x[rs1], vd[i] = vs2[i + > 1] */ > +void VECTOR_HELPER(vslide1down_vx)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src, k; > + uint64_t s1; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + s1 = env->gpr[rs1]; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i + 1) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i + 1) % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i == vl - 1 && i >= env->vfp.vstart) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = s1; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = s1; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = s1; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = s1; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else if (i < vl - 1) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vcompress.vm vd, vs2, vs1 > + * Compress into vd elements of vs2 where vs1 is enabled > + */ > +void VECTOR_HELPER(vcompress_vm)(CPURISCVState *env, uint32_t rs1, > uint32_t rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src; > + uint32_t vd_idx, num = 0; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, 1) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + /* zeroed all elements */ > + for (i = 0; i < lmul; i++) { > + memset(&env->vfp.vreg[rd + i].u64[0], 0, VLEN / 8); > + } > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (num / (VLEN / width)); > + src = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + vd_idx = num % (VLEN / width); > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u8[vd_idx] = > + env->vfp.vreg[src].u8[j]; > + num++; > + } > + break; > + case 16: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u16[vd_idx] = > + env->vfp.vreg[src].u16[j]; > + num++; > + } > + break; > + case 32: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u32[vd_idx] = > + env->vfp.vreg[src].u32[j]; > + num++; > + } > + break; > + case 64: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u64[vd_idx] = > + env->vfp.vreg[src].u64[j]; > + num++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + + env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + + env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + + env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + + env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + + env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredsum.vs vd, vs2, vs1, vm # vd[0] = sum(vs1[0] , vs2[*]) */ > +void VECTOR_HELPER(vredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u8[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u8[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = sum; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u16[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u16[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = sum; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u32[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u32[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = sum; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u64[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u64[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = sum; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfadd.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_add( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_add( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_add( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfadd.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_add( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_add( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_add( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vredand.vs vd, vs2, vs1, vm # vd[0] = and( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredand_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t res = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u8[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = res; > + } > + break; > + case 16: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u16[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = res; > + } > + break; > + case 32: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u32[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = res; > + } > + break; > + case 64: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u64[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = res; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredsum.vs vd, vs2, vs1, vm # Unordered sum */ > +void VECTOR_HELPER(vfredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float16 sum16 = 0.0f; > + float32 sum32 = 0.0f; > + float64 sum64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + sum16 = env->vfp.vreg[rs1].f16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum16 = float16_add(sum16, env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f16[0] = sum16; > + } > + break; > + case 32: > + if (i == 0) { > + sum32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum32 = float32_add(sum32, env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = sum32; > + } > + break; > + case 64: > + if (i == 0) { > + sum64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum64 = float64_add(sum64, env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = sum64; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + - env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + - env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + - env->vfp.vreg[src1].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + - env->vfp.vreg[src1].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + - env->gpr[rs1]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + - env->gpr[rs1]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + - env->gpr[rs1]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + - (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredor.vs vd, vs2, vs1, vm # vd[0] = or( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredor_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t res = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u8[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = res; > + } > + break; > + case 16: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u16[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = res; > + } > + break; > + case 32: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u32[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = res; > + } > + break; > + case 64: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u64[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = res; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsub.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sub( > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sub( > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sub( > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] - f[rs1] */ > +void VECTOR_HELPER(vfsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sub( > + > env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sub( > + > env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sub( > + > env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vrsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + - env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + - env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + - env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + - env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vrsub_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredxor.vs vd, vs2, vs1, vm # vd[0] = xor( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredxor_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t res = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u8[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = res; > + } > + break; > + case 16: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u16[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = res; > + } > + break; > + case 32: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u32[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = res; > + } > + break; > + case 64: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u64[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = res; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredosum.vs vd, vs2, vs1, vm # Ordered sum */ > +void VECTOR_HELPER(vfredosum_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + helper_vector_vfredsum_vs(env, vm, rs1, rs2, rd); > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vminu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] <= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src1].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] <= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src1].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] <= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src1].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] <= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src1].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vminu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) <= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredminu.vs vd, vs2, vs1, vm # vd[0] = minu( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredminu_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t minu = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u8[j]) { > + minu = env->vfp.vreg[src2].u8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = minu; > + } > + break; > + case 16: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u16[j]) { > + minu = env->vfp.vreg[src2].u16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = minu; > + } > + break; > + case 32: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u32[j]) { > + minu = env->vfp.vreg[src2].u32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = minu; > + } > + break; > + case 64: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u64[j]) { > + minu = env->vfp.vreg[src2].u64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = minu; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmin.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_minnum( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_minnum( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_minnum( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmin.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfmin_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_minnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_minnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_minnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] <= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src1].s8[j]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] <= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src1].s16[j]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] <= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src1].s32[j]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] <= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src1].s64[j]; > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmin_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) <= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredmin.vs vd, vs2, vs1, vm # vd[0] = min( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + int64_t min = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s8[j]) { > + min = env->vfp.vreg[src2].s8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s8[0] = min; > + } > + break; > + case 16: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s16[j]) { > + min = env->vfp.vreg[src2].s16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s16[0] = min; > + } > + break; > + case 32: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s32[j]) { > + min = env->vfp.vreg[src2].s32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s32[0] = min; > + } > + break; > + case 64: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s64[j]) { > + min = env->vfp.vreg[src2].s64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s64[0] = min; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredmin.vs vd, vs2, vs1, vm # Minimum value */ > +void VECTOR_HELPER(vfredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float16 min16 = 0.0f; > + float32 min32 = 0.0f; > + float64 min64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + min16 = env->vfp.vreg[rs1].f16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + min16 = float16_minnum(min16, > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f16[0] = min16; > + } > + break; > + case 32: > + if (i == 0) { > + min32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + min32 = float32_minnum(min32, > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = min32; > + } > + break; > + case 64: > + if (i == 0) { > + min64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + min64 = float64_minnum(min64, > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = min64; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmaxu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] >= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src1].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] >= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src1].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] >= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src1].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] >= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src1].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmaxu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) >= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredmaxu.vs vd, vs2, vs1, vm # vd[0] = maxu( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredmaxu_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t maxu = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u8[j]) { > + maxu = env->vfp.vreg[src2].u8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = maxu; > + } > + break; > + case 16: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u16[j]) { > + maxu = env->vfp.vreg[src2].u16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = maxu; > + } > + break; > + case 32: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u32[j]) { > + maxu = env->vfp.vreg[src2].u32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = maxu; > + } > + break; > + case 64: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u64[j]) { > + maxu = env->vfp.vreg[src2].u64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = maxu; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/*vfmax.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_maxnum( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_maxnum( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_maxnum( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmax.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfmax_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_maxnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_maxnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_maxnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] >= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src1].s8[j]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] >= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src1].s16[j]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] >= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src1].s32[j]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] >= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src1].s64[j]; > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmax_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) >= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredmax.vs vd, vs2, vs1, vm # vd[0] = max( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + int64_t max = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s8[j]) { > + max = env->vfp.vreg[src2].s8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s8[0] = max; > + } > + break; > + case 16: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s16[j]) { > + max = env->vfp.vreg[src2].s16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s16[0] = max; > + } > + break; > + case 32: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s32[j]) { > + max = env->vfp.vreg[src2].s32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s32[0] = max; > + } > + break; > + case 64: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s64[j]) { > + max = env->vfp.vreg[src2].s64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s64[0] = max; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredmax.vs vd, vs2, vs1, vm # Maximum value */ > +void VECTOR_HELPER(vfredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float16 max16 = 0.0f; > + float32 max32 = 0.0f; > + float64 max64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + max16 = env->vfp.vreg[rs1].f16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + max16 = float16_maxnum(max16, > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f16[0] = max16; > + } > + break; > + case 32: > + if (i == 0) { > + max32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + max32 = float32_maxnum(max32, > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = max32; > + } > + break; > + case 64: > + if (i == 0) { > + max64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + max64 = float64_maxnum(max64, > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = max64; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsgnj.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsgnj_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + > env->vfp.vreg[src1].f16[j], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + > env->vfp.vreg[src1].f32[j], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + > env->vfp.vreg[src1].f64[j], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsgnj.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfsgnj_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + env->fpr[rs1], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + env->fpr[rs1], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + env->fpr[rs1], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vand_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + & env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + & env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + & env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + & env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vand_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + & env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + & env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + & env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + & env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vand_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfsgnjn.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsgnjn_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + > ~env->vfp.vreg[src1].f16[j], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + > ~env->vfp.vreg[src1].f32[j], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + > ~env->vfp.vreg[src1].f64[j], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +/* vfsgnjn.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfsgnjn_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + ~env->fpr[rs1], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + ~env->fpr[rs1], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + ~env->fpr[rs1], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + | env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + | env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + | env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + | env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + | env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + | env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + | env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + | env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfsgnjx.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsgnjx_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + > env->vfp.vreg[src1].f16[j] ^ > + > env->vfp.vreg[src2].f16[j], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + > env->vfp.vreg[src1].f32[j] ^ > + > env->vfp.vreg[src2].f32[j], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + > env->vfp.vreg[src1].f64[j] ^ > + > env->vfp.vreg[src2].f64[j], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vfsgnjx.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfsgnjx_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + env->fpr[rs1] ^ > + > env->vfp.vreg[src2].f16[j], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + env->fpr[rs1] ^ > + > env->vfp.vreg[src2].f32[j], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + env->fpr[rs1] ^ > + > env->vfp.vreg[src2].f64[j], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vxor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + ^ env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + ^ env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + ^ env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + ^ env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vxor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + ^ env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + ^ env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + ^ env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + ^ env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vxor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax, carry; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + + env->vfp.vreg[src2].u8[j] + carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j] > + + env->vfp.vreg[src2].u16[j] + carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j] > + + env->vfp.vreg[src2].u32[j] + carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j] > + + env->vfp.vreg[src2].u64[j] + carry; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vadc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax, carry; > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u8[j] + carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u16[j] + carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u32[j] + carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = > (uint64_t)extend_gpr(env->gpr[rs1]) > + + env->vfp.vreg[src2].u64[j] + carry; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadc_vim)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax, carry; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u8[j] + carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u16[j] + carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u32[j] + carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u64[j] + carry; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmadc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax, carry; > + uint64_t tmp; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src1].u8[j] > + + env->vfp.vreg[src2].u8[j] + carry; > + tmp = tmp >> width; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src1].u16[j] > + + env->vfp.vreg[src2].u16[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)env->vfp.vreg[src1].u32[j] > + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src1].u64[j] > + + env->vfp.vreg[src2].u64[j] + carry; > + > + if ((tmp < env->vfp.vreg[src1].u64[j] || > + tmp < env->vfp.vreg[src2].u64[j]) > + || (env->vfp.vreg[src1].u64[j] == MAX_U64 && > + env->vfp.vreg[src2].u64[j] == MAX_U64)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmadc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax, carry; > + uint64_t tmp, extend_rs1; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint8_t)env->gpr[rs1] > + + env->vfp.vreg[src2].u8[j] + carry; > + tmp = tmp >> width; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint16_t)env->gpr[rs1] > + + env->vfp.vreg[src2].u16[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)((uint32_t)env->gpr[rs1]) > + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + > + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]); > + tmp = extend_rs1 + env->vfp.vreg[src2].u64[j] + carry; > + if ((tmp < extend_rs1) || > + (carry && (env->vfp.vreg[src2].u64[j] == MAX_U64))) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmadc_vim)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax, carry; > + uint64_t tmp; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint8_t)sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u8[j] + carry; > + tmp = tmp >> width; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint16_t)sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u16[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)((uint32_t)sign_extend(rs1, 5)) > + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u64[j] + carry; > + > + if ((tmp < (uint64_t)sign_extend(rs1, 5) || > + tmp < env->vfp.vreg[src2].u64[j]) > + || ((uint64_t)sign_extend(rs1, 5) == MAX_U64 && > + env->vfp.vreg[src2].u64[j] == MAX_U64)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsbc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax, carry; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + - env->vfp.vreg[src1].u8[j] - carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] > + - env->vfp.vreg[src1].u16[j] - carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] > + - env->vfp.vreg[src1].u32[j] - carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] > + - env->vfp.vreg[src1].u64[j] - carry; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsbc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax, carry; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + - env->gpr[rs1] - carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] > + - env->gpr[rs1] - carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] > + - env->gpr[rs1] - carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] > + - (uint64_t)extend_gpr(env->gpr[rs1]) - carry; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsbc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax, carry; > + uint64_t tmp; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u8[j] > + - env->vfp.vreg[src1].u8[j] - carry; > + tmp = (tmp >> width) & 0x1; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u16[j] > + - env->vfp.vreg[src1].u16[j] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)env->vfp.vreg[src2].u32[j] > + - (uint64_t)env->vfp.vreg[src1].u32[j] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u64[j] > + - env->vfp.vreg[src1].u64[j] - carry; > + > + if (((env->vfp.vreg[src1].u64[j] == MAX_U64) && carry) || > + env->vfp.vreg[src2].u64[j] < > + (env->vfp.vreg[src1].u64[j] + carry)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsbc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax, carry; > + uint64_t tmp, extend_rs1; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u8[j] > + - (uint8_t)env->gpr[rs1] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u16[j] > + - (uint16_t)env->gpr[rs1] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)env->vfp.vreg[src2].u32[j] > + - (uint64_t)((uint32_t)env->gpr[rs1]) - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + > + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]); > + tmp = env->vfp.vreg[src2].u64[j] - extend_rs1 - carry; > + > + if ((tmp > env->vfp.vreg[src2].u64[j]) || > + ((extend_rs1 == MAX_U64) && carry)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmpopc.m rd, vs2, v0.t # x[rd] = sum_i ( vs2[i].LSB && v0[i].LSB ) */ > +void VECTOR_HELPER(vmpopc_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + env->gpr[rd] = 0; > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_mask_reg(env, rs2, width, lmul, i) && > + vector_elem_mask(env, vm, width, lmul, i)) { > + env->gpr[rd]++; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfirst.m rd, vs2, vm */ > +void VECTOR_HELPER(vmfirst_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_mask_reg(env, rs2, width, lmul, i) && > + vector_elem_mask(env, vm, width, lmul, i)) { > + env->gpr[rd] = i; > + break; > + } > + } else { > + env->gpr[rd] = -1; > + } > + } > + > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmerge_vvm)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl, idx, pos; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src1].u8[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src1].u16[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src1].u32[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j]; > + } > + break; > + case 64: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src1].u64[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmerge_vxm)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl, idx, pos; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]; > + } > + break; > + case 16: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]; > + } > + break; > + case 32: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]; > + } > + break; > + case 64: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmerge_vim)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl, idx, pos; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + (uint8_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u8[j] = (uint8_t)sign_extend(rs1, > 5); > + } > + break; > + case 16: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + (uint16_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u16[j] = > (uint16_t)sign_extend(rs1, 5); > + } > + break; > + case 32: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + (uint32_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u32[j] = > (uint32_t)sign_extend(rs1, 5); > + } > + break; > + case 64: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u64[j] = > (uint64_t)sign_extend(rs1, 5); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfmerge.vfm vd, vs2, rs1, v0 # vd[i] = v0[i].LSB ? f[rs1] : vs2[i] */ > +void VECTOR_HELPER(vfmerge_vfm)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* vfmv.v.f vd, rs1 # vd[i] = f[rs1]; */ > + if (vm && (rs2 != 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = env->fpr[rs1]; > + } else { > + env->vfp.vreg[dest].f16[j] = > env->vfp.vreg[src2].f16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = env->fpr[rs1]; > + } else { > + env->vfp.vreg[dest].f32[j] = > env->vfp.vreg[src2].f32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = env->fpr[rs1]; > + } else { > + env->vfp.vreg[dest].f64[j] = > env->vfp.vreg[src2].f64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmseq_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] == > + env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] == > + env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] == > + env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] == > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmseq_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] == > env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] == > env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] == > env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) == > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmseq_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)sign_extend(rs1, 5) > + == env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)sign_extend(rs1, 5) > + == env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)sign_extend(rs1, 5) > + == env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)sign_extend(rs1, 5) == > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmandnot.mm vd, vs2, vs1 # vd = vs2 & ~vs1 */ > +void VECTOR_HELPER(vmandnot_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) & > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfeq.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmfeq_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_eq_quiet(env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_eq_quiet(env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_eq_quiet(env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfeq.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfeq_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmsne_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] != > + env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] != > + env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] != > + env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] != > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsne_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] != > env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] != > env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] != > env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) != > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsne_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)sign_extend(rs1, 5) > + != env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)sign_extend(rs1, 5) > + != env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)sign_extend(rs1, 5) > + != env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)sign_extend(rs1, 5) != > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmand.mm vd, vs2, vs1 # vd = vs2 & vs1 */ > +void VECTOR_HELPER(vmand_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) & > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfle.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmfle_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_le(env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_le(env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_le(env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfle.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfle_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_le(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_le(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_le(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsltu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] < > + env->vfp.vreg[src1].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] < > + env->vfp.vreg[src1].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] < > + env->vfp.vreg[src1].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] < > + env->vfp.vreg[src1].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsltu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] < > (uint8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] < > (uint16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] < > (uint32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] < > + (uint64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmor.mm vd, vs2, vs1 # vd = vs2 | vs1 */ > +void VECTOR_HELPER(vmor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) | > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmford.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmford_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float16_unordered_quiet(env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float32_unordered_quiet(env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float64_unordered_quiet(env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmford.vf vd, vs2, rs1, vm # Vector-scalar */ > +void VECTOR_HELPER(vmford_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float16_unordered_quiet(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float32_unordered_quiet(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float64_unordered_quiet(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmslt_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] < > + env->vfp.vreg[src1].s8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] < > + env->vfp.vreg[src1].s16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] < > + env->vfp.vreg[src1].s32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] < > + env->vfp.vreg[src1].s64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmslt_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] < > (int8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] < > (int16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] < > (int32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] < > + (int64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmxor.mm vd, vs2, vs1 # vd = vs2 ^ vs1 */ > +void VECTOR_HELPER(vmxor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^ > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmflt.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmflt_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_lt(env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_lt(env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_lt(env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmflt.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmflt_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_lt(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_lt(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_lt(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsleu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] <= > + env->vfp.vreg[src1].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] <= > + env->vfp.vreg[src1].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] <= > + env->vfp.vreg[src1].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] <= > + env->vfp.vreg[src1].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsleu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] <= > (uint8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] <= > (uint16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] <= > (uint32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] <= > + (uint64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsleu_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] <= (uint8_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] <= (uint16_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] <= (uint32_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] <= > + (uint64_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmornot.mm vd, vs2, vs1 # vd = vs2 | ~vs1 */ > +void VECTOR_HELPER(vmornot_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) | > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfne.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmfne_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_eq_quiet(env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_eq_quiet(env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_eq_quiet(env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfne.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfne_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsle_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] <= > + env->vfp.vreg[src1].s8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] <= > + env->vfp.vreg[src1].s16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] <= > + env->vfp.vreg[src1].s32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] <= > + env->vfp.vreg[src1].s64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsle_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] <= > (int8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] <= > (int16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] <= > (int32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] <= > + (int64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsle_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] <= > + (int8_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] <= > + (int16_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] <= > + (int32_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] <= > + sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmnand.mm vd, vs2, vs1 # vd = ~(vs2 & vs1) */ > +void VECTOR_HELPER(vmnand_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) & > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, (~tmp & 0x1)); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfgt.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfgt_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_le(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_le(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_le(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsgtu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] > > (uint8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] > > (uint16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] > > (uint32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] > > + (uint64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsgtu_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] > (uint8_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] > (uint16_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] > (uint32_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] > > + (uint64_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmnor.mm vd, vs2, vs1 # vd = ~(vs2 | vs1) */ > +void VECTOR_HELPER(vmnor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) | > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmsgt_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] > > (int8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] > > (int16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] > > (int32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] > > + (int64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsgt_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] > > + (int8_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] > > + (int16_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] > > + (int32_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] > > + sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +/* vmxnor.mm vd, vs2, vs1 # vd = ~(vs2 ^ vs1) */ > +void VECTOR_HELPER(vmxnor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^ > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfge.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfge_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_lt(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_lt(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_lt(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsaddu.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vsaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, > + env->vfp.vreg[src1].u8[j], > env->vfp.vreg[src2].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, > + env->vfp.vreg[src1].u16[j], > env->vfp.vreg[src2].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, > + env->vfp.vreg[src1].u32[j], > env->vfp.vreg[src2].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, > + env->vfp.vreg[src1].u64[j], > env->vfp.vreg[src2].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsaddu.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vsaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, > + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsaddu.vi vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vsaddu_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, > + env->vfp.vreg[src2].u8[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, > + env->vfp.vreg[src2].u16[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, > + env->vfp.vreg[src2].u32[j], rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, > + env->vfp.vreg[src2].u64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vdivu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] == 0) { > + env->vfp.vreg[dest].u8[j] = MAX_U8; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] / > + env->vfp.vreg[src1].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] == 0) { > + env->vfp.vreg[dest].u16[j] = MAX_U16; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + / env->vfp.vreg[src1].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] == 0) { > + env->vfp.vreg[dest].u32[j] = MAX_U32; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + / env->vfp.vreg[src1].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] == 0) { > + env->vfp.vreg[dest].u64[j] = MAX_U64; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + / env->vfp.vreg[src1].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vdivu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u8[j] = MAX_U8; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] / > + (uint8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u16[j] = MAX_U16; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + / (uint16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u32[j] = MAX_U32; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + / (uint32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].u64[j] = MAX_U64; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + / (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfdiv.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_div( > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_div( > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_div( > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfdiv.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_div( > + > env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_div( > + > env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_div( > + > env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsadd.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vsadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, > + env->vfp.vreg[src1].s8[j], > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, > + env->vfp.vreg[src1].s16[j], > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, > + env->vfp.vreg[src1].s32[j], > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsadd.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vsadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsadd.vi vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vsadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, > + env->vfp.vreg[src2].s8[j], sign_extend(rs1, 5)); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, > + env->vfp.vreg[src2].s16[j], sign_extend(rs1, 5)); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, > + env->vfp.vreg[src2].s32[j], sign_extend(rs1, 5)); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, > + env->vfp.vreg[src2].s64[j], sign_extend(rs1, 5)); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] == 0) { > + env->vfp.vreg[dest].s8[j] = -1; > + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && > + (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = MIN_S8; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] / > + env->vfp.vreg[src1].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] == 0) { > + env->vfp.vreg[dest].s16[j] = -1; > + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && > + (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = MIN_S16; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + / env->vfp.vreg[src1].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] == 0) { > + env->vfp.vreg[dest].s32[j] = -1; > + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && > + (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = MIN_S32; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + / env->vfp.vreg[src1].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] == 0) { > + env->vfp.vreg[dest].s64[j] = -1; > + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && > + (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = MIN_S64; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + / env->vfp.vreg[src1].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vdiv_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s8[j] = -1; > + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && > + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = MIN_S8; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] / > + (int8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s16[j] = -1; > + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && > + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = MIN_S16; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + / (int16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s32[j] = -1; > + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && > + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = MIN_S32; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + / (int32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].s64[j] = -1; > + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && > + ((int64_t)extend_gpr(env->gpr[rs1]) == > (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = MIN_S64; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + / (int64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] */ > +void VECTOR_HELPER(vfrdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_div( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_div( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_div( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssubu.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vssubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env, > + env->vfp.vreg[src2].u8[j], > env->vfp.vreg[src1].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env, > + env->vfp.vreg[src2].u16[j], > env->vfp.vreg[src1].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env, > + env->vfp.vreg[src2].u32[j], > env->vfp.vreg[src1].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env, > + env->vfp.vreg[src2].u64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssubu.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vssubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env, > + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vremu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] == 0) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] % > + env->vfp.vreg[src1].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] == 0) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + % env->vfp.vreg[src1].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] == 0) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + % env->vfp.vreg[src1].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] == 0) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + % env->vfp.vreg[src1].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vremu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] % > + (uint8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + % (uint16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + % (uint32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + % (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmsbf.m vd, vs2, vm # set-before-first mask bit */ > +void VECTOR_HELPER(vmsbf_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + bool first_mask_bit = false; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (first_mask_bit) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + continue; > + } > + if (!vector_mask_reg(env, rs2, width, lmul, i)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + first_mask_bit = true; > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmsif.m vd, vs2, vm # set-including-first mask bit */ > +void VECTOR_HELPER(vmsif_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + bool first_mask_bit = false; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (first_mask_bit) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + continue; > + } > + if (!vector_mask_reg(env, rs2, width, lmul, i)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + first_mask_bit = true; > + vector_mask_result(env, rd, width, lmul, i, 1); > + } > + } > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmsof.m vd, vs2, vm # set-only-first mask bit */ > +void VECTOR_HELPER(vmsof_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + bool first_mask_bit = false; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (first_mask_bit) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + continue; > + } > + if (!vector_mask_reg(env, rs2, width, lmul, i)) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + first_mask_bit = true; > + vector_mask_result(env, rd, width, lmul, i, 1); > + } > + } > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* viota.m v4, v2, v0.t */ > +void VECTOR_HELPER(viota_m)(CPURISCVState *env, uint32_t vm, uint32_t rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest; > + uint32_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 1)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vid.v vd, vm # Write element ID to destination. */ > +void VECTOR_HELPER(vid_v)(CPURISCVState *env, uint32_t vm, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = i; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = i; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = i; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = i; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssub.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vssub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env, > + env->vfp.vreg[src2].s8[j], > env->vfp.vreg[src1].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env, > + env->vfp.vreg[src2].s16[j], > env->vfp.vreg[src1].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env, > + env->vfp.vreg[src2].s32[j], > env->vfp.vreg[src1].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env, > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssub.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vssub_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vrem_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] == 0) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j]; > + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && > + (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = 0; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] % > + env->vfp.vreg[src1].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] == 0) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j]; > + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && > + (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = 0; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + % env->vfp.vreg[src1].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] == 0) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j]; > + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && > + (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = 0; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + % env->vfp.vreg[src1].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] == 0) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j]; > + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && > + (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = 0; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + % env->vfp.vreg[src1].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vrem_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j]; > + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && > + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = 0; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] % > + (int8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j]; > + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && > + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = 0; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + % (int16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j]; > + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && > + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = 0; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + % (int32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j]; > + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && > + ((int64_t)extend_gpr(env->gpr[rs1]) == > (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = 0; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + % (int64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +/* vaadd.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vaadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, > + env->vfp.vreg[src1].s8[j], > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, > + env->vfp.vreg[src1].s16[j], > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, > + env->vfp.vreg[src1].s32[j], > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vaadd.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vaadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, > + env->gpr[rs1], env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, > + env->gpr[rs1], env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, > + env->gpr[rs1], env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, > + env->gpr[rs1], env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vaadd.vi vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vaadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, > + rs1, env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, > + rs1, env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, > + rs1, env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, > + rs1, env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmulhu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + ((uint16_t)env->vfp.vreg[src1].u8[j] > + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + ((uint32_t)env->vfp.vreg[src1].u16[j] > + * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + ((uint64_t)env->vfp.vreg[src1].u32[j] > + * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = u64xu64_lh( > + env->vfp.vreg[src1].u64[j], > env->vfp.vreg[src2].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmulhu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + ((uint16_t)(uint8_t)env->gpr[rs1] > + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + ((uint32_t)(uint16_t)env->gpr[rs1] > + * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + ((uint64_t)(uint32_t)env->gpr[rs1] > + * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = u64xu64_lh( > + (uint64_t)extend_gpr(env->gpr[rs1]) > + , env->vfp.vreg[src2].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +/* vfmul.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_mul( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_mul( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_mul( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmul.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_mul( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_mul( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_mul( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsll_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + << (env->vfp.vreg[src1].u8[j] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + << (env->vfp.vreg[src1].u16[j] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + << (env->vfp.vreg[src1].u32[j] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + << (env->vfp.vreg[src1].u64[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsll_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + << (env->gpr[rs1] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + << (env->gpr[rs1] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + << (env->gpr[rs1] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + << ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsll_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + << (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + << (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + << (rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + << (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->gpr[rs1] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = env->gpr[rs1] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = env->gpr[rs1] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vasub.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vasub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8( > + env, > + ~env->vfp.vreg[src1].s8[j] + 1, > + env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16( > + env, > + ~env->vfp.vreg[src1].s16[j] + 1, > + env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32( > + env, > + ~env->vfp.vreg[src1].s32[j] + 1, > + env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64( > + env, > + ~env->vfp.vreg[src1].s64[j] + 1, > + env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vasub.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vasub_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmulhsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((uint16_t)env->vfp.vreg[src1].u8[j] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((uint32_t)env->vfp.vreg[src1].u16[j] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((uint64_t)env->vfp.vreg[src1].u32[j] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xu64_lh( > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmulhsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((uint16_t)(uint8_t)env->gpr[rs1] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((uint32_t)(uint16_t)env->gpr[rs1] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((uint64_t)(uint32_t)env->gpr[rs1] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xu64_lh( > + env->vfp.vreg[src2].s64[j], > + (uint64_t)extend_gpr(env->gpr[rs1])); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vsmul.vv vd, vs2, vs1, vm # vd[i] = > clip((vs2[i]*vs1[i]+round)>>(SEW-1)) */ > +void VECTOR_HELPER(vsmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if ((!(vm)) && rd == 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vsmul_8(env, > + env->vfp.vreg[src1].s8[j], > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vsmul_16(env, > + env->vfp.vreg[src1].s16[j], > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vsmul_32(env, > + env->vfp.vreg[src1].s32[j], > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vsmul_64(env, > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsmul.vx vd, vs2, rs1, vm # vd[i] = > clip((vs2[i]*x[rs1]+round)>>(SEW-1)) */ > +void VECTOR_HELPER(vsmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if ((!(vm)) && rd == 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vsmul_8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vsmul_16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vsmul_32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vsmul_64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmulh_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((int16_t)env->vfp.vreg[src1].s8[j] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((int32_t)env->vfp.vreg[src1].s16[j] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((int64_t)env->vfp.vreg[src1].s32[j] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xs64_lh( > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmulh_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((int16_t)(int8_t)env->gpr[rs1] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((int32_t)(int16_t)env->gpr[rs1] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((int64_t)(int32_t)env->gpr[rs1] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xs64_lh( > + (int64_t)extend_gpr(env->gpr[rs1]) > + , env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] - vs2[i] */ > +void VECTOR_HELPER(vfrsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sub( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sub( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sub( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + >> (env->vfp.vreg[src1].u8[j] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + >> (env->vfp.vreg[src1].u16[j] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + >> (env->vfp.vreg[src1].u32[j] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + >> (env->vfp.vreg[src1].u64[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + >> (env->gpr[rs1] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + >> (rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i] */ > +void VECTOR_HELPER(vfmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i] */ > +void VECTOR_HELPER(vfmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] > + >> (env->vfp.vreg[src1].s8[j] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + >> (env->vfp.vreg[src1].s16[j] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + >> (env->vfp.vreg[src1].s32[j] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + >> (env->vfp.vreg[src1].s64[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] > + >> (env->gpr[rs1] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + >> (rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[dest].s8[j] > + + env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[dest].s16[j] > + + env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[dest].s32[j] > + + env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[dest].s64[j] > + + env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->gpr[rs1] > + * env->vfp.vreg[dest].s8[j] > + + env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = env->gpr[rs1] > + * env->vfp.vreg[dest].s16[j] > + + env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = env->gpr[rs1] > + * env->vfp.vreg[dest].s32[j] > + + env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[dest].s64[j] > + + env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +/* vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) - vs2[i] */ > +void VECTOR_HELPER(vfnmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) - vs2[i] */ > +void VECTOR_HELPER(vfnmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssrl.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i] */ > +void VECTOR_HELPER(vssrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = vssrl_8(env, > + env->vfp.vreg[src2].u8[j], > env->vfp.vreg[src1].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = vssrl_16(env, > + env->vfp.vreg[src2].u16[j], > env->vfp.vreg[src1].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = vssrl_32(env, > + env->vfp.vreg[src2].u32[j], > env->vfp.vreg[src1].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = vssrl_64(env, > + env->vfp.vreg[src2].u64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssrl.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */ > +void VECTOR_HELPER(vssrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = vssrl_8(env, > + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = vssrl_16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = vssrl_32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = vssrl_64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssrl.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */ > +void VECTOR_HELPER(vssrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = vssrli_8(env, > + env->vfp.vreg[src2].u8[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = vssrli_16(env, > + env->vfp.vreg[src2].u16[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = vssrli_32(env, > + env->vfp.vreg[src2].u32[j], rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = vssrli_64(env, > + env->vfp.vreg[src2].u64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i] */ > +void VECTOR_HELPER(vfmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i] */ > +void VECTOR_HELPER(vfmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssra.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i]) */ > +void VECTOR_HELPER(vssra_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vssra_8(env, > + env->vfp.vreg[src2].s8[j], > env->vfp.vreg[src1].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vssra_16(env, > + env->vfp.vreg[src2].s16[j], > env->vfp.vreg[src1].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vssra_32(env, > + env->vfp.vreg[src2].s32[j], > env->vfp.vreg[src1].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vssra_64(env, > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssra.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */ > +void VECTOR_HELPER(vssra_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vssra_8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vssra_16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vssra_32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vssra_64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssra.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */ > +void VECTOR_HELPER(vssra_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vssrai_8(env, > + env->vfp.vreg[src2].s8[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vssrai_16(env, > + env->vfp.vreg[src2].s16[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vssrai_32(env, > + env->vfp.vreg[src2].s32[j], rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vssrai_64(env, > + env->vfp.vreg[src2].s64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] > + - env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[dest].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + - env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[dest].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + - env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[dest].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + - env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[dest].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnmsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] > + - env->gpr[rs1] > + * env->vfp.vreg[dest].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + - env->gpr[rs1] > + * env->vfp.vreg[dest].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + - env->gpr[rs1] > + * env->vfp.vreg[dest].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + - (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[dest].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + vs2[i] */ > +void VECTOR_HELPER(vfnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + > + > + env->vfp.vstart = 0; > +} > + > +/* vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) + vs2[i] */ > +void VECTOR_HELPER(vfnmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vnsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k] > + >> (env->vfp.vreg[src1].u8[j] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u32[k] > + >> (env->vfp.vreg[src1].u16[j] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u64[k] > + >> (env->vfp.vreg[src1].u32[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u32[k] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u64[k] > + >> (env->gpr[rs1] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u32[k] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u64[k] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k] > + >> (env->vfp.vreg[src1].s8[j] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s32[k] > + >> (env->vfp.vreg[src1].s16[j] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s64[k] > + >> (env->vfp.vreg[src1].s32[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s32[k] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s64[k] > + >> (env->gpr[rs1] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s32[k] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s64[k] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] += env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] += > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] += > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] += > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] += env->gpr[rs1] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] += env->gpr[rs1] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] += env->gpr[rs1] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] += > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclipu.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vnclipu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[k] = vnclipu_16(env, > + env->vfp.vreg[src2].u16[j], > env->vfp.vreg[src1].u8[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vnclipu_32(env, > + env->vfp.vreg[src2].u32[j], > env->vfp.vreg[src1].u16[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vnclipu_64(env, > + env->vfp.vreg[src2].u64[j], > env->vfp.vreg[src1].u32[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclipu.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vnclipu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[k] = vnclipu_16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vnclipu_32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vnclipu_64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > + > +/* vnclipu.vi vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vnclipu_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[k] = vnclipui_16(env, > + env->vfp.vreg[src2].u16[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vnclipui_32(env, > + env->vfp.vreg[src2].u32[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vnclipui_64(env, > + env->vfp.vreg[src2].u64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vnclip.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vnclip_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[k] = vnclip_16(env, > + env->vfp.vreg[src2].s16[j], > env->vfp.vreg[src1].u8[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vnclip_32(env, > + env->vfp.vreg[src2].s32[j], > env->vfp.vreg[src1].u16[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vnclip_64(env, > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].u32[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclip.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vnclip_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[k] = vnclip_16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vnclip_32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vnclip_64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclip.vi vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vnclip_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[k] = vnclipi_16(env, > + env->vfp.vreg[src2].s16[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vnclipi_32(env, > + env->vfp.vreg[src2].s32[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vnclipi_64(env, > + env->vfp.vreg[src2].s64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] -= env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] -= > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] -= > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] -= > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnmsac_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] -= env->gpr[rs1] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] -= env->gpr[rs1] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] -= env->gpr[rs1] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] -= > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vwredsumu.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(zero-extend(SEW)) > */ > +void VECTOR_HELPER(vwredsumu_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u8[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u16[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = sum; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u16[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u32[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = sum; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u32[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u64[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = sum; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src1].u8[j] + > + (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src1].u16[j] + > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src1].u32[j] + > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] + > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] + > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] + > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->fpr[rs1], > &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vwredsum.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(sign-extend(SEW)) */ > +void VECTOR_HELPER(vwredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + int64_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += (int16_t)env->vfp.vreg[src2].s8[j] << 8 >> 8; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].s16[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s16[0] = sum; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += (int32_t)env->vfp.vreg[src2].s16[j] << 16 >> > 16; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].s32[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s32[0] = sum; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += (int64_t)env->vfp.vreg[src2].s32[j] << 32 >> > 32; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].s64[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s64[0] = sum; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src1].s8[j] + > + (int16_t)env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src1].s16[j] + > + (int32_t)env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src1].s32[j] + > + (int64_t)env->vfp.vreg[src2].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) + > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) + > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) + > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vd, vs2, vs1, vm # Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) > */ > +void VECTOR_HELPER(vfwredsum_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float32 sum32 = 0.0f; > + float64 sum64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + sum32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum32 = float32_add(sum32, > + > float16_to_float32(env->vfp.vreg[src2].f16[j], > + true, &env->fp_status), > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = sum32; > + } > + break; > + case 32: > + if (i == 0) { > + sum64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum64 = float64_add(sum64, > + > float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = sum64; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] - > + (uint16_t)env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] - > + (uint32_t)env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] - > + (uint64_t)env->vfp.vreg[src1].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] - > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] - > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] - > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->fpr[rs1], > &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s8[j] - > + (int16_t)env->vfp.vreg[src1].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s16[j] - > + (int32_t)env->vfp.vreg[src1].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s32[j] - > + (int64_t)env->vfp.vreg[src1].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) - > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) - > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) - > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* > + * vfwredosum.vs vd, vs2, vs1, vm # > + * Ordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) > + */ > +void VECTOR_HELPER(vfwredosum_vs)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + helper_vector_vfwredsum_vs(env, vm, rs1, rs2, rd); > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src1].u8[j] + > + (uint16_t)env->vfp.vreg[src2].u16[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src1].u16[j] + > + (uint32_t)env->vfp.vreg[src2].u32[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src1].u32[j] + > + (uint64_t)env->vfp.vreg[src2].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_wx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u16[k] + > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u32[k] + > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u64[k] + > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.wv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.wf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwadd_wf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->fpr[rs1], > &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]) + > + (int16_t)env->vfp.vreg[src2].s16[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]) + > + (int32_t)env->vfp.vreg[src2].s32[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]) + > + (int64_t)env->vfp.vreg[src2].s64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_wx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s16[k] + > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s32[k] + > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s64[k] + > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u16[k] - > + (uint16_t)env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u32[k] - > + (uint32_t)env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u64[k] - > + (uint64_t)env->vfp.vreg[src1].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_wx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u16[k] - > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u32[k] - > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u64[k] - > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.wv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.wf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwsub_wf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->fpr[rs1], > &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s16[k] - > + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s32[k] - > + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s64[k] - > + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsub_wx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s16[k] - > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s32[k] - > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s64[k] - > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmulu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src1].u8[j] * > + (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src1].u16[j] * > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src1].u32[j] * > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmulu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwmul.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_mul( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_mul( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vfwmul.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_mul( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_mul( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->fpr[rs1], > &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmulsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s8[j] * > + (uint16_t)env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s16[j] * > + (uint32_t)env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s32[j] * > + (uint64_t)env->vfp.vreg[src1].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmulsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src1].s8[j] * > + (int16_t)env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src1].s16[j] * > + (int32_t)env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src1].s32[j] * > + (int64_t)env->vfp.vreg[src2].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccu.vv vd, vs1, vs2, vm # > + * vd[i] = clipu((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env, > + > env->vfp.vreg[src2].u8[j], > + > env->vfp.vreg[src1].u8[j], > + > env->vfp.vreg[dest].u16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env, > + > env->vfp.vreg[src2].u16[j], > + > env->vfp.vreg[src1].u16[j], > + > env->vfp.vreg[dest].u32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env, > + > env->vfp.vreg[src2].u32[j], > + > env->vfp.vreg[src1].u32[j], > + > env->vfp.vreg[dest].u64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccu.vx vd, rs1, vs2, vm # > + * vd[i] = clipu((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env, > + > env->vfp.vreg[src2].u8[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].u16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env, > + > env->vfp.vreg[src2].u16[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].u32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env, > + > env->vfp.vreg[src2].u32[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].u64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] += > + (uint16_t)env->vfp.vreg[src1].u8[j] * > + (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] += > + (uint32_t)env->vfp.vreg[src1].u16[j] * > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] += > + (uint64_t)env->vfp.vreg[src1].u32[j] * > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] += > + (uint16_t)env->vfp.vreg[src2].u8[j] * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] += > + (uint32_t)env->vfp.vreg[src2].u16[j] * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] += > + (uint64_t)env->vfp.vreg[src2].u32[j] * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfwmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmacc.vv vd, vs1, vs2, vm # > + * vd[i] = clip((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env, > + > env->vfp.vreg[src2].s8[j], > + > env->vfp.vreg[src1].s8[j], > + > env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env, > + > env->vfp.vreg[src2].s16[j], > + > env->vfp.vreg[src1].s16[j], > + > env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env, > + > env->vfp.vreg[src2].s32[j], > + > env->vfp.vreg[src1].s32[j], > + > env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmacc.vx vd, rs1, vs2, vm # > + * vd[i] = clip((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env, > + > env->vfp.vreg[src2].s8[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env, > + > env->vfp.vreg[src2].s16[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env, > + > env->vfp.vreg[src2].s32[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccsu.vv vd, vs1, vs2, vm > + * # vd[i] = clip(-((signed(vs1[i])*unsigned(vs2[i])+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccsu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env, > + > env->vfp.vreg[src2].u8[j], > + > env->vfp.vreg[src1].s8[j], > + > env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env, > + > env->vfp.vreg[src2].u16[j], > + > env->vfp.vreg[src1].s16[j], > + > env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env, > + > env->vfp.vreg[src2].u32[j], > + > env->vfp.vreg[src1].s32[j], > + > env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccsu.vx vd, rs1, vs2, vm > + * # vd[i] = clip(-((signed(x[rs1])*unsigned(vs2[i])+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccsu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env, > + > env->vfp.vreg[src2].u8[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env, > + > env->vfp.vreg[src2].u16[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env, > + > env->vfp.vreg[src2].u32[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccus.vx vd, rs1, vs2, vm > + * # vd[i] = clip(-((unsigned(x[rs1])*signed(vs2[i])+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccus_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmaccus_8(env, > + > env->vfp.vreg[src2].s8[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmaccus_16(env, > + > env->vfp.vreg[src2].s16[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmaccus_32(env, > + > env->vfp.vreg[src2].s32[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)env->vfp.vreg[src1].s8[j] > + * (int16_t)env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)env->vfp.vreg[src1].s16[j] * > + (int32_t)env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)env->vfp.vreg[src1].s32[j] * > + (int64_t)env->vfp.vreg[src2].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfwnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfwnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmaccsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)env->vfp.vreg[src1].s8[j] > + * (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)env->vfp.vreg[src1].s16[j] * > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)env->vfp.vreg[src1].s32[j] * > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmaccsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (uint16_t)((uint8_t)env->vfp.vreg[src2].u8[j]) * > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (uint32_t)((uint16_t)env->vfp.vreg[src2].u16[j]) * > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (uint64_t)((uint32_t)env->vfp.vreg[src2].u32[j]) * > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfwmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfwmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmaccus_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfwnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vfwnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfwnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > + > +/* vfsqrt.v vd, vs2, vm # Vector-vector square root */ > +void VECTOR_HELPER(vfsqrt_v)(CPURISCVState *env, uint32_t vm, uint32_t > rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sqrt( > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sqrt( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sqrt( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfclass.v vd, vs2, vm # Vector-vector */ > +void VECTOR_HELPER(vfclass_v)(CPURISCVState *env, uint32_t vm, uint32_t > rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = helper_fclass_h( > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = helper_fclass_s( > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = helper_fclass_d( > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ > +void VECTOR_HELPER(vfcvt_xu_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = float16_to_uint16( > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = float32_to_uint32( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = float64_to_uint64( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ > +void VECTOR_HELPER(vfcvt_x_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = float16_to_int16( > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = float32_to_int32( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = float64_to_int64( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ > +void VECTOR_HELPER(vfcvt_f_xu_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = uint16_to_float16( > + > env->vfp.vreg[src2].u16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = uint32_to_float32( > + > env->vfp.vreg[src2].u32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = uint64_to_float64( > + > env->vfp.vreg[src2].u64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ > +void VECTOR_HELPER(vfcvt_f_x_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = int16_to_float16( > + > env->vfp.vreg[src2].s16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = int32_to_float32( > + > env->vfp.vreg[src2].s32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = int64_to_float64( > + > env->vfp.vreg[src2].s64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned > integer.*/ > +void VECTOR_HELPER(vfwcvt_xu_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = float16_to_uint32( > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = float32_to_uint64( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + } > + } else { > + vector_tail_fwiden(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed > integer. */ > +void VECTOR_HELPER(vfwcvt_x_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = float16_to_int32( > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = float32_to_int64( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width > float */ > +void VECTOR_HELPER(vfwcvt_f_xu_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = uint16_to_float32( > + > env->vfp.vreg[src2].u16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = uint32_to_float64( > + > env->vfp.vreg[src2].u32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ > +void VECTOR_HELPER(vfwcvt_f_x_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = int16_to_float32( > + > env->vfp.vreg[src2].s16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = int32_to_float64( > + > env->vfp.vreg[src2].s32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vfwcvt.f.f.v vd, vs2, vm # > + * Convert single-width float to double-width float. > + */ > +void VECTOR_HELPER(vfwcvt_f_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float16_to_float32( > + > env->vfp.vreg[src2].f16[j], > + true, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float32_to_float64( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ > +void VECTOR_HELPER(vfncvt_xu_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = float32_to_uint16( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = float64_to_uint32( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed > integer. */ > +void VECTOR_HELPER(vfncvt_x_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = float32_to_int16( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = float64_to_int32( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to > float */ > +void VECTOR_HELPER(vfncvt_f_xu_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[k] = uint32_to_float16( > + > env->vfp.vreg[src2].u32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = uint64_to_float32( > + > env->vfp.vreg[src2].u64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ > +void VECTOR_HELPER(vfncvt_f_x_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[k] = int32_to_float16( > + > env->vfp.vreg[src2].s32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = int64_to_float32( > + > env->vfp.vreg[src2].s64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. > */ > +void VECTOR_HELPER(vfncvt_f_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[k] = float32_to_float16( > + > env->vfp.vreg[src2].f32[j], > + true, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float64_to_float32( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, addr), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, addr), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, addr), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlbuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlbff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlshu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, addr), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, addr), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlhuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlhff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, env->gpr[rs1] + read), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlswu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, env->gpr[rs1] + read), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, addr), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlwuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlwff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, env->gpr[rs1] + read), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vle_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 8; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 8; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 8, > width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vleff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + env->vfp.vl = 0; > + env->foflag = true; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 8; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vssb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxb_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vssh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxh_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vssw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + cpu_stl_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + cpu_stl_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxw_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 8; > + cpu_stq_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 8; > + cpu_stq_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + cpu_stl_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 8, > width, k); > + cpu_stq_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxe_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoswapw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_xchgl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_xchgl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env, > addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env, > addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamoswapd_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_xchgq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_xchgq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoaddw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_addl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_addl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_addl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_addl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vamoaddd_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_addq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_addq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoxorw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_xorl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_xorl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamoxord_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_xorq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_xorq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoandw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_andl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_andl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_andl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_andl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoandd_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_andq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_andq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoorw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_orl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_orl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_orl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_orl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoord_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_orq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_orq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamominw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_sminl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_sminl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamomind_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_sminq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_sminq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamomaxw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_smaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_smaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamomaxd_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_smaxq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_smaxq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamominuw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le( > + env, addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamominud_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_uminq_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_uminq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamomaxuw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_umaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_umaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le( > + env, addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vamomaxud_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_umaxq_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_umaxq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > -- > 2.7.4 > > >
On 8/28/19 11:54 AM, Richard Henderson wrote: > But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a > single bit. BTW, it is reasonable to check VSTART == 0 always. Quoting the spec: # Implementations are permitted to raise illegal instruction exceptions # when attempting to execute a vector instruction with a value of vstart # that the implementation can never produce when executing that same # instruction with the same vtype setting. Since qemu will never interrupt a single instruction, each vector instruction will always run to completion, which clears VSTART. Since QEMU will never produce a non-zero value of VSTART, it is allowed to trap on any non-zero setting of VSTART. I.e. it can be handled at translation time alongside VILL. r~
On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: > > Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 > Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> > --- > fpu/softfloat.c | 119 + > include/fpu/softfloat.h | 4 + > linux-user/riscv/cpu_loop.c | 8 +- > target/riscv/Makefile.objs | 2 +- > target/riscv/cpu.h | 30 + > target/riscv/cpu_bits.h | 15 + > target/riscv/cpu_helper.c | 7 + > target/riscv/csr.c | 65 +- > target/riscv/helper.h | 354 + > target/riscv/insn32.decode | 374 +- > target/riscv/insn_trans/trans_rvv.inc.c | 484 + > target/riscv/translate.c | 1 + > target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ > 13 files changed, 28017 insertions(+), 9 deletions(-) > create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c > create mode 100644 target/riscv/vector_helper.c > Hello, Thanks for the patch! As others have pointed out you will need to split the patch up into multiple smaller patches, otherwise it is too hard to review almost 30,000 lines of code. Can you also include a cover letter with your patch series describing how you are testing this? AFAIK vector extension support isn't in any compiler so I'm assuming you are handwriting the assembly or have toolchain patches. Either way it will help if you can share that so others can test your implementation. Alex and Richard have kindly started the review. Once you have addressed their comments and split this patch up into smaller patches you can send a v2 and we can go from there. Once again thanks for doing this implementation for QEMU! Alistair
On 2019/8/29 上午5:34, Alistair Francis wrote: > On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> >> --- >> fpu/softfloat.c | 119 + >> include/fpu/softfloat.h | 4 + >> linux-user/riscv/cpu_loop.c | 8 +- >> target/riscv/Makefile.objs | 2 +- >> target/riscv/cpu.h | 30 + >> target/riscv/cpu_bits.h | 15 + >> target/riscv/cpu_helper.c | 7 + >> target/riscv/csr.c | 65 +- >> target/riscv/helper.h | 354 + >> target/riscv/insn32.decode | 374 +- >> target/riscv/insn_trans/trans_rvv.inc.c | 484 + >> target/riscv/translate.c | 1 + >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ >> 13 files changed, 28017 insertions(+), 9 deletions(-) >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c >> create mode 100644 target/riscv/vector_helper.c >> > Hello, > > Thanks for the patch! > > As others have pointed out you will need to split the patch up into > multiple smaller patches, otherwise it is too hard to review almost > 30,000 lines of code. Hi, Alistair I'm so sorry for the inconvenience. It will be a patch set with a cover letter in V2. > Can you also include a cover letter with your patch series describing > how you are testing this? AFAIK vector extension support isn't in any > compiler so I'm assuming you are handwriting the assembly or have > toolchain patches. Either way it will help if you can share that so > others can test your implementation. Yes, it's handwriting assembly. The assembler in Binutils has support Vector extension. First define an function test_vadd_vv_8 in assembly and then it can be called from a C program. The function is something like /* vadd.vv */ TEST_FUNC(test_vadd_vv_8) vsetvli t1, x0, e8, m2 vlb.v v6, (a4) vsb.v v6, (a3) vsetvli t1, a0, e8, m2 vlb.v v0, (a1) vlb.v v2, (a2) vadd.vv v4, v0, v2 vsb.v v4, (a3) ret .size test_vadd_vv_8, .-test_vadd_vv_8 It takes more time to test than to implement the instructions. Maybe there is some better test method or some forced test cases in QEMU. Could you give me some advice for testing? Best Regards, Zhiwei > Alex and Richard have kindly started the review. Once you have > addressed their comments and split this patch up into smaller patches > you can send a v2 and we can go from there. > > Once again thanks for doing this implementation for QEMU! > > Alistair >
On 2019/8/29 上午4:43, Richard Henderson wrote: > On 8/28/19 11:54 AM, Richard Henderson wrote: >> But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a >> single bit. > BTW, it is reasonable to check VSTART == 0 always. Quoting the spec: > > # Implementations are permitted to raise illegal instruction exceptions > # when attempting to execute a vector instruction with a value of vstart > # that the implementation can never produce when executing that same > # instruction with the same vtype setting. > > Since qemu will never interrupt a single instruction, each vector instruction > will always run to completion, which clears VSTART. Since QEMU will never > produce a non-zero value of VSTART, it is allowed to trap on any non-zero > setting of VSTART. > > I.e. it can be handled at translation time alongside VILL. Hi, Richard I am so sorry for the inconvenience. It is very kind of you to review the horrible long code and give so many comments. Even in qemu, it may be some situations that VSTART != 0. For example, a load instruction leads to a page fault exception in a middle position. If VSTART == 0, some elements that had been loaded before the exception will be loaded once again. Specially, it may be a mistake if the instruction restores execution with VSTART== 0. When lmul == 1, "vlb v0 ,(a0), v0.t" As v0 is the mask register, if it is modified, some part of it can't be used again. It will take some time to address the other comments. After that I will split the patch into patch set with a cover letter in V2. Thank you again for your review! Best Regards, Zhiwei > > > r~ >
On 2019/8/29 上午3:20, Aleksandar Markovic wrote: > > > > On Wed, Aug 28, 2019 at 9:04 AM liuzhiwei <zhiwei_liu@c-sky.com > <mailto:zhiwei_liu@c-sky.com>> wrote: > > Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 > Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com > <mailto:zhiwei_liu@c-sky.com>> > --- > > > Such large patch and "Change-Id: > I3cf891bc400713b95f47ecca82b1bf773f3dcb25" is its entire commit > message?? Horrible. Hi, Aleksandar I am so sorry. A patch set with cover letter will be sent later. Best Regards, Zhiwei > Aleksandar > > fpu/softfloat.c | 119 + > include/fpu/softfloat.h | 4 + > linux-user/riscv/cpu_loop.c | 8 +- > target/riscv/Makefile.objs | 2 +- > target/riscv/cpu.h | 30 + > target/riscv/cpu_bits.h | 15 + > target/riscv/cpu_helper.c | 7 + > target/riscv/csr.c | 65 +- > target/riscv/helper.h | 354 + > target/riscv/insn32.decode | 374 +- > target/riscv/insn_trans/trans_rvv.inc.c | 484 + > target/riscv/translate.c | 1 + > target/riscv/vector_helper.c | 26563 > ++++++++++++++++++++++++++++++ > 13 files changed, 28017 insertions(+), 9 deletions(-) > create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c > create mode 100644 target/riscv/vector_helper.c > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 2ba36ec..da155ea 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a) > } > > /*---------------------------------------------------------------------------- > +| Returns the sign bit of the half-precision floating-point value > `a'. > +*----------------------------------------------------------------------------*/ > + > +static inline flag extractFloat16Sign(float16 a) > +{ > + return float16_val(a) >> 0xf; > +} > + > + > +/*---------------------------------------------------------------------------- > | Returns the fraction bits of the single-precision > floating-point value `a'. > *----------------------------------------------------------------------------*/ > > @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, > float_status *status) > } > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point value `a' is > less than > +| or equal to the corresponding value `b', and 0 otherwise. The > invalid > +| exception is raised if either operand is a NaN. The comparison > is performed > +| according to the IEC/IEEE Standard for Binary Floating-Point > Arithmetic. > +*----------------------------------------------------------------------------*/ > + > +int float16_le(float16 a, float16 b, float_status *status) > +{ > + flag aSign, bSign; > + uint16_t av, bv; > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && > extractFloat16Frac( a ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && > extractFloat16Frac( b ) ) > + ) { > + float_raise(float_flag_invalid, status); > + return 0; > + } > + aSign = extractFloat16Sign( a ); > + bSign = extractFloat16Sign( b ); > + av = float16_val(a); > + bv = float16_val(b); > + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | > bv )<<1 ) == 0 ); > + return ( av == bv ) || ( aSign ^ ( av < bv ) ); > + > +} > + > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point value `a' is > less than > | or equal to the corresponding value `b', and 0 otherwise. The > invalid > | exception is raised if either operand is a NaN. The comparison > is performed > @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, > float_status *status) > | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > *----------------------------------------------------------------------------*/ > > +int float16_lt(float16 a, float16 b, float_status *status) > +{ > + flag aSign, bSign; > + uint16_t av, bv; > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && > extractFloat16Frac( a ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && > extractFloat16Frac( b ) ) > + ) { > + float_raise(float_flag_invalid, status); > + return 0; > + } > + aSign = extractFloat16Sign( a ); > + bSign = extractFloat16Sign( b ); > + av = float16_val(a); > + bv = float16_val(b); > + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | > bv )<<1 ) != 0 ); > + return ( av != bv ) && ( aSign ^ ( av < bv ) ); > + > +} > + > +/*---------------------------------------------------------------------------- > +| Returns 1 if the single-precision floating-point value `a' is > less than > +| the corresponding value `b', and 0 otherwise. The invalid > exception is > +| raised if either operand is a NaN. The comparison is performed > according > +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > +*----------------------------------------------------------------------------*/ > + > int float32_lt(float32 a, float32 b, float_status *status) > { > flag aSign, bSign; > @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, > float_status *status) > } > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point value `a' is > equal to > +| the corresponding value `b', and 0 otherwise. Quiet NaNs do > not cause an > +| exception. The comparison is performed according to the > IEC/IEEE Standard > +| for Binary Floating-Point Arithmetic. > +*----------------------------------------------------------------------------*/ > + > +int float16_eq_quiet(float16 a, float16 b, float_status *status) > +{ > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && > extractFloat16Frac( a ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && > extractFloat16Frac( b ) ) > + ) { > + if (float16_is_signaling_nan(a, status) > + || float16_is_signaling_nan(b, status)) { > + float_raise(float_flag_invalid, status); > + } > + return 0; > + } > + return ( float16_val(a) == float16_val(b) ) || > + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 > ) == 0 ); > +} > + > + > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point value `a' is > equal to > | the corresponding value `b', and 0 otherwise. Quiet NaNs do > not cause an > | exception. The comparison is performed according to the > IEC/IEEE Standard > @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, > float_status *status) > } > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point values `a' and > `b' cannot > +| be compared, and 0 otherwise. Quiet NaNs do not cause an > exception. The > +| comparison is performed according to the IEC/IEEE Standard for > Binary > +| Floating-Point Arithmetic. > +*----------------------------------------------------------------------------*/ > + > +int float16_unordered_quiet(float16 a, float16 b, float_status > *status) > +{ > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && > extractFloat16Frac( a ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && > extractFloat16Frac( b ) ) > + ) { > + if (float16_is_signaling_nan(a, status) > + || float16_is_signaling_nan(b, status)) { > + float_raise(float_flag_invalid, status); > + } > + return 1; > + } > + return 0; > +} > + > + > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point values `a' and > `b' cannot > | be compared, and 0 otherwise. Quiet NaNs do not cause an > exception. The > | comparison is performed according to the IEC/IEEE Standard for > Binary > diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h > index 3ff3fa5..3b0754c 100644 > --- a/include/fpu/softfloat.h > +++ b/include/fpu/softfloat.h > @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, > float_status *status); > float16 float16_sqrt(float16, float_status *status); > int float16_compare(float16, float16, float_status *status); > int float16_compare_quiet(float16, float16, float_status *status); > +int float16_unordered_quiet(float16, float16, float_status *status); > +int float16_le(float16, float16, float_status *status); > +int float16_lt(float16, float16, float_status *status); > +int float16_eq_quiet(float16, float16, float_status *status); > > int float16_is_quiet_nan(float16, float_status *status); > int float16_is_signaling_nan(float16, float_status *status); > diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c > index 12aa3c0..b01548a 100644 > --- a/linux-user/riscv/cpu_loop.c > +++ b/linux-user/riscv/cpu_loop.c > @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env) > signum = 0; > sigcode = 0; > sigaddr = 0; > - > + if (env->foflag) { > + if (env->vfp.vl != 0) { > + env->foflag = false; > + env->pc += 4; > + continue; > + } > + } > switch (trapnr) { > case EXCP_INTERRUPT: > /* just indicate that signals should be handled asap */ > diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs > index b1c79bc..d577cef 100644 > --- a/target/riscv/Makefile.objs > +++ b/target/riscv/Makefile.objs > @@ -1,4 +1,4 @@ > -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o > fpu_helper.o gdbstub.o pmp.o > +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o > fpu_helper.o vector_helper.o gdbstub.o pmp.o > > DECODETREE = $(SRC_PATH)/scripts/decodetree.py > > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h > index 0adb307..5a93aa2 100644 > --- a/target/riscv/cpu.h > +++ b/target/riscv/cpu.h > @@ -67,6 +67,7 @@ > #define RVC RV('C') > #define RVS RV('S') > #define RVU RV('U') > +#define RVV RV('V') > > /* S extension denotes that Supervisor mode exists, however it is > possible > to have a core that support S mode but does not have an MMU > and there > @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState; > > #include "pmp.h" > > +#define VLEN 128 > +#define VUNIT(x) (VLEN / x) > + > struct CPURISCVState { > target_ulong gpr[32]; > uint64_t fpr[32]; /* assume both F and D extensions */ > + > + /* vector coprocessor state. */ > + struct { > + union VECTOR { > + float64 f64[VUNIT(64)]; > + float32 f32[VUNIT(32)]; > + float16 f16[VUNIT(16)]; > + target_ulong ul[VUNIT(sizeof(target_ulong))]; > + uint64_t u64[VUNIT(64)]; > + int64_t s64[VUNIT(64)]; > + uint32_t u32[VUNIT(32)]; > + int32_t s32[VUNIT(32)]; > + uint16_t u16[VUNIT(16)]; > + int16_t s16[VUNIT(16)]; > + uint8_t u8[VUNIT(8)]; > + int8_t s8[VUNIT(8)]; > + } vreg[32]; > + target_ulong vxrm; > + target_ulong vxsat; > + target_ulong vl; > + target_ulong vstart; > + target_ulong vtype; > + float_status fp_status; > + } vfp; > + > + bool foflag; > target_ulong pc; > target_ulong load_res; > target_ulong load_val; > diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h > index 11f971a..9eb43ec 100644 > --- a/target/riscv/cpu_bits.h > +++ b/target/riscv/cpu_bits.h > @@ -29,6 +29,14 @@ > #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) > #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | > FSR_DZA | FSR_NXA) > > +/* Vector Fixed-Point round model */ > +#define FSR_VXRM_SHIFT 9 > +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) > + > +/* Vector Fixed-Point saturation flag */ > +#define FSR_VXSAT_SHIFT 8 > +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) > + > /* Control and Status Registers */ > > /* User Trap Setup */ > @@ -48,6 +56,13 @@ > #define CSR_FRM 0x002 > #define CSR_FCSR 0x003 > > +/* User Vector CSRs */ > +#define CSR_VSTART 0x008 > +#define CSR_VXSAT 0x009 > +#define CSR_VXRM 0x00a > +#define CSR_VL 0xc20 > +#define CSR_VTYPE 0xc21 > + > /* User Timers and Counters */ > #define CSR_CYCLE 0xc00 > #define CSR_TIME 0xc01 > diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c > index e32b612..405caf6 100644 > --- a/target/riscv/cpu_helper.c > +++ b/target/riscv/cpu_helper.c > @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs) > [PRV_H] = RISCV_EXCP_H_ECALL, > [PRV_M] = RISCV_EXCP_M_ECALL > }; > + if (env->foflag) { > + if (env->vfp.vl != 0) { > + env->foflag = false; > + env->pc += 4; > + return; > + } > + } > > if (!async) { > /* set tval to badaddr for traps with address information */ > diff --git a/target/riscv/csr.c b/target/riscv/csr.c > index e0d4586..a6131ff 100644 > --- a/target/riscv/csr.c > +++ b/target/riscv/csr.c > @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno) > return 0; > } > > -#if !defined(CONFIG_USER_ONLY) > static int any(CPURISCVState *env, int csrno) > { > return 0; > } > > +#if !defined(CONFIG_USER_ONLY) > static int smode(CPURISCVState *env, int csrno) > { > return -!riscv_has_ext(env, RVS); > @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int > csrno, target_ulong *val) > return -1; > } > #endif > - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) > - | (env->frm << FSR_RD_SHIFT); > + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT) > + | (env->vfp.vxsat << FSR_VXSAT_SHIFT) > + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) > + | (env->frm << FSR_RD_SHIFT); > return 0; > } > > @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, > int csrno, target_ulong val) > env->mstatus |= MSTATUS_FS; > #endif > env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; > + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; > + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; > riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); > return 0; > } > > +static int read_vtype(CPURISCVState *env, int csrno, target_ulong > *val) > +{ > + *val = env->vfp.vtype; > + return 0; > +} > + > +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vl; > + return 0; > +} > + > +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong > *val) > +{ > + *val = env->vfp.vxrm; > + return 0; > +} > + > +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong > *val) > +{ > + *val = env->vfp.vxsat; > + return 0; > +} > + > +static int read_vstart(CPURISCVState *env, int csrno, > target_ulong *val) > +{ > + *val = env->vfp.vstart; > + return 0; > +} > + > +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong > val) > +{ > + env->vfp.vxrm = val; > + return 0; > +} > + > +static int write_vxsat(CPURISCVState *env, int csrno, > target_ulong val) > +{ > + env->vfp.vxsat = val; > + return 0; > +} > + > +static int write_vstart(CPURISCVState *env, int csrno, > target_ulong val) > +{ > + env->vfp.vstart = val; > + return 0; > +} > + > /* User Timers and Counters */ > static int read_instret(CPURISCVState *env, int csrno, > target_ulong *val) > { > @@ -873,7 +925,12 @@ static riscv_csr_operations > csr_ops[CSR_TABLE_SIZE] = { > [CSR_FFLAGS] = { fs, read_fflags, > write_fflags }, > [CSR_FRM] = { fs, read_frm, write_frm > }, > [CSR_FCSR] = { fs, read_fcsr, write_fcsr > }, > - > + /* Vector CSRs */ > + [CSR_VSTART] = { any, read_vstart, > write_vstart }, > + [CSR_VXSAT] = { any, read_vxsat, write_vxsat > }, > + [CSR_VXRM] = { any, read_vxrm, write_vxrm > }, > + [CSR_VL] = { any, read_vl }, > + [CSR_VTYPE] = { any, read_vtype > }, > /* User Timers and Counters */ > [CSR_CYCLE] = { ctr, read_instret > }, > [CSR_INSTRET] = { ctr, read_instret > }, > diff --git a/target/riscv/helper.h b/target/riscv/helper.h > index debb22a..fee02c0 100644 > --- a/target/riscv/helper.h > +++ b/target/riscv/helper.h > @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl) > DEF_HELPER_1(wfi, void, env) > DEF_HELPER_1(tlb_flush, void, env) > #endif > +/* Vector functions */ > +DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32) > +DEF_HELPER_3(vector_vid_v, void, env, i32, i32) > +DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32) > +DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32) > diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode > index 77f794e..d125ff9 100644 > --- a/target/riscv/insn32.decode > +++ b/target/riscv/insn32.decode > @@ -25,7 +25,7 @@ > %sh10 20:10 > %csr 20:12 > %rm 12:3 > - > +%nf 29:3 > # immediates: > %imm_i 20:s12 > %imm_s 25:s7 7:5 > @@ -43,7 +43,6 @@ > &u imm rd > &shift shamt rs1 rd > &atomic aq rl rs2 rs1 rd > - > # Formats 32: > @r ....... ..... ..... ... ..... ....... &r > %rs2 %rs1 %rd > @i ............ ..... ... ..... ....... &i imm=%imm_i > %rs1 %rd > @@ -62,11 +61,17 @@ > @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd > @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd > @r2 ....... ..... ..... ... ..... ....... %rs1 %rd > +@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd > +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd > +@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd > +@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd > +@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd > +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd > +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd > > @sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1 > @sfence_vm ....... ..... ..... ... ..... ....... %rs1 > > - > # *** Privileged Instructions *** > ecall 000000000000 00000 000 00000 1110011 > ebreak 000000000001 00000 000 00000 1110011 > @@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... ..... > 1010011 @r2_rm > fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm > fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm > fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm > + > +# *** RV32V Standard Extension *** > + > +# *** Vector loads and stores are encoded within LOADFP/STORE-FP *** > +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm > +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm > +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm > +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm > +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm > +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm > +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm > +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm > +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm > +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm > +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm > +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm > +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm > +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm > +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm > +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm > +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm > +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm > + > +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm > +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm > +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm > +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm > +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm > +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm > +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm > + > +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm > +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm > +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm > +vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm > +vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm > +vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm > +vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm > +vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm > +vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm > +vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm > +vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm > + > +#*** Vector AMO operations are encoded under the standard AMO > major opcode.*** > +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm > + > +#*** new major opcode OP-V *** > +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm > +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm > +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm > +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm > +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm > +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm > +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm > +vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm > +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm > +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm > +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm > +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm > +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm > +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm > +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm > +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm > +vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm > +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm > +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm > +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm > +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm > +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm > +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm > +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm > +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm > +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm > +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm > +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm > +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm > +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm > +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm > +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm > +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm > +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm > +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm > +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm > +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm > +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm > +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm > +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm > +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm > +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm > +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm > +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm > +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm > +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm > +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm > +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm > +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm > +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r > +vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r > +vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r > +vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r > +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm > +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm > +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm > +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm > +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm > +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm > +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r > +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r > +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r > +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r > +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r > +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r > +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r > +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r > +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r > +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r > +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm > +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm > +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm > +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm > +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm > +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm > +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm > +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm > +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm > +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm > +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r > +vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm > +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm > +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm > +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm > +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r > +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm > +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm > +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm > +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm > +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm > +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r > +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm > +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm > +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm > +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm > +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r > +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm > +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm > +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm > +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm > +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r > +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm > +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm > +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm > +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm > +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm > +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r > +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm > +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm > +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm > +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm > +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm > +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r > +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm > +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm > +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm > +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r > +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm > +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm > +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r > +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm > +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm > +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm > +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm > +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm > +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm > +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm > +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm > +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm > +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm > +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm > +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm > +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm > +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm > +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm > +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm > +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm > +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm > +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm > +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm > +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm > +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm > +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm > +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm > +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm > +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm > +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm > +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm > +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm > +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm > +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm > +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm > +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm > +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm > +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm > +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm > +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm > +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm > +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm > +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm > +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm > +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm > +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm > +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm > +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm > +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm > +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm > +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm > +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm > +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm > +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm > +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm > +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm > +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm > +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm > +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm > +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm > +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm > +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm > +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm > +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm > +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm > +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm > +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm > +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm > +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm > +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm > +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm > +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm > +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm > +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm > +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm > +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm > +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm > +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm > +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm > +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm > +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm > +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm > +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm > +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm > +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm > +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm > +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm > +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm > +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm > +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm > +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm > +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm > +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm > +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm > +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm > +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm > +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm > +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm > +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm > +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm > +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm > +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm > +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm > +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm > +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm > +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm > +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm > +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm > +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm > +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm > +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm > +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm > +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm > +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm > +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm > +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm > +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm > +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm > +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm > +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm > +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm > +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm > +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm > +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm > +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm > +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm > +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm > +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm > +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm > +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm > +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm > +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm > +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm > +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm > +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm > +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm > +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm > +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm > +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm > +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm > +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm > +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm > +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm > +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm > +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm > +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm > +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm > +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm > +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm > +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm > +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm > +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm > +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm > +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm > +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm > +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm > +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm > +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm > +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r > diff --git a/target/riscv/insn_trans/trans_rvv.inc.c > b/target/riscv/insn_trans/trans_rvv.inc.c > new file mode 100644 > index 0000000..dc8e6ce > --- /dev/null > +++ b/target/riscv/insn_trans/trans_rvv.inc.c > @@ -0,0 +1,484 @@ > +/* > + * RISC-V translation routines for the RVV Standard Extension. > + * > + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or > modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2 or later, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but > WITHOUT > + * ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public > License for > + * more details. > + * > + * You should have received a copy of the GNU General Public > License along with > + * this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#define GEN_VECTOR_R2_NFVM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 nf = tcg_const_i32(a->nf); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(nf); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R_NFVM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 nf = tcg_const_i32(a->nf); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(nf); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > + > +#define GEN_VECTOR_R_WDVM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 wd = tcg_const_i32(a->wd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(wd); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + gen_helper_vector_##INSN(cpu_env, s1, s2, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + return true; \ > +} > +#define GEN_VECTOR_R2_VM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, vm, s2, d); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > + > +#define GEN_VECTOR_R1_VM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, vm, d); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R_VM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R2_ZIMM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 zimm = tcg_const_i32(a->zimm); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(zimm); \ > + tcg_temp_free_i32(d); \ > + return true; \ > +} > + > +GEN_VECTOR_R2_NFVM(vlb_v) > +GEN_VECTOR_R2_NFVM(vlh_v) > +GEN_VECTOR_R2_NFVM(vlw_v) > +GEN_VECTOR_R2_NFVM(vle_v) > +GEN_VECTOR_R2_NFVM(vlbu_v) > +GEN_VECTOR_R2_NFVM(vlhu_v) > +GEN_VECTOR_R2_NFVM(vlwu_v) > +GEN_VECTOR_R2_NFVM(vlbff_v) > +GEN_VECTOR_R2_NFVM(vlhff_v) > +GEN_VECTOR_R2_NFVM(vlwff_v) > +GEN_VECTOR_R2_NFVM(vleff_v) > +GEN_VECTOR_R2_NFVM(vlbuff_v) > +GEN_VECTOR_R2_NFVM(vlhuff_v) > +GEN_VECTOR_R2_NFVM(vlwuff_v) > +GEN_VECTOR_R2_NFVM(vsb_v) > +GEN_VECTOR_R2_NFVM(vsh_v) > +GEN_VECTOR_R2_NFVM(vsw_v) > +GEN_VECTOR_R2_NFVM(vse_v) > + > +GEN_VECTOR_R_NFVM(vlsb_v) > +GEN_VECTOR_R_NFVM(vlsh_v) > +GEN_VECTOR_R_NFVM(vlsw_v) > +GEN_VECTOR_R_NFVM(vlse_v) > +GEN_VECTOR_R_NFVM(vlsbu_v) > +GEN_VECTOR_R_NFVM(vlshu_v) > +GEN_VECTOR_R_NFVM(vlswu_v) > +GEN_VECTOR_R_NFVM(vssb_v) > +GEN_VECTOR_R_NFVM(vssh_v) > +GEN_VECTOR_R_NFVM(vssw_v) > +GEN_VECTOR_R_NFVM(vsse_v) > +GEN_VECTOR_R_NFVM(vlxb_v) > +GEN_VECTOR_R_NFVM(vlxh_v) > +GEN_VECTOR_R_NFVM(vlxw_v) > +GEN_VECTOR_R_NFVM(vlxe_v) > +GEN_VECTOR_R_NFVM(vlxbu_v) > +GEN_VECTOR_R_NFVM(vlxhu_v) > +GEN_VECTOR_R_NFVM(vlxwu_v) > +GEN_VECTOR_R_NFVM(vsxb_v) > +GEN_VECTOR_R_NFVM(vsxh_v) > +GEN_VECTOR_R_NFVM(vsxw_v) > +GEN_VECTOR_R_NFVM(vsxe_v) > +GEN_VECTOR_R_NFVM(vsuxb_v) > +GEN_VECTOR_R_NFVM(vsuxh_v) > +GEN_VECTOR_R_NFVM(vsuxw_v) > +GEN_VECTOR_R_NFVM(vsuxe_v) > + > +GEN_VECTOR_R_WDVM(vamoswapw_v) > +GEN_VECTOR_R_WDVM(vamoswapd_v) > +GEN_VECTOR_R_WDVM(vamoaddw_v) > +GEN_VECTOR_R_WDVM(vamoaddd_v) > +GEN_VECTOR_R_WDVM(vamoxorw_v) > +GEN_VECTOR_R_WDVM(vamoxord_v) > +GEN_VECTOR_R_WDVM(vamoandw_v) > +GEN_VECTOR_R_WDVM(vamoandd_v) > +GEN_VECTOR_R_WDVM(vamoorw_v) > +GEN_VECTOR_R_WDVM(vamoord_v) > +GEN_VECTOR_R_WDVM(vamominw_v) > +GEN_VECTOR_R_WDVM(vamomind_v) > +GEN_VECTOR_R_WDVM(vamomaxw_v) > +GEN_VECTOR_R_WDVM(vamomaxd_v) > +GEN_VECTOR_R_WDVM(vamominuw_v) > +GEN_VECTOR_R_WDVM(vamominud_v) > +GEN_VECTOR_R_WDVM(vamomaxuw_v) > +GEN_VECTOR_R_WDVM(vamomaxud_v) > + > +GEN_VECTOR_R(vext_x_v) > +GEN_VECTOR_R(vfmv_f_s) > +GEN_VECTOR_R(vmv_s_x) > +GEN_VECTOR_R(vfmv_s_f) > +GEN_VECTOR_R(vadc_vvm) > +GEN_VECTOR_R(vadc_vxm) > +GEN_VECTOR_R(vadc_vim) > +GEN_VECTOR_R(vmadc_vvm) > +GEN_VECTOR_R(vmadc_vxm) > +GEN_VECTOR_R(vmadc_vim) > +GEN_VECTOR_R(vsbc_vvm) > +GEN_VECTOR_R(vsbc_vxm) > +GEN_VECTOR_R(vmsbc_vvm) > +GEN_VECTOR_R(vmsbc_vxm) > +GEN_VECTOR_R2_VM(vmpopc_m) > +GEN_VECTOR_R2_VM(vmfirst_m) > +GEN_VECTOR_R(vcompress_vm) > +GEN_VECTOR_R(vmandnot_mm) > +GEN_VECTOR_R(vmand_mm) > +GEN_VECTOR_R(vmor_mm) > +GEN_VECTOR_R(vmxor_mm) > +GEN_VECTOR_R(vmornot_mm) > +GEN_VECTOR_R(vmnand_mm) > +GEN_VECTOR_R(vmnor_mm) > +GEN_VECTOR_R(vmxnor_mm) > +GEN_VECTOR_R2_VM(vmsbf_m) > +GEN_VECTOR_R2_VM(vmsof_m) > +GEN_VECTOR_R2_VM(vmsif_m) > +GEN_VECTOR_R2_VM(viota_m) > +GEN_VECTOR_R1_VM(vid_v) > +GEN_VECTOR_R2_VM(vfcvt_xu_f_v) > +GEN_VECTOR_R2_VM(vfcvt_x_f_v) > +GEN_VECTOR_R2_VM(vfcvt_f_xu_v) > +GEN_VECTOR_R2_VM(vfcvt_f_x_v) > +GEN_VECTOR_R2_VM(vfwcvt_xu_f_v) > +GEN_VECTOR_R2_VM(vfwcvt_x_f_v) > +GEN_VECTOR_R2_VM(vfwcvt_f_xu_v) > +GEN_VECTOR_R2_VM(vfwcvt_f_x_v) > +GEN_VECTOR_R2_VM(vfwcvt_f_f_v) > +GEN_VECTOR_R2_VM(vfncvt_xu_f_v) > +GEN_VECTOR_R2_VM(vfncvt_x_f_v) > +GEN_VECTOR_R2_VM(vfncvt_f_xu_v) > +GEN_VECTOR_R2_VM(vfncvt_f_x_v) > +GEN_VECTOR_R2_VM(vfncvt_f_f_v) > +GEN_VECTOR_R2_VM(vfsqrt_v) > +GEN_VECTOR_R2_VM(vfclass_v) > + > +GEN_VECTOR_R_VM(vadd_vv) > +GEN_VECTOR_R_VM(vadd_vx) > +GEN_VECTOR_R_VM(vadd_vi) > +GEN_VECTOR_R_VM(vredsum_vs) > +GEN_VECTOR_R_VM(vfadd_vv) > +GEN_VECTOR_R_VM(vfadd_vf) > +GEN_VECTOR_R_VM(vredand_vs) > +GEN_VECTOR_R_VM(vfredsum_vs) > +GEN_VECTOR_R_VM(vsub_vv) > +GEN_VECTOR_R_VM(vsub_vx) > +GEN_VECTOR_R_VM(vredor_vs) > +GEN_VECTOR_R_VM(vfsub_vv) > +GEN_VECTOR_R_VM(vfsub_vf) > +GEN_VECTOR_R_VM(vrsub_vx) > +GEN_VECTOR_R_VM(vrsub_vi) > +GEN_VECTOR_R_VM(vredxor_vs) > +GEN_VECTOR_R_VM(vfredosum_vs) > +GEN_VECTOR_R_VM(vminu_vv) > +GEN_VECTOR_R_VM(vminu_vx) > +GEN_VECTOR_R_VM(vredminu_vs) > +GEN_VECTOR_R_VM(vfmin_vv) > +GEN_VECTOR_R_VM(vfmin_vf) > +GEN_VECTOR_R_VM(vmin_vv) > +GEN_VECTOR_R_VM(vmin_vx) > +GEN_VECTOR_R_VM(vredmin_vs) > +GEN_VECTOR_R_VM(vfredmin_vs) > +GEN_VECTOR_R_VM(vmaxu_vv) > +GEN_VECTOR_R_VM(vmaxu_vx) > +GEN_VECTOR_R_VM(vredmaxu_vs) > +GEN_VECTOR_R_VM(vfmax_vv) > +GEN_VECTOR_R_VM(vfmax_vf) > +GEN_VECTOR_R_VM(vmax_vv) > +GEN_VECTOR_R_VM(vmax_vx) > +GEN_VECTOR_R_VM(vredmax_vs) > +GEN_VECTOR_R_VM(vfredmax_vs) > +GEN_VECTOR_R_VM(vfsgnj_vv) > +GEN_VECTOR_R_VM(vfsgnj_vf) > +GEN_VECTOR_R_VM(vand_vv) > +GEN_VECTOR_R_VM(vand_vx) > +GEN_VECTOR_R_VM(vand_vi) > +GEN_VECTOR_R_VM(vfsgnjn_vv) > +GEN_VECTOR_R_VM(vfsgnjn_vf) > +GEN_VECTOR_R_VM(vor_vv) > +GEN_VECTOR_R_VM(vor_vx) > +GEN_VECTOR_R_VM(vor_vi) > +GEN_VECTOR_R_VM(vfsgnjx_vv) > +GEN_VECTOR_R_VM(vfsgnjx_vf) > +GEN_VECTOR_R_VM(vxor_vv) > +GEN_VECTOR_R_VM(vxor_vx) > +GEN_VECTOR_R_VM(vxor_vi) > +GEN_VECTOR_R_VM(vrgather_vv) > +GEN_VECTOR_R_VM(vrgather_vx) > +GEN_VECTOR_R_VM(vrgather_vi) > +GEN_VECTOR_R_VM(vslideup_vx) > +GEN_VECTOR_R_VM(vslideup_vi) > +GEN_VECTOR_R_VM(vslide1up_vx) > +GEN_VECTOR_R_VM(vslidedown_vx) > +GEN_VECTOR_R_VM(vslidedown_vi) > +GEN_VECTOR_R_VM(vslide1down_vx) > +GEN_VECTOR_R_VM(vmerge_vvm) > +GEN_VECTOR_R_VM(vmerge_vxm) > +GEN_VECTOR_R_VM(vmerge_vim) > +GEN_VECTOR_R_VM(vfmerge_vfm) > +GEN_VECTOR_R_VM(vmseq_vv) > +GEN_VECTOR_R_VM(vmseq_vx) > +GEN_VECTOR_R_VM(vmseq_vi) > +GEN_VECTOR_R_VM(vmfeq_vv) > +GEN_VECTOR_R_VM(vmfeq_vf) > +GEN_VECTOR_R_VM(vmsne_vv) > +GEN_VECTOR_R_VM(vmsne_vx) > +GEN_VECTOR_R_VM(vmsne_vi) > +GEN_VECTOR_R_VM(vmfle_vv) > +GEN_VECTOR_R_VM(vmfle_vf) > +GEN_VECTOR_R_VM(vmsltu_vv) > +GEN_VECTOR_R_VM(vmsltu_vx) > +GEN_VECTOR_R_VM(vmford_vv) > +GEN_VECTOR_R_VM(vmford_vf) > +GEN_VECTOR_R_VM(vmslt_vv) > +GEN_VECTOR_R_VM(vmslt_vx) > +GEN_VECTOR_R_VM(vmflt_vv) > +GEN_VECTOR_R_VM(vmflt_vf) > +GEN_VECTOR_R_VM(vmsleu_vv) > +GEN_VECTOR_R_VM(vmsleu_vx) > +GEN_VECTOR_R_VM(vmsleu_vi) > +GEN_VECTOR_R_VM(vmfne_vv) > +GEN_VECTOR_R_VM(vmfne_vf) > +GEN_VECTOR_R_VM(vmsle_vv) > +GEN_VECTOR_R_VM(vmsle_vx) > +GEN_VECTOR_R_VM(vmsle_vi) > +GEN_VECTOR_R_VM(vmfgt_vf) > +GEN_VECTOR_R_VM(vmsgtu_vx) > +GEN_VECTOR_R_VM(vmsgtu_vi) > +GEN_VECTOR_R_VM(vmsgt_vx) > +GEN_VECTOR_R_VM(vmsgt_vi) > +GEN_VECTOR_R_VM(vmfge_vf) > +GEN_VECTOR_R_VM(vsaddu_vv) > +GEN_VECTOR_R_VM(vsaddu_vx) > +GEN_VECTOR_R_VM(vsaddu_vi) > +GEN_VECTOR_R_VM(vdivu_vv) > +GEN_VECTOR_R_VM(vdivu_vx) > +GEN_VECTOR_R_VM(vfdiv_vv) > +GEN_VECTOR_R_VM(vfdiv_vf) > +GEN_VECTOR_R_VM(vsadd_vv) > +GEN_VECTOR_R_VM(vsadd_vx) > +GEN_VECTOR_R_VM(vsadd_vi) > +GEN_VECTOR_R_VM(vdiv_vv) > +GEN_VECTOR_R_VM(vdiv_vx) > +GEN_VECTOR_R_VM(vfrdiv_vf) > +GEN_VECTOR_R_VM(vssubu_vv) > +GEN_VECTOR_R_VM(vssubu_vx) > +GEN_VECTOR_R_VM(vremu_vv) > +GEN_VECTOR_R_VM(vremu_vx) > +GEN_VECTOR_R_VM(vssub_vv) > +GEN_VECTOR_R_VM(vssub_vx) > +GEN_VECTOR_R_VM(vrem_vv) > +GEN_VECTOR_R_VM(vrem_vx) > +GEN_VECTOR_R_VM(vaadd_vv) > +GEN_VECTOR_R_VM(vaadd_vx) > +GEN_VECTOR_R_VM(vaadd_vi) > +GEN_VECTOR_R_VM(vmulhu_vv) > +GEN_VECTOR_R_VM(vmulhu_vx) > +GEN_VECTOR_R_VM(vfmul_vv) > +GEN_VECTOR_R_VM(vfmul_vf) > +GEN_VECTOR_R_VM(vsll_vv) > +GEN_VECTOR_R_VM(vsll_vx) > +GEN_VECTOR_R_VM(vsll_vi) > +GEN_VECTOR_R_VM(vmul_vv) > +GEN_VECTOR_R_VM(vmul_vx) > +GEN_VECTOR_R_VM(vasub_vv) > +GEN_VECTOR_R_VM(vasub_vx) > +GEN_VECTOR_R_VM(vmulhsu_vv) > +GEN_VECTOR_R_VM(vmulhsu_vx) > +GEN_VECTOR_R_VM(vsmul_vv) > +GEN_VECTOR_R_VM(vsmul_vx) > +GEN_VECTOR_R_VM(vmulh_vv) > +GEN_VECTOR_R_VM(vmulh_vx) > +GEN_VECTOR_R_VM(vfrsub_vf) > +GEN_VECTOR_R_VM(vsrl_vv) > +GEN_VECTOR_R_VM(vsrl_vx) > +GEN_VECTOR_R_VM(vsrl_vi) > +GEN_VECTOR_R_VM(vfmadd_vv) > +GEN_VECTOR_R_VM(vfmadd_vf) > +GEN_VECTOR_R_VM(vsra_vv) > +GEN_VECTOR_R_VM(vsra_vx) > +GEN_VECTOR_R_VM(vsra_vi) > +GEN_VECTOR_R_VM(vmadd_vv) > +GEN_VECTOR_R_VM(vmadd_vx) > +GEN_VECTOR_R_VM(vfnmadd_vv) > +GEN_VECTOR_R_VM(vfnmadd_vf) > +GEN_VECTOR_R_VM(vssrl_vv) > +GEN_VECTOR_R_VM(vssrl_vx) > +GEN_VECTOR_R_VM(vssrl_vi) > +GEN_VECTOR_R_VM(vfmsub_vv) > +GEN_VECTOR_R_VM(vfmsub_vf) > +GEN_VECTOR_R_VM(vssra_vv) > +GEN_VECTOR_R_VM(vssra_vx) > +GEN_VECTOR_R_VM(vssra_vi) > +GEN_VECTOR_R_VM(vnmsub_vv) > +GEN_VECTOR_R_VM(vnmsub_vx) > +GEN_VECTOR_R_VM(vfnmsub_vv) > +GEN_VECTOR_R_VM(vfnmsub_vf) > +GEN_VECTOR_R_VM(vnsrl_vv) > +GEN_VECTOR_R_VM(vnsrl_vx) > +GEN_VECTOR_R_VM(vnsrl_vi) > +GEN_VECTOR_R_VM(vfmacc_vv) > +GEN_VECTOR_R_VM(vfmacc_vf) > +GEN_VECTOR_R_VM(vnsra_vv) > +GEN_VECTOR_R_VM(vnsra_vx) > +GEN_VECTOR_R_VM(vnsra_vi) > +GEN_VECTOR_R_VM(vmacc_vv) > +GEN_VECTOR_R_VM(vmacc_vx) > +GEN_VECTOR_R_VM(vfnmacc_vv) > +GEN_VECTOR_R_VM(vfnmacc_vf) > +GEN_VECTOR_R_VM(vnclipu_vv) > +GEN_VECTOR_R_VM(vnclipu_vx) > +GEN_VECTOR_R_VM(vnclipu_vi) > +GEN_VECTOR_R_VM(vfmsac_vv) > +GEN_VECTOR_R_VM(vfmsac_vf) > +GEN_VECTOR_R_VM(vnclip_vv) > +GEN_VECTOR_R_VM(vnclip_vx) > +GEN_VECTOR_R_VM(vnclip_vi) > +GEN_VECTOR_R_VM(vnmsac_vv) > +GEN_VECTOR_R_VM(vnmsac_vx) > +GEN_VECTOR_R_VM(vfnmsac_vv) > +GEN_VECTOR_R_VM(vfnmsac_vf) > +GEN_VECTOR_R_VM(vwredsumu_vs) > +GEN_VECTOR_R_VM(vwaddu_vv) > +GEN_VECTOR_R_VM(vwaddu_vx) > +GEN_VECTOR_R_VM(vfwadd_vv) > +GEN_VECTOR_R_VM(vfwadd_vf) > +GEN_VECTOR_R_VM(vwredsum_vs) > +GEN_VECTOR_R_VM(vwadd_vv) > +GEN_VECTOR_R_VM(vwadd_vx) > +GEN_VECTOR_R_VM(vfwredsum_vs) > +GEN_VECTOR_R_VM(vwsubu_vv) > +GEN_VECTOR_R_VM(vwsubu_vx) > +GEN_VECTOR_R_VM(vfwsub_vv) > +GEN_VECTOR_R_VM(vfwsub_vf) > +GEN_VECTOR_R_VM(vwsub_vv) > +GEN_VECTOR_R_VM(vwsub_vx) > +GEN_VECTOR_R_VM(vfwredosum_vs) > +GEN_VECTOR_R_VM(vwaddu_wv) > +GEN_VECTOR_R_VM(vwaddu_wx) > +GEN_VECTOR_R_VM(vfwadd_wv) > +GEN_VECTOR_R_VM(vfwadd_wf) > +GEN_VECTOR_R_VM(vwadd_wv) > +GEN_VECTOR_R_VM(vwadd_wx) > +GEN_VECTOR_R_VM(vwsubu_wv) > +GEN_VECTOR_R_VM(vwsubu_wx) > +GEN_VECTOR_R_VM(vfwsub_wv) > +GEN_VECTOR_R_VM(vfwsub_wf) > +GEN_VECTOR_R_VM(vwsub_wv) > +GEN_VECTOR_R_VM(vwsub_wx) > +GEN_VECTOR_R_VM(vwmulu_vv) > +GEN_VECTOR_R_VM(vwmulu_vx) > +GEN_VECTOR_R_VM(vfwmul_vv) > +GEN_VECTOR_R_VM(vfwmul_vf) > +GEN_VECTOR_R_VM(vwmulsu_vv) > +GEN_VECTOR_R_VM(vwmulsu_vx) > +GEN_VECTOR_R_VM(vwmul_vv) > +GEN_VECTOR_R_VM(vwmul_vx) > +GEN_VECTOR_R_VM(vwsmaccu_vv) > +GEN_VECTOR_R_VM(vwsmaccu_vx) > +GEN_VECTOR_R_VM(vwmaccu_vv) > +GEN_VECTOR_R_VM(vwmaccu_vx) > +GEN_VECTOR_R_VM(vfwmacc_vv) > +GEN_VECTOR_R_VM(vfwmacc_vf) > +GEN_VECTOR_R_VM(vwsmacc_vv) > +GEN_VECTOR_R_VM(vwsmacc_vx) > +GEN_VECTOR_R_VM(vwmacc_vv) > +GEN_VECTOR_R_VM(vwmacc_vx) > +GEN_VECTOR_R_VM(vfwnmacc_vv) > +GEN_VECTOR_R_VM(vfwnmacc_vf) > +GEN_VECTOR_R_VM(vwsmaccsu_vv) > +GEN_VECTOR_R_VM(vwsmaccsu_vx) > +GEN_VECTOR_R_VM(vwmaccsu_vv) > +GEN_VECTOR_R_VM(vwmaccsu_vx) > +GEN_VECTOR_R_VM(vfwmsac_vv) > +GEN_VECTOR_R_VM(vfwmsac_vf) > +GEN_VECTOR_R_VM(vwsmaccus_vx) > +GEN_VECTOR_R_VM(vwmaccus_vx) > +GEN_VECTOR_R_VM(vfwnmsac_vv) > +GEN_VECTOR_R_VM(vfwnmsac_vf) > +GEN_VECTOR_R2_ZIMM(vsetvli) > +GEN_VECTOR_R(vsetvl) > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index 8d6ab73..587c23e 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -706,6 +706,7 @@ static bool gen_shift(DisasContext *ctx, arg_r *a, > #include "insn_trans/trans_rva.inc.c" > #include "insn_trans/trans_rvf.inc.c" > #include "insn_trans/trans_rvd.inc.c" > +#include "insn_trans/trans_rvv.inc.c" > #include "insn_trans/trans_privileged.inc.c" > > /* > diff --git a/target/riscv/vector_helper.c > b/target/riscv/vector_helper.c > new file mode 100644 > index 0000000..1f8f1ec > --- /dev/null > +++ b/target/riscv/vector_helper.c > @@ -0,0 +1,26563 @@ > +/* > + * RISC-V Vectore Extension Helpers for QEMU. > + * > + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or > modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2 or later, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but > WITHOUT > + * ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public > License for > + * more details. > + * > + * You should have received a copy of the GNU General Public > License along with > + * this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/log.h" > +#include "cpu.h" > +#include "qemu/main-loop.h" > +#include "exec/exec-all.h" > +#include "exec/helper-proto.h" > +#include "exec/translator.h" > +#include "exec/cpu_ldst.h" > +#include <math.h> > +#include "instmap.h" > + > +#define VECTOR_HELPER(name) HELPER(glue(vector_, name)) > +#define SIGNBIT8 (1 << 7) > +#define MAX_U8 ((uint8_t)0xff) > +#define MIN_U8 ((uint8_t)0x0) > +#define MAX_S8 ((int8_t)0x7f) > +#define MIN_S8 ((int8_t)0x80) > +#define SIGNBIT16 (1 << 15) > +#define MAX_U16 ((uint16_t)0xffff) > +#define MIN_U16 ((uint16_t)0x0) > +#define MAX_S16 ((int16_t)0x7fff) > +#define MIN_S16 ((int16_t)0x8000) > +#define SIGNBIT32 (1 << 31) > +#define MAX_U32 ((uint32_t)0xffffffff) > +#define MIN_U32 ((uint32_t)0x0) > +#define MAX_S32 ((int32_t)0x7fffffff) > +#define MIN_S32 ((int32_t)0x80000000) > +#define SIGNBIT64 ((uint64_t)1 << 63) > +#define MAX_U64 ((uint64_t)0xffffffffffffffff) > +#define MIN_U64 ((uint64_t)0x0) > +#define MAX_S64 ((int64_t)0x7fffffffffffffff) > +#define MIN_S64 ((int64_t)0x8000000000000000) > + > +static int64_t sign_extend(int64_t a, int8_t width) > +{ > + return a << (64 - width) >> (64 - width); > +} > + > +static int64_t extend_gpr(target_ulong reg) > +{ > + return sign_extend(reg, sizeof(target_ulong) * 8); > +} > + > +static target_ulong vector_get_index(CPURISCVState *env, int rs1, > int rs2, > + int index, int mem, int width, int nf) > +{ > + target_ulong abs_off, base = env->gpr[rs1]; > + target_long offset; > + switch (width) { > + case 8: > + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + > nf * mem; > + break; > + case 16: > + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + > nf * mem; > + break; > + case 32: > + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + > nf * mem; > + break; > + case 64: > + offset = env->vfp.vreg[rs2].s64[index] + nf * mem; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return 0; > + } > + if (offset < 0) { > + abs_off = ~offset + 1; > + if (base >= abs_off) { > + return base - abs_off; > + } > + } else { > + if ((target_ulong)((target_ulong)offset + base) >= base) { > + return (target_ulong)offset + base; > + } > + } > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return 0; > +} > + > + > + > +/* ADD/SUB/COMPARE instructions. */ > +static inline uint8_t sat_add_u8(CPURISCVState *env, uint8_t a, > uint8_t b) > +{ > + uint8_t res = a + b; > + if (res < a) { > + res = MAX_U8; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_add_u16(CPURISCVState *env, uint16_t > a, uint16_t b) > +{ > + uint16_t res = a + b; > + if (res < a) { > + res = MAX_U16; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_add_u32(CPURISCVState *env, uint32_t > a, uint32_t b) > +{ > + uint32_t res = a + b; > + if (res < a) { > + res = MAX_U32; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_add_u64(CPURISCVState *env, uint64_t > a, uint64_t b) > +{ > + uint64_t res = a + b; > + if (res < a) { > + res = MAX_U64; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint8_t sat_add_s8(CPURISCVState *env, uint8_t a, > uint8_t b) > +{ > + uint8_t res = a + b; > + if (((res ^ a) & SIGNBIT8) && !((a ^ b) & SIGNBIT8)) { > + res = ~(((int8_t)a >> 7) ^ SIGNBIT8); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_add_s16(CPURISCVState *env, uint16_t > a, uint16_t b) > +{ > + uint16_t res = a + b; > + if (((res ^ a) & SIGNBIT16) && !((a ^ b) & SIGNBIT16)) { > + res = ~(((int16_t)a >> 15) ^ SIGNBIT16); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_add_s32(CPURISCVState *env, uint32_t > a, uint32_t b) > +{ > + uint32_t res = a + b; > + if (((res ^ a) & SIGNBIT32) && !((a ^ b) & SIGNBIT32)) { > + res = ~(((int32_t)a >> 31) ^ SIGNBIT32); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_add_s64(CPURISCVState *env, uint64_t > a, uint64_t b) > +{ > + uint64_t res = a + b; > + if (((res ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) { > + res = ~(((int64_t)a >> 63) ^ SIGNBIT64); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint8_t sat_sub_u8(CPURISCVState *env, uint8_t a, > uint8_t b) > +{ > + uint8_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_sub_u16(CPURISCVState *env, uint16_t > a, uint16_t b) > +{ > + uint16_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_sub_u32(CPURISCVState *env, uint32_t > a, uint32_t b) > +{ > + uint32_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_sub_u64(CPURISCVState *env, uint64_t > a, uint64_t b) > +{ > + uint64_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint8_t sat_sub_s8(CPURISCVState *env, uint8_t a, > uint8_t b) > +{ > + uint8_t res = a - b; > + if (((res ^ a) & SIGNBIT8) && ((a ^ b) & SIGNBIT8)) { > + res = ~(((int8_t)a >> 7) ^ SIGNBIT8); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_sub_s16(CPURISCVState *env, uint16_t > a, uint16_t b) > +{ > + uint16_t res = a - b; > + if (((res ^ a) & SIGNBIT16) && ((a ^ b) & SIGNBIT16)) { > + res = ~(((int16_t)a >> 15) ^ SIGNBIT16); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_sub_s32(CPURISCVState *env, uint32_t > a, uint32_t b) > +{ > + uint32_t res = a - b; > + if (((res ^ a) & SIGNBIT32) && ((a ^ b) & SIGNBIT32)) { > + res = ~(((int32_t)a >> 31) ^ SIGNBIT32); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_sub_s64(CPURISCVState *env, uint64_t > a, uint64_t b) > +{ > + uint64_t res = a - b; > + if (((res ^ a) & SIGNBIT64) && ((a ^ b) & SIGNBIT64)) { > + res = ~(((int64_t)a >> 63) ^ SIGNBIT64); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static uint64_t fix_data_round(CPURISCVState *env, uint64_t result, > + uint8_t shift) > +{ > + uint64_t lsb_1 = (uint64_t)1 << shift; > + int mod = env->vfp.vxrm; > + int mask = ((uint64_t)1 << shift) - 1; > + > + if (mod == 0x0) { /* rnu */ > + return lsb_1 >> 1; > + } else if (mod == 0x1) { /* rne */ > + if ((result & mask) > (lsb_1 >> 1) || > + (((result & mask) == (lsb_1 >> 1)) && > + (((result >> shift) & 0x1)) == 1)) { > + return lsb_1 >> 1; > + } > + } else if (mod == 0x3) { /* rod */ > + if (((result & mask) >= 0x1) && (((result >> shift) & > 0x1) == 0)) { > + return lsb_1; > + } > + } > + return 0; > +} > + > +static int8_t saturate_s8(CPURISCVState *env, int16_t res) > +{ > + if (res > MAX_S8) { > + env->vfp.vxsat = 0x1; > + return MAX_S8; > + } else if (res < MIN_S8) { > + env->vfp.vxsat = 0x1; > + return MIN_S8; > + } else { > + return res; > + } > +} > + > +static uint8_t saturate_u8(CPURISCVState *env, uint16_t res) > +{ > + if (res > MAX_U8) { > + env->vfp.vxsat = 0x1; > + return MAX_U8; > + } else { > + return res; > + } > +} > + > +static uint16_t saturate_u16(CPURISCVState *env, uint32_t res) > +{ > + if (res > MAX_U16) { > + env->vfp.vxsat = 0x1; > + return MAX_U16; > + } else { > + return res; > + } > +} > + > +static uint32_t saturate_u32(CPURISCVState *env, uint64_t res) > +{ > + if (res > MAX_U32) { > + env->vfp.vxsat = 0x1; > + return MAX_U32; > + } else { > + return res; > + } > +} > + > +static int16_t saturate_s16(CPURISCVState *env, int32_t res) > +{ > + if (res > MAX_S16) { > + env->vfp.vxsat = 0x1; > + return MAX_S16; > + } else if (res < MIN_S16) { > + env->vfp.vxsat = 0x1; > + return MIN_S16; > + } else { > + return res; > + } > +} > + > +static int32_t saturate_s32(CPURISCVState *env, int64_t res) > +{ > + if (res > MAX_S32) { > + env->vfp.vxsat = 0x1; > + return MAX_S32; > + } else if (res < MIN_S32) { > + env->vfp.vxsat = 0x1; > + return MIN_S32; > + } else { > + return res; > + } > +} > +static uint16_t vwsmaccu_8(CPURISCVState *env, uint8_t a, uint8_t b, > + uint16_t c) > +{ > + uint16_t round, res; > + uint16_t product = (uint16_t)a * (uint16_t)b; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (round + product) >> 4; > + return sat_add_u16(env, c, res); > +} > + > +static uint32_t vwsmaccu_16(CPURISCVState *env, uint16_t a, > uint16_t b, > + uint32_t c) > +{ > + uint32_t round, res; > + uint32_t product = (uint32_t)a * (uint32_t)b; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (round + product) >> 8; > + return sat_add_u32(env, c, res); > +} > + > +static uint64_t vwsmaccu_32(CPURISCVState *env, uint32_t a, > uint32_t b, > + uint64_t c) > +{ > + uint64_t round, res; > + uint64_t product = (uint64_t)a * (uint64_t)b; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (round + product) >> 16; > + return sat_add_u64(env, c, res); > +} > + > +static int16_t vwsmacc_8(CPURISCVState *env, int8_t a, int8_t b, > + int16_t c) > +{ > + int16_t round, res; > + int16_t product = (int16_t)a * (int16_t)b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (int16_t)(round + product) >> 4; > + return sat_add_s16(env, c, res); > +} > + > +static int32_t vwsmacc_16(CPURISCVState *env, int16_t a, int16_t b, > + int32_t c) > +{ > + int32_t round, res; > + int32_t product = (int32_t)a * (int32_t)b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (int32_t)(round + product) >> 8; > + return sat_add_s32(env, c, res); > +} > + > +static int64_t vwsmacc_32(CPURISCVState *env, int32_t a, int32_t b, > + int64_t c) > +{ > + int64_t round, res; > + int64_t product = (int64_t)a * (int64_t)b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (int64_t)(round + product) >> 16; > + return sat_add_s64(env, c, res); > +} > + > +static int16_t vwsmaccsu_8(CPURISCVState *env, uint8_t a, int8_t b, > + int16_t c) > +{ > + int16_t round, res; > + int16_t product = (uint16_t)a * (int16_t)b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (round + product) >> 4; > + return sat_sub_s16(env, c, res); > +} > + > +static int32_t vwsmaccsu_16(CPURISCVState *env, uint16_t a, > int16_t b, > + uint32_t c) > +{ > + int32_t round, res; > + int32_t product = (uint32_t)a * (int32_t)b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (round + product) >> 8; > + return sat_sub_s32(env, c, res); > +} > + > +static int64_t vwsmaccsu_32(CPURISCVState *env, uint32_t a, > int32_t b, > + int64_t c) > +{ > + int64_t round, res; > + int64_t product = (uint64_t)a * (int64_t)b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (round + product) >> 16; > + return sat_sub_s64(env, c, res); > +} > + > +static int16_t vwsmaccus_8(CPURISCVState *env, int8_t a, uint8_t b, > + int16_t c) > +{ > + int16_t round, res; > + int16_t product = (int16_t)a * (uint16_t)b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (round + product) >> 4; > + return sat_sub_s16(env, c, res); > +} > + > +static int32_t vwsmaccus_16(CPURISCVState *env, int16_t a, > uint16_t b, > + int32_t c) > +{ > + int32_t round, res; > + int32_t product = (int32_t)a * (uint32_t)b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (round + product) >> 8; > + return sat_sub_s32(env, c, res); > +} > + > +static uint64_t vwsmaccus_32(CPURISCVState *env, int32_t a, > uint32_t b, > + int64_t c) > +{ > + int64_t round, res; > + int64_t product = (int64_t)a * (uint64_t)b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (round + product) >> 16; > + return sat_sub_s64(env, c, res); > +} > + > +static int8_t vssra_8(CPURISCVState *env, int8_t a, uint8_t b) > +{ > + int16_t round, res; > + uint8_t shift = b & 0x7; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return res; > +} > + > +static int16_t vssra_16(CPURISCVState *env, int16_t a, uint16_t b) > +{ > + int32_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static int32_t vssra_32(CPURISCVState *env, int32_t a, uint32_t b) > +{ > + int64_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static int64_t vssra_64(CPURISCVState *env, int64_t a, uint64_t b) > +{ > + int64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a >> (shift - 1)) + (round >> (shift - 1)); > + return res >> 1; > +} > + > +static int8_t vssrai_8(CPURISCVState *env, int8_t a, uint8_t b) > +{ > + int16_t round, res; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static int16_t vssrai_16(CPURISCVState *env, int16_t a, uint8_t b) > +{ > + int32_t round, res; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static int32_t vssrai_32(CPURISCVState *env, int32_t a, uint8_t b) > +{ > + int64_t round, res; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static int64_t vssrai_64(CPURISCVState *env, int64_t a, uint8_t b) > +{ > + int64_t round, res; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a >> (b - 1)) + (round >> (b - 1)); > + return res >> 1; > +} > + > +static int8_t vnclip_16(CPURISCVState *env, int16_t a, uint8_t b) > +{ > + int16_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_s8(env, res); > +} > + > +static int16_t vnclip_32(CPURISCVState *env, int32_t a, uint16_t b) > +{ > + int32_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return saturate_s16(env, res); > +} > + > +static int32_t vnclip_64(CPURISCVState *env, int64_t a, uint32_t b) > +{ > + int64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_s32(env, res); > +} > + > +static int8_t vnclipi_16(CPURISCVState *env, int16_t a, uint8_t b) > +{ > + int16_t round, res; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_s8(env, res); > +} > + > +static int16_t vnclipi_32(CPURISCVState *env, int32_t a, uint8_t b) > +{ > + int32_t round, res; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_s16(env, res); > +} > + > +static int32_t vnclipi_64(CPURISCVState *env, int64_t a, uint8_t b) > +{ > + int32_t round, res; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_s32(env, res); > +} > + > +static uint8_t vnclipu_16(CPURISCVState *env, uint16_t a, uint8_t b) > +{ > + uint16_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_u8(env, res); > +} > + > +static uint16_t vnclipu_32(CPURISCVState *env, uint32_t a, > uint16_t b) > +{ > + uint32_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_u16(env, res); > +} > + > +static uint32_t vnclipu_64(CPURISCVState *env, uint64_t a, > uint32_t b) > +{ > + uint64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_u32(env, res); > +} > + > +static uint8_t vnclipui_16(CPURISCVState *env, uint16_t a, uint8_t b) > +{ > + uint16_t round, res; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_u8(env, res); > +} > + > +static uint16_t vnclipui_32(CPURISCVState *env, uint32_t a, > uint8_t b) > +{ > + uint32_t round, res; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_u16(env, res); > +} > + > +static uint32_t vnclipui_64(CPURISCVState *env, uint64_t a, > uint8_t b) > +{ > + uint64_t round, res; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_u32(env, res); > +} > + > +static uint8_t vssrl_8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint16_t round, res; > + uint8_t shift = b & 0x7; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static uint16_t vssrl_16(CPURISCVState *env, uint16_t a, uint16_t b) > +{ > + uint32_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static uint32_t vssrl_32(CPURISCVState *env, uint32_t a, uint32_t b) > +{ > + uint64_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static uint64_t vssrl_64(CPURISCVState *env, uint64_t a, uint64_t b) > +{ > + uint64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a >> (shift - 1)) + (round >> (shift - 1)); > + return res >> 1; > +} > + > +static uint8_t vssrli_8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint16_t round, res; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static uint16_t vssrli_16(CPURISCVState *env, uint16_t a, uint8_t b) > +{ > + uint32_t round, res; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static uint32_t vssrli_32(CPURISCVState *env, uint32_t a, uint8_t b) > +{ > + uint64_t round, res; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static uint64_t vssrli_64(CPURISCVState *env, uint64_t a, uint8_t b) > +{ > + uint64_t round, res; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a >> (b - 1)) + (round >> (b - 1)); > + return res >> 1; > +} > + > +static int8_t vsmul_8(CPURISCVState *env, int8_t a, int8_t b) > +{ > + int16_t round; > + int8_t res; > + int16_t product = (int16_t)a * (int16_t)b; > + > + if (a == MIN_S8 && b == MIN_S8) { > + env->vfp.vxsat = 1; > + > + return MAX_S8; > + } > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 7); > + res = sat_add_s16(env, product, round) >> 7; > + return res; > +} > + > + > +static int16_t vsmul_16(CPURISCVState *env, int16_t a, int16_t b) > +{ > + int32_t round; > + int16_t res; > + int32_t product = (int32_t)a * (int32_t)b; > + > + if (a == MIN_S16 && b == MIN_S16) { > + env->vfp.vxsat = 1; > + > + return MAX_S16; > + } > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 15); > + res = sat_add_s32(env, product, round) >> 15; > + return res; > +} > + > +static int32_t vsmul_32(CPURISCVState *env, int32_t a, int32_t b) > +{ > + int64_t round; > + int32_t res; > + int64_t product = (int64_t)a * (int64_t)b; > + > + if (a == MIN_S32 && b == MIN_S32) { > + env->vfp.vxsat = 1; > + > + return MAX_S32; > + } > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 31); > + res = sat_add_s64(env, product, round) >> 31; > + return res; > +} > + > + > +static int64_t vsmul_64(CPURISCVState *env, int64_t a, int64_t b) > +{ > + int64_t res; > + uint64_t abs_a = a, abs_b = b; > + uint64_t lo_64, hi_64, carry, round; > + > + if (a == MIN_S64 && b == MIN_S64) { > + env->vfp.vxsat = 1; > + > + return MAX_S64; > + } > + > + if (a < 0) { > + abs_a = ~a + 1; > + } > + if (b < 0) { > + abs_b = ~b + 1; > + } > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = abs_a >> 32; > + uint64_t a_lo = (uint32_t)abs_a; > + uint64_t b_hi = abs_b >> 32; > + uint64_t b_lo = (uint32_t)abs_b; > + > + /* > + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo > * b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * > b_lo) >> 32) >> 32 > + */ > + > + lo_64 = abs_a * abs_b; > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + > + if ((a ^ b) & SIGNBIT64) { > + lo_64 = ~lo_64; > + hi_64 = ~hi_64; > + if (lo_64 == MAX_U64) { > + lo_64 = 0; > + hi_64 += 1; > + } else { > + lo_64 += 1; > + } > + } > + > + /* set rem and res */ > + round = fix_data_round(env, lo_64, 63); > + if ((lo_64 + round) < lo_64) { > + hi_64 += 1; > + res = (hi_64 << 1); > + } else { > + res = (hi_64 << 1) | ((lo_64 + round) >> 63); > + } > + > + return res; > +} > +static inline int8_t avg_round_s8(CPURISCVState *env, int8_t a, > int8_t b) > +{ > + int16_t round; > + int8_t res; > + int16_t sum = a + b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)sum, 1); > + res = (sum + round) >> 1; > + > + return res; > +} > + > +static inline int16_t avg_round_s16(CPURISCVState *env, int16_t > a, int16_t b) > +{ > + int32_t round; > + int16_t res; > + int32_t sum = a + b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)sum, 1); > + res = (sum + round) >> 1; > + > + return res; > +} > + > +static inline int32_t avg_round_s32(CPURISCVState *env, int32_t > a, int32_t b) > +{ > + int64_t round; > + int32_t res; > + int64_t sum = a + b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)sum, 1); > + res = (sum + round) >> 1; > + > + return res; > +} > + > +static inline int64_t avg_round_s64(CPURISCVState *env, int64_t > a, int64_t b) > +{ > + int64_t rem = (a & 0x1) + (b & 0x1); > + int64_t res = (a >> 1) + (b >> 1) + (rem >> 1); > + int mod = env->vfp.vxrm; > + > + if (mod == 0x0) { /* rnu */ > + if (rem == 0x1) { > + return res + 1; > + } > + } else if (mod == 0x1) { /* rne */ > + if ((rem & 0x1) == 1 && ((res & 0x1) == 1)) { > + return res + 1; > + } > + } else if (mod == 0x3) { /* rod */ > + if (((rem & 0x1) >= 0x1) && (res & 0x1) == 0) { > + return res + 1; > + } > + } > + return res; > +} > + > +static target_ulong helper_fclass_h(uint64_t frs1) > +{ > + float16 f = frs1; > + bool sign = float16_is_neg(f); > + > + if (float16_is_infinity(f)) { > + return sign ? 1 << 0 : 1 << 7; > + } else if (float16_is_zero(f)) { > + return sign ? 1 << 3 : 1 << 4; > + } else if (float16_is_zero_or_denormal(f)) { > + return sign ? 1 << 2 : 1 << 5; > + } else if (float16_is_any_nan(f)) { > + float_status s = { }; /* for snan_bit_is_one */ > + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; > + } else { > + return sign ? 1 << 1 : 1 << 6; > + } > +} > + > +static inline bool vector_vtype_ill(CPURISCVState *env) > +{ > + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) { > + return true; > + } > + return false; > +} > + > +static inline void vector_vtype_set_ill(CPURISCVState *env) > +{ > + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1); > + return; > +} > + > +static inline int vector_vtype_get_sew(CPURISCVState *env) > +{ > + return (env->vfp.vtype >> 2) & 0x7; > +} > + > +static inline int vector_get_width(CPURISCVState *env) > +{ > + return 8 * (1 << vector_vtype_get_sew(env)); > +} > + > +static inline int vector_get_lmul(CPURISCVState *env) > +{ > + return 1 << (env->vfp.vtype & 0x3); > +} > + > +static inline int vector_get_vlmax(CPURISCVState *env) > +{ > + return vector_get_lmul(env) * VLEN / vector_get_width(env); > +} > + > +static inline int vector_elem_mask(CPURISCVState *env, uint32_t > vm, int width, > + int lmul, int index) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / 8; > + int pos = (index * mlen) % 8; > + > + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1); > +} > + > +static inline bool vector_overlap_vm_common(int lmul, int vm, int rd) > +{ > + if (lmul > 1 && vm == 0 && rd == 0) { > + return true; > + } > + return false; > +} > + > +static inline bool vector_overlap_vm_force(int vm, int rd) > +{ > + if (vm == 0 && rd == 0) { > + return true; > + } > + return false; > +} > + > +static inline bool vector_overlap_carry(int lmul, int rd) > +{ > + if (lmul > 1 && rd == 0) { > + return true; > + } > + return false; > +} > + > +static inline bool vector_overlap_dstgp_srcgp(int rd, int dlen, > int rs, > + int slen) > +{ > + if ((rd >= rs && rd < rs + slen) || (rs >= rd && rs < rd + > dlen)) { > + return true; > + } > + return false; > +} > + > +static inline uint64_t vector_get_mask(int start, int end) > +{ > + return ((uint64_t)(~((uint64_t)0))) << (63 - end + start) >> > (63 - end); > +} > + > +/* fetch unsigned element by width */ > +static inline uint64_t vector_get_iu_elem(CPURISCVState *env, > uint32_t width, > + uint32_t rs2, uint32_t index) > +{ > + uint64_t elem; > + if (width == 8) { > + elem = env->vfp.vreg[rs2].u8[index]; > + } else if (width == 16) { > + elem = env->vfp.vreg[rs2].u16[index]; > + } else if (width == 32) { > + elem = env->vfp.vreg[rs2].u32[index]; > + } else if (width == 64) { > + elem = env->vfp.vreg[rs2].u64[index]; > + } else { /* the max of (XLEN, FLEN) is no bigger than 64 */ > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return 0; > + } > + return elem; > +} > + > +static inline int vector_mask_reg(CPURISCVState *env, uint32_t > reg, int width, > + int lmul, int index) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / 8; > + int pos = (index * mlen) % 8; > + return (env->vfp.vreg[reg].u8[idx] >> pos) & 0x1; > +} > + > +static inline void vector_mask_result(CPURISCVState *env, > uint32_t reg, > + int width, int lmul, int index, uint32_t result) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / width; > + int pos = (index * mlen) % width; > + uint64_t mask = ~((((uint64_t)1 << mlen) - 1) << pos); > + > + switch (width) { > + case 8: > + env->vfp.vreg[reg].u8[idx] = (env->vfp.vreg[reg].u8[idx] > & mask) > + | (result << pos); > + break; > + case 16: > + env->vfp.vreg[reg].u16[idx] = > (env->vfp.vreg[reg].u16[idx] & mask) > + | (result << pos); > + break; > + case 32: > + env->vfp.vreg[reg].u32[idx] = > (env->vfp.vreg[reg].u32[idx] & mask) > + | (result << pos); > + break; > + case 64: > + env->vfp.vreg[reg].u64[idx] = > (env->vfp.vreg[reg].u64[idx] & mask) > + | > ((uint64_t)result << pos); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + > + return; > +} > + > +/** > + * deposit16: > + * @value: initial value to insert bit field into > + * @start: the lowest bit in the bit field (numbered from 0) > + * @length: the length of the bit field > + * @fieldval: the value to insert into the bit field > + * > + * Deposit @fieldval into the 16 bit @value at the bit field > specified > + * by the @start and @length parameters, and return the modified > + * @value. Bits of @value outside the bit field are not modified. > + * Bits of @fieldval above the least significant @length bits are > + * ignored. The bit field must lie entirely within the 16 bit word. > + * It is valid to request that all 16 bits are modified (ie @length > + * 16 and @start 0). > + * > + * Returns: the modified @value. > + */ > +static inline uint16_t deposit16(uint16_t value, int start, int > length, > + uint16_t fieldval) > +{ > + uint16_t mask; > + assert(start >= 0 && length > 0 && length <= 16 - start); > + mask = (~0U >> (16 - length)) << start; > + return (value & ~mask) | ((fieldval << start) & mask); > +} > + > +static void vector_tail_amo(CPURISCVState *env, int vreg, int > index, int width) > +{ > + switch (width) { > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 64: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_common(CPURISCVState *env, int vreg, int > index, > + int width) > +{ > + switch (width) { > + case 8: > + env->vfp.vreg[vreg].u8[index] = 0; > + break; > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 64: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_segment(CPURISCVState *env, int vreg, int > index, > + int width, int nf, int lmul) > +{ > + switch (width) { > + case 8: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u8[index] = 0; > + nf--; > + } > + break; > + case 16: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u16[index] = 0; > + nf--; > + } > + break; > + case 32: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u32[index] = 0; > + nf--; > + } > + break; > + case 64: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u64[index] = 0; > + nf--; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_widen(CPURISCVState *env, int vreg, int > index, > + int width) > +{ > + switch (width) { > + case 8: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 16: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_narrow(CPURISCVState *env, int vreg, int > index, > + int width) > +{ > + switch (width) { > + case 8: > + env->vfp.vreg[vreg].u8[index] = 0; > + break; > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_fcommon(CPURISCVState *env, int vreg, int > index, > + int width) > +{ > + switch (width) { > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 64: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_fwiden(CPURISCVState *env, int vreg, int > index, > + int width) > +{ > + switch (width) { > + case 16: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_fnarrow(CPURISCVState *env, int vreg, int > index, > + int width) > +{ > + switch (width) { > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > +static inline int vector_get_carry(CPURISCVState *env, int width, > int lmul, > + int index) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / 8; > + int pos = (index * mlen) % 8; > + > + return (env->vfp.vreg[0].u8[idx] >> pos) & 0x1; > +} > + > +static inline void vector_get_layout(CPURISCVState *env, int > width, int lmul, > + int index, int *idx, int *pos) > +{ > + int mlen = width / lmul; > + *idx = (index * mlen) / 8; > + *pos = (index * mlen) % 8; > +} > + > +static bool vector_lmul_check_reg(CPURISCVState *env, uint32_t lmul, > + uint32_t reg, bool widen) > +{ > + int legal = widen ? (lmul * 2) : lmul; > + > + if ((lmul != 1 && lmul != 2 && lmul != 4 && lmul != 8) || > + (lmul == 8 && widen)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return false; > + } > + > + if (reg % legal != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return false; > + } > + return true; > +} > + > +static inline uint64_t u64xu64_lh(uint64_t a, uint64_t b) > +{ > + uint64_t hi_64, carry; > + > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = a >> 32; > + uint64_t a_lo = (uint32_t)a; > + uint64_t b_hi = b >> 32; > + uint64_t b_lo = (uint32_t)b; > + > + /* > + * a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo > * b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * > b_lo) >> 32) >> 32 > + */ > + > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + > + return hi_64; > +} > + > + > +static inline int64_t s64xu64_lh(int64_t a, uint64_t b) > +{ > + uint64_t abs_a = a; > + uint64_t lo_64, hi_64, carry; > + > + if (a < 0) { > + abs_a = ~a + 1; > + } > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = abs_a >> 32; > + uint64_t a_lo = (uint32_t)abs_a; > + uint64_t b_hi = b >> 32; > + uint64_t b_lo = (uint32_t)b; > + > + /* > + * abs_a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo > * b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * > b_lo) >> 32) >> 32 > + */ > + > + lo_64 = abs_a * b; > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + if ((a ^ b) & SIGNBIT64) { > + lo_64 = ~lo_64; > + hi_64 = ~hi_64; > + if (lo_64 == MAX_U64) { > + lo_64 = 0; > + hi_64 += 1; > + } else { > + lo_64 += 1; > + } > + } > + return hi_64; > +} > + > + > +static inline int64_t s64xs64_lh(int64_t a, int64_t b) > +{ > + uint64_t abs_a = a, abs_b = b; > + uint64_t lo_64, hi_64, carry; > + > + if (a < 0) { > + abs_a = ~a + 1; > + } > + if (b < 0) { > + abs_b = ~b + 1; > + } > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = abs_a >> 32; > + uint64_t a_lo = (uint32_t)abs_a; > + uint64_t b_hi = abs_b >> 32; > + uint64_t b_lo = (uint32_t)abs_b; > + > + /* > + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo > * b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * > b_lo) >> 32) >> 32 > + */ > + > + lo_64 = abs_a * abs_b; > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + > + if ((a ^ b) & SIGNBIT64) { > + lo_64 = ~lo_64; > + hi_64 = ~hi_64; > + if (lo_64 == MAX_U64) { > + lo_64 = 0; > + hi_64 += 1; > + } else { > + lo_64 += 1; > + } > + } > + return hi_64; > +} > + > +void VECTOR_HELPER(vsetvl)(CPURISCVState *env, uint32_t rs1, > uint32_t rs2, > + uint32_t rd) > +{ > + int sew, max_sew, vlmax, vl; > + > + if (rs2 == 0) { > + vector_vtype_set_ill(env); > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + env->vfp.vtype = env->gpr[rs2]; > + sew = 1 << vector_get_width(env) / 8; > + max_sew = sizeof(target_ulong); > + > + > + if (env->misa & RVD) { > + max_sew = max_sew > 8 ? max_sew : 8; > + } else if (env->misa & RVF) { > + max_sew = max_sew > 4 ? max_sew : 4; > + } > + if (sew > max_sew) { > + vector_vtype_set_ill(env); > + return; > + } > + > + vlmax = vector_get_vlmax(env); > + if (rs1 == 0) { > + vl = vlmax; > + } else if (env->gpr[rs1] <= vlmax) { > + vl = env->gpr[rs1]; > + } else if (env->gpr[rs1] < 2 * vlmax) { > + vl = ceil(env->gpr[rs1] / 2); > + } else { > + vl = vlmax; > + } > + env->vfp.vl = vl; > + env->gpr[rd] = vl; > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsetvli)(CPURISCVState *env, uint32_t rs1, > uint32_t zimm, > + uint32_t rd) > +{ > + int sew, max_sew, vlmax, vl; > + > + env->vfp.vtype = zimm; > + sew = vector_get_width(env) / 8; > + max_sew = sizeof(target_ulong); > + > + if (env->misa & RVD) { > + max_sew = max_sew > 8 ? max_sew : 8; > + } else if (env->misa & RVF) { > + max_sew = max_sew > 4 ? max_sew : 4; > + } > + if (sew > max_sew) { > + vector_vtype_set_ill(env); > + return; > + } > + > + vlmax = vector_get_vlmax(env); > + if (rs1 == 0) { > + vl = vlmax; > + } else if (env->gpr[rs1] <= vlmax) { > + vl = env->gpr[rs1]; > + } else if (env->gpr[rs1] < 2 * vlmax) { > + vl = ceil(env->gpr[rs1] / 2); > + } else { > + vl = vlmax; > + } > + env->vfp.vl = vl; > + env->gpr[rd] = vl; > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vrgather.vv vd, vs2, vs1, vm # > + * vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; > + */ > +void VECTOR_HELPER(vrgather_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src, src1; > + uint32_t index; > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + index = env->vfp.vreg[src1].u8[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u8[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[index]; > + } > + } > + break; > + case 16: > + index = env->vfp.vreg[src1].u16[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u16[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[index]; > + } > + } > + break; > + case 32: > + index = env->vfp.vreg[src1].u32[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u32[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[index]; > + } > + } > + break; > + case 64: > + index = env->vfp.vreg[src1].u64[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u64[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[index]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : > vs2[rs1] */ > +void VECTOR_HELPER(vrgather_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src; > + uint32_t index; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u8[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[index]; > + } > + } > + break; > + case 16: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u16[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[index]; > + } > + } > + break; > + case 32: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u32[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[index]; > + } > + } > + break; > + case 64: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u64[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[index]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vrgather.vi <http://vrgather.vi> vd, vs2, imm, vm # vd[i] = > (imm >= VLMAX) ? 0 : vs2[imm] */ > +void VECTOR_HELPER(vrgather_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src; > + uint32_t index; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u8[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[index]; > + } > + } > + break; > + case 16: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u16[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[index]; > + } > + } > + break; > + case 32: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u32[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[index]; > + } > + } > + break; > + case 64: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u64[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[index]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vext_x_v)(CPURISCVState *env, uint32_t rs1, > uint32_t rs2, > + uint32_t rd) > +{ > + int width; > + uint64_t elem; > + target_ulong index = env->gpr[rs1]; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + > + elem = vector_get_iu_elem(env, width, rs2, index); > + if (index >= VLEN / width) { /* index is too big */ > + env->gpr[rd] = 0; > + } else { > + env->gpr[rd] = elem; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmv.f.s rd, vs2 # rd = vs2[0] (rs1=0) */ > +void VECTOR_HELPER(vfmv_f_s)(CPURISCVState *env, uint32_t rs1, > uint32_t rs2, > + uint32_t rd) > +{ > + int width, flen; > + uint64_t mask; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->misa & RVD) { > + flen = 8; > + } else if (env->misa & RVF) { > + flen = 4; > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + mask = (~((uint64_t)0)) << width; > + > + if (width == 8) { > + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s8[0] | mask; > + } else if (width == 16) { > + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s16[0] | > mask; > + } else if (width == 32) { > + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s32[0] | > mask; > + } else if (width == 64) { > + if (flen == 4) { > + env->fpr[rd] = env->vfp.vreg[rs2].s64[0] & 0xffffffff; > + } else { > + env->fpr[rd] = env->vfp.vreg[rs2].s64[0]; > + } > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmv.s.x vd, rs1 # vd[0] = rs1 */ > +void VECTOR_HELPER(vmv_s_x)(CPURISCVState *env, uint32_t rs1, > uint32_t rs2, > + uint32_t rd) > +{ > + int width; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= env->vfp.vl) { > + return; > + } > + > + memset(&env->vfp.vreg[rd].u8[0], 0, VLEN / 8); > + width = vector_get_width(env); > + > + if (width == 8) { > + env->vfp.vreg[rd].u8[0] = env->gpr[rs1]; > + } else if (width == 16) { > + env->vfp.vreg[rd].u16[0] = env->gpr[rs1]; > + } else if (width == 32) { > + env->vfp.vreg[rd].u32[0] = env->gpr[rs1]; > + } else if (width == 64) { > + env->vfp.vreg[rd].u64[0] = env->gpr[rs1]; > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2 = 0) */ > +void VECTOR_HELPER(vfmv_s_f)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, flen; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= env->vfp.vl) { > + return; > + } > + if (env->misa & RVD) { > + flen = 8; > + } else if (env->misa & RVF) { > + flen = 4; > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + > + if (width == 8) { > + env->vfp.vreg[rd].u8[0] = env->fpr[rs1]; > + } else if (width == 16) { > + env->vfp.vreg[rd].u16[0] = env->fpr[rs1]; > + } else if (width == 32) { > + env->vfp.vreg[rd].u32[0] = env->fpr[rs1]; > + } else if (width == 64) { > + if (flen == 4) { /* 1-extended to FLEN bits */ > + env->vfp.vreg[rd].u64[0] = (uint64_t)env->fpr[rs1] > + | 0xffffffff00000000; > + } else { > + env->vfp.vreg[rd].u64[0] = env->fpr[rs1]; > + } > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ > +void VECTOR_HELPER(vslideup_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = env->gpr[rs1]; > + > + if (offset < env->vfp.vstart) { > + offset = env->vfp.vstart; > + } > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i - offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i - offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vslideup.vi <http://vslideup.vi> vd, vs2, rs1, vm # vd[i+rs1] > = vs2[i] */ > +void VECTOR_HELPER(vslideup_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = rs1; > + > + if (offset < env->vfp.vstart) { > + offset = env->vfp.vstart; > + } > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i - offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i - offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + if (width == 8) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } > + } else if (width == 16) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + } else if (width == 32) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + } else if (width == 64) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ > +void VECTOR_HELPER(vslide1up_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src, k; > + uint64_t s1; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + s1 = env->gpr[rs1]; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i - 1) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i - 1) % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i == 0 && env->vfp.vstart == 0) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = s1; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = s1; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = s1; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = s1; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src].u8[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i + rs1] */ > +void VECTOR_HELPER(vslidedown_vx)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = env->gpr[rs1]; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i + offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i + offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } else { > + env->vfp.vreg[dest].u8[j] = 0; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } else { > + env->vfp.vreg[dest].u16[j] = 0; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } else { > + env->vfp.vreg[dest].u32[j] = 0; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } else { > + env->vfp.vreg[dest].u64[j] = 0; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vslidedown_vi)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = rs1; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i + offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i + offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } else { > + env->vfp.vreg[dest].u8[j] = 0; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } else { > + env->vfp.vreg[dest].u16[j] = 0; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } else { > + env->vfp.vreg[dest].u32[j] = 0; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } else { > + env->vfp.vreg[dest].u64[j] = 0; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslide1down.vx vd, vs2, rs1, vm # vd[vl - 1]=x[rs1], vd[i] = > vs2[i + 1] */ > +void VECTOR_HELPER(vslide1down_vx)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src, k; > + uint64_t s1; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + s1 = env->gpr[rs1]; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i + 1) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i + 1) % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i == vl - 1 && i >= env->vfp.vstart) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = s1; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = s1; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = s1; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = s1; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else if (i < vl - 1) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src].u8[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vcompress.vm vd, vs2, vs1 > + * Compress into vd elements of vs2 where vs1 is enabled > + */ > +void VECTOR_HELPER(vcompress_vm)(CPURISCVState *env, uint32_t > rs1, uint32_t rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src; > + uint32_t vd_idx, num = 0; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, 1) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + /* zeroed all elements */ > + for (i = 0; i < lmul; i++) { > + memset(&env->vfp.vreg[rd + i].u64[0], 0, VLEN / 8); > + } > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (num / (VLEN / width)); > + src = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + vd_idx = num % (VLEN / width); > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u8[vd_idx] = > + env->vfp.vreg[src].u8[j]; > + num++; > + } > + break; > + case 16: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u16[vd_idx] = > + env->vfp.vreg[src].u16[j]; > + num++; > + } > + break; > + case 32: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u32[vd_idx] = > + env->vfp.vreg[src].u32[j]; > + num++; > + } > + break; > + case 64: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u64[vd_idx] = > + env->vfp.vreg[src].u64[j]; > + num++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadd_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src1].u8[j] > + + env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + + env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + + env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + + env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadd_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + + env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadd_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredsum.vs vd, vs2, vs1, vm # vd[0] = sum(vs1[0] , vs2[*]) */ > +void VECTOR_HELPER(vredsum_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u8[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u8[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = sum; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u16[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u16[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = sum; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u32[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u32[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = sum; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u64[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u64[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = sum; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfadd.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfadd_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_add( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_add( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_add( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfadd.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfadd_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_add( > + env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_add( > + env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_add( > + env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vredand.vs vd, vs2, vs1, vm # vd[0] = and( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredand_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t res = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u8[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = res; > + } > + break; > + case 16: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u16[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = res; > + } > + break; > + case 32: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u32[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = res; > + } > + break; > + case 64: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u64[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = res; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredsum.vs vd, vs2, vs1, vm # Unordered sum */ > +void VECTOR_HELPER(vfredsum_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float16 sum16 = 0.0f; > + float32 sum32 = 0.0f; > + float64 sum64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + sum16 = env->vfp.vreg[rs1].f16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum16 = float16_add(sum16, > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f16[0] = sum16; > + } > + break; > + case 32: > + if (i == 0) { > + sum32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum32 = float32_add(sum32, > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = sum32; > + } > + break; > + case 64: > + if (i == 0) { > + sum64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum64 = float64_add(sum64, > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = sum64; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsub_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] > + - env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + - env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + - env->vfp.vreg[src1].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + - env->vfp.vreg[src1].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsub_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] > + - env->gpr[rs1]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + - env->gpr[rs1]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + - env->gpr[rs1]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + - (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredor.vs vd, vs2, vs1, vm # vd[0] = or( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredor_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t res = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u8[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = res; > + } > + break; > + case 16: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u16[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = res; > + } > + break; > + case 32: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u32[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = res; > + } > + break; > + case 64: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u64[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = res; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsub.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsub_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sub( > + env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sub( > + env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sub( > + env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] - > f[rs1] */ > +void VECTOR_HELPER(vfsub_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sub( > + env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sub( > + env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sub( > + env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vrsub_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + - env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + - env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + - env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + - env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vrsub_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredxor.vs vd, vs2, vs1, vm # vd[0] = xor( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredxor_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t res = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u8[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = res; > + } > + break; > + case 16: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u16[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = res; > + } > + break; > + case 32: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u32[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = res; > + } > + break; > + case 64: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u64[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = res; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredosum.vs vd, vs2, vs1, vm # Ordered sum */ > +void VECTOR_HELPER(vfredosum_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + helper_vector_vfredsum_vs(env, vm, rs1, rs2, rd); > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vminu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] <= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src1].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] <= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src1].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] <= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src1].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] <= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src1].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vminu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) <= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredminu.vs vd, vs2, vs1, vm # vd[0] = minu( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredminu_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t minu = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u8[j]) { > + minu = env->vfp.vreg[src2].u8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = minu; > + } > + break; > + case 16: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u16[j]) { > + minu = env->vfp.vreg[src2].u16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = minu; > + } > + break; > + case 32: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u32[j]) { > + minu = env->vfp.vreg[src2].u32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = minu; > + } > + break; > + case 64: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u64[j]) { > + minu = env->vfp.vreg[src2].u64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = minu; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmin.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfmin_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_minnum( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_minnum( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_minnum( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmin.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfmin_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_minnum( > + env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_minnum( > + env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_minnum( > + env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmin_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] <= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + �� env->vfp.vreg[src1].s8[j]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] <= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src1].s16[j]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] <= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src1].s32[j]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] <= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src1].s64[j]; > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmin_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) <= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredmin.vs vd, vs2, vs1, vm # vd[0] = min( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredmin_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + int64_t min = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s8[j]) { > + min = env->vfp.vreg[src2].s8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s8[0] = min; > + } > + break; > + case 16: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s16[j]) { > + min = env->vfp.vreg[src2].s16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s16[0] = min; > + } > + break; > + case 32: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s32[j]) { > + min = env->vfp.vreg[src2].s32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s32[0] = min; > + } > + break; > + case 64: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s64[j]) { > + min = env->vfp.vreg[src2].s64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s64[0] = min; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredmin.vs vd, vs2, vs1, vm # Minimum value */ > +void VECTOR_HELPER(vfredmin_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float16 min16 = 0.0f; > + float32 min32 = 0.0f; > + float64 min64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + min16 = env->vfp.vreg[rs1].f16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + min16 = float16_minnum(min16, > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f16[0] = min16; > + } > + break; > + case 32: > + if (i == 0) { > + min32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + min32 = float32_minnum(min32, > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = min32; > + } > + break; > + case 64: > + if (i == 0) { > + min64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + min64 = float64_minnum(min64, > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = min64; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmaxu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] >= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src1].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] >= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src1].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] >= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src1].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] >= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src1].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmaxu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) >= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredmaxu.vs vd, vs2, vs1, vm # vd[0] = maxu( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredmaxu_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t maxu = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u8[j]) { > + maxu = env->vfp.vreg[src2].u8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = maxu; > + } > + break; > + case 16: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u16[j]) { > + maxu = env->vfp.vreg[src2].u16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = maxu; > + } > + break; > + case 32: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u32[j]) { > + maxu = env->vfp.vreg[src2].u32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = maxu; > + } > + break; > + case 64: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u64[j]) { > + maxu = env->vfp.vreg[src2].u64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = maxu; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/*vfmax.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfmax_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_maxnum( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_maxnum( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_maxnum( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmax.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfmax_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_maxnum( > + env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_maxnum( > + env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_maxnum( > + env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmax_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] >= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src1].s8[j]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] >= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src1].s16[j]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] >= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src1].s32[j]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] >= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src1].s64[j]; > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmax_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) >= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredmax.vs vd, vs2, vs1, vm # vd[0] = max( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredmax_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + int64_t max = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s8[j]) { > + max = env->vfp.vreg[src2].s8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s8[0] = max; > + } > + break; > + case 16: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s16[j]) { > + max = env->vfp.vreg[src2].s16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s16[0] = max; > + } > + break; > + case 32: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s32[j]) { > + max = env->vfp.vreg[src2].s32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s32[0] = max; > + } > + break; > + case 64: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s64[j]) { > + max = env->vfp.vreg[src2].s64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s64[0] = max; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredmax.vs vd, vs2, vs1, vm # Maximum value */ > +void VECTOR_HELPER(vfredmax_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float16 max16 = 0.0f; > + float32 max32 = 0.0f; > + float64 max64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + max16 = env->vfp.vreg[rs1].f16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + max16 = float16_maxnum(max16, > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f16[0] = max16; > + } > + break; > + case 32: > + if (i == 0) { > + max32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + max32 = float32_maxnum(max32, > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = max32; > + } > + break; > + case 64: > + if (i == 0) { > + max64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + max64 = float64_maxnum(max64, > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = max64; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsgnj.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsgnj_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + env->vfp.vreg[src1].f16[j], > + 0, > + 15, > + env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + env->vfp.vreg[src1].f32[j], > + 0, > + 31, > + env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + env->vfp.vreg[src1].f64[j], > + 0, > + 63, > + env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsgnj.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfsgnj_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + env->fpr[rs1], > + 0, > + 15, > + env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + env->fpr[rs1], > + 0, > + 31, > + env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + env->fpr[rs1], > + 0, > + 63, > + env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vand_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src1].u8[j] > + & env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + & env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + & env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + & env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vand_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + & env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + & env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + & env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + & env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vand_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfsgnjn.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsgnjn_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + ~env->vfp.vreg[src1].f16[j], > + 0, > + 15, > + env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + ~env->vfp.vreg[src1].f32[j], > + 0, > + 31, > + env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + ~env->vfp.vreg[src1].f64[j], > + 0, > + 63, > + env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +/* vfsgnjn.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfsgnjn_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + ~env->fpr[rs1], > + 0, > + 15, > + env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + ~env->fpr[rs1], > + 0, > + 31, > + env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + ~env->fpr[rs1], > + 0, > + 63, > + env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vor_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src1].u8[j] > + | env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + | env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + | env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + | env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vor_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + | env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + | env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + | env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + | env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vor_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfsgnjx.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsgnjx_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + env->vfp.vreg[src1].f16[j] ^ > + env->vfp.vreg[src2].f16[j], > + 0, > + 15, > + env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + env->vfp.vreg[src1].f32[j] ^ > + env->vfp.vreg[src2].f32[j], > + 0, > + 31, > + env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + env->vfp.vreg[src1].f64[j] ^ > + env->vfp.vreg[src2].f64[j], > + 0, > + 63, > + env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vfsgnjx.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfsgnjx_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + env->fpr[rs1] ^ > + env->vfp.vreg[src2].f16[j], > + 0, > + 15, > + env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + env->fpr[rs1] ^ > + env->vfp.vreg[src2].f32[j], > + 0, > + 31, > + env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + env->fpr[rs1] ^ > + env->vfp.vreg[src2].f64[j], > + 0, > + 63, > + env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vxor_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src1].u8[j] > + ^ env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + ^ env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + ^ env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + ^ env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vxor_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + ^ env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + ^ env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + ^ env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + ^ env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vxor_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax, carry; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + + env->vfp.vreg[src2].u8[j] + carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + + env->vfp.vreg[src2].u16[j] + carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + + env->vfp.vreg[src2].u32[j] + carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + + env->vfp.vreg[src2].u64[j] + carry; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vadc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax, carry; > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u8[j] + carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u16[j] + carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u32[j] + carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = > (uint64_t)extend_gpr(env->gpr[rs1]) > + + env->vfp.vreg[src2].u64[j] + carry; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadc_vim)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax, carry; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u8[j] + carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u16[j] + carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u32[j] + carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u64[j] + carry; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmadc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax, carry; > + uint64_t tmp; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src1].u8[j] > + + env->vfp.vreg[src2].u8[j] + carry; > + tmp = tmp >> width; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src1].u16[j] > + + env->vfp.vreg[src2].u16[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)env->vfp.vreg[src1].u32[j] > + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src1].u64[j] > + + env->vfp.vreg[src2].u64[j] + carry; > + > + if ((tmp < env->vfp.vreg[src1].u64[j] || > + tmp < env->vfp.vreg[src2].u64[j]) > + || (env->vfp.vreg[src1].u64[j] == MAX_U64 && > + env->vfp.vreg[src2].u64[j] == MAX_U64)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmadc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax, carry; > + uint64_t tmp, extend_rs1; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint8_t)env->gpr[rs1] > + + env->vfp.vreg[src2].u8[j] + carry; > + tmp = tmp >> width; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint16_t)env->gpr[rs1] > + + env->vfp.vreg[src2].u16[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)((uint32_t)env->gpr[rs1]) > + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + > + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]); > + tmp = extend_rs1 + env->vfp.vreg[src2].u64[j] + > carry; > + if ((tmp < extend_rs1) || > + (carry && (env->vfp.vreg[src2].u64[j] == > MAX_U64))) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmadc_vim)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax, carry; > + uint64_t tmp; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint8_t)sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u8[j] + carry; > + tmp = tmp >> width; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint16_t)sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u16[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)((uint32_t)sign_extend(rs1, 5)) > + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u64[j] + carry; > + > + if ((tmp < (uint64_t)sign_extend(rs1, 5) || > + tmp < env->vfp.vreg[src2].u64[j]) > + || ((uint64_t)sign_extend(rs1, 5) == MAX_U64 && > + env->vfp.vreg[src2].u64[j] == MAX_U64)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsbc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax, carry; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + - env->vfp.vreg[src1].u8[j] - carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + - env->vfp.vreg[src1].u16[j] - carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + - env->vfp.vreg[src1].u32[j] - carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + - env->vfp.vreg[src1].u64[j] - carry; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsbc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax, carry; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + - env->gpr[rs1] - carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + - env->gpr[rs1] - carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + - env->gpr[rs1] - carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + - (uint64_t)extend_gpr(env->gpr[rs1]) - carry; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsbc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax, carry; > + uint64_t tmp; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u8[j] > + - env->vfp.vreg[src1].u8[j] - carry; > + tmp = (tmp >> width) & 0x1; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u16[j] > + - env->vfp.vreg[src1].u16[j] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)env->vfp.vreg[src2].u32[j] > + - (uint64_t)env->vfp.vreg[src1].u32[j] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u64[j] > + - env->vfp.vreg[src1].u64[j] - carry; > + > + if (((env->vfp.vreg[src1].u64[j] == MAX_U64) && > carry) || > + env->vfp.vreg[src2].u64[j] < > + (env->vfp.vreg[src1].u64[j] + carry)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsbc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax, carry; > + uint64_t tmp, extend_rs1; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u8[j] > + - (uint8_t)env->gpr[rs1] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u16[j] > + - (uint16_t)env->gpr[rs1] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)env->vfp.vreg[src2].u32[j] > + - (uint64_t)((uint32_t)env->gpr[rs1]) - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + > + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]); > + tmp = env->vfp.vreg[src2].u64[j] - extend_rs1 - > carry; > + > + if ((tmp > env->vfp.vreg[src2].u64[j]) || > + ((extend_rs1 == MAX_U64) && carry)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmpopc.m rd, vs2, v0.t # x[rd] = sum_i ( vs2[i].LSB && > v0[i].LSB ) */ > +void VECTOR_HELPER(vmpopc_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + env->gpr[rd] = 0; > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_mask_reg(env, rs2, width, lmul, i) && > + vector_elem_mask(env, vm, width, lmul, i)) { > + env->gpr[rd]++; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfirst.m rd, vs2, vm */ > +void VECTOR_HELPER(vmfirst_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_mask_reg(env, rs2, width, lmul, i) && > + vector_elem_mask(env, vm, width, lmul, i)) { > + env->gpr[rd] = i; > + break; > + } > + } else { > + env->gpr[rd] = -1; > + } > + } > + > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmerge_vvm)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl, idx, pos; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src1].u8[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src1].u16[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src1].u32[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j]; > + } > + break; > + case 64: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src1].u64[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmerge_vxm)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl, idx, pos; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]; > + } > + break; > + case 16: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]; > + } > + break; > + case 32: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]; > + } > + break; > + case 64: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmerge_vim)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl, idx, pos; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + (uint8_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u8[j] = > (uint8_t)sign_extend(rs1, 5); > + } > + break; > + case 16: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + (uint16_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u16[j] = > (uint16_t)sign_extend(rs1, 5); > + } > + break; > + case 32: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + (uint32_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u32[j] = > (uint32_t)sign_extend(rs1, 5); > + } > + break; > + case 64: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, > &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) > == 0) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u64[j] = > (uint64_t)sign_extend(rs1, 5); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfmerge.vfm vd, vs2, rs1, v0 # vd[i] = v0[i].LSB ? f[rs1] : > vs2[i] */ > +void VECTOR_HELPER(vfmerge_vfm)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* vfmv.v.f vd, rs1 # vd[i] = f[rs1]; */ > + if (vm && (rs2 != 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = env->fpr[rs1]; > + } else { > + env->vfp.vreg[dest].f16[j] = > env->vfp.vreg[src2].f16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = env->fpr[rs1]; > + } else { > + env->vfp.vreg[dest].f32[j] = > env->vfp.vreg[src2].f32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = env->fpr[rs1]; > + } else { > + env->vfp.vreg[dest].f64[j] = > env->vfp.vreg[src2].f64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmseq_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] == > + env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] == > + env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] == > + env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] == > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmseq_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] == > env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] == > env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] == > env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) == > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmseq_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)sign_extend(rs1, 5) > + == env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)sign_extend(rs1, 5) > + == env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)sign_extend(rs1, 5) > + == env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)sign_extend(rs1, 5) == > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmandnot.mm <http://vmandnot.mm> vd, vs2, vs1 # vd = vs2 & ~vs1 */ > +void VECTOR_HELPER(vmandnot_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) & > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfeq.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmfeq_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float16_eq_quiet(env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float32_eq_quiet(env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float64_eq_quiet(env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfeq.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfeq_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmsne_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] != > + env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] != > + env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] != > + env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] != > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsne_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] != > env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] != > env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] != > env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) != > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsne_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)sign_extend(rs1, 5) > + != env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)sign_extend(rs1, 5) > + != env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)sign_extend(rs1, 5) > + != env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)sign_extend(rs1, 5) != > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmand.mm <http://vmand.mm> vd, vs2, vs1 # vd = vs2 & vs1 */ > +void VECTOR_HELPER(vmand_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) & > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfle.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmfle_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_le(env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_le(env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_le(env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfle.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfle_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_le(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_le(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_le(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsltu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] < > + env->vfp.vreg[src1].u8[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] < > + env->vfp.vreg[src1].u16[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] < > + env->vfp.vreg[src1].u32[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] < > + env->vfp.vreg[src1].u64[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsltu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] < > (uint8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] < > (uint16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] < > (uint32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] < > + (uint64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmor.mm <http://vmor.mm> vd, vs2, vs1 # vd = vs2 | vs1 */ > +void VECTOR_HELPER(vmor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) | > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmford.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmford_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float16_unordered_quiet(env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float32_unordered_quiet(env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float64_unordered_quiet(env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmford.vf vd, vs2, rs1, vm # Vector-scalar */ > +void VECTOR_HELPER(vmford_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float16_unordered_quiet(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float32_unordered_quiet(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float64_unordered_quiet(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmslt_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] < > + env->vfp.vreg[src1].s8[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] < > + env->vfp.vreg[src1].s16[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] < > + env->vfp.vreg[src1].s32[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] < > + env->vfp.vreg[src1].s64[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmslt_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] < > (int8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] < > (int16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] < > (int32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] < > + (int64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmxor.mm <http://vmxor.mm> vd, vs2, vs1 # vd = vs2 ^ vs1 */ > +void VECTOR_HELPER(vmxor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^ > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmflt.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmflt_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_lt(env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_lt(env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_lt(env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmflt.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmflt_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_lt(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_lt(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_lt(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsleu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] <= > + env->vfp.vreg[src1].u8[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] <= > + env->vfp.vreg[src1].u16[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] <= > + env->vfp.vreg[src1].u32[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] <= > + env->vfp.vreg[src1].u64[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsleu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] <= > (uint8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] <= > (uint16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] <= > (uint32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] <= > + (uint64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsleu_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] <= (uint8_t)rs1) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] <= > (uint16_t)rs1) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] <= > (uint32_t)rs1) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] <= > + (uint64_t)rs1) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmornot.mm <http://vmornot.mm> vd, vs2, vs1 # vd = vs2 | ~vs1 */ > +void VECTOR_HELPER(vmornot_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) | > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfne.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmfne_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float16_eq_quiet(env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float32_eq_quiet(env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float64_eq_quiet(env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfne.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfne_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsle_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] <= > + env->vfp.vreg[src1].s8[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] <= > + env->vfp.vreg[src1].s16[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] <= > + env->vfp.vreg[src1].s32[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] <= > + env->vfp.vreg[src1].s64[j]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsle_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] <= > (int8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] <= > (int16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] <= > (int32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] <= > + (int64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsle_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] <= > + (int8_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] <= > + (int16_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] <= > + (int32_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] <= > + sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmnand.mm <http://vmnand.mm> vd, vs2, vs1 # vd = ~(vs2 & vs1) */ > +void VECTOR_HELPER(vmnand_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) & > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, (~tmp & > 0x1)); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfgt.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfgt_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_le(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_le(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_le(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsgtu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] > > (uint8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] > > (uint16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] > > (uint32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] > > + (uint64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsgtu_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] > (uint8_t)rs1) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] > (uint16_t)rs1) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] > (uint32_t)rs1) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] > > + (uint64_t)rs1) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmnor.mm <http://vmnor.mm> vd, vs2, vs1 # vd = ~(vs2 | vs1) */ > +void VECTOR_HELPER(vmnor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) | > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmsgt_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] > > (int8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] > > (int16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] > > (int32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] > > + (int64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsgt_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] > > + (int8_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] > > + (int16_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] > > + (int32_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] > > + sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, > i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, > i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +/* vmxnor.mm <http://vmxnor.mm> vd, vs2, vs1 # vd = ~(vs2 ^ vs1) */ > +void VECTOR_HELPER(vmxnor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^ > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfge.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfge_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_lt(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_lt(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_lt(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, > !result); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsaddu.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vsaddu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, > + env->vfp.vreg[src1].u8[j], > env->vfp.vreg[src2].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, > + env->vfp.vreg[src1].u16[j], > env->vfp.vreg[src2].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, > + env->vfp.vreg[src1].u32[j], > env->vfp.vreg[src2].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, > + env->vfp.vreg[src1].u64[j], > env->vfp.vreg[src2].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsaddu.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vsaddu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, > + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsaddu.vi <http://vsaddu.vi> vd, vs2, imm, vm # > vector-immediate */ > +void VECTOR_HELPER(vsaddu_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, > + env->vfp.vreg[src2].u8[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, > + env->vfp.vreg[src2].u16[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, > + env->vfp.vreg[src2].u32[j], rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, > + env->vfp.vreg[src2].u64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vdivu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] == 0) { > + env->vfp.vreg[dest].u8[j] = MAX_U8; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] / > + env->vfp.vreg[src1].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] == 0) { > + env->vfp.vreg[dest].u16[j] = MAX_U16; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + / env->vfp.vreg[src1].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] == 0) { > + env->vfp.vreg[dest].u32[j] = MAX_U32; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + / env->vfp.vreg[src1].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] == 0) { > + env->vfp.vreg[dest].u64[j] = MAX_U64; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + / env->vfp.vreg[src1].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vdivu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u8[j] = MAX_U8; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] / > + (uint8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u16[j] = MAX_U16; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + / (uint16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u32[j] = MAX_U32; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + / (uint32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].u64[j] = MAX_U64; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + / (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfdiv.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfdiv_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_div( > + env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_div( > + env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_div( > + env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfdiv.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfdiv_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_div( > + env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_div( > + env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_div( > + env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsadd.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vsadd_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, > + env->vfp.vreg[src1].s8[j], > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, > + env->vfp.vreg[src1].s16[j], > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, > + env->vfp.vreg[src1].s32[j], > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsadd.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vsadd_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsadd.vi <http://vsadd.vi> vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vsadd_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, > + env->vfp.vreg[src2].s8[j], > sign_extend(rs1, 5)); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, > + env->vfp.vreg[src2].s16[j], > sign_extend(rs1, 5)); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, > + env->vfp.vreg[src2].s32[j], > sign_extend(rs1, 5)); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, > + env->vfp.vreg[src2].s64[j], > sign_extend(rs1, 5)); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vdiv_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] == 0) { > + env->vfp.vreg[dest].s8[j] = -1; > + } else if ((env->vfp.vreg[src2].s8[j] == > MIN_S8) && > + (env->vfp.vreg[src1].s8[j] == > (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = MIN_S8; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] / > + env->vfp.vreg[src1].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] == 0) { > + env->vfp.vreg[dest].s16[j] = -1; > + } else if ((env->vfp.vreg[src2].s16[j] == > MIN_S16) && > + (env->vfp.vreg[src1].s16[j] == > (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = MIN_S16; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + / env->vfp.vreg[src1].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] == 0) { > + env->vfp.vreg[dest].s32[j] = -1; > + } else if ((env->vfp.vreg[src2].s32[j] == > MIN_S32) && > + (env->vfp.vreg[src1].s32[j] == > (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = MIN_S32; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + / env->vfp.vreg[src1].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] == 0) { > + env->vfp.vreg[dest].s64[j] = -1; > + } else if ((env->vfp.vreg[src2].s64[j] == > MIN_S64) && > + (env->vfp.vreg[src1].s64[j] == > (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = MIN_S64; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + / env->vfp.vreg[src1].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vdiv_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s8[j] = -1; > + } else if ((env->vfp.vreg[src2].s8[j] == > MIN_S8) && > + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = MIN_S8; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] / > + (int8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s16[j] = -1; > + } else if ((env->vfp.vreg[src2].s16[j] == > MIN_S16) && > + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = MIN_S16; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + / (int16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s32[j] = -1; > + } else if ((env->vfp.vreg[src2].s32[j] == > MIN_S32) && > + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = MIN_S32; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + / (int32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].s64[j] = -1; > + } else if ((env->vfp.vreg[src2].s64[j] == > MIN_S64) && > + ((int64_t)extend_gpr(env->gpr[rs1]) == (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = MIN_S64; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + / (int64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = > f[rs1]/vs2[i] */ > +void VECTOR_HELPER(vfrdiv_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_div( > + env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_div( > + env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_div( > + env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssubu.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vssubu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env, > + env->vfp.vreg[src2].u8[j], > env->vfp.vreg[src1].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env, > + env->vfp.vreg[src2].u16[j], > env->vfp.vreg[src1].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env, > + env->vfp.vreg[src2].u32[j], > env->vfp.vreg[src1].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env, > + env->vfp.vreg[src2].u64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssubu.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vssubu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env, > + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vremu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] == 0) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] % > + env->vfp.vreg[src1].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] == 0) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + % env->vfp.vreg[src1].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] == 0) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + % env->vfp.vreg[src1].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] == 0) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + % env->vfp.vreg[src1].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vremu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] % > + (uint8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + % (uint16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + % (uint32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + % (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmsbf.m vd, vs2, vm # set-before-first mask bit */ > +void VECTOR_HELPER(vmsbf_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + bool first_mask_bit = false; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (first_mask_bit) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + continue; > + } > + if (!vector_mask_reg(env, rs2, width, lmul, i)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + first_mask_bit = true; > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmsif.m vd, vs2, vm # set-including-first mask bit */ > +void VECTOR_HELPER(vmsif_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + bool first_mask_bit = false; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (first_mask_bit) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + continue; > + } > + if (!vector_mask_reg(env, rs2, width, lmul, i)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + first_mask_bit = true; > + vector_mask_result(env, rd, width, lmul, i, 1); > + } > + } > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmsof.m vd, vs2, vm # set-only-first mask bit */ > +void VECTOR_HELPER(vmsof_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + bool first_mask_bit = false; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (first_mask_bit) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + continue; > + } > + if (!vector_mask_reg(env, rs2, width, lmul, i)) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + first_mask_bit = true; > + vector_mask_result(env, rd, width, lmul, i, 1); > + } > + } > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* viota.m v4, v2, v0.t */ > +void VECTOR_HELPER(viota_m)(CPURISCVState *env, uint32_t vm, > uint32_t rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest; > + uint32_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 1)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vid.v vd, vm # Write element ID to destination. */ > +void VECTOR_HELPER(vid_v)(CPURISCVState *env, uint32_t vm, > uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = i; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = i; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = i; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = i; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssub.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vssub_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env, > + env->vfp.vreg[src2].s8[j], > env->vfp.vreg[src1].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env, > + env->vfp.vreg[src2].s16[j], > env->vfp.vreg[src1].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env, > + env->vfp.vreg[src2].s32[j], > env->vfp.vreg[src1].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env, > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssub.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vssub_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vrem_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] == 0) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j]; > + } else if ((env->vfp.vreg[src2].s8[j] == > MIN_S8) && > + (env->vfp.vreg[src1].s8[j] == > (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = 0; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] % > + env->vfp.vreg[src1].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] == 0) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j]; > + } else if ((env->vfp.vreg[src2].s16[j] == > MIN_S16) && > + (env->vfp.vreg[src1].s16[j] == > (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = 0; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + % env->vfp.vreg[src1].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] == 0) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j]; > + } else if ((env->vfp.vreg[src2].s32[j] == > MIN_S32) && > + (env->vfp.vreg[src1].s32[j] == > (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = 0; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + % env->vfp.vreg[src1].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] == 0) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j]; > + } else if ((env->vfp.vreg[src2].s64[j] == > MIN_S64) && > + (env->vfp.vreg[src1].s64[j] == > (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = 0; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + % env->vfp.vreg[src1].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vrem_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j]; > + } else if ((env->vfp.vreg[src2].s8[j] == > MIN_S8) && > + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = 0; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] % > + (int8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j]; > + } else if ((env->vfp.vreg[src2].s16[j] == > MIN_S16) && > + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = 0; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + % (int16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j]; > + } else if ((env->vfp.vreg[src2].s32[j] == > MIN_S32) && > + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = 0; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + % (int32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j]; > + } else if ((env->vfp.vreg[src2].s64[j] == > MIN_S64) && > + ((int64_t)extend_gpr(env->gpr[rs1]) == (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = 0; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + % (int64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +/* vaadd.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vaadd_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, > + env->vfp.vreg[src1].s8[j], > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, > + env->vfp.vreg[src1].s16[j], > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, > + env->vfp.vreg[src1].s32[j], > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vaadd.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vaadd_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, > + env->gpr[rs1], env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, > + env->gpr[rs1], env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, > + env->gpr[rs1], env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, > + env->gpr[rs1], env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vaadd.vi <http://vaadd.vi> vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vaadd_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, > + rs1, env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, > + rs1, env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, > + rs1, env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, > + rs1, env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmulhu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + ((uint16_t)env->vfp.vreg[src1].u8[j] > + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> > width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + ((uint32_t)env->vfp.vreg[src1].u16[j] > + * (uint32_t)env->vfp.vreg[src2].u16[j]) > >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + ((uint64_t)env->vfp.vreg[src1].u32[j] > + * (uint64_t)env->vfp.vreg[src2].u32[j]) > >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = u64xu64_lh( > + env->vfp.vreg[src1].u64[j], > env->vfp.vreg[src2].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmulhu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + ((uint16_t)(uint8_t)env->gpr[rs1] > + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> > width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + ((uint32_t)(uint16_t)env->gpr[rs1] > + * (uint32_t)env->vfp.vreg[src2].u16[j]) > >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + ((uint64_t)(uint32_t)env->gpr[rs1] > + * (uint64_t)env->vfp.vreg[src2].u32[j]) > >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = u64xu64_lh( > + (uint64_t)extend_gpr(env->gpr[rs1]) > + , env->vfp.vreg[src2].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +/* vfmul.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfmul_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_mul( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_mul( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_mul( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmul.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfmul_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_mul( > + env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_mul( > + env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_mul( > + env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsll_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] > + << (env->vfp.vreg[src1].u8[j] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + << (env->vfp.vreg[src1].u16[j] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + << (env->vfp.vreg[src1].u32[j] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + << (env->vfp.vreg[src1].u64[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsll_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] > + << (env->gpr[rs1] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + << (env->gpr[rs1] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + << (env->gpr[rs1] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + << ((uint64_t)extend_gpr(env->gpr[rs1]) & > 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsll_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] > + << (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + << (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + << (rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + << (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmul_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmul_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->gpr[rs1] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = env->gpr[rs1] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = env->gpr[rs1] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vasub.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vasub_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8( > + env, > + ~env->vfp.vreg[src1].s8[j] + 1, > + env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16( > + env, > + ~env->vfp.vreg[src1].s16[j] + 1, > + env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32( > + env, > + ~env->vfp.vreg[src1].s32[j] + 1, > + env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64( > + env, > + ~env->vfp.vreg[src1].s64[j] + 1, > + env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vasub.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vasub_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmulhsu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((uint16_t)env->vfp.vreg[src1].u8[j] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> > width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((uint32_t)env->vfp.vreg[src1].u16[j] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> > width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((uint64_t)env->vfp.vreg[src1].u32[j] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> > width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xu64_lh( > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmulhsu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((uint16_t)(uint8_t)env->gpr[rs1] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> > width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((uint32_t)(uint16_t)env->gpr[rs1] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> > width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((uint64_t)(uint32_t)env->gpr[rs1] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> > width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xu64_lh( > + env->vfp.vreg[src2].s64[j], > + (uint64_t)extend_gpr(env->gpr[rs1])); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vsmul.vv vd, vs2, vs1, vm # vd[i] = > clip((vs2[i]*vs1[i]+round)>>(SEW-1)) */ > +void VECTOR_HELPER(vsmul_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if ((!(vm)) && rd == 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vsmul_8(env, > + env->vfp.vreg[src1].s8[j], > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vsmul_16(env, > + env->vfp.vreg[src1].s16[j], > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vsmul_32(env, > + env->vfp.vreg[src1].s32[j], > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vsmul_64(env, > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsmul.vx vd, vs2, rs1, vm # vd[i] = > clip((vs2[i]*x[rs1]+round)>>(SEW-1)) */ > +void VECTOR_HELPER(vsmul_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if ((!(vm)) && rd == 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vsmul_8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vsmul_16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vsmul_32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vsmul_64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmulh_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((int16_t)env->vfp.vreg[src1].s8[j] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> > width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((int32_t)env->vfp.vreg[src1].s16[j] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> > width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((int64_t)env->vfp.vreg[src1].s32[j] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> > width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xs64_lh( > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmulh_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((int16_t)(int8_t)env->gpr[rs1] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> > width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((int32_t)(int16_t)env->gpr[rs1] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> > width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((int64_t)(int32_t)env->gpr[rs1] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> > width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xs64_lh( > + (int64_t)extend_gpr(env->gpr[rs1]) > + , env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] - > vs2[i] */ > +void VECTOR_HELPER(vfrsub_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sub( > + env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sub( > + env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sub( > + env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsrl_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] > + >> (env->vfp.vreg[src1].u8[j] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + >> (env->vfp.vreg[src1].u16[j] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + >> (env->vfp.vreg[src1].u32[j] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + >> (env->vfp.vreg[src1].u64[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsrl_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] > + >> (env->gpr[rs1] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & > 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsrl_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + >> (rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i] */ > +void VECTOR_HELPER(vfmadd_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[dest].f16[j], > + env->vfp.vreg[src2].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[dest].f32[j], > + env->vfp.vreg[src2].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[dest].f64[j], > + env->vfp.vreg[src2].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i] */ > +void VECTOR_HELPER(vfmadd_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f16[j], > + env->vfp.vreg[src2].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f32[j], > + env->vfp.vreg[src2].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f64[j], > + env->vfp.vreg[src2].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsra_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] > + >> (env->vfp.vreg[src1].s8[j] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + >> (env->vfp.vreg[src1].s16[j] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + >> (env->vfp.vreg[src1].s32[j] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + >> (env->vfp.vreg[src1].s64[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsra_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] > + >> (env->gpr[rs1] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & > 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsra_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + >> (rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmadd_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[dest].s8[j] > + + env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[dest].s16[j] > + + env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[dest].s32[j] > + + env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[dest].s64[j] > + + env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmadd_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->gpr[rs1] > + * env->vfp.vreg[dest].s8[j] > + + env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = env->gpr[rs1] > + * env->vfp.vreg[dest].s16[j] > + + env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = env->gpr[rs1] > + * env->vfp.vreg[dest].s32[j] > + + env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[dest].s64[j] > + + env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +/* vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) - > vs2[i] */ > +void VECTOR_HELPER(vfnmadd_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[dest].f16[j], > + env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[dest].f32[j], > + env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[dest].f64[j], > + env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) - > vs2[i] */ > +void VECTOR_HELPER(vfnmadd_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f16[j], > + env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f32[j], > + env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f64[j], > + env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssrl.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i] */ > +void VECTOR_HELPER(vssrl_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = vssrl_8(env, > + env->vfp.vreg[src2].u8[j], > env->vfp.vreg[src1].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = vssrl_16(env, > + env->vfp.vreg[src2].u16[j], > env->vfp.vreg[src1].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = vssrl_32(env, > + env->vfp.vreg[src2].u32[j], > env->vfp.vreg[src1].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = vssrl_64(env, > + env->vfp.vreg[src2].u64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssrl.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */ > +void VECTOR_HELPER(vssrl_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = vssrl_8(env, > + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = vssrl_16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = vssrl_32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = vssrl_64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssrl.vi <http://vssrl.vi> vd, vs2, imm, vm # vd[i] = ((vs2[i] > + round)>>imm) */ > +void VECTOR_HELPER(vssrl_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = vssrli_8(env, > + env->vfp.vreg[src2].u8[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = vssrli_16(env, > + env->vfp.vreg[src2].u16[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = vssrli_32(env, > + env->vfp.vreg[src2].u32[j], rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = vssrli_64(env, > + env->vfp.vreg[src2].u64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i] */ > +void VECTOR_HELPER(vfmsub_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[dest].f16[j], > + env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[dest].f32[j], > + env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[dest].f64[j], > + env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i] */ > +void VECTOR_HELPER(vfmsub_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f16[j], > + env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f32[j], > + env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f64[j], > + env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssra.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i]) */ > +void VECTOR_HELPER(vssra_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vssra_8(env, > + env->vfp.vreg[src2].s8[j], > env->vfp.vreg[src1].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vssra_16(env, > + env->vfp.vreg[src2].s16[j], > env->vfp.vreg[src1].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vssra_32(env, > + env->vfp.vreg[src2].s32[j], > env->vfp.vreg[src1].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vssra_64(env, > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssra.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */ > +void VECTOR_HELPER(vssra_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vssra_8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vssra_16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vssra_32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vssra_64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssra.vi <http://vssra.vi> vd, vs2, imm, vm # vd[i] = ((vs2[i] > + round)>>imm) */ > +void VECTOR_HELPER(vssra_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vssrai_8(env, > + env->vfp.vreg[src2].s8[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vssrai_16(env, > + env->vfp.vreg[src2].s16[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vssrai_32(env, > + env->vfp.vreg[src2].s32[j], rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vssrai_64(env, > + env->vfp.vreg[src2].s64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vnmsub_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] > + - env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[dest].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + - env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[dest].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + - env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[dest].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + - env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[dest].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnmsub_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] > + - env->gpr[rs1] > + * env->vfp.vreg[dest].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + - env->gpr[rs1] > + * env->vfp.vreg[dest].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + - env->gpr[rs1] > + * env->vfp.vreg[dest].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + - (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[dest].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + > vs2[i] */ > +void VECTOR_HELPER(vfnmsub_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[dest].f16[j], > + env->vfp.vreg[src2].f16[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[dest].f32[j], > + env->vfp.vreg[src2].f32[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[dest].f64[j], > + env->vfp.vreg[src2].f64[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + > + > + env->vfp.vstart = 0; > +} > + > +/* vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) + > vs2[i] */ > +void VECTOR_HELPER(vfnmsub_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f16[j], > + env->vfp.vreg[src2].f16[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f32[j], > + env->vfp.vreg[src2].f32[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + env->vfp.vreg[dest].f64[j], > + env->vfp.vreg[src2].f64[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vnsrl_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u16[k] > + >> (env->vfp.vreg[src1].u8[j] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u32[k] > + >> (env->vfp.vreg[src1].u16[j] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u64[k] > + >> (env->vfp.vreg[src1].u32[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsrl_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u16[k] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u32[k] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u64[k] > + >> (env->gpr[rs1] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsrl_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u16[k] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u32[k] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u64[k] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfmacc_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[dest].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[dest].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[dest].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfmacc_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[dest].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[dest].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[dest].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsra_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s16[k] > + >> (env->vfp.vreg[src1].s8[j] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s32[k] > + >> (env->vfp.vreg[src1].s16[j] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s64[k] > + >> (env->vfp.vreg[src1].s32[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsra_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s16[k] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s32[k] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s64[k] > + >> (env->gpr[rs1] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsra_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s16[k] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s32[k] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s64[k] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmacc_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] += > env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] += > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] += > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] += > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmacc_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] += env->gpr[rs1] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] += env->gpr[rs1] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] += env->gpr[rs1] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] += > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - > vd[i] */ > +void VECTOR_HELPER(vfnmacc_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - > vd[i] */ > +void VECTOR_HELPER(vfnmacc_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclipu.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vnclipu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[k] = vnclipu_16(env, > + env->vfp.vreg[src2].u16[j], > env->vfp.vreg[src1].u8[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vnclipu_32(env, > + env->vfp.vreg[src2].u32[j], > env->vfp.vreg[src1].u16[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vnclipu_64(env, > + env->vfp.vreg[src2].u64[j], > env->vfp.vreg[src1].u32[k]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclipu.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vnclipu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[k] = vnclipu_16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vnclipu_32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vnclipu_64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > + > +/* vnclipu.vi <http://vnclipu.vi> vd, vs2, imm, vm # > vector-immediate */ > +void VECTOR_HELPER(vnclipu_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[k] = vnclipui_16(env, > + env->vfp.vreg[src2].u16[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vnclipui_32(env, > + env->vfp.vreg[src2].u32[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vnclipui_64(env, > + env->vfp.vreg[src2].u64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfmsac_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfmsac_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vnclip.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vnclip_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[k] = vnclip_16(env, > + env->vfp.vreg[src2].s16[j], > env->vfp.vreg[src1].u8[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vnclip_32(env, > + env->vfp.vreg[src2].s32[j], > env->vfp.vreg[src1].u16[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vnclip_64(env, > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].u32[k]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclip.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vnclip_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[k] = vnclip_16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vnclip_32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vnclip_64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclip.vi <http://vnclip.vi> vd, vs2, imm, vm # > vector-immediate */ > +void VECTOR_HELPER(vnclip_vi)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[k] = vnclipi_16(env, > + env->vfp.vreg[src2].s16[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vnclipi_32(env, > + env->vfp.vreg[src2].s32[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vnclipi_64(env, > + env->vfp.vreg[src2].s64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vnmsac_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] -= > env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] -= > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] -= > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] -= > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnmsac_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] -= env->gpr[rs1] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] -= env->gpr[rs1] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] -= env->gpr[rs1] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] -= > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + > vd[i] */ > +void VECTOR_HELPER(vfnmsac_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[dest].f16[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[dest].f32[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[dest].f64[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + > vd[i] */ > +void VECTOR_HELPER(vfnmsac_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[dest].f16[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[dest].f32[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[dest].f64[j], > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vwredsumu.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + > sum(zero-extend(SEW)) */ > +void VECTOR_HELPER(vwredsumu_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u8[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u16[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = sum; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u16[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u32[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = sum; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u32[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u64[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = sum; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src1].u8[j] + > + (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src1].u16[j] + > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src1].u32[j] + > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] + > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] + > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] + > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwadd_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src1].f16[j], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwadd_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->fpr[rs1], &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vwredsum.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + > sum(sign-extend(SEW)) */ > +void VECTOR_HELPER(vwredsum_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + int64_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += (int16_t)env->vfp.vreg[src2].s8[j] << > 8 >> 8; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].s16[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s16[0] = sum; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += (int32_t)env->vfp.vreg[src2].s16[j] << > 16 >> 16; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].s32[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s32[0] = sum; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += (int64_t)env->vfp.vreg[src2].s32[j] << > 32 >> 32; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].s64[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s64[0] = sum; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src1].s8[j] + > + (int16_t)env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src1].s16[j] + > + (int32_t)env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src1].s32[j] + > + (int64_t)env->vfp.vreg[src2].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) + > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) + > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) + > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vd, vs2, vs1, vm # Unordered reduce 2*SEW = 2*SEW + > sum(promote(SEW)) */ > +void VECTOR_HELPER(vfwredsum_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float32 sum32 = 0.0f; > + float64 sum64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + sum32 = env->vfp.vreg[rs1].f32[0]; > + } > + �� if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum32 = float32_add(sum32, > + float16_to_float32(env->vfp.vreg[src2].f16[j], > + true, &env->fp_status), > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = sum32; > + } > + break; > + case 32: > + if (i == 0) { > + sum64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum64 = float64_add(sum64, > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = sum64; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] - > + (uint16_t)env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] - > + (uint32_t)env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] - > + (uint64_t)env->vfp.vreg[src1].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] - > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] - > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] - > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwsub_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src1].f16[j], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwsub_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->fpr[rs1], &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwsub_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s8[j] - > + (int16_t)env->vfp.vreg[src1].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s16[j] - > + (int32_t)env->vfp.vreg[src1].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s32[j] - > + (int64_t)env->vfp.vreg[src1].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwsub_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) - > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) - > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) - > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* > + * vfwredosum.vs vd, vs2, vs1, vm # > + * Ordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) > + */ > +void VECTOR_HELPER(vfwredosum_vs)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + helper_vector_vfwredsum_vs(env, vm, rs1, rs2, rd); > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_wv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src1].u8[j] + > + (uint16_t)env->vfp.vreg[src2].u16[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src1].u16[j] + > + (uint32_t)env->vfp.vreg[src2].u32[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src1].u32[j] + > + (uint64_t)env->vfp.vreg[src2].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_wx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u16[k] + > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u32[k] + > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u64[k] + > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.wv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwadd_wv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->vfp.vreg[src1].f16[j], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.wf <http://vfwadd.wf> vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwadd_wf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->fpr[rs1], &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_wv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]) + > + (int16_t)env->vfp.vreg[src2].s16[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]) + > + (int32_t)env->vfp.vreg[src2].s32[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]) + > + (int64_t)env->vfp.vreg[src2].s64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_wx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s16[k] + > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s32[k] + > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s64[k] + > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_wv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u16[k] - > + (uint16_t)env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u32[k] - > + (uint32_t)env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u64[k] - > + (uint64_t)env->vfp.vreg[src1].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_wx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u16[k] - > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u32[k] - > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u64[k] - > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.wv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwsub_wv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->vfp.vreg[src1].f16[j], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.wf <http://vfwsub.wf> vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwsub_wf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->fpr[rs1], &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsub_wv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s16[k] - > + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s32[k] - > + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s64[k] - > + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsub_wx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s16[k] - > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s32[k] - > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s64[k] - > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmulu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src1].u8[j] * > + (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src1].u16[j] * > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src1].u32[j] * > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmulu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwmul.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwmul_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_mul( > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src1].f16[j], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_mul( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vfwmul.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwmul_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_mul( > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_mul( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->fpr[rs1], &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmulsu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s8[j] * > + (uint16_t)env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s16[j] * > + (uint32_t)env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s32[j] * > + (uint64_t)env->vfp.vreg[src1].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmulsu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmul_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src1].s8[j] * > + (int16_t)env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src1].s16[j] * > + (int32_t)env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src1].s32[j] * > + (int64_t)env->vfp.vreg[src2].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmul_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccu.vv vd, vs1, vs2, vm # > + * vd[i] = clipu((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env, > + env->vfp.vreg[src2].u8[j], > + env->vfp.vreg[src1].u8[j], > + env->vfp.vreg[dest].u16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env, > + env->vfp.vreg[src2].u16[j], > + env->vfp.vreg[src1].u16[j], > + env->vfp.vreg[dest].u32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env, > + env->vfp.vreg[src2].u32[j], > + env->vfp.vreg[src1].u32[j], > + env->vfp.vreg[dest].u64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccu.vx vd, rs1, vs2, vm # > + * vd[i] = clipu((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env, > + env->vfp.vreg[src2].u8[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].u16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env, > + env->vfp.vreg[src2].u16[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].u32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env, > + env->vfp.vreg[src2].u32[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].u64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmaccu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] += > + (uint16_t)env->vfp.vreg[src1].u8[j] * > + (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] += > + (uint32_t)env->vfp.vreg[src1].u16[j] * > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] += > + (uint64_t)env->vfp.vreg[src1].u32[j] * > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmaccu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] += > + (uint16_t)env->vfp.vreg[src2].u8[j] * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] += > + (uint32_t)env->vfp.vreg[src2].u16[j] * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] += > + (uint64_t)env->vfp.vreg[src2].u32[j] * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + > vd[i] */ > +void VECTOR_HELPER(vfwmacc_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], true, > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + > vd[i] */ > +void VECTOR_HELPER(vfwmacc_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], true, > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmacc.vv vd, vs1, vs2, vm # > + * vd[i] = clip((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmacc_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env, > + env->vfp.vreg[src2].s8[j], > + env->vfp.vreg[src1].s8[j], > + env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env, > + env->vfp.vreg[src2].s16[j], > + env->vfp.vreg[src1].s16[j], > + env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env, > + env->vfp.vreg[src2].s32[j], > + env->vfp.vreg[src1].s32[j], > + env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmacc.vx vd, rs1, vs2, vm # > + * vd[i] = clip((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmacc_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env, > + env->vfp.vreg[src2].s8[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env, > + env->vfp.vreg[src2].s16[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env, > + env->vfp.vreg[src2].s32[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccsu.vv vd, vs1, vs2, vm > + * # vd[i] = > clip(-((signed(vs1[i])*unsigned(vs2[i])+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccsu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env, > + env->vfp.vreg[src2].u8[j], > + env->vfp.vreg[src1].s8[j], > + env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env, > + env->vfp.vreg[src2].u16[j], > + env->vfp.vreg[src1].s16[j], > + env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env, > + env->vfp.vreg[src2].u32[j], > + env->vfp.vreg[src1].s32[j], > + env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccsu.vx vd, rs1, vs2, vm > + * # vd[i] = > clip(-((signed(x[rs1])*unsigned(vs2[i])+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccsu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env, > + env->vfp.vreg[src2].u8[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env, > + env->vfp.vreg[src2].u16[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env, > + env->vfp.vreg[src2].u32[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccus.vx vd, rs1, vs2, vm > + * # vd[i] = > clip(-((unsigned(x[rs1])*signed(vs2[i])+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccus_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmaccus_8(env, > + env->vfp.vreg[src2].s8[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmaccus_16(env, > + env->vfp.vreg[src2].s16[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmaccus_32(env, > + env->vfp.vreg[src2].s32[j], > + env->gpr[rs1], > + env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vwmacc_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)env->vfp.vreg[src1].s8[j] > + * (int16_t)env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)env->vfp.vreg[src1].s16[j] * > + (int32_t)env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)env->vfp.vreg[src1].s32[j] * > + (int64_t)env->vfp.vreg[src2].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmacc_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - > vd[i] */ > +void VECTOR_HELPER(vfwnmacc_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], true, > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - > vd[i] */ > +void VECTOR_HELPER(vfwnmacc_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], true, > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmaccsu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)env->vfp.vreg[src1].s8[j] > + * (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)env->vfp.vreg[src1].s16[j] * > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)env->vfp.vreg[src1].s32[j] * > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmaccsu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (uint16_t)((uint8_t)env->vfp.vreg[src2].u8[j]) * > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (uint32_t)((uint16_t)env->vfp.vreg[src2].u16[j]) * > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (uint64_t)((uint32_t)env->vfp.vreg[src2].u32[j]) * > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - > vd[i] */ > +void VECTOR_HELPER(vfwmsac_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], true, > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - > vd[i] */ > +void VECTOR_HELPER(vfwmsac_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], true, > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmaccus_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + > vd[i] */ > +void VECTOR_HELPER(vfwnmsac_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], true, > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vfwnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + > vd[i] */ > +void VECTOR_HELPER(vfwnmsac_vf)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], true, > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > + > +/* vfsqrt.v vd, vs2, vm # Vector-vector square root */ > +void VECTOR_HELPER(vfsqrt_v)(CPURISCVState *env, uint32_t vm, > uint32_t rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sqrt( > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sqrt( > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sqrt( > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfclass.v vd, vs2, vm # Vector-vector */ > +void VECTOR_HELPER(vfclass_v)(CPURISCVState *env, uint32_t vm, > uint32_t rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = helper_fclass_h( > + env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = helper_fclass_s( > + env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = helper_fclass_d( > + env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ > +void VECTOR_HELPER(vfcvt_xu_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = float16_to_uint16( > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = float32_to_uint32( > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = float64_to_uint64( > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ > +void VECTOR_HELPER(vfcvt_x_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + �� env->vfp.vreg[dest].s16[j] = float16_to_int16( > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = float32_to_int32( > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = float64_to_int64( > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ > +void VECTOR_HELPER(vfcvt_f_xu_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = uint16_to_float16( > + env->vfp.vreg[src2].u16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = uint32_to_float32( > + env->vfp.vreg[src2].u32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = uint64_to_float64( > + env->vfp.vreg[src2].u64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ > +void VECTOR_HELPER(vfcvt_f_x_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = int16_to_float16( > + env->vfp.vreg[src2].s16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = int32_to_float32( > + env->vfp.vreg[src2].s32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = int64_to_float64( > + env->vfp.vreg[src2].s64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width > unsigned integer.*/ > +void VECTOR_HELPER(vfwcvt_xu_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = float16_to_uint32( > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = float32_to_uint64( > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + } else { > + vector_tail_fwiden(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width > signed integer. */ > +void VECTOR_HELPER(vfwcvt_x_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = float16_to_int32( > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = float32_to_int64( > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to > double-width float */ > +void VECTOR_HELPER(vfwcvt_f_xu_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = uint16_to_float32( > + env->vfp.vreg[src2].u16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = uint32_to_float64( > + env->vfp.vreg[src2].u32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width > float. */ > +void VECTOR_HELPER(vfwcvt_f_x_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = int16_to_float32( > + env->vfp.vreg[src2].s16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = int32_to_float64( > + env->vfp.vreg[src2].s32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vfwcvt.f.f.v vd, vs2, vm # > + * Convert single-width float to double-width float. > + */ > +void VECTOR_HELPER(vfwcvt_f_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float16_to_float32( > + env->vfp.vreg[src2].f16[j], > + true, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float32_to_float64( > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ > +void VECTOR_HELPER(vfncvt_xu_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = float32_to_uint16( > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = float64_to_uint32( > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to > signed integer. */ > +void VECTOR_HELPER(vfncvt_x_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = float32_to_int16( > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = float64_to_int32( > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned > integer to float */ > +void VECTOR_HELPER(vfncvt_f_xu_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[k] = uint32_to_float16( > + env->vfp.vreg[src2].u32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = uint64_to_float32( > + env->vfp.vreg[src2].u64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to > float. */ > +void VECTOR_HELPER(vfncvt_f_x_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[k] = int32_to_float16( > + env->vfp.vreg[src2].s32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = int64_to_float32( > + �� > env->vfp.vreg[src2].s64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to > single-width float. */ > +void VECTOR_HELPER(vfncvt_f_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[k] = float32_to_float16( > + env->vfp.vreg[src2].f32[j], > + true, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float64_to_float32( > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlbu_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlb_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + > read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + > read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + > read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsbu_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsb_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + > read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + > read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + > read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxbu_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxb_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, addr), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, addr), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, addr), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlbuff_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlbff_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + > read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + > read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + > read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlhu_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlh_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + > read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + > read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlshu_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsh_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + > read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + > read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxhu_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxh_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, addr), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, addr), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlhuff_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlhff_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + > read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + > read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlw_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, env->gpr[rs1] + > read), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlwu_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlswu_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsw_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, env->gpr[rs1] + > read), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxwu_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 4, width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 4, width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxw_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 4, width, k); > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 4, width, k); > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, addr), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlwuff_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlwff_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, env->gpr[rs1] + > read), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vle_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 8; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlse_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 8; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxe_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 4, width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 8, width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vleff_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + env->vfp.vl = 0; > + env->foflag = true; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 8; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsb_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vssb_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxb_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxb_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxb_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsh_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vssh_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxh_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxh_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxh_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsw_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vssw_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxw_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 4, width, k); > + cpu_stl_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 4, width, k); > + cpu_stl_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxw_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxw_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vse_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 8; > + cpu_stq_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsse_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 8; > + cpu_stq_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxe_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, > vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 1, width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 2, width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 4, width, k); > + cpu_stl_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, > j, 8, width, k); > + cpu_stq_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxe_v)(CPURISCVState *env, uint32_t nf, > uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxe_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoswapw_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_xchgl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_xchgl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_xchgl_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_xchgl_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamoswapd_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_xchgq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_xchgq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoaddw_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_addl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_addl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_addl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_addl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vamoaddd_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_addq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_addq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoxorw_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_xorl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_xorl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamoxord_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_xorq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_xorq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoandw_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_andl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_andl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_andl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_andl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoandd_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_andq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_andq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoorw_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_orl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_orl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_orl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_orl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoord_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_orq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_orq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamominw_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_sminl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_sminl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamomind_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_sminq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_sminq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamomaxw_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_smaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_smaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamomaxd_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_smaxq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_smaxq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamominuw_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_uminl_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_uminl_le( > + env, addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamominud_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_uminq_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_uminq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamomaxuw_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_umaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_umaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_umaxl_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_umaxl_le( > + env, addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vamomaxud_v)(CPURISCVState *env, uint32_t wd, > uint32_t vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_umaxq_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_umaxq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, > RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > -- > 2.7.4 > >
Hi, Alex On 2019/8/28 下午5:08, Alex Bennée wrote: > liuzhiwei <zhiwei_liu@c-sky.com> writes: > >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> >> --- >> fpu/softfloat.c | 119 + >> include/fpu/softfloat.h | 4 + > Changes to softfloat should be in a separate patch, but see bellow. > >> linux-user/riscv/cpu_loop.c | 8 +- >> target/riscv/Makefile.objs | 2 +- >> target/riscv/cpu.h | 30 + >> target/riscv/cpu_bits.h | 15 + >> target/riscv/cpu_helper.c | 7 + >> target/riscv/csr.c | 65 +- >> target/riscv/helper.h | 354 + >> target/riscv/insn32.decode | 374 +- >> target/riscv/insn_trans/trans_rvv.inc.c | 484 + >> target/riscv/translate.c | 1 + >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ > This is likely too big to be reviewed. Is it possible to split the patch > up into more discrete chunks, for example support pieces and then maybe > a class at a time? Yes, a patch set with cover letter will be sent later. > >> 13 files changed, 28017 insertions(+), 9 deletions(-) >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c >> create mode 100644 target/riscv/vector_helper.c >> >> diff --git a/fpu/softfloat.c b/fpu/softfloat.c >> index 2ba36ec..da155ea 100644 >> --- a/fpu/softfloat.c >> +++ b/fpu/softfloat.c >> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a) >> } >> >> /*---------------------------------------------------------------------------- >> +| Returns the sign bit of the half-precision floating-point value `a'. >> +*----------------------------------------------------------------------------*/ >> + >> +static inline flag extractFloat16Sign(float16 a) >> +{ >> + return float16_val(a) >> 0xf; >> +} >> + > We are trying to avoid this sort of bit fiddling for new code when we > already have generic decompose functions that can extract all the parts > into a common format. > >> + >> +/*---------------------------------------------------------------------------- >> | Returns the fraction bits of the single-precision floating-point value `a'. >> *----------------------------------------------------------------------------*/ >> >> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status *status) >> } >> >> /*---------------------------------------------------------------------------- >> +| Returns 1 if the half-precision floating-point value `a' is less than >> +| or equal to the corresponding value `b', and 0 otherwise. The invalid >> +| exception is raised if either operand is a NaN. The comparison is performed >> +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. >> +*----------------------------------------------------------------------------*/ >> + >> +int float16_le(float16 a, float16 b, float_status *status) >> +{ >> + flag aSign, bSign; >> + uint16_t av, bv; >> + a = float16_squash_input_denormal(a, status); >> + b = float16_squash_input_denormal(b, status); >> + >> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) >> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) >> + ) { >> + float_raise(float_flag_invalid, status); >> + return 0; >> + } >> + aSign = extractFloat16Sign( a ); >> + bSign = extractFloat16Sign( b ); >> + av = float16_val(a); >> + bv = float16_val(b); >> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) == 0 ); >> + return ( av == bv ) || ( aSign ^ ( av < bv ) ); >> + >> +} > What does this provide that: > > float16_compare(a, b, status) == float_relation_less; > > doesn't? > >> + >> +/*---------------------------------------------------------------------------- >> | Returns 1 if the single-precision floating-point value `a' is less than >> | or equal to the corresponding value `b', and 0 otherwise. The invalid >> | exception is raised if either operand is a NaN. The comparison is performed >> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status *status) >> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. >> *----------------------------------------------------------------------------*/ >> >> +int float16_lt(float16 a, float16 b, float_status *status) >> +{ >> + flag aSign, bSign; >> + uint16_t av, bv; >> + a = float16_squash_input_denormal(a, status); >> + b = float16_squash_input_denormal(b, status); >> + >> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) >> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) >> + ) { >> + float_raise(float_flag_invalid, status); >> + return 0; >> + } >> + aSign = extractFloat16Sign( a ); >> + bSign = extractFloat16Sign( b ); >> + av = float16_val(a); >> + bv = float16_val(b); >> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) != 0 ); >> + return ( av != bv ) && ( aSign ^ ( av < bv ) ); >> + >> +} >> + >> +/*---------------------------------------------------------------------------- >> +| Returns 1 if the single-precision floating-point value `a' is less than >> +| the corresponding value `b', and 0 otherwise. The invalid exception is >> +| raised if either operand is a NaN. The comparison is performed according >> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. >> +*----------------------------------------------------------------------------*/ >> + >> int float32_lt(float32 a, float32 b, float_status *status) >> { >> flag aSign, bSign; >> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, float_status *status) >> } >> >> /*---------------------------------------------------------------------------- >> +| Returns 1 if the half-precision floating-point value `a' is equal to >> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an >> +| exception. The comparison is performed according to the IEC/IEEE Standard >> +| for Binary Floating-Point Arithmetic. >> +*----------------------------------------------------------------------------*/ >> + >> +int float16_eq_quiet(float16 a, float16 b, float_status *status) >> +{ >> + a = float16_squash_input_denormal(a, status); >> + b = float16_squash_input_denormal(b, status); >> + >> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) >> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) >> + ) { >> + if (float16_is_signaling_nan(a, status) >> + || float16_is_signaling_nan(b, status)) { >> + float_raise(float_flag_invalid, status); >> + } >> + return 0; >> + } >> + return ( float16_val(a) == float16_val(b) ) || >> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 ); >> +} >> + > See also float_16_compare_quiet Thank your for reminding me. I did't find float16_compare and float16_compare_quiet interface before. >> + >> +/*---------------------------------------------------------------------------- >> | Returns 1 if the single-precision floating-point value `a' is equal to >> | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an >> | exception. The comparison is performed according to the IEC/IEEE Standard >> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, float_status *status) >> } >> >> /*---------------------------------------------------------------------------- >> +| Returns 1 if the half-precision floating-point values `a' and `b' cannot >> +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The >> +| comparison is performed according to the IEC/IEEE Standard for Binary >> +| Floating-Point Arithmetic. >> +*----------------------------------------------------------------------------*/ >> + >> +int float16_unordered_quiet(float16 a, float16 b, float_status *status) >> +{ >> + a = float16_squash_input_denormal(a, status); >> + b = float16_squash_input_denormal(b, status); >> + >> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) >> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) >> + ) { >> + if (float16_is_signaling_nan(a, status) >> + || float16_is_signaling_nan(b, status)) { >> + float_raise(float_flag_invalid, status); >> + } >> + return 1; >> + } >> + return 0; >> +} >> + >> + >> +/*---------------------------------------------------------------------------- >> | Returns 1 if the single-precision floating-point values `a' and `b' cannot >> | be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The >> | comparison is performed according to the IEC/IEEE Standard for Binary >> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h >> index 3ff3fa5..3b0754c 100644 >> --- a/include/fpu/softfloat.h >> +++ b/include/fpu/softfloat.h >> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status); >> float16 float16_sqrt(float16, float_status *status); >> int float16_compare(float16, float16, float_status *status); >> int float16_compare_quiet(float16, float16, float_status *status); >> +int float16_unordered_quiet(float16, float16, float_status *status); >> +int float16_le(float16, float16, float_status *status); >> +int float16_lt(float16, float16, float_status *status); >> +int float16_eq_quiet(float16, float16, float_status *status); >> >> int float16_is_quiet_nan(float16, float_status *status); >> int float16_is_signaling_nan(float16, float_status *status); >> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c >> index 12aa3c0..b01548a 100644 >> --- a/linux-user/riscv/cpu_loop.c >> +++ b/linux-user/riscv/cpu_loop.c >> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env) >> signum = 0; >> sigcode = 0; >> sigaddr = 0; >> - >> + if (env->foflag) { >> + if (env->vfp.vl != 0) { >> + env->foflag = false; >> + env->pc += 4; >> + continue; >> + } >> + } > What is this trying to do? Handle Fault-only-first exception. > >> switch (trapnr) { >> case EXCP_INTERRUPT: >> /* just indicate that signals should be handled asap */ >> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs >> index b1c79bc..d577cef 100644 >> --- a/target/riscv/Makefile.objs >> +++ b/target/riscv/Makefile.objs >> @@ -1,4 +1,4 @@ >> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o pmp.o >> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o pmp.o >> >> DECODETREE = $(SRC_PATH)/scripts/decodetree.py >> >> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h >> index 0adb307..5a93aa2 100644 >> --- a/target/riscv/cpu.h >> +++ b/target/riscv/cpu.h >> @@ -67,6 +67,7 @@ >> #define RVC RV('C') >> #define RVS RV('S') >> #define RVU RV('U') >> +#define RVV RV('V') >> >> /* S extension denotes that Supervisor mode exists, however it is possible >> to have a core that support S mode but does not have an MMU and there >> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState; >> >> #include "pmp.h" >> >> +#define VLEN 128 >> +#define VUNIT(x) (VLEN / x) >> + > If you want to do vectors I suggest you look at the TCGvec types for > passing pointers to vector registers to helpers. In this case you will > want to ensure your vector registers are properly aligned. > >> struct CPURISCVState { >> target_ulong gpr[32]; >> uint64_t fpr[32]; /* assume both F and D extensions */ >> + >> + /* vector coprocessor state. */ >> + struct { >> + union VECTOR { >> + float64 f64[VUNIT(64)]; >> + float32 f32[VUNIT(32)]; >> + float16 f16[VUNIT(16)]; >> + target_ulong ul[VUNIT(sizeof(target_ulong))]; >> + uint64_t u64[VUNIT(64)]; >> + int64_t s64[VUNIT(64)]; >> + uint32_t u32[VUNIT(32)]; >> + int32_t s32[VUNIT(32)]; >> + uint16_t u16[VUNIT(16)]; >> + int16_t s16[VUNIT(16)]; >> + uint8_t u8[VUNIT(8)]; >> + int8_t s8[VUNIT(8)]; >> + } vreg[32]; >> + target_ulong vxrm; >> + target_ulong vxsat; >> + target_ulong vl; >> + target_ulong vstart; >> + target_ulong vtype; >> + float_status fp_status; >> + } vfp; >> + >> + bool foflag; > Again I have no idea what foflag is here. > >> target_ulong pc; >> target_ulong load_res; >> target_ulong load_val; >> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h >> index 11f971a..9eb43ec 100644 >> --- a/target/riscv/cpu_bits.h >> +++ b/target/riscv/cpu_bits.h >> @@ -29,6 +29,14 @@ >> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) >> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) >> >> +/* Vector Fixed-Point round model */ >> +#define FSR_VXRM_SHIFT 9 >> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) >> + >> +/* Vector Fixed-Point saturation flag */ >> +#define FSR_VXSAT_SHIFT 8 >> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) >> + >> /* Control and Status Registers */ >> >> /* User Trap Setup */ >> @@ -48,6 +56,13 @@ >> #define CSR_FRM 0x002 >> #define CSR_FCSR 0x003 >> >> +/* User Vector CSRs */ >> +#define CSR_VSTART 0x008 >> +#define CSR_VXSAT 0x009 >> +#define CSR_VXRM 0x00a >> +#define CSR_VL 0xc20 >> +#define CSR_VTYPE 0xc21 >> + >> /* User Timers and Counters */ >> #define CSR_CYCLE 0xc00 >> #define CSR_TIME 0xc01 >> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c >> index e32b612..405caf6 100644 >> --- a/target/riscv/cpu_helper.c >> +++ b/target/riscv/cpu_helper.c >> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs) >> [PRV_H] = RISCV_EXCP_H_ECALL, >> [PRV_M] = RISCV_EXCP_M_ECALL >> }; >> + if (env->foflag) { >> + if (env->vfp.vl != 0) { >> + env->foflag = false; >> + env->pc += 4; >> + return; >> + } >> + } >> >> if (!async) { >> /* set tval to badaddr for traps with address information */ >> diff --git a/target/riscv/csr.c b/target/riscv/csr.c >> index e0d4586..a6131ff 100644 >> --- a/target/riscv/csr.c >> +++ b/target/riscv/csr.c >> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno) >> return 0; >> } >> >> -#if !defined(CONFIG_USER_ONLY) >> static int any(CPURISCVState *env, int csrno) >> { >> return 0; >> } >> >> +#if !defined(CONFIG_USER_ONLY) >> static int smode(CPURISCVState *env, int csrno) >> { >> return -!riscv_has_ext(env, RVS); >> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val) >> return -1; >> } >> #endif >> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) >> - | (env->frm << FSR_RD_SHIFT); >> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT) >> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT) >> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) >> + | (env->frm << FSR_RD_SHIFT); >> return 0; >> } >> >> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val) >> env->mstatus |= MSTATUS_FS; >> #endif >> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; >> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; >> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; >> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); >> return 0; >> } >> >> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val) >> +{ >> + *val = env->vfp.vtype; >> + return 0; >> +} >> + >> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) >> +{ >> + *val = env->vfp.vl; >> + return 0; >> +} >> + >> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val) >> +{ >> + *val = env->vfp.vxrm; >> + return 0; >> +} >> + >> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) >> +{ >> + *val = env->vfp.vxsat; >> + return 0; >> +} >> + >> +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val) >> +{ >> + *val = env->vfp.vstart; >> + return 0; >> +} >> + >> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val) >> +{ >> + env->vfp.vxrm = val; >> + return 0; >> +} >> + >> +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val) >> +{ >> + env->vfp.vxsat = val; >> + return 0; >> +} >> + >> +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val) >> +{ >> + env->vfp.vstart = val; >> + return 0; >> +} > A fixed return value makes me think these should be void functions. Good! > >> + >> /* User Timers and Counters */ >> static int read_instret(CPURISCVState *env, int csrno, target_ulong *val) >> { >> @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { >> [CSR_FFLAGS] = { fs, read_fflags, write_fflags }, >> [CSR_FRM] = { fs, read_frm, write_frm }, >> [CSR_FCSR] = { fs, read_fcsr, write_fcsr }, >> - >> + /* Vector CSRs */ >> + [CSR_VSTART] = { any, read_vstart, write_vstart }, >> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat }, >> + [CSR_VXRM] = { any, read_vxrm, write_vxrm }, >> + [CSR_VL] = { any, read_vl }, >> + [CSR_VTYPE] = { any, read_vtype }, >> /* User Timers and Counters */ >> [CSR_CYCLE] = { ctr, read_instret }, >> [CSR_INSTRET] = { ctr, read_instret }, >> diff --git a/target/riscv/helper.h b/target/riscv/helper.h >> index debb22a..fee02c0 100644 >> --- a/target/riscv/helper.h >> +++ b/target/riscv/helper.h >> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl) >> DEF_HELPER_1(wfi, void, env) >> DEF_HELPER_1(tlb_flush, void, env) >> #endif >> +/* Vector functions */ > Think about how you could split this patch up to introduce a group of > instructions at a time. This will make it a lot easier review. > > I'm going to leave review of the specifics to the RISCV maintainers but > I suspect they will want to wait until a v2 of the series. However it > looks like a good first pass at implementing vectors. > > -- > Alex Bennée It will not change softfloat in patch V2. Thank you again for your review! Best Regards, Zhiwei >
Hi Liuzhiwei, Some comments: 1. vector extension allows flexible implementation. It is better to describe the limitation of current implementation (such as vlen/elen/slen) , supported sections and unsupported features. 2. there should be cfg.ext_v to turn on vector extension from command line 3. from license It should be "Copyright (c) 2019 C-SKY Limited, All rights reserved." but not "2011 ~ 2019" It is huge work wait and thanks for your contribution. chihmin On Wed, Aug 28, 2019 at 3:06 PM liuzhiwei <zhiwei_liu@c-sky.com> wrote: > Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 > Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> > --- > fpu/softfloat.c | 119 + > include/fpu/softfloat.h | 4 + > linux-user/riscv/cpu_loop.c | 8 +- > target/riscv/Makefile.objs | 2 +- > target/riscv/cpu.h | 30 + > target/riscv/cpu_bits.h | 15 + > target/riscv/cpu_helper.c | 7 + > target/riscv/csr.c | 65 +- > target/riscv/helper.h | 354 + > target/riscv/insn32.decode | 374 +- > target/riscv/insn_trans/trans_rvv.inc.c | 484 + > target/riscv/translate.c | 1 + > target/riscv/vector_helper.c | 26563 > ++++++++++++++++++++++++++++++ > 13 files changed, 28017 insertions(+), 9 deletions(-) > create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c > create mode 100644 target/riscv/vector_helper.c > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 2ba36ec..da155ea 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a) > } > > > /*---------------------------------------------------------------------------- > +| Returns the sign bit of the half-precision floating-point value `a'. > > +*----------------------------------------------------------------------------*/ > + > +static inline flag extractFloat16Sign(float16 a) > +{ > + return float16_val(a) >> 0xf; > +} > + > + > > +/*---------------------------------------------------------------------------- > | Returns the fraction bits of the single-precision floating-point value > `a'. > > *----------------------------------------------------------------------------*/ > > @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status > *status) > } > > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point value `a' is less than > +| or equal to the corresponding value `b', and 0 otherwise. The invalid > +| exception is raised if either operand is a NaN. The comparison is > performed > +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > > +*----------------------------------------------------------------------------*/ > + > +int float16_le(float16 a, float16 b, float_status *status) > +{ > + flag aSign, bSign; > + uint16_t av, bv; > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a > ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b > ) ) > + ) { > + float_raise(float_flag_invalid, status); > + return 0; > + } > + aSign = extractFloat16Sign( a ); > + bSign = extractFloat16Sign( b ); > + av = float16_val(a); > + bv = float16_val(b); > + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) > == 0 ); > + return ( av == bv ) || ( aSign ^ ( av < bv ) ); > + > +} > + > > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point value `a' is less than > | or equal to the corresponding value `b', and 0 otherwise. The invalid > | exception is raised if either operand is a NaN. The comparison is > performed > @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status > *status) > | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > > *----------------------------------------------------------------------------*/ > > +int float16_lt(float16 a, float16 b, float_status *status) > +{ > + flag aSign, bSign; > + uint16_t av, bv; > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a > ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b > ) ) > + ) { > + float_raise(float_flag_invalid, status); > + return 0; > + } > + aSign = extractFloat16Sign( a ); > + bSign = extractFloat16Sign( b ); > + av = float16_val(a); > + bv = float16_val(b); > + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) > != 0 ); > + return ( av != bv ) && ( aSign ^ ( av < bv ) ); > + > +} > + > > +/*---------------------------------------------------------------------------- > +| Returns 1 if the single-precision floating-point value `a' is less than > +| the corresponding value `b', and 0 otherwise. The invalid exception is > +| raised if either operand is a NaN. The comparison is performed > according > +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > > +*----------------------------------------------------------------------------*/ > + > int float32_lt(float32 a, float32 b, float_status *status) > { > flag aSign, bSign; > @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, > float_status *status) > } > > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point value `a' is equal to > +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause > an > +| exception. The comparison is performed according to the IEC/IEEE > Standard > +| for Binary Floating-Point Arithmetic. > > +*----------------------------------------------------------------------------*/ > + > +int float16_eq_quiet(float16 a, float16 b, float_status *status) > +{ > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a > ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b > ) ) > + ) { > + if (float16_is_signaling_nan(a, status) > + || float16_is_signaling_nan(b, status)) { > + float_raise(float_flag_invalid, status); > + } > + return 0; > + } > + return ( float16_val(a) == float16_val(b) ) || > + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 > ); > +} > + > + > > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point value `a' is equal to > | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause > an > | exception. The comparison is performed according to the IEC/IEEE > Standard > @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, > float_status *status) > } > > > /*---------------------------------------------------------------------------- > +| Returns 1 if the half-precision floating-point values `a' and `b' cannot > +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. > The > +| comparison is performed according to the IEC/IEEE Standard for Binary > +| Floating-Point Arithmetic. > > +*----------------------------------------------------------------------------*/ > + > +int float16_unordered_quiet(float16 a, float16 b, float_status *status) > +{ > + a = float16_squash_input_denormal(a, status); > + b = float16_squash_input_denormal(b, status); > + > + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a > ) ) > + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b > ) ) > + ) { > + if (float16_is_signaling_nan(a, status) > + || float16_is_signaling_nan(b, status)) { > + float_raise(float_flag_invalid, status); > + } > + return 1; > + } > + return 0; > +} > + > + > > +/*---------------------------------------------------------------------------- > | Returns 1 if the single-precision floating-point values `a' and `b' > cannot > | be compared, and 0 otherwise. Quiet NaNs do not cause an exception. > The > | comparison is performed according to the IEC/IEEE Standard for Binary > diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h > index 3ff3fa5..3b0754c 100644 > --- a/include/fpu/softfloat.h > +++ b/include/fpu/softfloat.h > @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, > float_status *status); > float16 float16_sqrt(float16, float_status *status); > int float16_compare(float16, float16, float_status *status); > int float16_compare_quiet(float16, float16, float_status *status); > +int float16_unordered_quiet(float16, float16, float_status *status); > +int float16_le(float16, float16, float_status *status); > +int float16_lt(float16, float16, float_status *status); > +int float16_eq_quiet(float16, float16, float_status *status); > > int float16_is_quiet_nan(float16, float_status *status); > int float16_is_signaling_nan(float16, float_status *status); > diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c > index 12aa3c0..b01548a 100644 > --- a/linux-user/riscv/cpu_loop.c > +++ b/linux-user/riscv/cpu_loop.c > @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env) > signum = 0; > sigcode = 0; > sigaddr = 0; > - > + if (env->foflag) { > + if (env->vfp.vl != 0) { > + env->foflag = false; > + env->pc += 4; > + continue; > + } > + } > switch (trapnr) { > case EXCP_INTERRUPT: > /* just indicate that signals should be handled asap */ > diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs > index b1c79bc..d577cef 100644 > --- a/target/riscv/Makefile.objs > +++ b/target/riscv/Makefile.objs > @@ -1,4 +1,4 @@ > -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o > gdbstub.o pmp.o > +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o > vector_helper.o gdbstub.o pmp.o > > DECODETREE = $(SRC_PATH)/scripts/decodetree.py > > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h > index 0adb307..5a93aa2 100644 > --- a/target/riscv/cpu.h > +++ b/target/riscv/cpu.h > @@ -67,6 +67,7 @@ > #define RVC RV('C') > #define RVS RV('S') > #define RVU RV('U') > +#define RVV RV('V') > > /* S extension denotes that Supervisor mode exists, however it is possible > to have a core that support S mode but does not have an MMU and there > @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState; > > #include "pmp.h" > > +#define VLEN 128 > +#define VUNIT(x) (VLEN / x) > + > struct CPURISCVState { > target_ulong gpr[32]; > uint64_t fpr[32]; /* assume both F and D extensions */ > + > + /* vector coprocessor state. */ > + struct { > + union VECTOR { > + float64 f64[VUNIT(64)]; > + float32 f32[VUNIT(32)]; > + float16 f16[VUNIT(16)]; > + target_ulong ul[VUNIT(sizeof(target_ulong))]; > + uint64_t u64[VUNIT(64)]; > + int64_t s64[VUNIT(64)]; > + uint32_t u32[VUNIT(32)]; > + int32_t s32[VUNIT(32)]; > + uint16_t u16[VUNIT(16)]; > + int16_t s16[VUNIT(16)]; > + uint8_t u8[VUNIT(8)]; > + int8_t s8[VUNIT(8)]; > + } vreg[32]; > + target_ulong vxrm; > + target_ulong vxsat; > + target_ulong vl; > + target_ulong vstart; > + target_ulong vtype; > + float_status fp_status; > + } vfp; > + > + bool foflag; > target_ulong pc; > target_ulong load_res; > target_ulong load_val; > diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h > index 11f971a..9eb43ec 100644 > --- a/target/riscv/cpu_bits.h > +++ b/target/riscv/cpu_bits.h > @@ -29,6 +29,14 @@ > #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) > #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | > FSR_NXA) > > +/* Vector Fixed-Point round model */ > +#define FSR_VXRM_SHIFT 9 > +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) > + > +/* Vector Fixed-Point saturation flag */ > +#define FSR_VXSAT_SHIFT 8 > +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) > + > /* Control and Status Registers */ > > /* User Trap Setup */ > @@ -48,6 +56,13 @@ > #define CSR_FRM 0x002 > #define CSR_FCSR 0x003 > > +/* User Vector CSRs */ > +#define CSR_VSTART 0x008 > +#define CSR_VXSAT 0x009 > +#define CSR_VXRM 0x00a > +#define CSR_VL 0xc20 > +#define CSR_VTYPE 0xc21 > + > /* User Timers and Counters */ > #define CSR_CYCLE 0xc00 > #define CSR_TIME 0xc01 > diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c > index e32b612..405caf6 100644 > --- a/target/riscv/cpu_helper.c > +++ b/target/riscv/cpu_helper.c > @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs) > [PRV_H] = RISCV_EXCP_H_ECALL, > [PRV_M] = RISCV_EXCP_M_ECALL > }; > + if (env->foflag) { > + if (env->vfp.vl != 0) { > + env->foflag = false; > + env->pc += 4; > + return; > + } > + } > > if (!async) { > /* set tval to badaddr for traps with address information */ > diff --git a/target/riscv/csr.c b/target/riscv/csr.c > index e0d4586..a6131ff 100644 > --- a/target/riscv/csr.c > +++ b/target/riscv/csr.c > @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno) > return 0; > } > > -#if !defined(CONFIG_USER_ONLY) > static int any(CPURISCVState *env, int csrno) > { > return 0; > } > > +#if !defined(CONFIG_USER_ONLY) > static int smode(CPURISCVState *env, int csrno) > { > return -!riscv_has_ext(env, RVS); > @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, > target_ulong *val) > return -1; > } > #endif > - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) > - | (env->frm << FSR_RD_SHIFT); > + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT) > + | (env->vfp.vxsat << FSR_VXSAT_SHIFT) > + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) > + | (env->frm << FSR_RD_SHIFT); > return 0; > } > > @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, > target_ulong val) > env->mstatus |= MSTATUS_FS; > #endif > env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; > + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; > + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; > riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); > return 0; > } > > +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vtype; > + return 0; > +} > + > +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vl; > + return 0; > +} > + > +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vxrm; > + return 0; > +} > + > +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vxsat; > + return 0; > +} > + > +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val) > +{ > + *val = env->vfp.vstart; > + return 0; > +} > + > +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val) > +{ > + env->vfp.vxrm = val; > + return 0; > +} > + > +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val) > +{ > + env->vfp.vxsat = val; > + return 0; > +} > + > +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val) > +{ > + env->vfp.vstart = val; > + return 0; > +} > + > /* User Timers and Counters */ > static int read_instret(CPURISCVState *env, int csrno, target_ulong *val) > { > @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = > { > [CSR_FFLAGS] = { fs, read_fflags, write_fflags > }, > [CSR_FRM] = { fs, read_frm, write_frm > }, > [CSR_FCSR] = { fs, read_fcsr, write_fcsr > }, > - > + /* Vector CSRs */ > + [CSR_VSTART] = { any, read_vstart, write_vstart > }, > + [CSR_VXSAT] = { any, read_vxsat, write_vxsat > }, > + [CSR_VXRM] = { any, read_vxrm, write_vxrm > }, > + [CSR_VL] = { any, read_vl > }, > + [CSR_VTYPE] = { any, read_vtype > }, > /* User Timers and Counters */ > [CSR_CYCLE] = { ctr, read_instret > }, > [CSR_INSTRET] = { ctr, read_instret > }, > diff --git a/target/riscv/helper.h b/target/riscv/helper.h > index debb22a..fee02c0 100644 > --- a/target/riscv/helper.h > +++ b/target/riscv/helper.h > @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl) > DEF_HELPER_1(wfi, void, env) > DEF_HELPER_1(tlb_flush, void, env) > #endif > +/* Vector functions */ > +DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32) > +DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32) > +DEF_HELPER_3(vector_vid_v, void, env, i32, i32) > +DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32) > +DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32) > +DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32) > +DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32) > +DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32) > diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode > index 77f794e..d125ff9 100644 > --- a/target/riscv/insn32.decode > +++ b/target/riscv/insn32.decode > @@ -25,7 +25,7 @@ > %sh10 20:10 > %csr 20:12 > %rm 12:3 > - > +%nf 29:3 > # immediates: > %imm_i 20:s12 > %imm_s 25:s7 7:5 > @@ -43,7 +43,6 @@ > &u imm rd > &shift shamt rs1 rd > &atomic aq rl rs2 rs1 rd > - > # Formats 32: > @r ....... ..... ..... ... ..... ....... &r %rs2 > %rs1 %rd > @i ............ ..... ... ..... ....... &i imm=%imm_i > %rs1 %rd > @@ -62,11 +61,17 @@ > @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd > @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd > @r2 ....... ..... ..... ... ..... ....... %rs1 %rd > +@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd > +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd > +@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd > +@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd > +@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd > +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd > +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd > > @sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1 > @sfence_vm ....... ..... ..... ... ..... ....... %rs1 > > - > # *** Privileged Instructions *** > ecall 000000000000 00000 000 00000 1110011 > ebreak 000000000001 00000 000 00000 1110011 > @@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... ..... 1010011 > @r2_rm > fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm > fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm > fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm > + > +# *** RV32V Standard Extension *** > + > +# *** Vector loads and stores are encoded within LOADFP/STORE-FP *** > +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm > +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm > +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm > +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm > +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm > +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm > +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm > +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm > +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm > +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm > +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm > +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm > +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm > +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm > +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm > +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm > +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm > +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm > + > +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm > +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm > +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm > +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm > +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm > +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm > +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm > + > +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm > +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm > +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm > +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm > +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm > +vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm > +vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm > +vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm > +vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm > +vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm > +vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm > +vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm > +vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm > + > +#*** Vector AMO operations are encoded under the standard AMO major > opcode.*** > +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm > +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm > +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm > + > +#*** new major opcode OP-V *** > +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm > +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm > +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm > +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm > +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm > +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm > +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm > +vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm > +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm > +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm > +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm > +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm > +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm > +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm > +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm > +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm > +vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm > +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm > +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm > +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm > +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm > +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm > +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm > +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm > +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm > +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm > +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm > +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm > +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm > +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm > +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm > +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm > +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm > +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm > +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm > +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm > +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm > +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm > +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm > +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm > +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm > +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm > +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm > +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm > +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm > +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm > +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm > +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm > +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm > +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm > +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r > +vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r > +vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r > +vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r > +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm > +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm > +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm > +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm > +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm > +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm > +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r > +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r > +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r > +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r > +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r > +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r > +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r > +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r > +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r > +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r > +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm > +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm > +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm > +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm > +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm > +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm > +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm > +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm > +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm > +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm > +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r > +vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm > +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm > +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm > +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm > +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r > +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm > +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm > +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm > +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm > +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm > +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r > +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm > +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm > +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm > +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm > +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r > +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm > +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm > +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm > +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm > +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r > +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm > +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm > +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm > +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm > +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm > +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r > +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm > +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm > +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm > +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm > +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm > +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r > +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm > +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm > +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm > +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r > +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm > +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm > +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r > +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm > +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm > +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm > +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm > +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm > +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm > +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm > +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm > +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm > +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm > +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm > +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm > +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm > +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm > +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm > +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm > +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm > +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm > +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm > +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm > +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm > +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm > +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm > +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm > +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm > +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm > +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm > +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm > +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm > +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm > +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm > +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm > +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm > +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm > +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm > +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm > +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm > +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm > +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm > +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm > +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm > +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm > +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm > +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm > +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm > +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm > +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm > +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm > +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm > +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm > +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm > +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm > +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm > +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm > +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm > +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm > +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm > +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm > +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm > +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm > +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm > +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm > +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm > +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm > +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm > +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm > +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm > +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm > +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm > +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm > +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm > +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm > +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm > +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm > +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm > +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm > +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm > +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm > +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm > +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm > +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm > +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm > +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm > +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm > +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm > +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm > +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm > +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm > +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm > +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm > +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm > +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm > +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm > +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm > +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm > +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm > +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm > +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm > +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm > +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm > +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm > +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm > +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm > +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm > +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm > +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm > +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm > +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm > +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm > +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm > +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm > +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm > +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm > +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm > +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm > +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm > +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm > +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm > +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm > +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm > +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm > +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm > +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm > +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm > +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm > +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm > +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm > +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm > +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm > +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm > +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm > +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm > +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm > +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm > +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm > +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm > +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm > +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm > +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm > +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm > +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm > +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm > +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm > +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm > +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm > +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm > +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm > +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm > +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm > +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm > +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm > +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm > +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm > +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm > +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm > +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm > +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm > +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm > +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r > diff --git a/target/riscv/insn_trans/trans_rvv.inc.c > b/target/riscv/insn_trans/trans_rvv.inc.c > new file mode 100644 > index 0000000..dc8e6ce > --- /dev/null > +++ b/target/riscv/insn_trans/trans_rvv.inc.c > @@ -0,0 +1,484 @@ > +/* > + * RISC-V translation routines for the RVV Standard Extension. > + * > + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2 or later, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > for > + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with > + * this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#define GEN_VECTOR_R2_NFVM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 nf = tcg_const_i32(a->nf); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(nf); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R_NFVM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 nf = tcg_const_i32(a->nf); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(nf); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > + > +#define GEN_VECTOR_R_WDVM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 wd = tcg_const_i32(a->wd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(wd); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + gen_helper_vector_##INSN(cpu_env, s1, s2, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + return true; \ > +} > +#define GEN_VECTOR_R2_VM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, vm, s2, d); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > + > +#define GEN_VECTOR_R1_VM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, vm, d); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R_VM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + TCGv_i32 vm = tcg_const_i32(a->vm); \ > + gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(s2); \ > + tcg_temp_free_i32(d); \ > + tcg_temp_free_i32(vm); \ > + return true; \ > +} > +#define GEN_VECTOR_R2_ZIMM(INSN) \ > +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ > +{ \ > + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ > + TCGv_i32 zimm = tcg_const_i32(a->zimm); \ > + TCGv_i32 d = tcg_const_i32(a->rd); \ > + gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \ > + tcg_temp_free_i32(s1); \ > + tcg_temp_free_i32(zimm); \ > + tcg_temp_free_i32(d); \ > + return true; \ > +} > + > +GEN_VECTOR_R2_NFVM(vlb_v) > +GEN_VECTOR_R2_NFVM(vlh_v) > +GEN_VECTOR_R2_NFVM(vlw_v) > +GEN_VECTOR_R2_NFVM(vle_v) > +GEN_VECTOR_R2_NFVM(vlbu_v) > +GEN_VECTOR_R2_NFVM(vlhu_v) > +GEN_VECTOR_R2_NFVM(vlwu_v) > +GEN_VECTOR_R2_NFVM(vlbff_v) > +GEN_VECTOR_R2_NFVM(vlhff_v) > +GEN_VECTOR_R2_NFVM(vlwff_v) > +GEN_VECTOR_R2_NFVM(vleff_v) > +GEN_VECTOR_R2_NFVM(vlbuff_v) > +GEN_VECTOR_R2_NFVM(vlhuff_v) > +GEN_VECTOR_R2_NFVM(vlwuff_v) > +GEN_VECTOR_R2_NFVM(vsb_v) > +GEN_VECTOR_R2_NFVM(vsh_v) > +GEN_VECTOR_R2_NFVM(vsw_v) > +GEN_VECTOR_R2_NFVM(vse_v) > + > +GEN_VECTOR_R_NFVM(vlsb_v) > +GEN_VECTOR_R_NFVM(vlsh_v) > +GEN_VECTOR_R_NFVM(vlsw_v) > +GEN_VECTOR_R_NFVM(vlse_v) > +GEN_VECTOR_R_NFVM(vlsbu_v) > +GEN_VECTOR_R_NFVM(vlshu_v) > +GEN_VECTOR_R_NFVM(vlswu_v) > +GEN_VECTOR_R_NFVM(vssb_v) > +GEN_VECTOR_R_NFVM(vssh_v) > +GEN_VECTOR_R_NFVM(vssw_v) > +GEN_VECTOR_R_NFVM(vsse_v) > +GEN_VECTOR_R_NFVM(vlxb_v) > +GEN_VECTOR_R_NFVM(vlxh_v) > +GEN_VECTOR_R_NFVM(vlxw_v) > +GEN_VECTOR_R_NFVM(vlxe_v) > +GEN_VECTOR_R_NFVM(vlxbu_v) > +GEN_VECTOR_R_NFVM(vlxhu_v) > +GEN_VECTOR_R_NFVM(vlxwu_v) > +GEN_VECTOR_R_NFVM(vsxb_v) > +GEN_VECTOR_R_NFVM(vsxh_v) > +GEN_VECTOR_R_NFVM(vsxw_v) > +GEN_VECTOR_R_NFVM(vsxe_v) > +GEN_VECTOR_R_NFVM(vsuxb_v) > +GEN_VECTOR_R_NFVM(vsuxh_v) > +GEN_VECTOR_R_NFVM(vsuxw_v) > +GEN_VECTOR_R_NFVM(vsuxe_v) > + > +GEN_VECTOR_R_WDVM(vamoswapw_v) > +GEN_VECTOR_R_WDVM(vamoswapd_v) > +GEN_VECTOR_R_WDVM(vamoaddw_v) > +GEN_VECTOR_R_WDVM(vamoaddd_v) > +GEN_VECTOR_R_WDVM(vamoxorw_v) > +GEN_VECTOR_R_WDVM(vamoxord_v) > +GEN_VECTOR_R_WDVM(vamoandw_v) > +GEN_VECTOR_R_WDVM(vamoandd_v) > +GEN_VECTOR_R_WDVM(vamoorw_v) > +GEN_VECTOR_R_WDVM(vamoord_v) > +GEN_VECTOR_R_WDVM(vamominw_v) > +GEN_VECTOR_R_WDVM(vamomind_v) > +GEN_VECTOR_R_WDVM(vamomaxw_v) > +GEN_VECTOR_R_WDVM(vamomaxd_v) > +GEN_VECTOR_R_WDVM(vamominuw_v) > +GEN_VECTOR_R_WDVM(vamominud_v) > +GEN_VECTOR_R_WDVM(vamomaxuw_v) > +GEN_VECTOR_R_WDVM(vamomaxud_v) > + > +GEN_VECTOR_R(vext_x_v) > +GEN_VECTOR_R(vfmv_f_s) > +GEN_VECTOR_R(vmv_s_x) > +GEN_VECTOR_R(vfmv_s_f) > +GEN_VECTOR_R(vadc_vvm) > +GEN_VECTOR_R(vadc_vxm) > +GEN_VECTOR_R(vadc_vim) > +GEN_VECTOR_R(vmadc_vvm) > +GEN_VECTOR_R(vmadc_vxm) > +GEN_VECTOR_R(vmadc_vim) > +GEN_VECTOR_R(vsbc_vvm) > +GEN_VECTOR_R(vsbc_vxm) > +GEN_VECTOR_R(vmsbc_vvm) > +GEN_VECTOR_R(vmsbc_vxm) > +GEN_VECTOR_R2_VM(vmpopc_m) > +GEN_VECTOR_R2_VM(vmfirst_m) > +GEN_VECTOR_R(vcompress_vm) > +GEN_VECTOR_R(vmandnot_mm) > +GEN_VECTOR_R(vmand_mm) > +GEN_VECTOR_R(vmor_mm) > +GEN_VECTOR_R(vmxor_mm) > +GEN_VECTOR_R(vmornot_mm) > +GEN_VECTOR_R(vmnand_mm) > +GEN_VECTOR_R(vmnor_mm) > +GEN_VECTOR_R(vmxnor_mm) > +GEN_VECTOR_R2_VM(vmsbf_m) > +GEN_VECTOR_R2_VM(vmsof_m) > +GEN_VECTOR_R2_VM(vmsif_m) > +GEN_VECTOR_R2_VM(viota_m) > +GEN_VECTOR_R1_VM(vid_v) > +GEN_VECTOR_R2_VM(vfcvt_xu_f_v) > +GEN_VECTOR_R2_VM(vfcvt_x_f_v) > +GEN_VECTOR_R2_VM(vfcvt_f_xu_v) > +GEN_VECTOR_R2_VM(vfcvt_f_x_v) > +GEN_VECTOR_R2_VM(vfwcvt_xu_f_v) > +GEN_VECTOR_R2_VM(vfwcvt_x_f_v) > +GEN_VECTOR_R2_VM(vfwcvt_f_xu_v) > +GEN_VECTOR_R2_VM(vfwcvt_f_x_v) > +GEN_VECTOR_R2_VM(vfwcvt_f_f_v) > +GEN_VECTOR_R2_VM(vfncvt_xu_f_v) > +GEN_VECTOR_R2_VM(vfncvt_x_f_v) > +GEN_VECTOR_R2_VM(vfncvt_f_xu_v) > +GEN_VECTOR_R2_VM(vfncvt_f_x_v) > +GEN_VECTOR_R2_VM(vfncvt_f_f_v) > +GEN_VECTOR_R2_VM(vfsqrt_v) > +GEN_VECTOR_R2_VM(vfclass_v) > + > +GEN_VECTOR_R_VM(vadd_vv) > +GEN_VECTOR_R_VM(vadd_vx) > +GEN_VECTOR_R_VM(vadd_vi) > +GEN_VECTOR_R_VM(vredsum_vs) > +GEN_VECTOR_R_VM(vfadd_vv) > +GEN_VECTOR_R_VM(vfadd_vf) > +GEN_VECTOR_R_VM(vredand_vs) > +GEN_VECTOR_R_VM(vfredsum_vs) > +GEN_VECTOR_R_VM(vsub_vv) > +GEN_VECTOR_R_VM(vsub_vx) > +GEN_VECTOR_R_VM(vredor_vs) > +GEN_VECTOR_R_VM(vfsub_vv) > +GEN_VECTOR_R_VM(vfsub_vf) > +GEN_VECTOR_R_VM(vrsub_vx) > +GEN_VECTOR_R_VM(vrsub_vi) > +GEN_VECTOR_R_VM(vredxor_vs) > +GEN_VECTOR_R_VM(vfredosum_vs) > +GEN_VECTOR_R_VM(vminu_vv) > +GEN_VECTOR_R_VM(vminu_vx) > +GEN_VECTOR_R_VM(vredminu_vs) > +GEN_VECTOR_R_VM(vfmin_vv) > +GEN_VECTOR_R_VM(vfmin_vf) > +GEN_VECTOR_R_VM(vmin_vv) > +GEN_VECTOR_R_VM(vmin_vx) > +GEN_VECTOR_R_VM(vredmin_vs) > +GEN_VECTOR_R_VM(vfredmin_vs) > +GEN_VECTOR_R_VM(vmaxu_vv) > +GEN_VECTOR_R_VM(vmaxu_vx) > +GEN_VECTOR_R_VM(vredmaxu_vs) > +GEN_VECTOR_R_VM(vfmax_vv) > +GEN_VECTOR_R_VM(vfmax_vf) > +GEN_VECTOR_R_VM(vmax_vv) > +GEN_VECTOR_R_VM(vmax_vx) > +GEN_VECTOR_R_VM(vredmax_vs) > +GEN_VECTOR_R_VM(vfredmax_vs) > +GEN_VECTOR_R_VM(vfsgnj_vv) > +GEN_VECTOR_R_VM(vfsgnj_vf) > +GEN_VECTOR_R_VM(vand_vv) > +GEN_VECTOR_R_VM(vand_vx) > +GEN_VECTOR_R_VM(vand_vi) > +GEN_VECTOR_R_VM(vfsgnjn_vv) > +GEN_VECTOR_R_VM(vfsgnjn_vf) > +GEN_VECTOR_R_VM(vor_vv) > +GEN_VECTOR_R_VM(vor_vx) > +GEN_VECTOR_R_VM(vor_vi) > +GEN_VECTOR_R_VM(vfsgnjx_vv) > +GEN_VECTOR_R_VM(vfsgnjx_vf) > +GEN_VECTOR_R_VM(vxor_vv) > +GEN_VECTOR_R_VM(vxor_vx) > +GEN_VECTOR_R_VM(vxor_vi) > +GEN_VECTOR_R_VM(vrgather_vv) > +GEN_VECTOR_R_VM(vrgather_vx) > +GEN_VECTOR_R_VM(vrgather_vi) > +GEN_VECTOR_R_VM(vslideup_vx) > +GEN_VECTOR_R_VM(vslideup_vi) > +GEN_VECTOR_R_VM(vslide1up_vx) > +GEN_VECTOR_R_VM(vslidedown_vx) > +GEN_VECTOR_R_VM(vslidedown_vi) > +GEN_VECTOR_R_VM(vslide1down_vx) > +GEN_VECTOR_R_VM(vmerge_vvm) > +GEN_VECTOR_R_VM(vmerge_vxm) > +GEN_VECTOR_R_VM(vmerge_vim) > +GEN_VECTOR_R_VM(vfmerge_vfm) > +GEN_VECTOR_R_VM(vmseq_vv) > +GEN_VECTOR_R_VM(vmseq_vx) > +GEN_VECTOR_R_VM(vmseq_vi) > +GEN_VECTOR_R_VM(vmfeq_vv) > +GEN_VECTOR_R_VM(vmfeq_vf) > +GEN_VECTOR_R_VM(vmsne_vv) > +GEN_VECTOR_R_VM(vmsne_vx) > +GEN_VECTOR_R_VM(vmsne_vi) > +GEN_VECTOR_R_VM(vmfle_vv) > +GEN_VECTOR_R_VM(vmfle_vf) > +GEN_VECTOR_R_VM(vmsltu_vv) > +GEN_VECTOR_R_VM(vmsltu_vx) > +GEN_VECTOR_R_VM(vmford_vv) > +GEN_VECTOR_R_VM(vmford_vf) > +GEN_VECTOR_R_VM(vmslt_vv) > +GEN_VECTOR_R_VM(vmslt_vx) > +GEN_VECTOR_R_VM(vmflt_vv) > +GEN_VECTOR_R_VM(vmflt_vf) > +GEN_VECTOR_R_VM(vmsleu_vv) > +GEN_VECTOR_R_VM(vmsleu_vx) > +GEN_VECTOR_R_VM(vmsleu_vi) > +GEN_VECTOR_R_VM(vmfne_vv) > +GEN_VECTOR_R_VM(vmfne_vf) > +GEN_VECTOR_R_VM(vmsle_vv) > +GEN_VECTOR_R_VM(vmsle_vx) > +GEN_VECTOR_R_VM(vmsle_vi) > +GEN_VECTOR_R_VM(vmfgt_vf) > +GEN_VECTOR_R_VM(vmsgtu_vx) > +GEN_VECTOR_R_VM(vmsgtu_vi) > +GEN_VECTOR_R_VM(vmsgt_vx) > +GEN_VECTOR_R_VM(vmsgt_vi) > +GEN_VECTOR_R_VM(vmfge_vf) > +GEN_VECTOR_R_VM(vsaddu_vv) > +GEN_VECTOR_R_VM(vsaddu_vx) > +GEN_VECTOR_R_VM(vsaddu_vi) > +GEN_VECTOR_R_VM(vdivu_vv) > +GEN_VECTOR_R_VM(vdivu_vx) > +GEN_VECTOR_R_VM(vfdiv_vv) > +GEN_VECTOR_R_VM(vfdiv_vf) > +GEN_VECTOR_R_VM(vsadd_vv) > +GEN_VECTOR_R_VM(vsadd_vx) > +GEN_VECTOR_R_VM(vsadd_vi) > +GEN_VECTOR_R_VM(vdiv_vv) > +GEN_VECTOR_R_VM(vdiv_vx) > +GEN_VECTOR_R_VM(vfrdiv_vf) > +GEN_VECTOR_R_VM(vssubu_vv) > +GEN_VECTOR_R_VM(vssubu_vx) > +GEN_VECTOR_R_VM(vremu_vv) > +GEN_VECTOR_R_VM(vremu_vx) > +GEN_VECTOR_R_VM(vssub_vv) > +GEN_VECTOR_R_VM(vssub_vx) > +GEN_VECTOR_R_VM(vrem_vv) > +GEN_VECTOR_R_VM(vrem_vx) > +GEN_VECTOR_R_VM(vaadd_vv) > +GEN_VECTOR_R_VM(vaadd_vx) > +GEN_VECTOR_R_VM(vaadd_vi) > +GEN_VECTOR_R_VM(vmulhu_vv) > +GEN_VECTOR_R_VM(vmulhu_vx) > +GEN_VECTOR_R_VM(vfmul_vv) > +GEN_VECTOR_R_VM(vfmul_vf) > +GEN_VECTOR_R_VM(vsll_vv) > +GEN_VECTOR_R_VM(vsll_vx) > +GEN_VECTOR_R_VM(vsll_vi) > +GEN_VECTOR_R_VM(vmul_vv) > +GEN_VECTOR_R_VM(vmul_vx) > +GEN_VECTOR_R_VM(vasub_vv) > +GEN_VECTOR_R_VM(vasub_vx) > +GEN_VECTOR_R_VM(vmulhsu_vv) > +GEN_VECTOR_R_VM(vmulhsu_vx) > +GEN_VECTOR_R_VM(vsmul_vv) > +GEN_VECTOR_R_VM(vsmul_vx) > +GEN_VECTOR_R_VM(vmulh_vv) > +GEN_VECTOR_R_VM(vmulh_vx) > +GEN_VECTOR_R_VM(vfrsub_vf) > +GEN_VECTOR_R_VM(vsrl_vv) > +GEN_VECTOR_R_VM(vsrl_vx) > +GEN_VECTOR_R_VM(vsrl_vi) > +GEN_VECTOR_R_VM(vfmadd_vv) > +GEN_VECTOR_R_VM(vfmadd_vf) > +GEN_VECTOR_R_VM(vsra_vv) > +GEN_VECTOR_R_VM(vsra_vx) > +GEN_VECTOR_R_VM(vsra_vi) > +GEN_VECTOR_R_VM(vmadd_vv) > +GEN_VECTOR_R_VM(vmadd_vx) > +GEN_VECTOR_R_VM(vfnmadd_vv) > +GEN_VECTOR_R_VM(vfnmadd_vf) > +GEN_VECTOR_R_VM(vssrl_vv) > +GEN_VECTOR_R_VM(vssrl_vx) > +GEN_VECTOR_R_VM(vssrl_vi) > +GEN_VECTOR_R_VM(vfmsub_vv) > +GEN_VECTOR_R_VM(vfmsub_vf) > +GEN_VECTOR_R_VM(vssra_vv) > +GEN_VECTOR_R_VM(vssra_vx) > +GEN_VECTOR_R_VM(vssra_vi) > +GEN_VECTOR_R_VM(vnmsub_vv) > +GEN_VECTOR_R_VM(vnmsub_vx) > +GEN_VECTOR_R_VM(vfnmsub_vv) > +GEN_VECTOR_R_VM(vfnmsub_vf) > +GEN_VECTOR_R_VM(vnsrl_vv) > +GEN_VECTOR_R_VM(vnsrl_vx) > +GEN_VECTOR_R_VM(vnsrl_vi) > +GEN_VECTOR_R_VM(vfmacc_vv) > +GEN_VECTOR_R_VM(vfmacc_vf) > +GEN_VECTOR_R_VM(vnsra_vv) > +GEN_VECTOR_R_VM(vnsra_vx) > +GEN_VECTOR_R_VM(vnsra_vi) > +GEN_VECTOR_R_VM(vmacc_vv) > +GEN_VECTOR_R_VM(vmacc_vx) > +GEN_VECTOR_R_VM(vfnmacc_vv) > +GEN_VECTOR_R_VM(vfnmacc_vf) > +GEN_VECTOR_R_VM(vnclipu_vv) > +GEN_VECTOR_R_VM(vnclipu_vx) > +GEN_VECTOR_R_VM(vnclipu_vi) > +GEN_VECTOR_R_VM(vfmsac_vv) > +GEN_VECTOR_R_VM(vfmsac_vf) > +GEN_VECTOR_R_VM(vnclip_vv) > +GEN_VECTOR_R_VM(vnclip_vx) > +GEN_VECTOR_R_VM(vnclip_vi) > +GEN_VECTOR_R_VM(vnmsac_vv) > +GEN_VECTOR_R_VM(vnmsac_vx) > +GEN_VECTOR_R_VM(vfnmsac_vv) > +GEN_VECTOR_R_VM(vfnmsac_vf) > +GEN_VECTOR_R_VM(vwredsumu_vs) > +GEN_VECTOR_R_VM(vwaddu_vv) > +GEN_VECTOR_R_VM(vwaddu_vx) > +GEN_VECTOR_R_VM(vfwadd_vv) > +GEN_VECTOR_R_VM(vfwadd_vf) > +GEN_VECTOR_R_VM(vwredsum_vs) > +GEN_VECTOR_R_VM(vwadd_vv) > +GEN_VECTOR_R_VM(vwadd_vx) > +GEN_VECTOR_R_VM(vfwredsum_vs) > +GEN_VECTOR_R_VM(vwsubu_vv) > +GEN_VECTOR_R_VM(vwsubu_vx) > +GEN_VECTOR_R_VM(vfwsub_vv) > +GEN_VECTOR_R_VM(vfwsub_vf) > +GEN_VECTOR_R_VM(vwsub_vv) > +GEN_VECTOR_R_VM(vwsub_vx) > +GEN_VECTOR_R_VM(vfwredosum_vs) > +GEN_VECTOR_R_VM(vwaddu_wv) > +GEN_VECTOR_R_VM(vwaddu_wx) > +GEN_VECTOR_R_VM(vfwadd_wv) > +GEN_VECTOR_R_VM(vfwadd_wf) > +GEN_VECTOR_R_VM(vwadd_wv) > +GEN_VECTOR_R_VM(vwadd_wx) > +GEN_VECTOR_R_VM(vwsubu_wv) > +GEN_VECTOR_R_VM(vwsubu_wx) > +GEN_VECTOR_R_VM(vfwsub_wv) > +GEN_VECTOR_R_VM(vfwsub_wf) > +GEN_VECTOR_R_VM(vwsub_wv) > +GEN_VECTOR_R_VM(vwsub_wx) > +GEN_VECTOR_R_VM(vwmulu_vv) > +GEN_VECTOR_R_VM(vwmulu_vx) > +GEN_VECTOR_R_VM(vfwmul_vv) > +GEN_VECTOR_R_VM(vfwmul_vf) > +GEN_VECTOR_R_VM(vwmulsu_vv) > +GEN_VECTOR_R_VM(vwmulsu_vx) > +GEN_VECTOR_R_VM(vwmul_vv) > +GEN_VECTOR_R_VM(vwmul_vx) > +GEN_VECTOR_R_VM(vwsmaccu_vv) > +GEN_VECTOR_R_VM(vwsmaccu_vx) > +GEN_VECTOR_R_VM(vwmaccu_vv) > +GEN_VECTOR_R_VM(vwmaccu_vx) > +GEN_VECTOR_R_VM(vfwmacc_vv) > +GEN_VECTOR_R_VM(vfwmacc_vf) > +GEN_VECTOR_R_VM(vwsmacc_vv) > +GEN_VECTOR_R_VM(vwsmacc_vx) > +GEN_VECTOR_R_VM(vwmacc_vv) > +GEN_VECTOR_R_VM(vwmacc_vx) > +GEN_VECTOR_R_VM(vfwnmacc_vv) > +GEN_VECTOR_R_VM(vfwnmacc_vf) > +GEN_VECTOR_R_VM(vwsmaccsu_vv) > +GEN_VECTOR_R_VM(vwsmaccsu_vx) > +GEN_VECTOR_R_VM(vwmaccsu_vv) > +GEN_VECTOR_R_VM(vwmaccsu_vx) > +GEN_VECTOR_R_VM(vfwmsac_vv) > +GEN_VECTOR_R_VM(vfwmsac_vf) > +GEN_VECTOR_R_VM(vwsmaccus_vx) > +GEN_VECTOR_R_VM(vwmaccus_vx) > +GEN_VECTOR_R_VM(vfwnmsac_vv) > +GEN_VECTOR_R_VM(vfwnmsac_vf) > +GEN_VECTOR_R2_ZIMM(vsetvli) > +GEN_VECTOR_R(vsetvl) > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index 8d6ab73..587c23e 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -706,6 +706,7 @@ static bool gen_shift(DisasContext *ctx, arg_r *a, > #include "insn_trans/trans_rva.inc.c" > #include "insn_trans/trans_rvf.inc.c" > #include "insn_trans/trans_rvd.inc.c" > +#include "insn_trans/trans_rvv.inc.c" > #include "insn_trans/trans_privileged.inc.c" > > /* > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > new file mode 100644 > index 0000000..1f8f1ec > --- /dev/null > +++ b/target/riscv/vector_helper.c > @@ -0,0 +1,26563 @@ > +/* > + * RISC-V Vectore Extension Helpers for QEMU. > + * > + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2 or later, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > for > + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with > + * this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/log.h" > +#include "cpu.h" > +#include "qemu/main-loop.h" > +#include "exec/exec-all.h" > +#include "exec/helper-proto.h" > +#include "exec/translator.h" > +#include "exec/cpu_ldst.h" > +#include <math.h> > +#include "instmap.h" > + > +#define VECTOR_HELPER(name) HELPER(glue(vector_, name)) > +#define SIGNBIT8 (1 << 7) > +#define MAX_U8 ((uint8_t)0xff) > +#define MIN_U8 ((uint8_t)0x0) > +#define MAX_S8 ((int8_t)0x7f) > +#define MIN_S8 ((int8_t)0x80) > +#define SIGNBIT16 (1 << 15) > +#define MAX_U16 ((uint16_t)0xffff) > +#define MIN_U16 ((uint16_t)0x0) > +#define MAX_S16 ((int16_t)0x7fff) > +#define MIN_S16 ((int16_t)0x8000) > +#define SIGNBIT32 (1 << 31) > +#define MAX_U32 ((uint32_t)0xffffffff) > +#define MIN_U32 ((uint32_t)0x0) > +#define MAX_S32 ((int32_t)0x7fffffff) > +#define MIN_S32 ((int32_t)0x80000000) > +#define SIGNBIT64 ((uint64_t)1 << 63) > +#define MAX_U64 ((uint64_t)0xffffffffffffffff) > +#define MIN_U64 ((uint64_t)0x0) > +#define MAX_S64 ((int64_t)0x7fffffffffffffff) > +#define MIN_S64 ((int64_t)0x8000000000000000) > + > +static int64_t sign_extend(int64_t a, int8_t width) > +{ > + return a << (64 - width) >> (64 - width); > +} > + > +static int64_t extend_gpr(target_ulong reg) > +{ > + return sign_extend(reg, sizeof(target_ulong) * 8); > +} > + > +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2, > + int index, int mem, int width, int nf) > +{ > + target_ulong abs_off, base = env->gpr[rs1]; > + target_long offset; > + switch (width) { > + case 8: > + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem; > + break; > + case 16: > + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf * > mem; > + break; > + case 32: > + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf * > mem; > + break; > + case 64: > + offset = env->vfp.vreg[rs2].s64[index] + nf * mem; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return 0; > + } > + if (offset < 0) { > + abs_off = ~offset + 1; > + if (base >= abs_off) { > + return base - abs_off; > + } > + } else { > + if ((target_ulong)((target_ulong)offset + base) >= base) { > + return (target_ulong)offset + base; > + } > + } > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return 0; > +} > + > + > + > +/* ADD/SUB/COMPARE instructions. */ > +static inline uint8_t sat_add_u8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint8_t res = a + b; > + if (res < a) { > + res = MAX_U8; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_add_u16(CPURISCVState *env, uint16_t a, > uint16_t b) > +{ > + uint16_t res = a + b; > + if (res < a) { > + res = MAX_U16; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_add_u32(CPURISCVState *env, uint32_t a, > uint32_t b) > +{ > + uint32_t res = a + b; > + if (res < a) { > + res = MAX_U32; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_add_u64(CPURISCVState *env, uint64_t a, > uint64_t b) > +{ > + uint64_t res = a + b; > + if (res < a) { > + res = MAX_U64; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint8_t sat_add_s8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint8_t res = a + b; > + if (((res ^ a) & SIGNBIT8) && !((a ^ b) & SIGNBIT8)) { > + res = ~(((int8_t)a >> 7) ^ SIGNBIT8); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_add_s16(CPURISCVState *env, uint16_t a, > uint16_t b) > +{ > + uint16_t res = a + b; > + if (((res ^ a) & SIGNBIT16) && !((a ^ b) & SIGNBIT16)) { > + res = ~(((int16_t)a >> 15) ^ SIGNBIT16); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_add_s32(CPURISCVState *env, uint32_t a, > uint32_t b) > +{ > + uint32_t res = a + b; > + if (((res ^ a) & SIGNBIT32) && !((a ^ b) & SIGNBIT32)) { > + res = ~(((int32_t)a >> 31) ^ SIGNBIT32); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_add_s64(CPURISCVState *env, uint64_t a, > uint64_t b) > +{ > + uint64_t res = a + b; > + if (((res ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) { > + res = ~(((int64_t)a >> 63) ^ SIGNBIT64); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint8_t sat_sub_u8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint8_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_sub_u16(CPURISCVState *env, uint16_t a, > uint16_t b) > +{ > + uint16_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_sub_u32(CPURISCVState *env, uint32_t a, > uint32_t b) > +{ > + uint32_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_sub_u64(CPURISCVState *env, uint64_t a, > uint64_t b) > +{ > + uint64_t res = a - b; > + if (res > a) { > + res = 0; > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint8_t sat_sub_s8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint8_t res = a - b; > + if (((res ^ a) & SIGNBIT8) && ((a ^ b) & SIGNBIT8)) { > + res = ~(((int8_t)a >> 7) ^ SIGNBIT8); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint16_t sat_sub_s16(CPURISCVState *env, uint16_t a, > uint16_t b) > +{ > + uint16_t res = a - b; > + if (((res ^ a) & SIGNBIT16) && ((a ^ b) & SIGNBIT16)) { > + res = ~(((int16_t)a >> 15) ^ SIGNBIT16); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint32_t sat_sub_s32(CPURISCVState *env, uint32_t a, > uint32_t b) > +{ > + uint32_t res = a - b; > + if (((res ^ a) & SIGNBIT32) && ((a ^ b) & SIGNBIT32)) { > + res = ~(((int32_t)a >> 31) ^ SIGNBIT32); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static inline uint64_t sat_sub_s64(CPURISCVState *env, uint64_t a, > uint64_t b) > +{ > + uint64_t res = a - b; > + if (((res ^ a) & SIGNBIT64) && ((a ^ b) & SIGNBIT64)) { > + res = ~(((int64_t)a >> 63) ^ SIGNBIT64); > + env->vfp.vxsat = 0x1; > + > + } > + return res; > +} > + > +static uint64_t fix_data_round(CPURISCVState *env, uint64_t result, > + uint8_t shift) > +{ > + uint64_t lsb_1 = (uint64_t)1 << shift; > + int mod = env->vfp.vxrm; > + int mask = ((uint64_t)1 << shift) - 1; > + > + if (mod == 0x0) { /* rnu */ > + return lsb_1 >> 1; > + } else if (mod == 0x1) { /* rne */ > + if ((result & mask) > (lsb_1 >> 1) || > + (((result & mask) == (lsb_1 >> 1)) && > + (((result >> shift) & 0x1)) == 1)) { > + return lsb_1 >> 1; > + } > + } else if (mod == 0x3) { /* rod */ > + if (((result & mask) >= 0x1) && (((result >> shift) & 0x1) == 0)) > { > + return lsb_1; > + } > + } > + return 0; > +} > + > +static int8_t saturate_s8(CPURISCVState *env, int16_t res) > +{ > + if (res > MAX_S8) { > + env->vfp.vxsat = 0x1; > + return MAX_S8; > + } else if (res < MIN_S8) { > + env->vfp.vxsat = 0x1; > + return MIN_S8; > + } else { > + return res; > + } > +} > + > +static uint8_t saturate_u8(CPURISCVState *env, uint16_t res) > +{ > + if (res > MAX_U8) { > + env->vfp.vxsat = 0x1; > + return MAX_U8; > + } else { > + return res; > + } > +} > + > +static uint16_t saturate_u16(CPURISCVState *env, uint32_t res) > +{ > + if (res > MAX_U16) { > + env->vfp.vxsat = 0x1; > + return MAX_U16; > + } else { > + return res; > + } > +} > + > +static uint32_t saturate_u32(CPURISCVState *env, uint64_t res) > +{ > + if (res > MAX_U32) { > + env->vfp.vxsat = 0x1; > + return MAX_U32; > + } else { > + return res; > + } > +} > + > +static int16_t saturate_s16(CPURISCVState *env, int32_t res) > +{ > + if (res > MAX_S16) { > + env->vfp.vxsat = 0x1; > + return MAX_S16; > + } else if (res < MIN_S16) { > + env->vfp.vxsat = 0x1; > + return MIN_S16; > + } else { > + return res; > + } > +} > + > +static int32_t saturate_s32(CPURISCVState *env, int64_t res) > +{ > + if (res > MAX_S32) { > + env->vfp.vxsat = 0x1; > + return MAX_S32; > + } else if (res < MIN_S32) { > + env->vfp.vxsat = 0x1; > + return MIN_S32; > + } else { > + return res; > + } > +} > +static uint16_t vwsmaccu_8(CPURISCVState *env, uint8_t a, uint8_t b, > + uint16_t c) > +{ > + uint16_t round, res; > + uint16_t product = (uint16_t)a * (uint16_t)b; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (round + product) >> 4; > + return sat_add_u16(env, c, res); > +} > + > +static uint32_t vwsmaccu_16(CPURISCVState *env, uint16_t a, uint16_t b, > + uint32_t c) > +{ > + uint32_t round, res; > + uint32_t product = (uint32_t)a * (uint32_t)b; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (round + product) >> 8; > + return sat_add_u32(env, c, res); > +} > + > +static uint64_t vwsmaccu_32(CPURISCVState *env, uint32_t a, uint32_t b, > + uint64_t c) > +{ > + uint64_t round, res; > + uint64_t product = (uint64_t)a * (uint64_t)b; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (round + product) >> 16; > + return sat_add_u64(env, c, res); > +} > + > +static int16_t vwsmacc_8(CPURISCVState *env, int8_t a, int8_t b, > + int16_t c) > +{ > + int16_t round, res; > + int16_t product = (int16_t)a * (int16_t)b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (int16_t)(round + product) >> 4; > + return sat_add_s16(env, c, res); > +} > + > +static int32_t vwsmacc_16(CPURISCVState *env, int16_t a, int16_t b, > + int32_t c) > +{ > + int32_t round, res; > + int32_t product = (int32_t)a * (int32_t)b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (int32_t)(round + product) >> 8; > + return sat_add_s32(env, c, res); > +} > + > +static int64_t vwsmacc_32(CPURISCVState *env, int32_t a, int32_t b, > + int64_t c) > +{ > + int64_t round, res; > + int64_t product = (int64_t)a * (int64_t)b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (int64_t)(round + product) >> 16; > + return sat_add_s64(env, c, res); > +} > + > +static int16_t vwsmaccsu_8(CPURISCVState *env, uint8_t a, int8_t b, > + int16_t c) > +{ > + int16_t round, res; > + int16_t product = (uint16_t)a * (int16_t)b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (round + product) >> 4; > + return sat_sub_s16(env, c, res); > +} > + > +static int32_t vwsmaccsu_16(CPURISCVState *env, uint16_t a, int16_t b, > + uint32_t c) > +{ > + int32_t round, res; > + int32_t product = (uint32_t)a * (int32_t)b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (round + product) >> 8; > + return sat_sub_s32(env, c, res); > +} > + > +static int64_t vwsmaccsu_32(CPURISCVState *env, uint32_t a, int32_t b, > + int64_t c) > +{ > + int64_t round, res; > + int64_t product = (uint64_t)a * (int64_t)b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (round + product) >> 16; > + return sat_sub_s64(env, c, res); > +} > + > +static int16_t vwsmaccus_8(CPURISCVState *env, int8_t a, uint8_t b, > + int16_t c) > +{ > + int16_t round, res; > + int16_t product = (int16_t)a * (uint16_t)b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); > + res = (round + product) >> 4; > + return sat_sub_s16(env, c, res); > +} > + > +static int32_t vwsmaccus_16(CPURISCVState *env, int16_t a, uint16_t b, > + int32_t c) > +{ > + int32_t round, res; > + int32_t product = (int32_t)a * (uint32_t)b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); > + res = (round + product) >> 8; > + return sat_sub_s32(env, c, res); > +} > + > +static uint64_t vwsmaccus_32(CPURISCVState *env, int32_t a, uint32_t b, > + int64_t c) > +{ > + int64_t round, res; > + int64_t product = (int64_t)a * (uint64_t)b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); > + res = (round + product) >> 16; > + return sat_sub_s64(env, c, res); > +} > + > +static int8_t vssra_8(CPURISCVState *env, int8_t a, uint8_t b) > +{ > + int16_t round, res; > + uint8_t shift = b & 0x7; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return res; > +} > + > +static int16_t vssra_16(CPURISCVState *env, int16_t a, uint16_t b) > +{ > + int32_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static int32_t vssra_32(CPURISCVState *env, int32_t a, uint32_t b) > +{ > + int64_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static int64_t vssra_64(CPURISCVState *env, int64_t a, uint64_t b) > +{ > + int64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a >> (shift - 1)) + (round >> (shift - 1)); > + return res >> 1; > +} > + > +static int8_t vssrai_8(CPURISCVState *env, int8_t a, uint8_t b) > +{ > + int16_t round, res; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static int16_t vssrai_16(CPURISCVState *env, int16_t a, uint8_t b) > +{ > + int32_t round, res; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static int32_t vssrai_32(CPURISCVState *env, int32_t a, uint8_t b) > +{ > + int64_t round, res; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static int64_t vssrai_64(CPURISCVState *env, int64_t a, uint8_t b) > +{ > + int64_t round, res; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a >> (b - 1)) + (round >> (b - 1)); > + return res >> 1; > +} > + > +static int8_t vnclip_16(CPURISCVState *env, int16_t a, uint8_t b) > +{ > + int16_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_s8(env, res); > +} > + > +static int16_t vnclip_32(CPURISCVState *env, int32_t a, uint16_t b) > +{ > + int32_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return saturate_s16(env, res); > +} > + > +static int32_t vnclip_64(CPURISCVState *env, int64_t a, uint32_t b) > +{ > + int64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_s32(env, res); > +} > + > +static int8_t vnclipi_16(CPURISCVState *env, int16_t a, uint8_t b) > +{ > + int16_t round, res; > + > + round = (int16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_s8(env, res); > +} > + > +static int16_t vnclipi_32(CPURISCVState *env, int32_t a, uint8_t b) > +{ > + int32_t round, res; > + > + round = (int32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_s16(env, res); > +} > + > +static int32_t vnclipi_64(CPURISCVState *env, int64_t a, uint8_t b) > +{ > + int32_t round, res; > + > + round = (int64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_s32(env, res); > +} > + > +static uint8_t vnclipu_16(CPURISCVState *env, uint16_t a, uint8_t b) > +{ > + uint16_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_u8(env, res); > +} > + > +static uint16_t vnclipu_32(CPURISCVState *env, uint32_t a, uint16_t b) > +{ > + uint32_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_u16(env, res); > +} > + > +static uint32_t vnclipu_64(CPURISCVState *env, uint64_t a, uint32_t b) > +{ > + uint64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + > + return saturate_u32(env, res); > +} > + > +static uint8_t vnclipui_16(CPURISCVState *env, uint16_t a, uint8_t b) > +{ > + uint16_t round, res; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_u8(env, res); > +} > + > +static uint16_t vnclipui_32(CPURISCVState *env, uint32_t a, uint8_t b) > +{ > + uint32_t round, res; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_u16(env, res); > +} > + > +static uint32_t vnclipui_64(CPURISCVState *env, uint64_t a, uint8_t b) > +{ > + uint64_t round, res; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + > + return saturate_u32(env, res); > +} > + > +static uint8_t vssrl_8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint16_t round, res; > + uint8_t shift = b & 0x7; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static uint16_t vssrl_16(CPURISCVState *env, uint16_t a, uint16_t b) > +{ > + uint32_t round, res; > + uint8_t shift = b & 0xf; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static uint32_t vssrl_32(CPURISCVState *env, uint32_t a, uint32_t b) > +{ > + uint64_t round, res; > + uint8_t shift = b & 0x1f; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a + round) >> shift; > + return res; > +} > + > +static uint64_t vssrl_64(CPURISCVState *env, uint64_t a, uint64_t b) > +{ > + uint64_t round, res; > + uint8_t shift = b & 0x3f; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); > + res = (a >> (shift - 1)) + (round >> (shift - 1)); > + return res >> 1; > +} > + > +static uint8_t vssrli_8(CPURISCVState *env, uint8_t a, uint8_t b) > +{ > + uint16_t round, res; > + > + round = (uint16_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static uint16_t vssrli_16(CPURISCVState *env, uint16_t a, uint8_t b) > +{ > + uint32_t round, res; > + > + round = (uint32_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static uint32_t vssrli_32(CPURISCVState *env, uint32_t a, uint8_t b) > +{ > + uint64_t round, res; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a + round) >> b; > + return res; > +} > + > +static uint64_t vssrli_64(CPURISCVState *env, uint64_t a, uint8_t b) > +{ > + uint64_t round, res; > + > + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); > + res = (a >> (b - 1)) + (round >> (b - 1)); > + return res >> 1; > +} > + > +static int8_t vsmul_8(CPURISCVState *env, int8_t a, int8_t b) > +{ > + int16_t round; > + int8_t res; > + int16_t product = (int16_t)a * (int16_t)b; > + > + if (a == MIN_S8 && b == MIN_S8) { > + env->vfp.vxsat = 1; > + > + return MAX_S8; > + } > + > + round = (int16_t)fix_data_round(env, (uint64_t)product, 7); > + res = sat_add_s16(env, product, round) >> 7; > + return res; > +} > + > + > +static int16_t vsmul_16(CPURISCVState *env, int16_t a, int16_t b) > +{ > + int32_t round; > + int16_t res; > + int32_t product = (int32_t)a * (int32_t)b; > + > + if (a == MIN_S16 && b == MIN_S16) { > + env->vfp.vxsat = 1; > + > + return MAX_S16; > + } > + > + round = (int32_t)fix_data_round(env, (uint64_t)product, 15); > + res = sat_add_s32(env, product, round) >> 15; > + return res; > +} > + > +static int32_t vsmul_32(CPURISCVState *env, int32_t a, int32_t b) > +{ > + int64_t round; > + int32_t res; > + int64_t product = (int64_t)a * (int64_t)b; > + > + if (a == MIN_S32 && b == MIN_S32) { > + env->vfp.vxsat = 1; > + > + return MAX_S32; > + } > + > + round = (int64_t)fix_data_round(env, (uint64_t)product, 31); > + res = sat_add_s64(env, product, round) >> 31; > + return res; > +} > + > + > +static int64_t vsmul_64(CPURISCVState *env, int64_t a, int64_t b) > +{ > + int64_t res; > + uint64_t abs_a = a, abs_b = b; > + uint64_t lo_64, hi_64, carry, round; > + > + if (a == MIN_S64 && b == MIN_S64) { > + env->vfp.vxsat = 1; > + > + return MAX_S64; > + } > + > + if (a < 0) { > + abs_a = ~a + 1; > + } > + if (b < 0) { > + abs_b = ~b + 1; > + } > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = abs_a >> 32; > + uint64_t a_lo = (uint32_t)abs_a; > + uint64_t b_hi = abs_b >> 32; > + uint64_t b_lo = (uint32_t)abs_b; > + > + /* > + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * > b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) > >> 32 > + */ > + > + lo_64 = abs_a * abs_b; > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + > + if ((a ^ b) & SIGNBIT64) { > + lo_64 = ~lo_64; > + hi_64 = ~hi_64; > + if (lo_64 == MAX_U64) { > + lo_64 = 0; > + hi_64 += 1; > + } else { > + lo_64 += 1; > + } > + } > + > + /* set rem and res */ > + round = fix_data_round(env, lo_64, 63); > + if ((lo_64 + round) < lo_64) { > + hi_64 += 1; > + res = (hi_64 << 1); > + } else { > + res = (hi_64 << 1) | ((lo_64 + round) >> 63); > + } > + > + return res; > +} > +static inline int8_t avg_round_s8(CPURISCVState *env, int8_t a, int8_t b) > +{ > + int16_t round; > + int8_t res; > + int16_t sum = a + b; > + > + round = (int16_t)fix_data_round(env, (uint64_t)sum, 1); > + res = (sum + round) >> 1; > + > + return res; > +} > + > +static inline int16_t avg_round_s16(CPURISCVState *env, int16_t a, > int16_t b) > +{ > + int32_t round; > + int16_t res; > + int32_t sum = a + b; > + > + round = (int32_t)fix_data_round(env, (uint64_t)sum, 1); > + res = (sum + round) >> 1; > + > + return res; > +} > + > +static inline int32_t avg_round_s32(CPURISCVState *env, int32_t a, > int32_t b) > +{ > + int64_t round; > + int32_t res; > + int64_t sum = a + b; > + > + round = (int64_t)fix_data_round(env, (uint64_t)sum, 1); > + res = (sum + round) >> 1; > + > + return res; > +} > + > +static inline int64_t avg_round_s64(CPURISCVState *env, int64_t a, > int64_t b) > +{ > + int64_t rem = (a & 0x1) + (b & 0x1); > + int64_t res = (a >> 1) + (b >> 1) + (rem >> 1); > + int mod = env->vfp.vxrm; > + > + if (mod == 0x0) { /* rnu */ > + if (rem == 0x1) { > + return res + 1; > + } > + } else if (mod == 0x1) { /* rne */ > + if ((rem & 0x1) == 1 && ((res & 0x1) == 1)) { > + return res + 1; > + } > + } else if (mod == 0x3) { /* rod */ > + if (((rem & 0x1) >= 0x1) && (res & 0x1) == 0) { > + return res + 1; > + } > + } > + return res; > +} > + > +static target_ulong helper_fclass_h(uint64_t frs1) > +{ > + float16 f = frs1; > + bool sign = float16_is_neg(f); > + > + if (float16_is_infinity(f)) { > + return sign ? 1 << 0 : 1 << 7; > + } else if (float16_is_zero(f)) { > + return sign ? 1 << 3 : 1 << 4; > + } else if (float16_is_zero_or_denormal(f)) { > + return sign ? 1 << 2 : 1 << 5; > + } else if (float16_is_any_nan(f)) { > + float_status s = { }; /* for snan_bit_is_one */ > + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; > + } else { > + return sign ? 1 << 1 : 1 << 6; > + } > +} > + > +static inline bool vector_vtype_ill(CPURISCVState *env) > +{ > + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) { > + return true; > + } > + return false; > +} > + > +static inline void vector_vtype_set_ill(CPURISCVState *env) > +{ > + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1); > + return; > +} > + > +static inline int vector_vtype_get_sew(CPURISCVState *env) > +{ > + return (env->vfp.vtype >> 2) & 0x7; > +} > + > +static inline int vector_get_width(CPURISCVState *env) > +{ > + return 8 * (1 << vector_vtype_get_sew(env)); > +} > + > +static inline int vector_get_lmul(CPURISCVState *env) > +{ > + return 1 << (env->vfp.vtype & 0x3); > +} > + > +static inline int vector_get_vlmax(CPURISCVState *env) > +{ > + return vector_get_lmul(env) * VLEN / vector_get_width(env); > +} > + > +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int > width, > + int lmul, int index) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / 8; > + int pos = (index * mlen) % 8; > + > + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1); > +} > + > +static inline bool vector_overlap_vm_common(int lmul, int vm, int rd) > +{ > + if (lmul > 1 && vm == 0 && rd == 0) { > + return true; > + } > + return false; > +} > + > +static inline bool vector_overlap_vm_force(int vm, int rd) > +{ > + if (vm == 0 && rd == 0) { > + return true; > + } > + return false; > +} > + > +static inline bool vector_overlap_carry(int lmul, int rd) > +{ > + if (lmul > 1 && rd == 0) { > + return true; > + } > + return false; > +} > + > +static inline bool vector_overlap_dstgp_srcgp(int rd, int dlen, int rs, > + int slen) > +{ > + if ((rd >= rs && rd < rs + slen) || (rs >= rd && rs < rd + dlen)) { > + return true; > + } > + return false; > +} > + > +static inline uint64_t vector_get_mask(int start, int end) > +{ > + return ((uint64_t)(~((uint64_t)0))) << (63 - end + start) >> (63 - > end); > +} > + > +/* fetch unsigned element by width */ > +static inline uint64_t vector_get_iu_elem(CPURISCVState *env, uint32_t > width, > + uint32_t rs2, uint32_t index) > +{ > + uint64_t elem; > + if (width == 8) { > + elem = env->vfp.vreg[rs2].u8[index]; > + } else if (width == 16) { > + elem = env->vfp.vreg[rs2].u16[index]; > + } else if (width == 32) { > + elem = env->vfp.vreg[rs2].u32[index]; > + } else if (width == 64) { > + elem = env->vfp.vreg[rs2].u64[index]; > + } else { /* the max of (XLEN, FLEN) is no bigger than 64 */ > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return 0; > + } > + return elem; > +} > + > +static inline int vector_mask_reg(CPURISCVState *env, uint32_t reg, int > width, > + int lmul, int index) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / 8; > + int pos = (index * mlen) % 8; > + return (env->vfp.vreg[reg].u8[idx] >> pos) & 0x1; > +} > + > +static inline void vector_mask_result(CPURISCVState *env, uint32_t reg, > + int width, int lmul, int index, uint32_t result) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / width; > + int pos = (index * mlen) % width; > + uint64_t mask = ~((((uint64_t)1 << mlen) - 1) << pos); > + > + switch (width) { > + case 8: > + env->vfp.vreg[reg].u8[idx] = (env->vfp.vreg[reg].u8[idx] & mask) > + | (result << pos); > + break; > + case 16: > + env->vfp.vreg[reg].u16[idx] = (env->vfp.vreg[reg].u16[idx] & mask) > + | (result << pos); > + break; > + case 32: > + env->vfp.vreg[reg].u32[idx] = (env->vfp.vreg[reg].u32[idx] & mask) > + | (result << pos); > + break; > + case 64: > + env->vfp.vreg[reg].u64[idx] = (env->vfp.vreg[reg].u64[idx] & mask) > + | ((uint64_t)result << > pos); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + break; > + } > + > + return; > +} > + > +/** > + * deposit16: > + * @value: initial value to insert bit field into > + * @start: the lowest bit in the bit field (numbered from 0) > + * @length: the length of the bit field > + * @fieldval: the value to insert into the bit field > + * > + * Deposit @fieldval into the 16 bit @value at the bit field specified > + * by the @start and @length parameters, and return the modified > + * @value. Bits of @value outside the bit field are not modified. > + * Bits of @fieldval above the least significant @length bits are > + * ignored. The bit field must lie entirely within the 16 bit word. > + * It is valid to request that all 16 bits are modified (ie @length > + * 16 and @start 0). > + * > + * Returns: the modified @value. > + */ > +static inline uint16_t deposit16(uint16_t value, int start, int length, > + uint16_t fieldval) > +{ > + uint16_t mask; > + assert(start >= 0 && length > 0 && length <= 16 - start); > + mask = (~0U >> (16 - length)) << start; > + return (value & ~mask) | ((fieldval << start) & mask); > +} > + > +static void vector_tail_amo(CPURISCVState *env, int vreg, int index, int > width) > +{ > + switch (width) { > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 64: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_common(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 8: > + env->vfp.vreg[vreg].u8[index] = 0; > + break; > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 64: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_segment(CPURISCVState *env, int vreg, int index, > + int width, int nf, int lmul) > +{ > + switch (width) { > + case 8: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u8[index] = 0; > + nf--; > + } > + break; > + case 16: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u16[index] = 0; > + nf--; > + } > + break; > + case 32: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u32[index] = 0; > + nf--; > + } > + break; > + case 64: > + while (nf >= 0) { > + env->vfp.vreg[vreg + nf * lmul].u64[index] = 0; > + nf--; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_widen(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 8: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 16: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_narrow(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 8: > + env->vfp.vreg[vreg].u8[index] = 0; > + break; > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_fcommon(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 64: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_fwiden(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 16: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u64[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > + > +static void vector_tail_fnarrow(CPURISCVState *env, int vreg, int index, > + int width) > +{ > + switch (width) { > + case 16: > + env->vfp.vreg[vreg].u16[index] = 0; > + break; > + case 32: > + env->vfp.vreg[vreg].u32[index] = 0; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > +} > +static inline int vector_get_carry(CPURISCVState *env, int width, int > lmul, > + int index) > +{ > + int mlen = width / lmul; > + int idx = (index * mlen) / 8; > + int pos = (index * mlen) % 8; > + > + return (env->vfp.vreg[0].u8[idx] >> pos) & 0x1; > +} > + > +static inline void vector_get_layout(CPURISCVState *env, int width, int > lmul, > + int index, int *idx, int *pos) > +{ > + int mlen = width / lmul; > + *idx = (index * mlen) / 8; > + *pos = (index * mlen) % 8; > +} > + > +static bool vector_lmul_check_reg(CPURISCVState *env, uint32_t lmul, > + uint32_t reg, bool widen) > +{ > + int legal = widen ? (lmul * 2) : lmul; > + > + if ((lmul != 1 && lmul != 2 && lmul != 4 && lmul != 8) || > + (lmul == 8 && widen)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return false; > + } > + > + if (reg % legal != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return false; > + } > + return true; > +} > + > +static inline uint64_t u64xu64_lh(uint64_t a, uint64_t b) > +{ > + uint64_t hi_64, carry; > + > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = a >> 32; > + uint64_t a_lo = (uint32_t)a; > + uint64_t b_hi = b >> 32; > + uint64_t b_lo = (uint32_t)b; > + > + /* > + * a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * > b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) > >> 32 > + */ > + > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + > + return hi_64; > +} > + > + > +static inline int64_t s64xu64_lh(int64_t a, uint64_t b) > +{ > + uint64_t abs_a = a; > + uint64_t lo_64, hi_64, carry; > + > + if (a < 0) { > + abs_a = ~a + 1; > + } > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = abs_a >> 32; > + uint64_t a_lo = (uint32_t)abs_a; > + uint64_t b_hi = b >> 32; > + uint64_t b_lo = (uint32_t)b; > + > + /* > + * abs_a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * > b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) > >> 32 > + */ > + > + lo_64 = abs_a * b; > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + if ((a ^ b) & SIGNBIT64) { > + lo_64 = ~lo_64; > + hi_64 = ~hi_64; > + if (lo_64 == MAX_U64) { > + lo_64 = 0; > + hi_64 += 1; > + } else { > + lo_64 += 1; > + } > + } > + return hi_64; > +} > + > + > +static inline int64_t s64xs64_lh(int64_t a, int64_t b) > +{ > + uint64_t abs_a = a, abs_b = b; > + uint64_t lo_64, hi_64, carry; > + > + if (a < 0) { > + abs_a = ~a + 1; > + } > + if (b < 0) { > + abs_b = ~b + 1; > + } > + > + /* first get the whole product in {hi_64, lo_64} */ > + uint64_t a_hi = abs_a >> 32; > + uint64_t a_lo = (uint32_t)abs_a; > + uint64_t b_hi = abs_b >> 32; > + uint64_t b_lo = (uint32_t)abs_b; > + > + /* > + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) > + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + > + * (a_lo * b_hi) << 32 + a_lo * b_lo > + * = {hi_64, lo_64} > + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * > b_lo)) >> 64 > + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry > + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) > >> 32 > + */ > + > + lo_64 = abs_a * abs_b; > + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + > + (uint64_t)(uint32_t)(a_lo * b_hi) + > + ((a_lo * b_lo) >> 32)) >> 32; > + > + hi_64 = a_hi * b_hi + > + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + > + carry; > + > + if ((a ^ b) & SIGNBIT64) { > + lo_64 = ~lo_64; > + hi_64 = ~hi_64; > + if (lo_64 == MAX_U64) { > + lo_64 = 0; > + hi_64 += 1; > + } else { > + lo_64 += 1; > + } > + } > + return hi_64; > +} > + > +void VECTOR_HELPER(vsetvl)(CPURISCVState *env, uint32_t rs1, uint32_t rs2, > + uint32_t rd) > +{ > + int sew, max_sew, vlmax, vl; > + > + if (rs2 == 0) { > + vector_vtype_set_ill(env); > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + env->vfp.vtype = env->gpr[rs2]; > + sew = 1 << vector_get_width(env) / 8; > + max_sew = sizeof(target_ulong); > + > + > + if (env->misa & RVD) { > + max_sew = max_sew > 8 ? max_sew : 8; > + } else if (env->misa & RVF) { > + max_sew = max_sew > 4 ? max_sew : 4; > + } > + if (sew > max_sew) { > + vector_vtype_set_ill(env); > + return; > + } > + > + vlmax = vector_get_vlmax(env); > + if (rs1 == 0) { > + vl = vlmax; > + } else if (env->gpr[rs1] <= vlmax) { > + vl = env->gpr[rs1]; > + } else if (env->gpr[rs1] < 2 * vlmax) { > + vl = ceil(env->gpr[rs1] / 2); > + } else { > + vl = vlmax; > + } > + env->vfp.vl = vl; > + env->gpr[rd] = vl; > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsetvli)(CPURISCVState *env, uint32_t rs1, uint32_t > zimm, > + uint32_t rd) > +{ > + int sew, max_sew, vlmax, vl; > + > + env->vfp.vtype = zimm; > + sew = vector_get_width(env) / 8; > + max_sew = sizeof(target_ulong); > + > + if (env->misa & RVD) { > + max_sew = max_sew > 8 ? max_sew : 8; > + } else if (env->misa & RVF) { > + max_sew = max_sew > 4 ? max_sew : 4; > + } > + if (sew > max_sew) { > + vector_vtype_set_ill(env); > + return; > + } > + > + vlmax = vector_get_vlmax(env); > + if (rs1 == 0) { > + vl = vlmax; > + } else if (env->gpr[rs1] <= vlmax) { > + vl = env->gpr[rs1]; > + } else if (env->gpr[rs1] < 2 * vlmax) { > + vl = ceil(env->gpr[rs1] / 2); > + } else { > + vl = vlmax; > + } > + env->vfp.vl = vl; > + env->gpr[rd] = vl; > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vrgather.vv vd, vs2, vs1, vm # > + * vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; > + */ > +void VECTOR_HELPER(vrgather_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src, src1; > + uint32_t index; > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + index = env->vfp.vreg[src1].u8[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u8[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[index]; > + } > + } > + break; > + case 16: > + index = env->vfp.vreg[src1].u16[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u16[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[index]; > + } > + } > + break; > + case 32: > + index = env->vfp.vreg[src1].u32[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u32[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[index]; > + } > + } > + break; > + case 64: > + index = env->vfp.vreg[src1].u64[j]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u64[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[index]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : > vs2[rs1] */ > +void VECTOR_HELPER(vrgather_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src; > + uint32_t index; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u8[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[index]; > + } > + } > + break; > + case 16: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u16[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[index]; > + } > + } > + break; > + case 32: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u32[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[index]; > + } > + } > + break; > + case 64: > + index = env->gpr[rs1]; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u64[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[index]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] > */ > +void VECTOR_HELPER(vrgather_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src; > + uint32_t index; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u8[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[index]; > + } > + } > + break; > + case 16: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u16[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[index]; > + } > + } > + break; > + case 32: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u32[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[index]; > + } > + } > + break; > + case 64: > + index = rs1; > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (index >= vlmax) { > + env->vfp.vreg[dest].u64[j] = 0; > + } else { > + src = rs2 + (index / (VLEN / width)); > + index = index % (VLEN / width); > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[index]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vext_x_v)(CPURISCVState *env, uint32_t rs1, uint32_t > rs2, > + uint32_t rd) > +{ > + int width; > + uint64_t elem; > + target_ulong index = env->gpr[rs1]; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + > + elem = vector_get_iu_elem(env, width, rs2, index); > + if (index >= VLEN / width) { /* index is too big */ > + env->gpr[rd] = 0; > + } else { > + env->gpr[rd] = elem; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmv.f.s rd, vs2 # rd = vs2[0] (rs1=0) */ > +void VECTOR_HELPER(vfmv_f_s)(CPURISCVState *env, uint32_t rs1, uint32_t > rs2, > + uint32_t rd) > +{ > + int width, flen; > + uint64_t mask; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->misa & RVD) { > + flen = 8; > + } else if (env->misa & RVF) { > + flen = 4; > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + mask = (~((uint64_t)0)) << width; > + > + if (width == 8) { > + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s8[0] | mask; > + } else if (width == 16) { > + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s16[0] | mask; > + } else if (width == 32) { > + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s32[0] | mask; > + } else if (width == 64) { > + if (flen == 4) { > + env->fpr[rd] = env->vfp.vreg[rs2].s64[0] & 0xffffffff; > + } else { > + env->fpr[rd] = env->vfp.vreg[rs2].s64[0]; > + } > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmv.s.x vd, rs1 # vd[0] = rs1 */ > +void VECTOR_HELPER(vmv_s_x)(CPURISCVState *env, uint32_t rs1, uint32_t > rs2, > + uint32_t rd) > +{ > + int width; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= env->vfp.vl) { > + return; > + } > + > + memset(&env->vfp.vreg[rd].u8[0], 0, VLEN / 8); > + width = vector_get_width(env); > + > + if (width == 8) { > + env->vfp.vreg[rd].u8[0] = env->gpr[rs1]; > + } else if (width == 16) { > + env->vfp.vreg[rd].u16[0] = env->gpr[rs1]; > + } else if (width == 32) { > + env->vfp.vreg[rd].u32[0] = env->gpr[rs1]; > + } else if (width == 64) { > + env->vfp.vreg[rd].u64[0] = env->gpr[rs1]; > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2 = 0) */ > +void VECTOR_HELPER(vfmv_s_f)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, flen; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= env->vfp.vl) { > + return; > + } > + if (env->misa & RVD) { > + flen = 8; > + } else if (env->misa & RVF) { > + flen = 4; > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + > + if (width == 8) { > + env->vfp.vreg[rd].u8[0] = env->fpr[rs1]; > + } else if (width == 16) { > + env->vfp.vreg[rd].u16[0] = env->fpr[rs1]; > + } else if (width == 32) { > + env->vfp.vreg[rd].u32[0] = env->fpr[rs1]; > + } else if (width == 64) { > + if (flen == 4) { /* 1-extended to FLEN bits */ > + env->vfp.vreg[rd].u64[0] = (uint64_t)env->fpr[rs1] > + | 0xffffffff00000000; > + } else { > + env->vfp.vreg[rd].u64[0] = env->fpr[rs1]; > + } > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ > +void VECTOR_HELPER(vslideup_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = env->gpr[rs1]; > + > + if (offset < env->vfp.vstart) { > + offset = env->vfp.vstart; > + } > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i - offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i - offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vslideup.vi vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ > +void VECTOR_HELPER(vslideup_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = rs1; > + > + if (offset < env->vfp.vstart) { > + offset = env->vfp.vstart; > + } > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i - offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i - offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + if (width == 8) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } > + } else if (width == 16) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + } else if (width == 32) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + } else if (width == 64) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ > +void VECTOR_HELPER(vslide1up_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src, k; > + uint64_t s1; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + s1 = env->gpr[rs1]; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i - 1) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i - 1) % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i == 0 && env->vfp.vstart == 0) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = s1; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = s1; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = s1; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = s1; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i + rs1] */ > +void VECTOR_HELPER(vslidedown_vx)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = env->gpr[rs1]; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i + offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i + offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } else { > + env->vfp.vreg[dest].u8[j] = 0; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } else { > + env->vfp.vreg[dest].u16[j] = 0; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } else { > + env->vfp.vreg[dest].u32[j] = 0; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } else { > + env->vfp.vreg[dest].u64[j] = 0; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vslidedown_vi)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax, offset; > + int i, j, dest, src, k; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + offset = rs1; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i + offset) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i + offset) % (VLEN / width); > + if (i < offset) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src].u8[k]; > + } else { > + env->vfp.vreg[dest].u8[j] = 0; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } else { > + env->vfp.vreg[dest].u16[j] = 0; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } else { > + env->vfp.vreg[dest].u32[j] = 0; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (i + offset < vlmax) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } else { > + env->vfp.vreg[dest].u64[j] = 0; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vslide1down.vx vd, vs2, rs1, vm # vd[vl - 1]=x[rs1], vd[i] = vs2[i + > 1] */ > +void VECTOR_HELPER(vslide1down_vx)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src, k; > + uint64_t s1; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + s1 = env->gpr[rs1]; > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src = rs2 + ((i + 1) / (VLEN / width)); > + j = i % (VLEN / width); > + k = (i + 1) % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i == vl - 1 && i >= env->vfp.vstart) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = s1; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = s1; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = s1; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = s1; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else if (i < vl - 1) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src].u16[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src].u32[k]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vcompress.vm vd, vs2, vs1 > + * Compress into vd elements of vs2 where vs1 is enabled > + */ > +void VECTOR_HELPER(vcompress_vm)(CPURISCVState *env, uint32_t rs1, > uint32_t rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src; > + uint32_t vd_idx, num = 0; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, 1) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + /* zeroed all elements */ > + for (i = 0; i < lmul; i++) { > + memset(&env->vfp.vreg[rd + i].u64[0], 0, VLEN / 8); > + } > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (num / (VLEN / width)); > + src = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + vd_idx = num % (VLEN / width); > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u8[vd_idx] = > + env->vfp.vreg[src].u8[j]; > + num++; > + } > + break; > + case 16: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u16[vd_idx] = > + env->vfp.vreg[src].u16[j]; > + num++; > + } > + break; > + case 32: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u32[vd_idx] = > + env->vfp.vreg[src].u32[j]; > + num++; > + } > + break; > + case 64: > + if (vector_mask_reg(env, rs1, width, lmul, i)) { > + env->vfp.vreg[dest].u64[vd_idx] = > + env->vfp.vreg[src].u64[j]; > + num++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + + env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + + env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + + env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + + env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + + env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredsum.vs vd, vs2, vs1, vm # vd[0] = sum(vs1[0] , vs2[*]) */ > +void VECTOR_HELPER(vredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u8[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u8[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = sum; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u16[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u16[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = sum; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u32[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u32[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = sum; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u64[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u64[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = sum; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfadd.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_add( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_add( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_add( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfadd.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_add( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_add( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_add( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vredand.vs vd, vs2, vs1, vm # vd[0] = and( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredand_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t res = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u8[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = res; > + } > + break; > + case 16: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u16[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = res; > + } > + break; > + case 32: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u32[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = res; > + } > + break; > + case 64: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res &= env->vfp.vreg[src2].u64[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = res; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredsum.vs vd, vs2, vs1, vm # Unordered sum */ > +void VECTOR_HELPER(vfredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float16 sum16 = 0.0f; > + float32 sum32 = 0.0f; > + float64 sum64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + sum16 = env->vfp.vreg[rs1].f16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum16 = float16_add(sum16, env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f16[0] = sum16; > + } > + break; > + case 32: > + if (i == 0) { > + sum32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum32 = float32_add(sum32, env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = sum32; > + } > + break; > + case 64: > + if (i == 0) { > + sum64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum64 = float64_add(sum64, env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = sum64; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + - env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + - env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + - env->vfp.vreg[src1].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + - env->vfp.vreg[src1].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + - env->gpr[rs1]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + - env->gpr[rs1]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + - env->gpr[rs1]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + - (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredor.vs vd, vs2, vs1, vm # vd[0] = or( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredor_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t res = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u8[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = res; > + } > + break; > + case 16: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u16[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = res; > + } > + break; > + case 32: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u32[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = res; > + } > + break; > + case 64: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res |= env->vfp.vreg[src2].u64[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = res; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsub.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sub( > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sub( > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sub( > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] - f[rs1] */ > +void VECTOR_HELPER(vfsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sub( > + > env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sub( > + > env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sub( > + > env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vrsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + - env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + - env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + - env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + - env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vrsub_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + - env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredxor.vs vd, vs2, vs1, vm # vd[0] = xor( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredxor_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t res = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u8[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = res; > + } > + break; > + case 16: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u16[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = res; > + } > + break; > + case 32: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u32[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = res; > + } > + break; > + case 64: > + if (i == 0) { > + res = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + res ^= env->vfp.vreg[src2].u64[j]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = res; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredosum.vs vd, vs2, vs1, vm # Ordered sum */ > +void VECTOR_HELPER(vfredosum_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + helper_vector_vfredsum_vs(env, vm, rs1, rs2, rd); > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vminu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] <= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src1].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] <= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src1].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] <= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src1].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] <= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src1].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vminu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) <= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredminu.vs vd, vs2, vs1, vm # vd[0] = minu( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredminu_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t minu = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u8[j]) { > + minu = env->vfp.vreg[src2].u8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = minu; > + } > + break; > + case 16: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u16[j]) { > + minu = env->vfp.vreg[src2].u16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = minu; > + } > + break; > + case 32: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u32[j]) { > + minu = env->vfp.vreg[src2].u32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = minu; > + } > + break; > + case 64: > + if (i == 0) { > + minu = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (minu > env->vfp.vreg[src2].u64[j]) { > + minu = env->vfp.vreg[src2].u64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = minu; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmin.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_minnum( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_minnum( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_minnum( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmin.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfmin_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_minnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_minnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_minnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] <= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src1].s8[j]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] <= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src1].s16[j]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] <= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src1].s32[j]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] <= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src1].s64[j]; > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmin_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] <= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) <= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredmin.vs vd, vs2, vs1, vm # vd[0] = min( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + int64_t min = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s8[j]) { > + min = env->vfp.vreg[src2].s8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s8[0] = min; > + } > + break; > + case 16: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s16[j]) { > + min = env->vfp.vreg[src2].s16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s16[0] = min; > + } > + break; > + case 32: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s32[j]) { > + min = env->vfp.vreg[src2].s32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s32[0] = min; > + } > + break; > + case 64: > + if (i == 0) { > + min = env->vfp.vreg[rs1].s64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (min > env->vfp.vreg[src2].s64[j]) { > + min = env->vfp.vreg[src2].s64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s64[0] = min; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredmin.vs vd, vs2, vs1, vm # Minimum value */ > +void VECTOR_HELPER(vfredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float16 min16 = 0.0f; > + float32 min32 = 0.0f; > + float64 min64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + min16 = env->vfp.vreg[rs1].f16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + min16 = float16_minnum(min16, > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f16[0] = min16; > + } > + break; > + case 32: > + if (i == 0) { > + min32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + min32 = float32_minnum(min32, > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = min32; > + } > + break; > + case 64: > + if (i == 0) { > + min64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + min64 = float64_minnum(min64, > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = min64; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmaxu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] >= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src1].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] >= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src1].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] >= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src1].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] >= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src1].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmaxu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].u8[j]) { > + env->vfp.vreg[dest].u8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].u16[j]) { > + env->vfp.vreg[dest].u16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].u32[j]) { > + env->vfp.vreg[dest].u32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) >= > + env->vfp.vreg[src2].u64[j]) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredmaxu.vs vd, vs2, vs1, vm # vd[0] = maxu( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredmaxu_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t maxu = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u8[j]) { > + maxu = env->vfp.vreg[src2].u8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u8[0] = maxu; > + } > + break; > + case 16: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u16[j]) { > + maxu = env->vfp.vreg[src2].u16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = maxu; > + } > + break; > + case 32: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u32[j]) { > + maxu = env->vfp.vreg[src2].u32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = maxu; > + } > + break; > + case 64: > + if (i == 0) { > + maxu = env->vfp.vreg[rs1].u64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (maxu < env->vfp.vreg[src2].u64[j]) { > + maxu = env->vfp.vreg[src2].u64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = maxu; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/*vfmax.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_maxnum( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_maxnum( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_maxnum( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmax.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfmax_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_maxnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_maxnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_maxnum( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] >= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src1].s8[j]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] >= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src1].s16[j]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] >= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src1].s32[j]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] >= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src1].s64[j]; > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmax_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].s8[j]) { > + env->vfp.vreg[dest].s8[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s8[j] = > + env->vfp.vreg[src2].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].s16[j]) { > + env->vfp.vreg[dest].s16[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s16[j] = > + env->vfp.vreg[src2].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] >= > + env->vfp.vreg[src2].s32[j]) { > + env->vfp.vreg[dest].s32[j] = > + env->gpr[rs1]; > + } else { > + env->vfp.vreg[dest].s32[j] = > + env->vfp.vreg[src2].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) >= > + env->vfp.vreg[src2].s64[j]) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]); > + } else { > + env->vfp.vreg[dest].s64[j] = > + env->vfp.vreg[src2].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vredmax.vs vd, vs2, vs1, vm # vd[0] = max( vs1[0] , vs2[*] ) */ > +void VECTOR_HELPER(vredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + int64_t max = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s8[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s8[j]) { > + max = env->vfp.vreg[src2].s8[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s8[0] = max; > + } > + break; > + case 16: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s16[j]) { > + max = env->vfp.vreg[src2].s16[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s16[0] = max; > + } > + break; > + case 32: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s32[j]) { > + max = env->vfp.vreg[src2].s32[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s32[0] = max; > + } > + break; > + case 64: > + if (i == 0) { > + max = env->vfp.vreg[rs1].s64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (max < env->vfp.vreg[src2].s64[j]) { > + max = env->vfp.vreg[src2].s64[j]; > + } > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s64[0] = max; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfredmax.vs vd, vs2, vs1, vm # Maximum value */ > +void VECTOR_HELPER(vfredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float16 max16 = 0.0f; > + float32 max32 = 0.0f; > + float64 max64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + max16 = env->vfp.vreg[rs1].f16[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + max16 = float16_maxnum(max16, > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f16[0] = max16; > + } > + break; > + case 32: > + if (i == 0) { > + max32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + max32 = float32_maxnum(max32, > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = max32; > + } > + break; > + case 64: > + if (i == 0) { > + max64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + max64 = float64_maxnum(max64, > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = max64; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsgnj.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsgnj_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + > env->vfp.vreg[src1].f16[j], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + > env->vfp.vreg[src1].f32[j], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + > env->vfp.vreg[src1].f64[j], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfsgnj.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfsgnj_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + env->fpr[rs1], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + env->fpr[rs1], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + env->fpr[rs1], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vand_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + & env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + & env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + & env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + & env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vand_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + & env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + & env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + & env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + & env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vand_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + & env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfsgnjn.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsgnjn_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + > ~env->vfp.vreg[src1].f16[j], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + > ~env->vfp.vreg[src1].f32[j], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + > ~env->vfp.vreg[src1].f64[j], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +/* vfsgnjn.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfsgnjn_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + ~env->fpr[rs1], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + ~env->fpr[rs1], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + ~env->fpr[rs1], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + | env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + | env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + | env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + | env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + | env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + | env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + | env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + | env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + | env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfsgnjx.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfsgnjx_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + > env->vfp.vreg[src1].f16[j] ^ > + > env->vfp.vreg[src2].f16[j], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + > env->vfp.vreg[src1].f32[j] ^ > + > env->vfp.vreg[src2].f32[j], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + > env->vfp.vreg[src1].f64[j] ^ > + > env->vfp.vreg[src2].f64[j], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vfsgnjx.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfsgnjx_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = deposit16( > + env->fpr[rs1] ^ > + > env->vfp.vreg[src2].f16[j], > + 0, > + 15, > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = deposit32( > + env->fpr[rs1] ^ > + > env->vfp.vreg[src2].f32[j], > + 0, > + 31, > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = deposit64( > + env->fpr[rs1] ^ > + > env->vfp.vreg[src2].f64[j], > + 0, > + 63, > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vxor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + ^ env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j] > + ^ env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j] > + ^ env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j] > + ^ env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vxor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + ^ env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + ^ env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + ^ env->vfp.vreg[src2].u32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]) > + ^ env->vfp.vreg[src2].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vxor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) > + ^ env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax, carry; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] > + + env->vfp.vreg[src2].u8[j] + carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j] > + + env->vfp.vreg[src2].u16[j] + carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j] > + + env->vfp.vreg[src2].u32[j] + carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j] > + + env->vfp.vreg[src2].u64[j] + carry; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vadc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax, carry; > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u8[j] + carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u16[j] + carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] > + + env->vfp.vreg[src2].u32[j] + carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = > (uint64_t)extend_gpr(env->gpr[rs1]) > + + env->vfp.vreg[src2].u64[j] + carry; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vadc_vim)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax, carry; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u8[j] + carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u16[j] + carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u32[j] + carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u64[j] + carry; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmadc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax, carry; > + uint64_t tmp; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src1].u8[j] > + + env->vfp.vreg[src2].u8[j] + carry; > + tmp = tmp >> width; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src1].u16[j] > + + env->vfp.vreg[src2].u16[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)env->vfp.vreg[src1].u32[j] > + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src1].u64[j] > + + env->vfp.vreg[src2].u64[j] + carry; > + > + if ((tmp < env->vfp.vreg[src1].u64[j] || > + tmp < env->vfp.vreg[src2].u64[j]) > + || (env->vfp.vreg[src1].u64[j] == MAX_U64 && > + env->vfp.vreg[src2].u64[j] == MAX_U64)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmadc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax, carry; > + uint64_t tmp, extend_rs1; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint8_t)env->gpr[rs1] > + + env->vfp.vreg[src2].u8[j] + carry; > + tmp = tmp >> width; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint16_t)env->gpr[rs1] > + + env->vfp.vreg[src2].u16[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)((uint32_t)env->gpr[rs1]) > + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + > + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]); > + tmp = extend_rs1 + env->vfp.vreg[src2].u64[j] + carry; > + if ((tmp < extend_rs1) || > + (carry && (env->vfp.vreg[src2].u64[j] == MAX_U64))) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmadc_vim)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax, carry; > + uint64_t tmp; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint8_t)sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u8[j] + carry; > + tmp = tmp >> width; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint16_t)sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u16[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)((uint32_t)sign_extend(rs1, 5)) > + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; > + tmp = tmp >> width; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)sign_extend(rs1, 5) > + + env->vfp.vreg[src2].u64[j] + carry; > + > + if ((tmp < (uint64_t)sign_extend(rs1, 5) || > + tmp < env->vfp.vreg[src2].u64[j]) > + || ((uint64_t)sign_extend(rs1, 5) == MAX_U64 && > + env->vfp.vreg[src2].u64[j] == MAX_U64)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsbc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax, carry; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + - env->vfp.vreg[src1].u8[j] - carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] > + - env->vfp.vreg[src1].u16[j] - carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] > + - env->vfp.vreg[src1].u32[j] - carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] > + - env->vfp.vreg[src1].u64[j] - carry; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsbc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax, carry; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + - env->gpr[rs1] - carry; > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] > + - env->gpr[rs1] - carry; > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] > + - env->gpr[rs1] - carry; > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] > + - (uint64_t)extend_gpr(env->gpr[rs1]) - carry; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsbc_vvm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax, carry; > + uint64_t tmp; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u8[j] > + - env->vfp.vreg[src1].u8[j] - carry; > + tmp = (tmp >> width) & 0x1; > + > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u16[j] > + - env->vfp.vreg[src1].u16[j] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)env->vfp.vreg[src2].u32[j] > + - (uint64_t)env->vfp.vreg[src1].u32[j] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u64[j] > + - env->vfp.vreg[src1].u64[j] - carry; > + > + if (((env->vfp.vreg[src1].u64[j] == MAX_U64) && carry) || > + env->vfp.vreg[src2].u64[j] < > + (env->vfp.vreg[src1].u64[j] + carry)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsbc_vxm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax, carry; > + uint64_t tmp, extend_rs1; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) > + || (rd == 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u8[j] > + - (uint8_t)env->gpr[rs1] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 16: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = env->vfp.vreg[src2].u16[j] > + - (uint16_t)env->gpr[rs1] - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 32: > + carry = vector_get_carry(env, width, lmul, i); > + tmp = (uint64_t)env->vfp.vreg[src2].u32[j] > + - (uint64_t)((uint32_t)env->gpr[rs1]) - carry; > + tmp = (tmp >> width) & 0x1; > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + case 64: > + carry = vector_get_carry(env, width, lmul, i); > + > + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]); > + tmp = env->vfp.vreg[src2].u64[j] - extend_rs1 - carry; > + > + if ((tmp > env->vfp.vreg[src2].u64[j]) || > + ((extend_rs1 == MAX_U64) && carry)) { > + tmp = 1; > + } else { > + tmp = 0; > + } > + vector_mask_result(env, rd, width, lmul, i, tmp); > + break; > + > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmpopc.m rd, vs2, v0.t # x[rd] = sum_i ( vs2[i].LSB && v0[i].LSB ) */ > +void VECTOR_HELPER(vmpopc_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + env->gpr[rd] = 0; > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_mask_reg(env, rs2, width, lmul, i) && > + vector_elem_mask(env, vm, width, lmul, i)) { > + env->gpr[rd]++; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfirst.m rd, vs2, vm */ > +void VECTOR_HELPER(vmfirst_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_mask_reg(env, rs2, width, lmul, i) && > + vector_elem_mask(env, vm, width, lmul, i)) { > + env->gpr[rd] = i; > + break; > + } > + } else { > + env->gpr[rd] = -1; > + } > + } > + > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmerge_vvm)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl, idx, pos; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src1].u8[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src1].u16[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src1].u32[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src1].u32[j]; > + } > + break; > + case 64: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src1].u64[j]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src1].u64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmerge_vxm)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl, idx, pos; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]; > + } > + break; > + case 16: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]; > + } > + break; > + case 32: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]; > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]; > + } > + break; > + case 64: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmerge_vim)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl, idx, pos; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u8[j] = > + env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > + (uint8_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u8[j] = (uint8_t)sign_extend(rs1, > 5); > + } > + break; > + case 16: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u16[j] = > + env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > + (uint16_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u16[j] = > (uint16_t)sign_extend(rs1, 5); > + } > + break; > + case 32: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u32[j] = > + env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > + (uint32_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u32[j] = > (uint32_t)sign_extend(rs1, 5); > + } > + break; > + case 64: > + if (vm == 0) { > + vector_get_layout(env, width, lmul, i, &idx, &pos); > + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { > + env->vfp.vreg[dest].u64[j] = > + env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > + (uint64_t)sign_extend(rs1, 5); > + } > + } else { > + if (rs2 != 0) { > + riscv_raise_exception(env, > + RISCV_EXCP_ILLEGAL_INST, GETPC()); > + } > + env->vfp.vreg[dest].u64[j] = > (uint64_t)sign_extend(rs1, 5); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfmerge.vfm vd, vs2, rs1, v0 # vd[i] = v0[i].LSB ? f[rs1] : vs2[i] */ > +void VECTOR_HELPER(vfmerge_vfm)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* vfmv.v.f vd, rs1 # vd[i] = f[rs1]; */ > + if (vm && (rs2 != 0)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = env->fpr[rs1]; > + } else { > + env->vfp.vreg[dest].f16[j] = > env->vfp.vreg[src2].f16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = env->fpr[rs1]; > + } else { > + env->vfp.vreg[dest].f32[j] = > env->vfp.vreg[src2].f32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = env->fpr[rs1]; > + } else { > + env->vfp.vreg[dest].f64[j] = > env->vfp.vreg[src2].f64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmseq_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] == > + env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] == > + env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] == > + env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] == > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmseq_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] == > env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] == > env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] == > env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) == > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmseq_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)sign_extend(rs1, 5) > + == env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)sign_extend(rs1, 5) > + == env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)sign_extend(rs1, 5) > + == env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)sign_extend(rs1, 5) == > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmandnot.mm vd, vs2, vs1 # vd = vs2 & ~vs1 */ > +void VECTOR_HELPER(vmandnot_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) & > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfeq.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmfeq_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_eq_quiet(env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_eq_quiet(env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_eq_quiet(env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfeq.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfeq_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmsne_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] != > + env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] != > + env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] != > + env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] != > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsne_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] != > env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] != > env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] != > env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) != > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsne_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)sign_extend(rs1, 5) > + != env->vfp.vreg[src2].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)sign_extend(rs1, 5) > + != env->vfp.vreg[src2].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)sign_extend(rs1, 5) > + != env->vfp.vreg[src2].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)sign_extend(rs1, 5) != > + env->vfp.vreg[src2].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmand.mm vd, vs2, vs1 # vd = vs2 & vs1 */ > +void VECTOR_HELPER(vmand_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) & > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfle.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmfle_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_le(env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_le(env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_le(env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfle.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfle_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_le(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_le(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_le(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsltu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] < > + env->vfp.vreg[src1].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] < > + env->vfp.vreg[src1].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] < > + env->vfp.vreg[src1].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] < > + env->vfp.vreg[src1].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsltu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] < > (uint8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] < > (uint16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] < > (uint32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] < > + (uint64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmor.mm vd, vs2, vs1 # vd = vs2 | vs1 */ > +void VECTOR_HELPER(vmor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) | > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmford.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmford_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float16_unordered_quiet(env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float32_unordered_quiet(env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float64_unordered_quiet(env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmford.vf vd, vs2, rs1, vm # Vector-scalar */ > +void VECTOR_HELPER(vmford_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float16_unordered_quiet(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float32_unordered_quiet(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = > float64_unordered_quiet(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmslt_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] < > + env->vfp.vreg[src1].s8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] < > + env->vfp.vreg[src1].s16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] < > + env->vfp.vreg[src1].s32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] < > + env->vfp.vreg[src1].s64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmslt_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] < > (int8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] < > (int16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] < > (int32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] < > + (int64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmxor.mm vd, vs2, vs1 # vd = vs2 ^ vs1 */ > +void VECTOR_HELPER(vmxor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^ > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmflt.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmflt_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_lt(env->vfp.vreg[src2].f16[j], > + env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_lt(env->vfp.vreg[src2].f32[j], > + env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_lt(env->vfp.vreg[src2].f64[j], > + env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmflt.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmflt_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_lt(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_lt(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_lt(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsleu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] <= > + env->vfp.vreg[src1].u8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] <= > + env->vfp.vreg[src1].u16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] <= > + env->vfp.vreg[src1].u32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] <= > + env->vfp.vreg[src1].u64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsleu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] <= > (uint8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] <= > (uint16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] <= > (uint32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] <= > + (uint64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsleu_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] <= (uint8_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] <= (uint16_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] <= (uint32_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] <= > + (uint64_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmornot.mm vd, vs2, vs1 # vd = vs2 | ~vs1 */ > +void VECTOR_HELPER(vmornot_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) | > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfne.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vmfne_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src1, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_eq_quiet(env->vfp.vreg[src1].f16[j], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_eq_quiet(env->vfp.vreg[src1].f32[j], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_eq_quiet(env->vfp.vreg[src1].f64[j], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfne.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfne_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_eq_quiet(env->fpr[rs1], > + env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsle_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] <= > + env->vfp.vreg[src1].s8[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] <= > + env->vfp.vreg[src1].s16[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] <= > + env->vfp.vreg[src1].s32[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] <= > + env->vfp.vreg[src1].s64[j]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsle_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] <= > (int8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] <= > (int16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] <= > (int32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] <= > + (int64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsle_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] <= > + (int8_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] <= > + (int16_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] <= > + (int32_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] <= > + sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmnand.mm vd, vs2, vs1 # vd = ~(vs2 & vs1) */ > +void VECTOR_HELPER(vmnand_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) & > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, (~tmp & 0x1)); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfgt.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfgt_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_le(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_le(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_le(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsgtu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] > > (uint8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] > > (uint16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] > > (uint32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] > > + (uint64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsgtu_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u8[j] > (uint8_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u16[j] > (uint16_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u32[j] > (uint32_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].u64[j] > > + (uint64_t)rs1) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmnor.mm vd, vs2, vs1 # vd = ~(vs2 | vs1) */ > +void VECTOR_HELPER(vmnor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) | > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmsgt_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] > > (int8_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] > > (int16_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] > > (int32_t)env->gpr[rs1]) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] > > + (int64_t)extend_gpr(env->gpr[rs1])) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmsgt_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s8[j] > > + (int8_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s16[j] > > + (int16_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s32[j] > > + (int32_t)sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src2].s64[j] > > + sign_extend(rs1, 5)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + if (width <= 64) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +/* vmxnor.mm vd, vs2, vs1 # vd = ~(vs2 ^ vs1) */ > +void VECTOR_HELPER(vmxnor_mm)(CPURISCVState *env, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, i, vlmax; > + uint32_t tmp; > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + for (i = 0; i < vlmax; i++) { > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^ > + vector_mask_reg(env, rs2, width, lmul, i); > + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1); > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + > + env->vfp.vstart = 0; > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmfge.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vmfge_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2, result; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float16_lt(env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float32_lt(env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + result = float64_lt(env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + vector_mask_result(env, rd, width, lmul, i, !result); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + case 32: > + case 64: > + vector_mask_result(env, rd, width, lmul, i, 0); > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsaddu.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vsaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, > + env->vfp.vreg[src1].u8[j], > env->vfp.vreg[src2].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, > + env->vfp.vreg[src1].u16[j], > env->vfp.vreg[src2].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, > + env->vfp.vreg[src1].u32[j], > env->vfp.vreg[src2].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, > + env->vfp.vreg[src1].u64[j], > env->vfp.vreg[src2].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsaddu.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vsaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, > + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsaddu.vi vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vsaddu_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, > + env->vfp.vreg[src2].u8[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, > + env->vfp.vreg[src2].u16[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, > + env->vfp.vreg[src2].u32[j], rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, > + env->vfp.vreg[src2].u64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vdivu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] == 0) { > + env->vfp.vreg[dest].u8[j] = MAX_U8; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] / > + env->vfp.vreg[src1].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] == 0) { > + env->vfp.vreg[dest].u16[j] = MAX_U16; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + / env->vfp.vreg[src1].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] == 0) { > + env->vfp.vreg[dest].u32[j] = MAX_U32; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + / env->vfp.vreg[src1].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] == 0) { > + env->vfp.vreg[dest].u64[j] = MAX_U64; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + / env->vfp.vreg[src1].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vdivu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u8[j] = MAX_U8; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] / > + (uint8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u16[j] = MAX_U16; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + / (uint16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u32[j] = MAX_U32; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + / (uint32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].u64[j] = MAX_U64; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + / (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfdiv.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_div( > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[src1].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_div( > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[src1].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_div( > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[src1].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfdiv.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_div( > + > env->vfp.vreg[src2].f16[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_div( > + > env->vfp.vreg[src2].f32[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_div( > + > env->vfp.vreg[src2].f64[j], > + env->fpr[rs1], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsadd.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vsadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, > + env->vfp.vreg[src1].s8[j], > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, > + env->vfp.vreg[src1].s16[j], > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, > + env->vfp.vreg[src1].s32[j], > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsadd.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vsadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsadd.vi vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vsadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, > + env->vfp.vreg[src2].s8[j], sign_extend(rs1, 5)); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, > + env->vfp.vreg[src2].s16[j], sign_extend(rs1, 5)); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, > + env->vfp.vreg[src2].s32[j], sign_extend(rs1, 5)); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, > + env->vfp.vreg[src2].s64[j], sign_extend(rs1, 5)); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] == 0) { > + env->vfp.vreg[dest].s8[j] = -1; > + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && > + (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = MIN_S8; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] / > + env->vfp.vreg[src1].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] == 0) { > + env->vfp.vreg[dest].s16[j] = -1; > + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && > + (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = MIN_S16; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + / env->vfp.vreg[src1].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] == 0) { > + env->vfp.vreg[dest].s32[j] = -1; > + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && > + (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = MIN_S32; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + / env->vfp.vreg[src1].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] == 0) { > + env->vfp.vreg[dest].s64[j] = -1; > + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && > + (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = MIN_S64; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + / env->vfp.vreg[src1].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vdiv_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s8[j] = -1; > + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && > + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = MIN_S8; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] / > + (int8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s16[j] = -1; > + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && > + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = MIN_S16; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + / (int16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s32[j] = -1; > + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && > + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = MIN_S32; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + / (int32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].s64[j] = -1; > + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && > + ((int64_t)extend_gpr(env->gpr[rs1]) == > (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = MIN_S64; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + / (int64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] */ > +void VECTOR_HELPER(vfrdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_div( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_div( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_div( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssubu.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vssubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env, > + env->vfp.vreg[src2].u8[j], > env->vfp.vreg[src1].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env, > + env->vfp.vreg[src2].u16[j], > env->vfp.vreg[src1].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env, > + env->vfp.vreg[src2].u32[j], > env->vfp.vreg[src1].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env, > + env->vfp.vreg[src2].u64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssubu.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vssubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env, > + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vremu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u8[j] == 0) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] % > + env->vfp.vreg[src1].u8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u16[j] == 0) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + % env->vfp.vreg[src1].u16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u32[j] == 0) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + % env->vfp.vreg[src1].u32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].u64[j] == 0) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + % env->vfp.vreg[src1].u64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vremu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j]; > + } else { > + env->vfp.vreg[dest].u8[j] = > env->vfp.vreg[src2].u8[j] % > + (uint8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j]; > + } else { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + % (uint16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j]; > + } else { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + % (uint32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j]; > + } else { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + % (uint64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vmsbf.m vd, vs2, vm # set-before-first mask bit */ > +void VECTOR_HELPER(vmsbf_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + bool first_mask_bit = false; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (first_mask_bit) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + continue; > + } > + if (!vector_mask_reg(env, rs2, width, lmul, i)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + first_mask_bit = true; > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmsif.m vd, vs2, vm # set-including-first mask bit */ > +void VECTOR_HELPER(vmsif_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + bool first_mask_bit = false; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (first_mask_bit) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + continue; > + } > + if (!vector_mask_reg(env, rs2, width, lmul, i)) { > + vector_mask_result(env, rd, width, lmul, i, 1); > + } else { > + first_mask_bit = true; > + vector_mask_result(env, rd, width, lmul, i, 1); > + } > + } > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vmsof.m vd, vs2, vm # set-only-first mask bit */ > +void VECTOR_HELPER(vmsof_m)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i; > + bool first_mask_bit = false; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + if (i < vl) { > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (first_mask_bit) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + continue; > + } > + if (!vector_mask_reg(env, rs2, width, lmul, i)) { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } else { > + first_mask_bit = true; > + vector_mask_result(env, rd, width, lmul, i, 1); > + } > + } > + } else { > + vector_mask_result(env, rd, width, lmul, i, 0); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* viota.m v4, v2, v0.t */ > +void VECTOR_HELPER(viota_m)(CPURISCVState *env, uint32_t vm, uint32_t rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest; > + uint32_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 1)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = sum; > + if (vector_mask_reg(env, rs2, width, lmul, i)) { > + sum++; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vid.v vd, vm # Write element ID to destination. */ > +void VECTOR_HELPER(vid_v)(CPURISCVState *env, uint32_t vm, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = i; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = i; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = i; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = i; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssub.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vssub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env, > + env->vfp.vreg[src2].s8[j], > env->vfp.vreg[src1].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env, > + env->vfp.vreg[src2].s16[j], > env->vfp.vreg[src1].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env, > + env->vfp.vreg[src2].s32[j], > env->vfp.vreg[src1].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env, > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssub.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vssub_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vrem_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s8[j] == 0) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j]; > + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && > + (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = 0; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] % > + env->vfp.vreg[src1].s8[j]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s16[j] == 0) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j]; > + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && > + (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = 0; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + % env->vfp.vreg[src1].s16[j]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s32[j] == 0) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j]; > + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && > + (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = 0; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + % env->vfp.vreg[src1].s32[j]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if (env->vfp.vreg[src1].s64[j] == 0) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j]; > + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && > + (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = 0; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + % env->vfp.vreg[src1].s64[j]; > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vrem_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int8_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j]; > + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && > + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) { > + env->vfp.vreg[dest].s8[j] = 0; > + } else { > + env->vfp.vreg[dest].s8[j] = > env->vfp.vreg[src2].s8[j] % > + (int8_t)env->gpr[rs1]; > + } > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int16_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j]; > + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && > + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) { > + env->vfp.vreg[dest].s16[j] = 0; > + } else { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + % (int16_t)env->gpr[rs1]; > + } > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int32_t)env->gpr[rs1] == 0) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j]; > + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && > + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) { > + env->vfp.vreg[dest].s32[j] = 0; > + } else { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + % (int32_t)env->gpr[rs1]; > + } > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j]; > + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && > + ((int64_t)extend_gpr(env->gpr[rs1]) == > (int64_t)(-1))) { > + env->vfp.vreg[dest].s64[j] = 0; > + } else { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + % (int64_t)extend_gpr(env->gpr[rs1]); > + } > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +/* vaadd.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vaadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, > + env->vfp.vreg[src1].s8[j], > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, > + env->vfp.vreg[src1].s16[j], > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, > + env->vfp.vreg[src1].s32[j], > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vaadd.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vaadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, > + env->gpr[rs1], env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, > + env->gpr[rs1], env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, > + env->gpr[rs1], env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, > + env->gpr[rs1], env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vaadd.vi vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vaadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, > + rs1, env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, > + rs1, env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, > + rs1, env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, > + rs1, env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmulhu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + ((uint16_t)env->vfp.vreg[src1].u8[j] > + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + ((uint32_t)env->vfp.vreg[src1].u16[j] > + * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + ((uint64_t)env->vfp.vreg[src1].u32[j] > + * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = u64xu64_lh( > + env->vfp.vreg[src1].u64[j], > env->vfp.vreg[src2].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmulhu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = > + ((uint16_t)(uint8_t)env->gpr[rs1] > + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > + ((uint32_t)(uint16_t)env->gpr[rs1] > + * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > + ((uint64_t)(uint32_t)env->gpr[rs1] > + * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = u64xu64_lh( > + (uint64_t)extend_gpr(env->gpr[rs1]) > + , env->vfp.vreg[src2].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +/* vfmul.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vfmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_mul( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_mul( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_mul( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmul.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_mul( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_mul( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_mul( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsll_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + << (env->vfp.vreg[src1].u8[j] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + << (env->vfp.vreg[src1].u16[j] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + << (env->vfp.vreg[src1].u32[j] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + << (env->vfp.vreg[src1].u64[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsll_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + << (env->gpr[rs1] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + << (env->gpr[rs1] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + << (env->gpr[rs1] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + << ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsll_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + << (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + << (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + << (rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + << (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->gpr[rs1] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = env->gpr[rs1] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = env->gpr[rs1] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vasub.vv vd, vs2, vs1, vm # Vector-vector */ > +void VECTOR_HELPER(vasub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8( > + env, > + ~env->vfp.vreg[src1].s8[j] + 1, > + env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16( > + env, > + ~env->vfp.vreg[src1].s16[j] + 1, > + env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32( > + env, > + ~env->vfp.vreg[src1].s32[j] + 1, > + env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64( > + env, > + ~env->vfp.vreg[src1].s64[j] + 1, > + env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vasub.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vasub_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = avg_round_s8( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = avg_round_s16( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = avg_round_s32( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = avg_round_s64( > + env, ~env->gpr[rs1] + 1, > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmulhsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((uint16_t)env->vfp.vreg[src1].u8[j] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((uint32_t)env->vfp.vreg[src1].u16[j] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((uint64_t)env->vfp.vreg[src1].u32[j] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xu64_lh( > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmulhsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((uint16_t)(uint8_t)env->gpr[rs1] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((uint32_t)(uint16_t)env->gpr[rs1] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((uint64_t)(uint32_t)env->gpr[rs1] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xu64_lh( > + env->vfp.vreg[src2].s64[j], > + (uint64_t)extend_gpr(env->gpr[rs1])); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vsmul.vv vd, vs2, vs1, vm # vd[i] = > clip((vs2[i]*vs1[i]+round)>>(SEW-1)) */ > +void VECTOR_HELPER(vsmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if ((!(vm)) && rd == 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vsmul_8(env, > + env->vfp.vreg[src1].s8[j], > env->vfp.vreg[src2].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vsmul_16(env, > + env->vfp.vreg[src1].s16[j], > env->vfp.vreg[src2].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vsmul_32(env, > + env->vfp.vreg[src1].s32[j], > env->vfp.vreg[src2].s32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vsmul_64(env, > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vsmul.vx vd, vs2, rs1, vm # vd[i] = > clip((vs2[i]*x[rs1]+round)>>(SEW-1)) */ > +void VECTOR_HELPER(vsmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if ((!(vm)) && rd == 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vsmul_8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vsmul_16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vsmul_32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vsmul_64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vmulh_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((int16_t)env->vfp.vreg[src1].s8[j] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((int32_t)env->vfp.vreg[src1].s16[j] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((int64_t)env->vfp.vreg[src1].s32[j] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xs64_lh( > + env->vfp.vreg[src1].s64[j], > env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmulh_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = > + ((int16_t)(int8_t)env->gpr[rs1] > + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > + ((int32_t)(int16_t)env->gpr[rs1] > + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > + ((int64_t)(int32_t)env->gpr[rs1] > + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = s64xs64_lh( > + (int64_t)extend_gpr(env->gpr[rs1]) > + , env->vfp.vreg[src2].s64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] - vs2[i] */ > +void VECTOR_HELPER(vfrsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sub( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sub( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sub( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + >> (env->vfp.vreg[src1].u8[j] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + >> (env->vfp.vreg[src1].u16[j] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + >> (env->vfp.vreg[src1].u32[j] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + >> (env->vfp.vreg[src1].u64[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + >> (env->gpr[rs1] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u16[j] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u32[j] > + >> (rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = > env->vfp.vreg[src2].u64[j] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i] */ > +void VECTOR_HELPER(vfmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i] */ > +void VECTOR_HELPER(vfmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] > + >> (env->vfp.vreg[src1].s8[j] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + >> (env->vfp.vreg[src1].s16[j] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + >> (env->vfp.vreg[src1].s32[j] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + >> (env->vfp.vreg[src1].s64[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] > + >> (env->gpr[rs1] & 0x7); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + >> (rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[dest].s8[j] > + + env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[dest].s16[j] > + + env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[dest].s32[j] > + + env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[dest].s64[j] > + + env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->gpr[rs1] > + * env->vfp.vreg[dest].s8[j] > + + env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = env->gpr[rs1] > + * env->vfp.vreg[dest].s16[j] > + + env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = env->gpr[rs1] > + * env->vfp.vreg[dest].s32[j] > + + env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[dest].s64[j] > + + env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +/* vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) - vs2[i] */ > +void VECTOR_HELPER(vfnmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) - vs2[i] */ > +void VECTOR_HELPER(vfnmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssrl.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i] */ > +void VECTOR_HELPER(vssrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = vssrl_8(env, > + env->vfp.vreg[src2].u8[j], > env->vfp.vreg[src1].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = vssrl_16(env, > + env->vfp.vreg[src2].u16[j], > env->vfp.vreg[src1].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = vssrl_32(env, > + env->vfp.vreg[src2].u32[j], > env->vfp.vreg[src1].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = vssrl_64(env, > + env->vfp.vreg[src2].u64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssrl.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */ > +void VECTOR_HELPER(vssrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = vssrl_8(env, > + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = vssrl_16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = vssrl_32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = vssrl_64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssrl.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */ > +void VECTOR_HELPER(vssrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = vssrli_8(env, > + env->vfp.vreg[src2].u8[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = vssrli_16(env, > + env->vfp.vreg[src2].u16[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = vssrli_32(env, > + env->vfp.vreg[src2].u32[j], rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = vssrli_64(env, > + env->vfp.vreg[src2].u64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i] */ > +void VECTOR_HELPER(vfmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i] */ > +void VECTOR_HELPER(vfmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssra.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i]) */ > +void VECTOR_HELPER(vssra_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vssra_8(env, > + env->vfp.vreg[src2].s8[j], > env->vfp.vreg[src1].u8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vssra_16(env, > + env->vfp.vreg[src2].s16[j], > env->vfp.vreg[src1].u16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vssra_32(env, > + env->vfp.vreg[src2].s32[j], > env->vfp.vreg[src1].u32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vssra_64(env, > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].u64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssra.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */ > +void VECTOR_HELPER(vssra_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vssra_8(env, > + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vssra_16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vssra_32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vssra_64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vssra.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */ > +void VECTOR_HELPER(vssra_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = vssrai_8(env, > + env->vfp.vreg[src2].s8[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = vssrai_16(env, > + env->vfp.vreg[src2].s16[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = vssrai_32(env, > + env->vfp.vreg[src2].s32[j], rs1); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = vssrai_64(env, > + env->vfp.vreg[src2].s64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] > + - env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[dest].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + - env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[dest].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + - env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[dest].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + - env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[dest].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnmsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] > + - env->gpr[rs1] > + * env->vfp.vreg[dest].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s16[j] > + - env->gpr[rs1] > + * env->vfp.vreg[dest].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s32[j] > + - env->gpr[rs1] > + * env->vfp.vreg[dest].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = > env->vfp.vreg[src2].s64[j] > + - (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[dest].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + vs2[i] */ > +void VECTOR_HELPER(vfnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + > + > + env->vfp.vstart = 0; > +} > + > +/* vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) + vs2[i] */ > +void VECTOR_HELPER(vfnmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[dest].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vnsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k] > + >> (env->vfp.vreg[src1].u8[j] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u32[k] > + >> (env->vfp.vreg[src1].u16[j] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u64[k] > + >> (env->vfp.vreg[src1].u32[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u32[k] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u64[k] > + >> (env->gpr[rs1] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = > env->vfp.vreg[src2].u32[k] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = > env->vfp.vreg[src2].u64[k] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + 0, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k] > + >> (env->vfp.vreg[src1].s8[j] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s32[k] > + >> (env->vfp.vreg[src1].s16[j] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s64[k] > + >> (env->vfp.vreg[src1].s32[j] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k] > + >> (env->gpr[rs1] & 0xf); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s32[k] > + >> (env->gpr[rs1] & 0x1f); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s64[k] > + >> (env->gpr[rs1] & 0x3f); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k] > + >> (rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = > env->vfp.vreg[src2].s32[k] > + >> (rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = > env->vfp.vreg[src2].s64[k] > + >> (rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_narrow(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] += env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] += > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] += > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] += > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] += env->gpr[rs1] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] += env->gpr[rs1] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] += env->gpr[rs1] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] += > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c > | > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclipu.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vnclipu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[k] = vnclipu_16(env, > + env->vfp.vreg[src2].u16[j], > env->vfp.vreg[src1].u8[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vnclipu_32(env, > + env->vfp.vreg[src2].u32[j], > env->vfp.vreg[src1].u16[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vnclipu_64(env, > + env->vfp.vreg[src2].u64[j], > env->vfp.vreg[src1].u32[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclipu.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vnclipu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[k] = vnclipu_16(env, > + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vnclipu_32(env, > + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vnclipu_64(env, > + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > + > +/* vnclipu.vi vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vnclipu_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u8[k] = vnclipui_16(env, > + env->vfp.vreg[src2].u16[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vnclipui_32(env, > + env->vfp.vreg[src2].u32[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vnclipui_64(env, > + env->vfp.vreg[src2].u64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vnclip.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vnclip_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[k] = vnclip_16(env, > + env->vfp.vreg[src2].s16[j], > env->vfp.vreg[src1].u8[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vnclip_32(env, > + env->vfp.vreg[src2].s32[j], > env->vfp.vreg[src1].u16[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vnclip_64(env, > + env->vfp.vreg[src2].s64[j], > env->vfp.vreg[src1].u32[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclip.vx vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vnclip_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[k] = vnclip_16(env, > + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vnclip_32(env, > + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vnclip_64(env, > + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vnclip.vi vd, vs2, imm, vm # vector-immediate */ > +void VECTOR_HELPER(vnclip_vi)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, k, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) > + || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + j = i % (VLEN / (2 * width)); > + k = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[k] = vnclipi_16(env, > + env->vfp.vreg[src2].s16[j], rs1); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vnclipi_32(env, > + env->vfp.vreg[src2].s32[j], rs1); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vnclipi_64(env, > + env->vfp.vreg[src2].s64[j], rs1); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_narrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] -= env->vfp.vreg[src1].s8[j] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] -= > env->vfp.vreg[src1].s16[j] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] -= > env->vfp.vreg[src1].s32[j] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] -= > env->vfp.vreg[src1].s64[j] > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vnmsac_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s8[j] -= env->gpr[rs1] > + * env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] -= env->gpr[rs1] > + * env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] -= env->gpr[rs1] > + * env->vfp.vreg[src2].s32[j]; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] -= > + (int64_t)extend_gpr(env->gpr[rs1]) > + * env->vfp.vreg[src2].s64[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_common(env, dest, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + > env->vfp.vreg[src1].f16[j], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + > env->vfp.vreg[src1].f32[j], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + > env->vfp.vreg[src1].f64[j], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f16[j], > + > env->vfp.vreg[dest].f16[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f32[j], > + > env->vfp.vreg[dest].f32[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_muladd( > + env->fpr[rs1], > + > env->vfp.vreg[src2].f64[j], > + > env->vfp.vreg[dest].f64[j], > + > float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vwredsumu.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(zero-extend(SEW)) > */ > +void VECTOR_HELPER(vwredsumu_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + uint64_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u8[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u16[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u16[0] = sum; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u16[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u32[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u32[0] = sum; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += env->vfp.vreg[src2].u32[j]; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].u64[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].u64[0] = sum; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src1].u8[j] + > + (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src1].u16[j] + > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src1].u32[j] + > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] + > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] + > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] + > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->fpr[rs1], > &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vwredsum.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(sign-extend(SEW)) */ > +void VECTOR_HELPER(vwredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + int64_t sum = 0; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += (int16_t)env->vfp.vreg[src2].s8[j] << 8 >> 8; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].s16[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s16[0] = sum; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += (int32_t)env->vfp.vreg[src2].s16[j] << 16 >> > 16; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].s32[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s32[0] = sum; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum += (int64_t)env->vfp.vreg[src2].s32[j] << 32 >> > 32; > + } > + if (i == 0) { > + sum += env->vfp.vreg[rs1].s64[0]; > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].s64[0] = sum; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src1].s8[j] + > + (int16_t)env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src1].s16[j] + > + (int32_t)env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src1].s32[j] + > + (int64_t)env->vfp.vreg[src2].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) + > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) + > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) + > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vd, vs2, vs1, vm # Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) > */ > +void VECTOR_HELPER(vfwredsum_vs)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, src2; > + float32 sum32 = 0.0f; > + float64 sum64 = 0.0f; > + > + lmul = vector_get_lmul(env); > + vector_lmul_check_reg(env, lmul, rs2, false); > + > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart != 0) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vl = env->vfp.vl; > + if (vl == 0) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < VLEN / 64; i++) { > + env->vfp.vreg[rd].u64[i] = 0; > + } > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + > + if (i < vl) { > + switch (width) { > + case 16: > + if (i == 0) { > + sum32 = env->vfp.vreg[rs1].f32[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum32 = float32_add(sum32, > + > float16_to_float32(env->vfp.vreg[src2].f16[j], > + true, &env->fp_status), > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f32[0] = sum32; > + } > + break; > + case 32: > + if (i == 0) { > + sum64 = env->vfp.vreg[rs1].f64[0]; > + } > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + sum64 = float64_add(sum64, > + > float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + if (i == vl - 1) { > + env->vfp.vreg[rd].f64[0] = sum64; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] - > + (uint16_t)env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] - > + (uint32_t)env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] - > + (uint64_t)env->vfp.vreg[src1].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] - > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] - > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] - > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->fpr[rs1], > &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s8[j] - > + (int16_t)env->vfp.vreg[src1].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s16[j] - > + (int32_t)env->vfp.vreg[src1].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s32[j] - > + (int64_t)env->vfp.vreg[src1].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) > + ) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) - > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) - > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) - > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* > + * vfwredosum.vs vd, vs2, vs1, vm # > + * Ordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) > + */ > +void VECTOR_HELPER(vfwredosum_vs)(CPURISCVState *env, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + helper_vector_vfwredsum_vs(env, vm, rs1, rs2, rd); > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src1].u8[j] + > + (uint16_t)env->vfp.vreg[src2].u16[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src1].u16[j] + > + (uint32_t)env->vfp.vreg[src2].u32[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src1].u32[j] + > + (uint64_t)env->vfp.vreg[src2].u64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwaddu_wx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u16[k] + > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u32[k] + > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u64[k] + > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.wv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwadd.wf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwadd_wf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_add( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_add( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->fpr[rs1], > &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]) + > + (int16_t)env->vfp.vreg[src2].s16[k]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]) + > + (int32_t)env->vfp.vreg[src2].s32[k]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]) + > + (int64_t)env->vfp.vreg[src2].s64[k]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwadd_wx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s16[k] + > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s32[k] + > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s64[k] + > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u16[k] - > + (uint16_t)env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u32[k] - > + (uint32_t)env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u64[k] - > + (uint64_t)env->vfp.vreg[src1].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsubu_wx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u16[k] - > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u32[k] - > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u64[k] - > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.wv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwsub.wf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwsub_wf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_sub( > + env->vfp.vreg[src2].f32[k], > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_sub( > + env->vfp.vreg[src2].f64[k], > + float32_to_float64(env->fpr[rs1], > &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s16[k] - > + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s32[k] - > + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s64[k] - > + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwsub_wx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / (2 * width))); > + dest = rd + (i / (VLEN / (2 * width))); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s16[k] - > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s32[k] - > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s64[k] - > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmulu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src1].u8[j] * > + (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src1].u16[j] * > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src1].u32[j] * > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmulu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = > + (uint16_t)env->vfp.vreg[src2].u8[j] * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = > + (uint32_t)env->vfp.vreg[src2].u16[j] * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = > + (uint64_t)env->vfp.vreg[src2].u32[j] * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwmul.vv vd, vs2, vs1, vm # vector-vector */ > +void VECTOR_HELPER(vfwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_mul( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_mul( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vfwmul.vf vd, vs2, rs1, vm # vector-scalar */ > +void VECTOR_HELPER(vfwmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_mul( > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->fpr[rs1], true, > + &env->fp_status), > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_mul( > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->fpr[rs1], > &env->fp_status), > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmulsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src2].s8[j] * > + (uint16_t)env->vfp.vreg[src1].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src2].s16[j] * > + (uint32_t)env->vfp.vreg[src1].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src2].s32[j] * > + (uint64_t)env->vfp.vreg[src1].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmulsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)env->vfp.vreg[src1].s8[j] * > + (int16_t)env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)env->vfp.vreg[src1].s16[j] * > + (int32_t)env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)env->vfp.vreg[src1].s32[j] * > + (int64_t)env->vfp.vreg[src2].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccu.vv vd, vs1, vs2, vm # > + * vd[i] = clipu((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env, > + > env->vfp.vreg[src2].u8[j], > + > env->vfp.vreg[src1].u8[j], > + > env->vfp.vreg[dest].u16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env, > + > env->vfp.vreg[src2].u16[j], > + > env->vfp.vreg[src1].u16[j], > + > env->vfp.vreg[dest].u32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env, > + > env->vfp.vreg[src2].u32[j], > + > env->vfp.vreg[src1].u32[j], > + > env->vfp.vreg[dest].u64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccu.vx vd, rs1, vs2, vm # > + * vd[i] = clipu((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env, > + > env->vfp.vreg[src2].u8[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].u16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env, > + > env->vfp.vreg[src2].u16[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].u32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env, > + > env->vfp.vreg[src2].u32[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].u64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] += > + (uint16_t)env->vfp.vreg[src1].u8[j] * > + (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] += > + (uint32_t)env->vfp.vreg[src1].u16[j] * > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] += > + (uint64_t)env->vfp.vreg[src1].u32[j] * > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] += > + (uint16_t)env->vfp.vreg[src2].u8[j] * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] += > + (uint32_t)env->vfp.vreg[src2].u16[j] * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] += > + (uint64_t)env->vfp.vreg[src2].u32[j] * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfwmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + 0, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmacc.vv vd, vs1, vs2, vm # > + * vd[i] = clip((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env, > + > env->vfp.vreg[src2].s8[j], > + > env->vfp.vreg[src1].s8[j], > + > env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env, > + > env->vfp.vreg[src2].s16[j], > + > env->vfp.vreg[src1].s16[j], > + > env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env, > + > env->vfp.vreg[src2].s32[j], > + > env->vfp.vreg[src1].s32[j], > + > env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmacc.vx vd, rs1, vs2, vm # > + * vd[i] = clip((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env, > + > env->vfp.vreg[src2].s8[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env, > + > env->vfp.vreg[src2].s16[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env, > + > env->vfp.vreg[src2].s32[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccsu.vv vd, vs1, vs2, vm > + * # vd[i] = clip(-((signed(vs1[i])*unsigned(vs2[i])+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccsu_vv)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env, > + > env->vfp.vreg[src2].u8[j], > + > env->vfp.vreg[src1].s8[j], > + > env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env, > + > env->vfp.vreg[src2].u16[j], > + > env->vfp.vreg[src1].s16[j], > + > env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env, > + > env->vfp.vreg[src2].u32[j], > + > env->vfp.vreg[src1].s32[j], > + > env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccsu.vx vd, rs1, vs2, vm > + * # vd[i] = clip(-((signed(x[rs1])*unsigned(vs2[i])+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccsu_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env, > + > env->vfp.vreg[src2].u8[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env, > + > env->vfp.vreg[src2].u16[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env, > + > env->vfp.vreg[src2].u32[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vwsmaccus.vx vd, rs1, vs2, vm > + * # vd[i] = clip(-((unsigned(x[rs1])*signed(vs2[i])+round)>>SEW/2)+vd[i]) > + */ > +void VECTOR_HELPER(vwsmaccus_vx)(CPURISCVState *env, uint32_t vm, > uint32_t rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + vl = env->vfp.vl; > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = vwsmaccus_8(env, > + > env->vfp.vreg[src2].s8[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s16[k]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = vwsmaccus_16(env, > + > env->vfp.vreg[src2].s16[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s32[k]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = vwsmaccus_32(env, > + > env->vfp.vreg[src2].s32[j], > + env->gpr[rs1], > + > env->vfp.vreg[dest].s64[k]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)env->vfp.vreg[src1].s8[j] > + * (int16_t)env->vfp.vreg[src2].s8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)env->vfp.vreg[src1].s16[j] * > + (int32_t)env->vfp.vreg[src2].s16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)env->vfp.vreg[src1].s32[j] * > + (int64_t)env->vfp.vreg[src2].s32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfwnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfwnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c | > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmaccsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src1, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src1 = rs1 + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)env->vfp.vreg[src1].s8[j] > + * (uint16_t)env->vfp.vreg[src2].u8[j]; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)env->vfp.vreg[src1].s16[j] * > + (uint32_t)env->vfp.vreg[src2].u16[j]; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)env->vfp.vreg[src1].s32[j] * > + (uint64_t)env->vfp.vreg[src2].u32[j]; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vwmaccsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (uint16_t)((uint8_t)env->vfp.vreg[src2].u8[j]) * > + (int16_t)((int8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (uint32_t)((uint16_t)env->vfp.vreg[src2].u16[j]) * > + (int32_t)((int16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (uint64_t)((uint32_t)env->vfp.vreg[src2].u32[j]) * > + (int64_t)((int32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfwmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */ > +void VECTOR_HELPER(vfwmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_c, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vwmaccus_vx)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl; > + uint32_t lmul, width, src2, dest, vlmax; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + for (i = 0; i < vlmax; i++) { > + src2 = rs2 + (i / (VLEN / width)); > + dest = rd + (i / (VLEN / (2 * width))); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] += > + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * > + (uint16_t)((uint8_t)env->gpr[rs1]); > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] += > + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * > + (uint32_t)((uint16_t)env->gpr[rs1]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] += > + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * > + (uint64_t)((uint32_t)env->gpr[rs1]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_widen(env, dest, k, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +/* vfwnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfwnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src1, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs1, false); > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + src1 = rs1 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + float16_to_float32(env->vfp.vreg[src1].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + float32_to_float64(env->vfp.vreg[src1].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + > + env->vfp.vstart = 0; > +} > + > +/* vfwnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */ > +void VECTOR_HELPER(vfwnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t > rs1, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float32_muladd( > + env->fpr[rs1], > + float16_to_float32(env->vfp.vreg[src2].f16[j], > true, > + &env->fp_status), > + float16_to_float32(env->vfp.vreg[dest].f16[j], > true, > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float64_muladd( > + env->fpr[rs1], > + float32_to_float64(env->vfp.vreg[src2].f32[j], > + &env->fp_status), > + float32_to_float64(env->vfp.vreg[dest].f32[j], > + &env->fp_status), > + float_muladd_negate_product, > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f32[k] = 0; > + case 32: > + env->vfp.vreg[dest].f64[k] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > + > +/* vfsqrt.v vd, vs2, vm # Vector-vector square root */ > +void VECTOR_HELPER(vfsqrt_v)(CPURISCVState *env, uint32_t vm, uint32_t > rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = float16_sqrt( > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = float32_sqrt( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = float64_sqrt( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + switch (width) { > + case 16: > + env->vfp.vreg[dest].f16[j] = 0; > + case 32: > + env->vfp.vreg[dest].f32[j] = 0; > + case 64: > + env->vfp.vreg[dest].f64[j] = 0; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfclass.v vd, vs2, vm # Vector-vector */ > +void VECTOR_HELPER(vfclass_v)(CPURISCVState *env, uint32_t vm, uint32_t > rs2, > + uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = helper_fclass_h( > + > env->vfp.vreg[src2].f16[j]); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = helper_fclass_s( > + > env->vfp.vreg[src2].f32[j]); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = helper_fclass_d( > + > env->vfp.vreg[src2].f64[j]); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ > +void VECTOR_HELPER(vfcvt_xu_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + if (vector_vtype_ill(env)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[j] = float16_to_uint16( > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[j] = float32_to_uint32( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[j] = float64_to_uint64( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ > +void VECTOR_HELPER(vfcvt_x_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[j] = float16_to_int16( > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[j] = float32_to_int32( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[j] = float64_to_int64( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ > +void VECTOR_HELPER(vfcvt_f_xu_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = uint16_to_float16( > + > env->vfp.vreg[src2].u16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = uint32_to_float32( > + > env->vfp.vreg[src2].u32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = uint64_to_float64( > + > env->vfp.vreg[src2].u64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ > +void VECTOR_HELPER(vfcvt_f_x_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[j] = int16_to_float16( > + > env->vfp.vreg[src2].s16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[j] = int32_to_float32( > + > env->vfp.vreg[src2].s32[j], > + &env->fp_status); > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[j] = int64_to_float64( > + > env->vfp.vreg[src2].s64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fcommon(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned > integer.*/ > +void VECTOR_HELPER(vfwcvt_xu_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = float16_to_uint32( > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u64[k] = float32_to_uint64( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + } > + } else { > + vector_tail_fwiden(env, dest, j, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed > integer. */ > +void VECTOR_HELPER(vfwcvt_x_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = float16_to_int32( > + > env->vfp.vreg[src2].f16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s64[k] = float32_to_int64( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width > float */ > +void VECTOR_HELPER(vfwcvt_f_xu_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = uint16_to_float32( > + > env->vfp.vreg[src2].u16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = uint32_to_float64( > + > env->vfp.vreg[src2].u32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ > +void VECTOR_HELPER(vfwcvt_f_x_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = int16_to_float32( > + > env->vfp.vreg[src2].s16[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = int32_to_float64( > + > env->vfp.vreg[src2].s32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* > + * vfwcvt.f.f.v vd, vs2, vm # > + * Convert single-width float to double-width float. > + */ > +void VECTOR_HELPER(vfwcvt_f_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) > + || vector_overlap_vm_force(vm, rd) > + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, false); > + vector_lmul_check_reg(env, lmul, rd, true); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / (2 * width))); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float16_to_float32( > + > env->vfp.vreg[src2].f16[j], > + true, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f64[k] = float32_to_float64( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fwiden(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ > +void VECTOR_HELPER(vfncvt_xu_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u16[k] = float32_to_uint16( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].u32[k] = float64_to_uint32( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed > integer. */ > +void VECTOR_HELPER(vfncvt_x_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s16[k] = float32_to_int16( > + > env->vfp.vreg[src2].f32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].s32[k] = float64_to_int32( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to > float */ > +void VECTOR_HELPER(vfncvt_f_xu_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[k] = uint32_to_float16( > + > env->vfp.vreg[src2].u32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = uint64_to_float32( > + > env->vfp.vreg[src2].u64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ > +void VECTOR_HELPER(vfncvt_f_x_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[k] = int32_to_float16( > + > env->vfp.vreg[src2].s32[j], > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = int64_to_float32( > + > env->vfp.vreg[src2].s64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. > */ > +void VECTOR_HELPER(vfncvt_f_f_v)(CPURISCVState *env, uint32_t vm, > + uint32_t rs2, uint32_t rd) > +{ > + int width, lmul, vl, vlmax; > + int i, j, k, dest, src2; > + > + lmul = vector_get_lmul(env); > + vl = env->vfp.vl; > + if (vector_vtype_ill(env) || > + vector_overlap_vm_common(lmul, vm, rd) || > + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rs2, true); > + vector_lmul_check_reg(env, lmul, rd, false); > + > + if (lmul > 4) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (env->vfp.vstart >= vl) { > + return; > + } > + > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / (2 * width))); > + k = i % (VLEN / width); > + j = i % (VLEN / (2 * width)); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f16[k] = float32_to_float16( > + > env->vfp.vreg[src2].f32[j], > + true, > + &env->fp_status); > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + env->vfp.vreg[dest].f32[k] = float64_to_float32( > + > env->vfp.vreg[src2].f64[j], > + &env->fp_status); > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_fnarrow(env, dest, k, width); > + } > + } > + return; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, addr), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, addr), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, addr), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlbuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlbff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s8[j] = > + cpu_ldsb_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s16[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlshu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, addr), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, addr), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlhuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlhff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s16[j] = > + cpu_ldsw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s32[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, env->gpr[rs1] + read), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlswu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, env->gpr[rs1] + read), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, addr), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlwuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlwff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + vector_lmul_check_reg(env, lmul, rd, false); > + env->foflag = true; > + env->vfp.vl = 0; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].s64[j] = > sign_extend( > + cpu_ldl_data(env, env->gpr[rs1] + read), 32); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vle_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 8; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * env->gpr[rs2] + k * 8; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vlxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 8, > width, k); > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, addr); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vleff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, read; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + env->vfp.vl = 0; > + env->foflag = true; > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = i * (nf + 1) + k; > + env->vfp.vreg[dest + k * lmul].u8[j] = > + cpu_ldub_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 2; > + env->vfp.vreg[dest + k * lmul].u16[j] = > + cpu_lduw_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 4; > + env->vfp.vreg[dest + k * lmul].u32[j] = > + cpu_ldl_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + read = (i * (nf + 1) + k) * 8; > + env->vfp.vreg[dest + k * lmul].u64[j] = > + cpu_ldq_data(env, env->gpr[rs1] + read); > + k--; > + } > + env->vfp.vstart++; > + } > + env->vfp.vl++; > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } else { > + vector_tail_segment(env, dest, j, width, k, lmul); > + } > + } > + env->foflag = false; > + env->vfp.vl = vl; > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vssb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxb_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vssh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxh_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vssw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + cpu_stl_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + cpu_stl_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxw_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * (nf + 1) + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = (i * (nf + 1) + k) * 8; > + cpu_stq_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, wrote; > + > + vl = env->vfp.vl; > + > + > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k; > + cpu_stb_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 2; > + cpu_stw_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 4; > + cpu_stl_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + wrote = i * env->gpr[rs2] + k * 8; > + cpu_stq_data(env, env->gpr[rs1] + wrote, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + int i, j, k, vl, vlmax, lmul, width, dest, src2; > + target_ulong addr; > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + > + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + if (lmul * (nf + 1) > 32) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, rd, false); > + > + for (i = 0; i < vlmax; i++) { > + dest = rd + (i / (VLEN / width)); > + src2 = rs2 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + k = nf; > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 8: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 1, > width, k); > + cpu_stb_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s8[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 16: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 2, > width, k); > + cpu_stw_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s16[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 4, > width, k); > + cpu_stl_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s32[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + while (k >= 0) { > + addr = vector_get_index(env, rs1, src2, j, 8, > width, k); > + cpu_stq_data(env, addr, > + env->vfp.vreg[dest + k * lmul].s64[j]); > + k--; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + return; > + } > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vsuxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, > + uint32_t rs1, uint32_t rs2, uint32_t rd) > +{ > + return VECTOR_HELPER(vsxe_v)(env, nf, vm, rs1, rs2, rd); > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoswapw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_xchgl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_xchgl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env, > addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env, > addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamoswapd_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_xchgq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_xchgq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoaddw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_addl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_addl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_addl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_addl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vamoaddd_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_addq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_addq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoxorw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_xorl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_xorl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamoxord_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_xorq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_xorq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoandw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_andl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_andl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_andl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_andl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoandd_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_andq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_andq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoorw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_orl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_orl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_orl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_orl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamoord_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_orq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_orq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamominw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_sminl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_sminl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamomind_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_sminq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_sminq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamomaxw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_smaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_smaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env, > + addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = > (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env, > + addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamomaxd_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + int64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_smaxq_le(env, addr, > + env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_smaxq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamominuw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le( > + env, addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > + > +void VECTOR_HELPER(vamominud_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_uminl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_uminq_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_uminq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > +void VECTOR_HELPER(vamomaxuw_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TESL; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 32 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 32: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint32_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s32[j]; > + addr = idx + env->gpr[rs1]; > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_umaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_umaxl_le(env, addr, > + env->vfp.vreg[src3].s32[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s32[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le( > + env, addr, env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > +void VECTOR_HELPER(vamomaxud_v)(CPURISCVState *env, uint32_t wd, uint32_t > vm, > + uint32_t rs1, uint32_t vs2, uint32_t vs3) > +{ > + int i, j, vl; > + target_long idx; > + uint32_t lmul, width, src2, src3, vlmax; > + target_ulong addr; > +#ifdef CONFIG_SOFTMMU > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOp memop = MO_ALIGN | MO_TEQ; > +#endif > + > + vl = env->vfp.vl; > + lmul = vector_get_lmul(env); > + width = vector_get_width(env); > + vlmax = vector_get_vlmax(env); > + /* MEM <= SEW <= XLEN */ > + if (width < 64 || (width > sizeof(target_ulong) * 8)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + /* if wd, rd is writen the old value */ > + if (vector_vtype_ill(env) || > + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > + return; > + } > + > + vector_lmul_check_reg(env, lmul, vs2, false); > + vector_lmul_check_reg(env, lmul, vs3, false); > + > + for (i = 0; i < vlmax; i++) { > + src2 = vs2 + (i / (VLEN / width)); > + src3 = vs3 + (i / (VLEN / width)); > + j = i % (VLEN / width); > + if (i < env->vfp.vstart) { > + continue; > + } else if (i < vl) { > + switch (width) { > + case 64: > + if (vector_elem_mask(env, vm, width, lmul, i)) { > + uint64_t tmp; > + idx = (target_long)env->vfp.vreg[src2].s64[j]; > + addr = idx + env->gpr[rs1]; > + > +#ifdef CONFIG_SOFTMMU > + tmp = helper_atomic_fetch_umaxq_le( > + env, addr, env->vfp.vreg[src3].s64[j], > + make_memop_idx(memop & ~MO_SIGN, mem_idx)); > +#else > + tmp = helper_atomic_fetch_umaxq_le(env, addr, > + env->vfp.vreg[src3].s64[j]); > +#endif > + if (wd) { > + env->vfp.vreg[src3].s64[j] = tmp; > + } > + env->vfp.vstart++; > + } > + break; > + default: > + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, > GETPC()); > + break; > + } > + } else { > + vector_tail_amo(env, src3, j, width); > + } > + } > + env->vfp.vstart = 0; > +} > + > -- > 2.7.4 > > >
On 8/29/19 5:45 AM, liuzhiwei wrote: > Even in qemu, it may be some situations that VSTART != 0. For example, a load > instruction leads to a page fault exception in a middle position. If VSTART == > 0, some elements that had been loaded before the exception will be loaded once > again. Alternately, you can validate all of the pages before performing any memory operations. At which point there will never be an exception in the middle. As it turns out, you *must* do this in order to allow watchpoints to work correctly. David Hildebrand and I are at this moment fixing this aspect of watchpoints for s390x. See https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05979.html r~
On 8/29/19 5:00 AM, liuzhiwei wrote: > Maybe there is some better test method or some forced test cases in QEMU. Could > you give me some advice for testing? If you have hardware, or another simulator, RISU is very good for testing these sorts of things. See https://git.linaro.org/people/pmaydell/risu.git You'll need to write new support for RISC-V, but it's not hard and we can help out with that. r~
29.08.2019. 15.02, "liuzhiwei" <zhiwei_liu@c-sky.com> је написао/ла: > > > On 2019/8/29 上午3:20, Aleksandar Markovic wrote: >> >> >> >> > On Wed, Aug 28, 2019 at 9:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: >>> >>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 >>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> >>> --- >> >> >> Such large patch and "Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25" is its entire commit message?? Horrible. > > Hi, Aleksandar > > I am so sorry. A patch set with cover letter will be sent later. > > Best Regards, > > Zhiwei OK, Zhiwei, You'll soon get more used to participating in open source, and write much better patches. Try to follow guidelines described at https://wiki.qemu.org/Contribute/SubmitAPatch Thanks, Aleksandar >> >> Aleksandar >> >>> >>> fpu/softfloat.c | 119 + >>> include/fpu/softfloat.h | 4 + >>> linux-user/riscv/cpu_loop.c | 8 +- >>> target/riscv/Makefile.objs | 2 +- >>> target/riscv/cpu.h | 30 + >>> target/riscv/cpu_bits.h | 15 + >>> target/riscv/cpu_helper.c | 7 + >>> target/riscv/csr.c | 65 +- >>> target/riscv/helper.h | 354 + >>> target/riscv/insn32.decode | 374 +- >>> target/riscv/insn_trans/trans_rvv.inc.c | 484 + >>> target/riscv/translate.c | 1 + >>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ >>> 13 files changed, 28017 insertions(+), 9 deletions(-) >>> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c >>> create mode 100644 target/riscv/vector_helper.c >>> >>> diff --git a/fpu/softfloat.c b/fpu/softfloat.c >>> index 2ba36ec..da155ea 100644 >>> --- a/fpu/softfloat.c >>> +++ b/fpu/softfloat.c >>> @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a) >>> } >>> >>> /*---------------------------------------------------------------------------- >>> +| Returns the sign bit of the half-precision floating-point value `a'. >>> +*----------------------------------------------------------------------------*/ >>> + >>> +static inline flag extractFloat16Sign(float16 a) >>> +{ >>> + return float16_val(a) >> 0xf; >>> +} >>> + >>> + >>> +/*---------------------------------------------------------------------------- >>> | Returns the fraction bits of the single-precision floating-point value `a'. >>> *----------------------------------------------------------------------------*/ >>> >>> @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status *status) >>> } >>> >>> /*---------------------------------------------------------------------------- >>> +| Returns 1 if the half-precision floating-point value `a' is less than >>> +| or equal to the corresponding value `b', and 0 otherwise. The invalid >>> +| exception is raised if either operand is a NaN. The comparison is performed >>> +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. >>> +*----------------------------------------------------------------------------*/ >>> + >>> +int float16_le(float16 a, float16 b, float_status *status) >>> +{ >>> + flag aSign, bSign; >>> + uint16_t av, bv; >>> + a = float16_squash_input_denormal(a, status); >>> + b = float16_squash_input_denormal(b, status); >>> + >>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) >>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) >>> + ) { >>> + float_raise(float_flag_invalid, status); >>> + return 0; >>> + } >>> + aSign = extractFloat16Sign( a ); >>> + bSign = extractFloat16Sign( b ); >>> + av = float16_val(a); >>> + bv = float16_val(b); >>> + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) == 0 ); >>> + return ( av == bv ) || ( aSign ^ ( av < bv ) ); >>> + >>> +} >>> + >>> +/*---------------------------------------------------------------------------- >>> | Returns 1 if the single-precision floating-point value `a' is less than >>> | or equal to the corresponding value `b', and 0 otherwise. The invalid >>> | exception is raised if either operand is a NaN. The comparison is performed >>> @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status *status) >>> | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. >>> *----------------------------------------------------------------------------*/ >>> >>> +int float16_lt(float16 a, float16 b, float_status *status) >>> +{ >>> + flag aSign, bSign; >>> + uint16_t av, bv; >>> + a = float16_squash_input_denormal(a, status); >>> + b = float16_squash_input_denormal(b, status); >>> + >>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) >>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) >>> + ) { >>> + float_raise(float_flag_invalid, status); >>> + return 0; >>> + } >>> + aSign = extractFloat16Sign( a ); >>> + bSign = extractFloat16Sign( b ); >>> + av = float16_val(a); >>> + bv = float16_val(b); >>> + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) != 0 ); >>> + return ( av != bv ) && ( aSign ^ ( av < bv ) ); >>> + >>> +} >>> + >>> +/*---------------------------------------------------------------------------- >>> +| Returns 1 if the single-precision floating-point value `a' is less than >>> +| the corresponding value `b', and 0 otherwise. The invalid exception is >>> +| raised if either operand is a NaN. The comparison is performed according >>> +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. >>> +*----------------------------------------------------------------------------*/ >>> + >>> int float32_lt(float32 a, float32 b, float_status *status) >>> { >>> flag aSign, bSign; >>> @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, float_status *status) >>> } >>> >>> /*---------------------------------------------------------------------------- >>> +| Returns 1 if the half-precision floating-point value `a' is equal to >>> +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an >>> +| exception. The comparison is performed according to the IEC/IEEE Standard >>> +| for Binary Floating-Point Arithmetic. >>> +*----------------------------------------------------------------------------*/ >>> + >>> +int float16_eq_quiet(float16 a, float16 b, float_status *status) >>> +{ >>> + a = float16_squash_input_denormal(a, status); >>> + b = float16_squash_input_denormal(b, status); >>> + >>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) >>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) >>> + ) { >>> + if (float16_is_signaling_nan(a, status) >>> + || float16_is_signaling_nan(b, status)) { >>> + float_raise(float_flag_invalid, status); >>> + } >>> + return 0; >>> + } >>> + return ( float16_val(a) == float16_val(b) ) || >>> + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 ); >>> +} >>> + >>> + >>> +/*---------------------------------------------------------------------------- >>> | Returns 1 if the single-precision floating-point value `a' is equal to >>> | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an >>> | exception. The comparison is performed according to the IEC/IEEE Standard >>> @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, float_status *status) >>> } >>> >>> /*---------------------------------------------------------------------------- >>> +| Returns 1 if the half-precision floating-point values `a' and `b' cannot >>> +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The >>> +| comparison is performed according to the IEC/IEEE Standard for Binary >>> +| Floating-Point Arithmetic. >>> +*----------------------------------------------------------------------------*/ >>> + >>> +int float16_unordered_quiet(float16 a, float16 b, float_status *status) >>> +{ >>> + a = float16_squash_input_denormal(a, status); >>> + b = float16_squash_input_denormal(b, status); >>> + >>> + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) >>> + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) >>> + ) { >>> + if (float16_is_signaling_nan(a, status) >>> + || float16_is_signaling_nan(b, status)) { >>> + float_raise(float_flag_invalid, status); >>> + } >>> + return 1; >>> + } >>> + return 0; >>> +} >>> + >>> + >>> +/*---------------------------------------------------------------------------- >>> | Returns 1 if the single-precision floating-point values `a' and `b' cannot >>> | be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The >>> | comparison is performed according to the IEC/IEEE Standard for Binary >>> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h >>> index 3ff3fa5..3b0754c 100644 >>> --- a/include/fpu/softfloat.h >>> +++ b/include/fpu/softfloat.h >>> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status); >>> float16 float16_sqrt(float16, float_status *status); >>> int float16_compare(float16, float16, float_status *status); >>> int float16_compare_quiet(float16, float16, float_status *status); >>> +int float16_unordered_quiet(float16, float16, float_status *status); >>> +int float16_le(float16, float16, float_status *status); >>> +int float16_lt(float16, float16, float_status *status); >>> +int float16_eq_quiet(float16, float16, float_status *status); >>> >>> int float16_is_quiet_nan(float16, float_status *status); >>> int float16_is_signaling_nan(float16, float_status *status); >>> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c >>> index 12aa3c0..b01548a 100644 >>> --- a/linux-user/riscv/cpu_loop.c >>> +++ b/linux-user/riscv/cpu_loop.c >>> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env) >>> signum = 0; >>> sigcode = 0; >>> sigaddr = 0; >>> - >>> + if (env->foflag) { >>> + if (env->vfp.vl != 0) { >>> + env->foflag = false; >>> + env->pc += 4; >>> + continue; >>> + } >>> + } >>> switch (trapnr) { >>> case EXCP_INTERRUPT: >>> /* just indicate that signals should be handled asap */ >>> diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs >>> index b1c79bc..d577cef 100644 >>> --- a/target/riscv/Makefile.objs >>> +++ b/target/riscv/Makefile.objs >>> @@ -1,4 +1,4 @@ >>> -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o pmp.o >>> +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o pmp.o >>> >>> DECODETREE = $(SRC_PATH)/scripts/decodetree.py >>> >>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h >>> index 0adb307..5a93aa2 100644 >>> --- a/target/riscv/cpu.h >>> +++ b/target/riscv/cpu.h >>> @@ -67,6 +67,7 @@ >>> #define RVC RV('C') >>> #define RVS RV('S') >>> #define RVU RV('U') >>> +#define RVV RV('V') >>> >>> /* S extension denotes that Supervisor mode exists, however it is possible >>> to have a core that support S mode but does not have an MMU and there >>> @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState; >>> >>> #include "pmp.h" >>> >>> +#define VLEN 128 >>> +#define VUNIT(x) (VLEN / x) >>> + >>> struct CPURISCVState { >>> target_ulong gpr[32]; >>> uint64_t fpr[32]; /* assume both F and D extensions */ >>> + >>> + /* vector coprocessor state. */ >>> + struct { >>> + union VECTOR { >>> + float64 f64[VUNIT(64)]; >>> + float32 f32[VUNIT(32)]; >>> + float16 f16[VUNIT(16)]; >>> + target_ulong ul[VUNIT(sizeof(target_ulong))]; >>> + uint64_t u64[VUNIT(64)]; >>> + int64_t s64[VUNIT(64)]; >>> + uint32_t u32[VUNIT(32)]; >>> + int32_t s32[VUNIT(32)]; >>> + uint16_t u16[VUNIT(16)]; >>> + int16_t s16[VUNIT(16)]; >>> + uint8_t u8[VUNIT(8)]; >>> + int8_t s8[VUNIT(8)]; >>> + } vreg[32]; >>> + target_ulong vxrm; >>> + target_ulong vxsat; >>> + target_ulong vl; >>> + target_ulong vstart; >>> + target_ulong vtype; >>> + float_status fp_status; >>> + } vfp; >>> + >>> + bool foflag; >>> target_ulong pc; >>> target_ulong load_res; >>> target_ulong load_val; >>> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h >>> index 11f971a..9eb43ec 100644 >>> --- a/target/riscv/cpu_bits.h >>> +++ b/target/riscv/cpu_bits.h >>> @@ -29,6 +29,14 @@ >>> #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) >>> #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) >>> >>> +/* Vector Fixed-Point round model */ >>> +#define FSR_VXRM_SHIFT 9 >>> +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) >>> + >>> +/* Vector Fixed-Point saturation flag */ >>> +#define FSR_VXSAT_SHIFT 8 >>> +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) >>> + >>> /* Control and Status Registers */ >>> >>> /* User Trap Setup */ >>> @@ -48,6 +56,13 @@ >>> #define CSR_FRM 0x002 >>> #define CSR_FCSR 0x003 >>> >>> +/* User Vector CSRs */ >>> +#define CSR_VSTART 0x008 >>> +#define CSR_VXSAT 0x009 >>> +#define CSR_VXRM 0x00a >>> +#define CSR_VL 0xc20 >>> +#define CSR_VTYPE 0xc21 >>> + >>> /* User Timers and Counters */ >>> #define CSR_CYCLE 0xc00 >>> #define CSR_TIME 0xc01 >>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c >>> index e32b612..405caf6 100644 >>> --- a/target/riscv/cpu_helper.c >>> +++ b/target/riscv/cpu_helper.c >>> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs) >>> [PRV_H] = RISCV_EXCP_H_ECALL, >>> [PRV_M] = RISCV_EXCP_M_ECALL >>> }; >>> + if (env->foflag) { >>> + if (env->vfp.vl != 0) { >>> + env->foflag = false; >>> + env->pc += 4; >>> + return; >>> + } >>> + } >>> >>> if (!async) { >>> /* set tval to badaddr for traps with address information */ >>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c >>> index e0d4586..a6131ff 100644 >>> --- a/target/riscv/csr.c >>> +++ b/target/riscv/csr.c >>> @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno) >>> return 0; >>> } >>> >>> -#if !defined(CONFIG_USER_ONLY) >>> static int any(CPURISCVState *env, int csrno) >>> { >>> return 0; >>> } >>> >>> +#if !defined(CONFIG_USER_ONLY) >>> static int smode(CPURISCVState *env, int csrno) >>> { >>> return -!riscv_has_ext(env, RVS); >>> @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val) >>> return -1; >>> } >>> #endif >>> - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) >>> - | (env->frm << FSR_RD_SHIFT); >>> + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT) >>> + | (env->vfp.vxsat << FSR_VXSAT_SHIFT) >>> + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) >>> + | (env->frm << FSR_RD_SHIFT); >>> return 0; >>> } >>> >>> @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val) >>> env->mstatus |= MSTATUS_FS; >>> #endif >>> env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; >>> + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; >>> + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; >>> riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); >>> return 0; >>> } >>> >>> +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val) >>> +{ >>> + *val = env->vfp.vtype; >>> + return 0; >>> +} >>> + >>> +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) >>> +{ >>> + *val = env->vfp.vl; >>> + return 0; >>> +} >>> + >>> +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val) >>> +{ >>> + *val = env->vfp.vxrm; >>> + return 0; >>> +} >>> + >>> +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) >>> +{ >>> + *val = env->vfp.vxsat; >>> + return 0; >>> +} >>> + >>> +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val) >>> +{ >>> + *val = env->vfp.vstart; >>> + return 0; >>> +} >>> + >>> +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val) >>> +{ >>> + env->vfp.vxrm = val; >>> + return 0; >>> +} >>> + >>> +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val) >>> +{ >>> + env->vfp.vxsat = val; >>> + return 0; >>> +} >>> + >>> +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val) >>> +{ >>> + env->vfp.vstart = val; >>> + return 0; >>> +} >>> + >>> /* User Timers and Counters */ >>> static int read_instret(CPURISCVState *env, int csrno, target_ulong *val) >>> { >>> @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { >>> [CSR_FFLAGS] = { fs, read_fflags, write_fflags }, >>> [CSR_FRM] = { fs, read_frm, write_frm }, >>> [CSR_FCSR] = { fs, read_fcsr, write_fcsr }, >>> - >>> + /* Vector CSRs */ >>> + [CSR_VSTART] = { any, read_vstart, write_vstart }, >>> + [CSR_VXSAT] = { any, read_vxsat, write_vxsat }, >>> + [CSR_VXRM] = { any, read_vxrm, write_vxrm }, >>> + [CSR_VL] = { any, read_vl }, >>> + [CSR_VTYPE] = { any, read_vtype }, >>> /* User Timers and Counters */ >>> [CSR_CYCLE] = { ctr, read_instret }, >>> [CSR_INSTRET] = { ctr, read_instret }, >>> diff --git a/target/riscv/helper.h b/target/riscv/helper.h >>> index debb22a..fee02c0 100644 >>> --- a/target/riscv/helper.h >>> +++ b/target/riscv/helper.h >>> @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl) >>> DEF_HELPER_1(wfi, void, env) >>> DEF_HELPER_1(tlb_flush, void, env) >>> #endif >>> +/* Vector functions */ >>> +DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32) >>> +DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32) >>> +DEF_HELPER_3(vector_vid_v, void, env, i32, i32) >>> +DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32) >>> +DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32) >>> +DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32) >>> +DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32) >>> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode >>> index 77f794e..d125ff9 100644 >>> --- a/target/riscv/insn32.decode >>> +++ b/target/riscv/insn32.decode >>> @@ -25,7 +25,7 @@ >>> %sh10 20:10 >>> %csr 20:12 >>> %rm 12:3 >>> - >>> +%nf 29:3 >>> # immediates: >>> %imm_i 20:s12 >>> %imm_s 25:s7 7:5 >>> @@ -43,7 +43,6 @@ >>> &u imm rd >>> &shift shamt rs1 rd >>> &atomic aq rl rs2 rs1 rd >>> - >>> # Formats 32: >>> @r ....... ..... ..... ... ..... ....... &r %rs2 %rs1 %rd >>> @i ............ ..... ... ..... ....... &i imm=%imm_i %rs1 %rd >>> @@ -62,11 +61,17 @@ >>> @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd >>> @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd >>> @r2 ....... ..... ..... ... ..... ....... %rs1 %rd >>> +@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd >>> +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd >>> +@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd >>> +@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd >>> +@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd >>> +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd >>> +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd >>> >>> @sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1 >>> @sfence_vm ....... ..... ..... ... ..... ....... %rs1 >>> >>> - >>> # *** Privileged Instructions *** >>> ecall 000000000000 00000 000 00000 1110011 >>> ebreak 000000000001 00000 000 00000 1110011 >>> @@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... ..... 1010011 @r2_rm >>> fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm >>> fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm >>> fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm >>> + >>> +# *** RV32V Standard Extension *** >>> + >>> +# *** Vector loads and stores are encoded within LOADFP/STORE-FP *** >>> +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm >>> +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm >>> +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm >>> +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm >>> +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm >>> +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm >>> +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm >>> +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm >>> +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm >>> +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm >>> +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm >>> +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm >>> +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm >>> +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm >>> +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm >>> +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm >>> +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm >>> +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm >>> + >>> +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm >>> +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm >>> +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm >>> +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm >>> +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm >>> +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm >>> +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm >>> +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm >>> +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm >>> +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm >>> +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm >>> + >>> +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm >>> +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm >>> +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm >>> +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm >>> +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm >>> +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm >>> +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm >>> +vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm >>> +vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm >>> +vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm >>> +vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm >>> +vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm >>> +vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm >>> +vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm >>> +vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm >>> + >>> +#*** Vector AMO operations are encoded under the standard AMO major opcode.*** >>> +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm >>> +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm >>> +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm >>> +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm >>> +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm >>> +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm >>> +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm >>> +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm >>> +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm >>> +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm >>> +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm >>> +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm >>> +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm >>> +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm >>> +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm >>> +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm >>> +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm >>> +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm >>> + >>> +#*** new major opcode OP-V *** >>> +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm >>> +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm >>> +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm >>> +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm >>> +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm >>> +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm >>> +vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm >>> +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm >>> +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm >>> +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm >>> +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm >>> +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm >>> +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm >>> +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm >>> +vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm >>> +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm >>> +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm >>> +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm >>> +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm >>> +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm >>> +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm >>> +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm >>> +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm >>> +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm >>> +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm >>> +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm >>> +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm >>> +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm >>> +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm >>> +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm >>> +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm >>> +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm >>> +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm >>> +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm >>> +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm >>> +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm >>> +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm >>> +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm >>> +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm >>> +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm >>> +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm >>> +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm >>> +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm >>> +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm >>> +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm >>> +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r >>> +vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r >>> +vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r >>> +vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r >>> +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm >>> +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm >>> +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm >>> +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm >>> +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm >>> +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm >>> +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r >>> +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r >>> +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r >>> +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r >>> +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r >>> +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r >>> +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r >>> +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r >>> +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r >>> +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r >>> +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm >>> +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm >>> +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm >>> +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm >>> +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm >>> +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm >>> +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm >>> +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm >>> +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm >>> +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r >>> +vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm >>> +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm >>> +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm >>> +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r >>> +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm >>> +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm >>> +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm >>> +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm >>> +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r >>> +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm >>> +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm >>> +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm >>> +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r >>> +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm >>> +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm >>> +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm >>> +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r >>> +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm >>> +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm >>> +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm >>> +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm >>> +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r >>> +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm >>> +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm >>> +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm >>> +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm >>> +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r >>> +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm >>> +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm >>> +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r >>> +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm >>> +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r >>> +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm >>> +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm >>> +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm >>> +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm >>> +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm >>> +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm >>> +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm >>> +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm >>> +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm >>> +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm >>> +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm >>> +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm >>> +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm >>> +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm >>> +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm >>> +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm >>> +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm >>> +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm >>> +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm >>> +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm >>> +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm >>> +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm >>> +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm >>> +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm >>> +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm >>> +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm >>> +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm >>> +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm >>> +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm >>> +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm >>> +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm >>> +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm >>> +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm >>> +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm >>> +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm >>> +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm >>> +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm >>> +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm >>> +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm >>> +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm >>> +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm >>> +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm >>> +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm >>> +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm >>> +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm >>> +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm >>> +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm >>> +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm >>> +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm >>> +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm >>> +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm >>> +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm >>> +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm >>> +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm >>> +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm >>> +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm >>> +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm >>> +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm >>> +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm >>> +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm >>> +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm >>> +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm >>> +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm >>> +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm >>> +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm >>> +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm >>> +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm >>> +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm >>> +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm >>> +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm >>> +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm >>> +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm >>> +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm >>> +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm >>> +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm >>> +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm >>> +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm >>> +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm >>> +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm >>> +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm >>> +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm >>> +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm >>> +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm >>> +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm >>> +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm >>> +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm >>> +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm >>> +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm >>> +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm >>> +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm >>> +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm >>> +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm >>> +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm >>> +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm >>> +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm >>> +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm >>> +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm >>> +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm >>> +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm >>> +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm >>> +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm >>> +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm >>> +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm >>> +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm >>> +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm >>> +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm >>> +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm >>> +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm >>> +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm >>> +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm >>> +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm >>> +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm >>> +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm >>> +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm >>> +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r >>> diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c >>> new file mode 100644 >>> index 0000000..dc8e6ce >>> --- /dev/null >>> +++ b/target/riscv/insn_trans/trans_rvv.inc.c >>> @@ -0,0 +1,484 @@ >>> +/* >>> + * RISC-V translation routines for the RVV Standard Extension. >>> + * >>> + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved. >>> + * >>> + * This program is free software; you can redistribute it and/or modify it >>> + * under the terms and conditions of the GNU General Public License, >>> + * version 2 or later, as published by the Free Software Foundation. >>> + * >>> + * This program is distributed in the hope it will be useful, but WITHOUT >>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or >>> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for >>> + * more details. >>> + * >>> + * You should have received a copy of the GNU General Public License along with >>> + * this program. If not, see <http://www.gnu.org/licenses/>. >>> + */ >>> + >>> +#define GEN_VECTOR_R2_NFVM(INSN) \ >>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ >>> +{ \ >>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ >>> + TCGv_i32 d = tcg_const_i32(a->rd); \ >>> + TCGv_i32 nf = tcg_const_i32(a->nf); \ >>> + TCGv_i32 vm = tcg_const_i32(a->vm); \ >>> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \ >>> + tcg_temp_free_i32(s1); \ >>> + tcg_temp_free_i32(d); \ >>> + tcg_temp_free_i32(nf); \ >>> + tcg_temp_free_i32(vm); \ >>> + return true; \ >>> +} >>> +#define GEN_VECTOR_R_NFVM(INSN) \ >>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ >>> +{ \ >>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ >>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ >>> + TCGv_i32 d = tcg_const_i32(a->rd); \ >>> + TCGv_i32 nf = tcg_const_i32(a->nf); \ >>> + TCGv_i32 vm = tcg_const_i32(a->vm); \ >>> + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\ >>> + tcg_temp_free_i32(s1); \ >>> + tcg_temp_free_i32(s2); \ >>> + tcg_temp_free_i32(d); \ >>> + tcg_temp_free_i32(nf); \ >>> + tcg_temp_free_i32(vm); \ >>> + return true; \ >>> +} >>> + >>> +#define GEN_VECTOR_R_WDVM(INSN) \ >>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ >>> +{ \ >>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ >>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ >>> + TCGv_i32 d = tcg_const_i32(a->rd); \ >>> + TCGv_i32 wd = tcg_const_i32(a->wd); \ >>> + TCGv_i32 vm = tcg_const_i32(a->vm); \ >>> + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\ >>> + tcg_temp_free_i32(s1); \ >>> + tcg_temp_free_i32(s2); \ >>> + tcg_temp_free_i32(d); \ >>> + tcg_temp_free_i32(wd); \ >>> + tcg_temp_free_i32(vm); \ >>> + return true; \ >>> +} >>> +#define GEN_VECTOR_R(INSN) \ >>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ >>> +{ \ >>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ >>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ >>> + TCGv_i32 d = tcg_const_i32(a->rd); \ >>> + gen_helper_vector_##INSN(cpu_env, s1, s2, d); \ >>> + tcg_temp_free_i32(s1); \ >>> + tcg_temp_free_i32(s2); \ >>> + tcg_temp_free_i32(d); \ >>> + return true; \ >>> +} >>> +#define GEN_VECTOR_R2_VM(INSN) \ >>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ >>> +{ \ >>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ >>> + TCGv_i32 d = tcg_const_i32(a->rd); \ >>> + TCGv_i32 vm = tcg_const_i32(a->vm); \ >>> + gen_helper_vector_##INSN(cpu_env, vm, s2, d); \ >>> + tcg_temp_free_i32(s2); \ >>> + tcg_temp_free_i32(d); \ >>> + tcg_temp_free_i32(vm); \ >>> + return true; \ >>> +} >>> + >>> +#define GEN_VECTOR_R1_VM(INSN) \ >>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ >>> +{ \ >>> + TCGv_i32 d = tcg_const_i32(a->rd); \ >>> + TCGv_i32 vm = tcg_const_i32(a->vm); \ >>> + gen_helper_vector_##INSN(cpu_env, vm, d); \ >>> + tcg_temp_free_i32(d); \ >>> + tcg_temp_free_i32(vm); \ >>> + return true; \ >>> +} >>> +#define GEN_VECTOR_R_VM(INSN) \ >>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ >>> +{ \ >>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ >>> + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ >>> + TCGv_i32 d = tcg_const_i32(a->rd); \ >>> + TCGv_i32 vm = tcg_const_i32(a->vm); \ >>> + gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \ >>> + tcg_temp_free_i32(s1); \ >>> + tcg_temp_free_i32(s2); \ >>> + tcg_temp_free_i32(d); \ >>> + tcg_temp_free_i32(vm); \ >>> + return true; \ >>> +} >>> +#define GEN_VECTOR_R2_ZIMM(INSN) \ >>> +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ >>> +{ \ >>> + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ >>> + TCGv_i32 zimm = tcg_const_i32(a->zimm); \ >>> + TCGv_i32 d = tcg_const_i32(a->rd); \ >>> + gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \ >>> + tcg_temp_free_i32(s1); \ >>> + tcg_temp_free_i32(zimm); \ >>> + tcg_temp_free_i32(d); \ >>> + return true; \ >>> +} >>> + >>> +GEN_VECTOR_R2_NFVM(vlb_v) >>> +GEN_VECTOR_R2_NFVM(vlh_v) > > ...
On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: > > On 2019/8/29 上午5:34, Alistair Francis wrote: > > On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: > >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 > >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> > >> --- > >> fpu/softfloat.c | 119 + > >> include/fpu/softfloat.h | 4 + > >> linux-user/riscv/cpu_loop.c | 8 +- > >> target/riscv/Makefile.objs | 2 +- > >> target/riscv/cpu.h | 30 + > >> target/riscv/cpu_bits.h | 15 + > >> target/riscv/cpu_helper.c | 7 + > >> target/riscv/csr.c | 65 +- > >> target/riscv/helper.h | 354 + > >> target/riscv/insn32.decode | 374 +- > >> target/riscv/insn_trans/trans_rvv.inc.c | 484 + > >> target/riscv/translate.c | 1 + > >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ > >> 13 files changed, 28017 insertions(+), 9 deletions(-) > >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c > >> create mode 100644 target/riscv/vector_helper.c > >> > > Hello, > > > > Thanks for the patch! > > > > As others have pointed out you will need to split the patch up into > > multiple smaller patches, otherwise it is too hard to review almost > > 30,000 lines of code. > > Hi, Alistair > > I'm so sorry for the inconvenience. It will be a patch set with a cover > letter in V2. No worries. > > > Can you also include a cover letter with your patch series describing > > how you are testing this? AFAIK vector extension support isn't in any > > compiler so I'm assuming you are handwriting the assembly or have > > toolchain patches. Either way it will help if you can share that so > > others can test your implementation. > > Yes, it's handwriting assembly. The assembler in Binutils has support > Vector extension. First define an function test_vadd_vv_8 in assembly > and then it can be called from a C program. > > The function is something like > > /* vadd.vv */ > TEST_FUNC(test_vadd_vv_8) > vsetvli t1, x0, e8, m2 > vlb.v v6, (a4) > vsb.v v6, (a3) > vsetvli t1, a0, e8, m2 > vlb.v v0, (a1) > vlb.v v2, (a2) > vadd.vv v4, v0, v2 > vsb.v v4, (a3) > ret > .size test_vadd_vv_8, .-test_vadd_vv_8 If possible it might be worth releasing the code that you are using for testing. > > It takes more time to test than to implement the instructions. Maybe > there is some better test method or some forced test cases in QEMU. > Could you give me some advice for testing? Richard's idea of risu seems like a good option. Thinking about it a bit more we are going to have other extensions in the future that will need assembly testing so setting up a test framework seems like a good idea. I am happy to help try and get this going as well. Alistair > > Best Regards, > > Zhiwei > > > Alex and Richard have kindly started the review. Once you have > > addressed their comments and split this patch up into smaller patches > > you can send a v2 and we can go from there. > > > > Once again thanks for doing this implementation for QEMU! > > > > Alistair > >
Alistair Francis <alistair23@gmail.com> writes: > On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: >> >> On 2019/8/29 上午5:34, Alistair Francis wrote: >> > On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: >> >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 >> >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> >> >> --- >> >> fpu/softfloat.c | 119 + >> >> include/fpu/softfloat.h | 4 + >> >> linux-user/riscv/cpu_loop.c | 8 +- >> >> target/riscv/Makefile.objs | 2 +- >> >> target/riscv/cpu.h | 30 + >> >> target/riscv/cpu_bits.h | 15 + >> >> target/riscv/cpu_helper.c | 7 + >> >> target/riscv/csr.c | 65 +- >> >> target/riscv/helper.h | 354 + >> >> target/riscv/insn32.decode | 374 +- >> >> target/riscv/insn_trans/trans_rvv.inc.c | 484 + >> >> target/riscv/translate.c | 1 + >> >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ >> >> 13 files changed, 28017 insertions(+), 9 deletions(-) >> >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c >> >> create mode 100644 target/riscv/vector_helper.c >> >> >> > Hello, >> > >> > Thanks for the patch! >> > >> > As others have pointed out you will need to split the patch up into >> > multiple smaller patches, otherwise it is too hard to review almost >> > 30,000 lines of code. >> >> Hi, Alistair >> >> I'm so sorry for the inconvenience. It will be a patch set with a cover >> letter in V2. > > No worries. > >> >> > Can you also include a cover letter with your patch series describing >> > how you are testing this? AFAIK vector extension support isn't in any >> > compiler so I'm assuming you are handwriting the assembly or have >> > toolchain patches. Either way it will help if you can share that so >> > others can test your implementation. >> >> Yes, it's handwriting assembly. The assembler in Binutils has support >> Vector extension. First define an function test_vadd_vv_8 in assembly >> and then it can be called from a C program. >> >> The function is something like >> >> /* vadd.vv */ >> TEST_FUNC(test_vadd_vv_8) >> vsetvli t1, x0, e8, m2 >> vlb.v v6, (a4) >> vsb.v v6, (a3) >> vsetvli t1, a0, e8, m2 >> vlb.v v0, (a1) >> vlb.v v2, (a2) >> vadd.vv v4, v0, v2 >> vsb.v v4, (a3) >> ret >> .size test_vadd_vv_8, .-test_vadd_vv_8 > > If possible it might be worth releasing the code that you are using for testing. > >> >> It takes more time to test than to implement the instructions. Maybe >> there is some better test method or some forced test cases in QEMU. >> Could you give me some advice for testing? > > Richard's idea of risu seems like a good option. > > Thinking about it a bit more we are going to have other extensions in > the future that will need assembly testing so setting up a test > framework seems like a good idea. I am happy to help try and get this > going as well. tests/tcg already has the bits you need for both linux-user and system based testing. The main problem is getting a version of gcc that is new enough to emit the newer instructions. I recently updated the images to buster so gcc is pretty recent now (8.3). I did start down the road of a general "op" test frame work which tried to come up with a common framework/boilerplate so all you needed to do was supply a new function (possible with a hex encoded instruction) and a list of expected inputs and outputs: https://github.com/stsquad/qemu/commits/testing/generic-op-tester I suspect it was over engineered but perhaps it would be worth reviving it (or something like it) to make adding a simple single instruction test case with minimal additional verbiage? > > Alistair > >> >> Best Regards, >> >> Zhiwei >> >> > Alex and Richard have kindly started the review. Once you have >> > addressed their comments and split this patch up into smaller patches >> > you can send a v2 and we can go from there. >> > >> > Once again thanks for doing this implementation for QEMU! >> > >> > Alistair >> > -- Alex Bennée
On Fri, Aug 30, 2019 at 2:06 AM Alex Bennée <alex.bennee@linaro.org> wrote: > > > Alistair Francis <alistair23@gmail.com> writes: > > > On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: > >> > >> On 2019/8/29 上午5:34, Alistair Francis wrote: > >> > On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: > >> >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 > >> >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> > >> >> --- > >> >> fpu/softfloat.c | 119 + > >> >> include/fpu/softfloat.h | 4 + > >> >> linux-user/riscv/cpu_loop.c | 8 +- > >> >> target/riscv/Makefile.objs | 2 +- > >> >> target/riscv/cpu.h | 30 + > >> >> target/riscv/cpu_bits.h | 15 + > >> >> target/riscv/cpu_helper.c | 7 + > >> >> target/riscv/csr.c | 65 +- > >> >> target/riscv/helper.h | 354 + > >> >> target/riscv/insn32.decode | 374 +- > >> >> target/riscv/insn_trans/trans_rvv.inc.c | 484 + > >> >> target/riscv/translate.c | 1 + > >> >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ > >> >> 13 files changed, 28017 insertions(+), 9 deletions(-) > >> >> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c > >> >> create mode 100644 target/riscv/vector_helper.c > >> >> > >> > Hello, > >> > > >> > Thanks for the patch! > >> > > >> > As others have pointed out you will need to split the patch up into > >> > multiple smaller patches, otherwise it is too hard to review almost > >> > 30,000 lines of code. > >> > >> Hi, Alistair > >> > >> I'm so sorry for the inconvenience. It will be a patch set with a cover > >> letter in V2. > > > > No worries. > > > >> > >> > Can you also include a cover letter with your patch series describing > >> > how you are testing this? AFAIK vector extension support isn't in any > >> > compiler so I'm assuming you are handwriting the assembly or have > >> > toolchain patches. Either way it will help if you can share that so > >> > others can test your implementation. > >> > >> Yes, it's handwriting assembly. The assembler in Binutils has support > >> Vector extension. First define an function test_vadd_vv_8 in assembly > >> and then it can be called from a C program. > >> > >> The function is something like > >> > >> /* vadd.vv */ > >> TEST_FUNC(test_vadd_vv_8) > >> vsetvli t1, x0, e8, m2 > >> vlb.v v6, (a4) > >> vsb.v v6, (a3) > >> vsetvli t1, a0, e8, m2 > >> vlb.v v0, (a1) > >> vlb.v v2, (a2) > >> vadd.vv v4, v0, v2 > >> vsb.v v4, (a3) > >> ret > >> .size test_vadd_vv_8, .-test_vadd_vv_8 > > > > If possible it might be worth releasing the code that you are using for testing. > > > >> > >> It takes more time to test than to implement the instructions. Maybe > >> there is some better test method or some forced test cases in QEMU. > >> Could you give me some advice for testing? > > > > Richard's idea of risu seems like a good option. > > > > Thinking about it a bit more we are going to have other extensions in > > the future that will need assembly testing so setting up a test > > framework seems like a good idea. I am happy to help try and get this > > going as well. Ah, I looked into this more and it compares it to hardware running the same binary. In this case there is no hardware so that doesn't work too well. What we could do though, is compare it to Spike (which I think has the vector instructions?) which would have the same effect. > > tests/tcg already has the bits you need for both linux-user and system > based testing. The main problem is getting a version of gcc that is new > enough to emit the newer instructions. I recently updated the images to > buster so gcc is pretty recent now (8.3). In this case there is no GCC with the new instructions. > > I did start down the road of a general "op" test frame work which tried > to come up with a common framework/boilerplate so all you needed to do > was supply a new function (possible with a hex encoded instruction) and > a list of expected inputs and outputs: > > https://github.com/stsquad/qemu/commits/testing/generic-op-tester > > I suspect it was over engineered but perhaps it would be worth reviving > it (or something like it) to make adding a simple single instruction > test case with minimal additional verbiage? That would be interesting, I'll take a look. Alistair > > > > > Alistair > > > >> > >> Best Regards, > >> > >> Zhiwei > >> > >> > Alex and Richard have kindly started the review. Once you have > >> > addressed their comments and split this patch up into smaller patches > >> > you can send a v2 and we can go from there. > >> > > >> > Once again thanks for doing this implementation for QEMU! > >> > > >> > Alistair > >> > > > > -- > Alex Bennée
On 2019/8/30 上午5:50, Alistair Francis wrote: > On Thu, Aug 29, 2019 at 5:05 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: >> On 2019/8/29 上午5:34, Alistair Francis wrote: >>> On Wed, Aug 28, 2019 at 12:04 AM liuzhiwei <zhiwei_liu@c-sky.com> wrote: >>>> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 >>>> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> >>>> --- >>>> fpu/softfloat.c | 119 + >>>> include/fpu/softfloat.h | 4 + >>>> linux-user/riscv/cpu_loop.c | 8 +- >>>> target/riscv/Makefile.objs | 2 +- >>>> target/riscv/cpu.h | 30 + >>>> target/riscv/cpu_bits.h | 15 + >>>> target/riscv/cpu_helper.c | 7 + >>>> target/riscv/csr.c | 65 +- >>>> target/riscv/helper.h | 354 + >>>> target/riscv/insn32.decode | 374 +- >>>> target/riscv/insn_trans/trans_rvv.inc.c | 484 + >>>> target/riscv/translate.c | 1 + >>>> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ >>>> 13 files changed, 28017 insertions(+), 9 deletions(-) >>>> create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c >>>> create mode 100644 target/riscv/vector_helper.c >>>> >>> Hello, >>> >>> Thanks for the patch! >>> >>> As others have pointed out you will need to split the patch up into >>> multiple smaller patches, otherwise it is too hard to review almost >>> 30,000 lines of code. >> Hi, Alistair >> >> I'm so sorry for the inconvenience. It will be a patch set with a cover >> letter in V2. > No worries. > >>> Can you also include a cover letter with your patch series describing >>> how you are testing this? AFAIK vector extension support isn't in any >>> compiler so I'm assuming you are handwriting the assembly or have >>> toolchain patches. Either way it will help if you can share that so >>> others can test your implementation. >> Yes, it's handwriting assembly. The assembler in Binutils has support >> Vector extension. First define an function test_vadd_vv_8 in assembly >> and then it can be called from a C program. >> >> The function is something like >> >> /* vadd.vv */ >> TEST_FUNC(test_vadd_vv_8) >> vsetvli t1, x0, e8, m2 >> vlb.v v6, (a4) >> vsb.v v6, (a3) >> vsetvli t1, a0, e8, m2 >> vlb.v v0, (a1) >> vlb.v v2, (a2) >> vadd.vv v4, v0, v2 >> vsb.v v4, (a3) >> ret >> .size test_vadd_vv_8, .-test_vadd_vv_8 > If possible it might be worth releasing the code that you are using for testing. Yes, but I didn't find a good place to release these test codes currently. > >> It takes more time to test than to implement the instructions. Maybe >> there is some better test method or some forced test cases in QEMU. >> Could you give me some advice for testing? > Richard's idea of risu seems like a good option. All the test cases will be validated in Spike, which has supported the same vector specification. But this cross validation work may delay until V3. I will split the patch, and address comments as soon as possible, to ensure the patch V2 can be sent next week. Would it be all right? > > Thinking about it a bit more we are going to have other extensions in > the future that will need assembly testing so setting up a test > framework seems like a good idea. I am happy to help try and get this > going as well. > > Alistair There is usually a big difference between new a ISA extension and the others. I doubt there is an general framework. A very light framework includes building, input aiding generation, result validation, and report maybe OK . Best Regards, Zhiwei >> Best Regards, >> >> Zhiwei >> >>> Alex and Richard have kindly started the review. Once you have >>> addressed their comments and split this patch up into smaller patches >>> you can send a v2 and we can go from there. >>> >>> Once again thanks for doing this implementation for QEMU! >>> >>> Alistair >>>
On 2019/8/29 下午11:14, Richard Henderson wrote: > On 8/29/19 5:00 AM, liuzhiwei wrote: >> Maybe there is some better test method or some forced test cases in QEMU. Could >> you give me some advice for testing? > If you have hardware, or another simulator, RISU is very good > for testing these sorts of things. > > See https://git.linaro.org/people/pmaydell/risu.git > > You'll need to write new support for RISC-V, but it's not hard > and we can help out with that. > > > r~ > Hi, Richard Thank you for your advice. I will run test cases in Spike for cross validation at first. Best Regards, Zhiwei
On 2019/8/29 下午11:09, Richard Henderson wrote: > On 8/29/19 5:45 AM, liuzhiwei wrote: >> Even in qemu, it may be some situations that VSTART != 0. For example, a load >> instruction leads to a page fault exception in a middle position. If VSTART == >> 0, some elements that had been loaded before the exception will be loaded once >> again. > Alternately, you can validate all of the pages before performing any memory > operations. At which point there will never be an exception in the middle. As a vector instruction may access memory across many pages, is there any way to validate the pages? Page table walk ?Or some TLB APIs? > As it turns out, you *must* do this in order to allow watchpoints to work > correctly. David Hildebrand and I are at this moment fixing this aspect of > watchpoints for s390x. > > See https://lists.gnu.org/archive/html/qemu-devel/2019-08/msg05979.html I am interested in wathpoint implementation and once implemented the user mode watchpoints in the wild. A backtrace of watchpoint is like #0 cpu_watchpoint_address_matches (wp=0x555556228110, addr=536871072, len=1) at qemu/exec.c:1094 #1 0x000055555567204f in check_watchpoint (offset=160, len=1, attrs=..., flags=2) at qemu/exec.c:2803 #2 0x0000555555672379 in watch_mem_write (opaque=0x0, addr=536871072, val=165, size=1, attrs=...) at qemu/exec.c:2878 #3 0x00005555556d44bb in memory_region_write_with_attrs_accessor (mr=0x5555561292e0 <io_mem_watch>, addr=536871072, value=0x7fffedffe2c8, size=1, shift=0, mask=255, attrs=...) at qemu/memory.c:553 #4 0x00005555556d45de in access_with_adjusted_size (addr=536871072, value=0x7fffedffe2c8, size=1, access_size_min=1, access_size_max=8, access_fn=0x5555556d43cd <memory_region_write_with_attrs_accessor>, mr=0x5555561292e0 <io_mem_watch>, attrs=...) at qemu/memory.c:594 #5 0x00005555556d7247 in memory_region_dispatch_write (mr=0x5555561292e0 <io_mem_watch>, addr=536871072, data=165, size=1, attrs=...) at qemu/memory.c:1480 #6 0x00005555556f0d13 in io_writex (env=0x5555561efb58, iotlbentry=0x5555561f5398, mmu_idx=1, val=165, addr=536871072, retaddr=0, recheck=false, size=1) at qemu/accel/tcg/cputlb.c:909 #7 0x00005555556f19a6 in io_writeb (env=0x5555561efb58, mmu_idx=1, index=0, val=165 '\245', addr=536871072, retaddr=0, recheck=false) at qemu/accel/tcg/softmmu_template.h:268 #8 0x00005555556f1b54 in helper_ret_stb_mmu (env=0x5555561efb58, addr=536871072, val=165 '\245', oi=1, retaddr=0) at qemu/accel/tcg/softmmu_template.h:304 #9 0x0000555555769f06 in cpu_stb_data_ra (env=0x5555561efb58, ptr=536871072, v=165, retaddr=0) at qemu/include/exec/cpu_ldst_template.h:182 #10 0x0000555555769f80 in cpu_stb_data (env=0x5555561efb58, ptr=536871072, v=165) at /qemu/include/exec/cpu_ldst_template.h:194 #11 0x000055555576a913 in csky_cpu_stb_data (env=0x5555561efb58, vaddr=536871072, data=165 '\245') at qemu/target/csky/csky_ldst.c:48 #12 0x000055555580ba7d in helper_vdsp2_vstru_n (env=0x5555561efb58, insn=4167183360) at qemu/target/csky/op_vdsp2.c:1317 The path is not related to probe_write in the patch(). Could you give more details or a test case where watchpoint doesn't work correctly? > > r~ >
On 2019/8/29 下午10:06, Chih-Min Chao wrote: > Hi Liuzhiwei, > > Some comments: > 1. vector extension allows flexible implementation. It is better > to describe the limitation of current implementation (such as > vlen/elen/slen) , supported sections and unsupported features. Thanks! All mentioned will be in patch V2. > 2. there should be cfg.ext_v to turn on vector extension from > command line I will add the vector extension to cpu "any". Is it all right? > 3. from license > It should be "Copyright (c) 2019 C-SKY Limited, All > rights reserved." but not "2011 ~ 2019" > > It is huge work wait and thanks for your contribution. > > chihmin > > On Wed, Aug 28, 2019 at 3:06 PM liuzhiwei <zhiwei_liu@c-sky.com > <mailto:zhiwei_liu@c-sky.com>> wrote: > > Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 > Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com > <mailto:zhiwei_liu@c-sky.com>> > --- > fpu/softfloat.c | 119 + > include/fpu/softfloat.h | 4 + > linux-user/riscv/cpu_loop.c | 8 +- > target/riscv/Makefile.objs | 2 +- > target/riscv/cpu.h | 30 + > target/riscv/cpu_bits.h | 15 + > target/riscv/cpu_helper.c | 7 + > target/riscv/csr.c | 65 +- > target/riscv/helper.h | 354 + > target/riscv/insn32.decode | 374 +- > target/riscv/insn_trans/trans_rvv.inc.c | 484 + > target/riscv/translate.c | 1 + > target/riscv/vector_helper.c | 26563 > ++++++++++++++++++++++++++++++ > 13 files changed, 28017 insertions(+), 9 deletions(-) > create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c > create mode 100644 target/riscv/vector_helper.c > >
On 2019/8/29 上午2:54, Richard Henderson wrote: > On 8/27/19 7:36 PM, liuzhiwei wrote: >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> >> --- >> fpu/softfloat.c | 119 + >> include/fpu/softfloat.h | 4 + >> linux-user/riscv/cpu_loop.c | 8 +- >> target/riscv/Makefile.objs | 2 +- >> target/riscv/cpu.h | 30 + >> target/riscv/cpu_bits.h | 15 + >> target/riscv/cpu_helper.c | 7 + >> target/riscv/csr.c | 65 +- >> target/riscv/helper.h | 354 + >> target/riscv/insn32.decode | 374 +- >> target/riscv/insn_trans/trans_rvv.inc.c | 484 + >> target/riscv/translate.c | 1 + >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ >> 13 files changed, 28017 insertions(+), 9 deletions(-) > As Alex mentioned, this is *far* too big to be presented as a single patch. OK, split it into patch set in V2 > >> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h >> index 3ff3fa5..3b0754c 100644 >> --- a/include/fpu/softfloat.h >> +++ b/include/fpu/softfloat.h >> @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status); >> float16 float16_sqrt(float16, float_status *status); >> int float16_compare(float16, float16, float_status *status); >> int float16_compare_quiet(float16, float16, float_status *status); >> +int float16_unordered_quiet(float16, float16, float_status *status); >> +int float16_le(float16, float16, float_status *status); >> +int float16_lt(float16, float16, float_status *status); >> +int float16_eq_quiet(float16, float16, float_status *status); > As Alex mentioned, none of these changes are required, as all > functionality is provided by float16_compare{,_quiet}. Yes, use float16_compare instead. >> diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c >> index 12aa3c0..b01548a 100644 >> --- a/linux-user/riscv/cpu_loop.c >> +++ b/linux-user/riscv/cpu_loop.c >> @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env) >> signum = 0; >> sigcode = 0; >> sigaddr = 0; >> - >> + if (env->foflag) { >> + if (env->vfp.vl != 0) { >> + env->foflag = false; >> + env->pc += 4; >> + continue; >> + } > This is most definitely not the correct way to implement first-fault. > > You need to have a look at target/arm/sve_helper.c, e.g. sve_ldff1_r, > where we test pages for validity with tlb_vaddr_to_host. Why should test pages for validity? If there is a page fault in running time, it just the case why it must use the fault-only-first instruction. >> + /* vector coprocessor state. */ >> + struct { >> + union VECTOR { >> + float64 f64[VUNIT(64)]; >> + float32 f32[VUNIT(32)]; >> + float16 f16[VUNIT(16)]; >> + target_ulong ul[VUNIT(sizeof(target_ulong))]; >> + uint64_t u64[VUNIT(64)]; >> + int64_t s64[VUNIT(64)]; >> + uint32_t u32[VUNIT(32)]; >> + int32_t s32[VUNIT(32)]; >> + uint16_t u16[VUNIT(16)]; >> + int16_t s16[VUNIT(16)]; >> + uint8_t u8[VUNIT(8)]; >> + int8_t s8[VUNIT(8)]; >> + } vreg[32]; >> + target_ulong vxrm; >> + target_ulong vxsat; >> + target_ulong vl; >> + target_ulong vstart; >> + target_ulong vtype; >> + float_status fp_status; >> + } vfp; > You've obviously copied "vfp" from target/arm. Drop that. It makes no sense > in the context of risc-v. > I'm not sure that vreg[].element[] really makes the most sense in the context > of how risc-v rearranges its elements. It will almost certainly fail clang > validators, if enabled, since you'll be indexing beyond the end of vreg[n] into > vreg[n+1]. > > It might be best to have a single array: > > union { > uint64_t u64[32 * VLEN / 64]; > ... > uint8_t u8[32 * VLEN / 8]; > } velt; > > This is clearer to the compiler that this is a single block of memory that we > can index as we please. A single array is a good idea. But vreg[] will be better for understanding as it preserve the register concepts. > Note that float64/float32/float16 are legacy. They will always be equivalent > to the unsigned integer types of the same size. > > Is there really any vector operation at all that is dependent on XLEN? If not, > then there is no reason to confuse things by including target_ulong. > OK. >> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c >> index e32b612..405caf6 100644 >> --- a/target/riscv/cpu_helper.c >> +++ b/target/riscv/cpu_helper.c >> @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs) >> [PRV_H] = RISCV_EXCP_H_ECALL, >> [PRV_M] = RISCV_EXCP_M_ECALL >> }; >> + if (env->foflag) { >> + if (env->vfp.vl != 0) { >> + env->foflag = false; >> + env->pc += 4; >> + return; >> + } >> + } > Again, not the way to implement first-fault. > > In particular, you haven't even verified that do_interrupt has been called on > behalf of a RISCV_EXCP_LOAD_PAGE_FAULT. This could be a timer tick. I don't think this could be a timer tick. A timer tick must not interrupt one instruction in qemu. According to the specification, if there is a RISCV_EXCP_LOAD_PAGE_FAULT in the instruction, and some elements had been loaded or stored, the remaining elements will not be processed again after restore from the exception. If there is a RISCV_EXCP_LOAD_PAGE_FAULT in the instruction, and no elements had been loaded or stored, the remaining elements will be processed again after restore from the exception. > >> +#define MAX_U8 ((uint8_t)0xff) >> +#define MIN_U8 ((uint8_t)0x0) >> +#define MAX_S8 ((int8_t)0x7f) >> +#define MIN_S8 ((int8_t)0x80) >> +#define SIGNBIT16 (1 << 15) >> +#define MAX_U16 ((uint16_t)0xffff) >> +#define MIN_U16 ((uint16_t)0x0) >> +#define MAX_S16 ((int16_t)0x7fff) >> +#define MIN_S16 ((int16_t)0x8000) >> +#define SIGNBIT32 (1 << 31) >> +#define MAX_U32 ((uint32_t)0xffffffff) >> +#define MIN_U32 ((uint32_t)0x0) >> +#define MAX_S32 ((int32_t)0x7fffffff) >> +#define MIN_S32 ((int32_t)0x80000000) >> +#define SIGNBIT64 ((uint64_t)1 << 63) >> +#define MAX_U64 ((uint64_t)0xffffffffffffffff) >> +#define MIN_U64 ((uint64_t)0x0) >> +#define MAX_S64 ((int64_t)0x7fffffffffffffff) >> +#define MIN_S64 ((int64_t)0x8000000000000000) > Why are you replicating INT8_MIN et al? Thanks, it will be removed. > > >> +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2, >> + int index, int mem, int width, int nf) >> +{ >> + target_ulong abs_off, base = env->gpr[rs1]; >> + target_long offset; >> + switch (width) { >> + case 8: >> + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem; >> + break; >> + case 16: >> + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf * mem; >> + break; >> + case 32: >> + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf * mem; >> + break; >> + case 64: >> + offset = env->vfp.vreg[rs2].s64[index] + nf * mem; >> + break; >> + default: >> + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); > This is broken. You cannot use GETPC() anywhere except in the outermost > HELPER(). Otherwise you're not computing the return address back into the > code_gen_buffer, which is what is required to properly unwind the guest state. Yes, I will fix it. > >> +static inline bool vector_vtype_ill(CPURISCVState *env) >> +{ >> + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) { >> + return true; >> + } >> + return false; >> +} >> + >> +static inline void vector_vtype_set_ill(CPURISCVState *env) >> +{ >> + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1); >> + return; >> +} >> + >> +static inline int vector_vtype_get_sew(CPURISCVState *env) >> +{ >> + return (env->vfp.vtype >> 2) & 0x7; >> +} >> + >> +static inline int vector_get_width(CPURISCVState *env) >> +{ >> + return 8 * (1 << vector_vtype_get_sew(env)); >> +} >> + >> +static inline int vector_get_lmul(CPURISCVState *env) >> +{ >> + return 1 << (env->vfp.vtype & 0x3); >> +} >> + >> +static inline int vector_get_vlmax(CPURISCVState *env) >> +{ >> + return vector_get_lmul(env) * VLEN / vector_get_width(env); >> +} >> + >> +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int width, >> + int lmul, int index) >> +{ >> + int mlen = width / lmul; >> + int idx = (index * mlen) / 8; >> + int pos = (index * mlen) % 8; >> + >> + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1); >> +} > I would strongly encourage you place the components of vtype within tb_flags > via cpu_get_tb_cpu_state. This would allow you to move quite a few checks from > run-time to translation-time. Good idea and some difficult. > Recall that translation happens once (per configuration), whereas execution > happens many times. Obviously, the more configurations that we create, the > more translation that must happen. > > But the vtypei argument to vsetvli is a good choice, because it is constant, > relates directly to the compiled code, and is unrelated to the length of the > data being processed. Good choice for what? I am not quite understand. > With that, you can verify at translation: > > (1) vill > (2) v[n], for (n % lmul) != 0 > (3) v[n] overlapping v[0] for masked/carry operations, with lmul > 1 > > and > > (4) you can arrange the helpers so that instead of 1 helper that has to > handle all SEW, you have N helpers, each handling a different SEW. For all vector instructions or just vsetvli? > And with all of this done, I believe you no longer need to pass the register > number to the helper. You can pass the address of v[n], which is much more > like how the tcg generic vector support works. > > Whether or not to include VL in tb_flags is a harder choice. Certainly not the > exact value of VL, as that would lead to different translations for every loop > tail. But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a > single bit. Knowing that this condition is true would allow some use of the > tcg generic vector support. > > E.g. vadd.vv could be > > if (masked) { > switch (SEW) { > case MO_8: > gen_helper_vadd8_mask(...); > break; > ... > } > } else if (vl_eq_vlmax) { > tcg_gen_gvec_add(SEW, vreg_ofs(vd), vreg_ofs(vs2), vreg_ofs(vs1), > VLEN * LMUL, VLEN * LMUL); > } else { > switch (SEW) { > case MO_8: > gen_helper_vadd8(...); > break; > ... > } > } > > Or, equivalently, pack pointers to the actual generator functions into a > structure so that this code structure can be shared between many instructions. > > Bear in mind that all tcg gvec operations operate strictly upon lanes. I.e. > > vd[x] = vs1[x] op vs2[x] > > thus the actual arrangement of the elements in storage is irrelevant and SLEN > need not be considered here. Thank you very much. Although it is some difficult for me to address your comments, they are very helpful. Best Regards, Zhiwei > > > r~ >
On 9/2/19 2:43 AM, liuzhiwei wrote: >> This is most definitely not the correct way to implement first-fault. >> >> You need to have a look at target/arm/sve_helper.c, e.g. sve_ldff1_r, >> where we test pages for validity with tlb_vaddr_to_host. > Why should test pages for validity? If there is a page fault in running time, > it just the case why it must use the fault-only-first instruction. So that the helper does not fault for the Nth access, N > 1. You test to see if the page has a mapping, and if it doesn't, you end the instruction, without going through the exception path that I have objections to. Except for gather loads, you don't have to test for every access, only at page boundaries. And then you may also arrange to use direct host access to the pages that you've validated. Again, have a look at sve_ldff1_r. > A single array is a good idea. But vreg[] will be better for understanding as it preserve the register concepts. A function access to the registers would be just as good for that. r~
On 9/2/19 12:45 AM, liuzhiwei wrote: > > On 2019/8/29 下午11:09, Richard Henderson wrote: >> On 8/29/19 5:45 AM, liuzhiwei wrote: >>> Even in qemu, it may be some situations that VSTART != 0. For example, a load >>> instruction leads to a page fault exception in a middle position. If VSTART == >>> 0, some elements that had been loaded before the exception will be loaded once >>> again. >> Alternately, you can validate all of the pages before performing any memory >> operations. At which point there will never be an exception in the middle. > > As a vector instruction may access memory across many pages, is there any way > to validate the pages? Page table walk ?Or some TLB APIs? Yes, there are TLB APIs. Several of them, depending on what is needed. > #0 cpu_watchpoint_address_matches (wp=0x555556228110, addr=536871072, len=1) > at qemu/exec.c:1094 > #1 0x000055555567204f in check_watchpoint (offset=160, len=1, attrs=..., > flags=2) at qemu/exec.c:2803 > #2 0x0000555555672379 in watch_mem_write (opaque=0x0, addr=536871072, val=165, > size=1, attrs=...) at qemu/exec.c:2878 > #3 0x00005555556d44bb in memory_region_write_with_attrs_accessor > (mr=0x5555561292e0 <io_mem_watch>, addr=536871072, value=0x7fffedffe2c8, > size=1, shift=0, mask=255, attrs=...) > at qemu/memory.c:553 > #4 0x00005555556d45de in access_with_adjusted_size (addr=536871072, > value=0x7fffedffe2c8, size=1, access_size_min=1, access_size_max=8, > access_fn=0x5555556d43cd <memory_region_write_with_attrs_accessor>, > mr=0x5555561292e0 <io_mem_watch>, attrs=...) at qemu/memory.c:594 > #5 0x00005555556d7247 in memory_region_dispatch_write (mr=0x5555561292e0 > <io_mem_watch>, addr=536871072, data=165, size=1, attrs=...) at qemu/memory.c:1480 > #6 0x00005555556f0d13 in io_writex (env=0x5555561efb58, > iotlbentry=0x5555561f5398, mmu_idx=1, val=165, addr=536871072, retaddr=0, > recheck=false, size=1) at qemu/accel/tcg/cputlb.c:909 > #7 0x00005555556f19a6 in io_writeb (env=0x5555561efb58, mmu_idx=1, index=0, > val=165 '\245', addr=536871072, retaddr=0, recheck=false) at > qemu/accel/tcg/softmmu_template.h:268 > #8 0x00005555556f1b54 in helper_ret_stb_mmu (env=0x5555561efb58, > addr=536871072, val=165 '\245', oi=1, retaddr=0) at > qemu/accel/tcg/softmmu_template.h:304 > #9 0x0000555555769f06 in cpu_stb_data_ra (env=0x5555561efb58, ptr=536871072, > v=165, retaddr=0) at qemu/include/exec/cpu_ldst_template.h:182 > #10 0x0000555555769f80 in cpu_stb_data (env=0x5555561efb58, ptr=536871072, > v=165) at /qemu/include/exec/cpu_ldst_template.h:194 > #11 0x000055555576a913 in csky_cpu_stb_data (env=0x5555561efb58, > vaddr=536871072, data=165 '\245') at qemu/target/csky/csky_ldst.c:48 > #12 0x000055555580ba7d in helper_vdsp2_vstru_n (env=0x5555561efb58, > insn=4167183360) at qemu/target/csky/op_vdsp2.c:1317 > > The path is not related to probe_write in the patch(). Of course. It wasn't supposed to be. > Could you give more details or a test case where watchpoint doesn't work > correctly? If the store partially, but not completely, overlaps the watchpoint. This is obviously much easier to do with large vector operations than with normal integer operations. In this case, we may have completed some of the stores before encountering the watchpoint. Which, inside check_watchpoint(), will longjmp back to the cpu main loop. Now we have a problem: the store is partially complete and it should not be. Therefore, we now have patches queued in tcg-next that adjust probe_write to perform both access and watchpoint tests. There is still target-specific code that must be adjusted to match, so there are not currently any examples in the tree to show. However, the idea is: (1) Instructions that perform more than one host store must probe the entire range to be stored before performing any stores. (2) Instructions that perform more than one host load must either probe the entire range to be loaded, or collect the data in temporary storage. If not using probes, writeback to the register file must be delayed until after all loads are done. (3) Any one probe may not cross a page boundary; splitting of the access across pages must be done by the helper. r~
Hi Richard, Sorry to reply so late. Upstream is really difficult . I was really frustrated to recieve so many difficult comments. It is hard for me to absorb them and will take a lot of time to fixup. Now I will move on. On 2019/8/29 2:54, Richard Henderson wrote: > On 8/27/19 7:36 PM, liuzhiwei wrote: >> Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 >> Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> >> --- >> fpu/softfloat.c | 119 + >> include/fpu/softfloat.h | 4 + >> linux-user/riscv/cpu_loop.c | 8 +- >> target/riscv/Makefile.objs | 2 +- >> target/riscv/cpu.h | 30 + >> target/riscv/cpu_bits.h | 15 + >> target/riscv/cpu_helper.c | 7 + >> target/riscv/csr.c | 65 +- >> target/riscv/helper.h | 354 + >> target/riscv/insn32.decode | 374 +- >> target/riscv/insn_trans/trans_rvv.inc.c | 484 + >> target/riscv/translate.c | 1 + >> target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ >> 13 files changed, 28017 insertions(+), 9 deletions(-) >> + /* vector coprocessor state. */ >> + struct { >> + union VECTOR { >> + float64 f64[VUNIT(64)]; >> + float32 f32[VUNIT(32)]; >> + float16 f16[VUNIT(16)]; >> + target_ulong ul[VUNIT(sizeof(target_ulong))]; >> + uint64_t u64[VUNIT(64)]; >> + int64_t s64[VUNIT(64)]; >> + uint32_t u32[VUNIT(32)]; >> + int32_t s32[VUNIT(32)]; >> + uint16_t u16[VUNIT(16)]; >> + int16_t s16[VUNIT(16)]; >> + uint8_t u8[VUNIT(8)]; >> + int8_t s8[VUNIT(8)]; >> + } vreg[32]; >> + target_ulong vxrm; >> + target_ulong vxsat; >> + target_ulong vl; >> + target_ulong vstart; >> + target_ulong vtype; >> + float_status fp_status; >> + } vfp; > You've obviously copied "vfp" from target/arm. Drop that. It makes no sense > in the context of risc-v. > > I'm not sure that vreg[].element[] really makes the most sense in the context > of how risc-v rearranges its elements. Use vreg[].element[] is my gut feeling. It will be easiest to understand the code. As you said, view all vector registers as a single block of memory is good for programing. > It will almost certainly fail clang > validators, if enabled, since you'll be indexing beyond the end of vreg[n] into > vreg[n+1]. I'm sorry that it's really hard to absorb your opinion. I don't know why clang will fail when index beyond the end of vreg[n] into vreg[n+1]. > It might be best to have a single array: > > union { > uint64_t u64[32 * VLEN / 64]; > ... > uint8_t u8[32 * VLEN / 8]; > } velt; > > This is clearer to the compiler that this is a single block of memory that we > can index as we please. As Chih-Min Chao said in another part of PATCH V2 thread, VLEN will be a property which can be specified from command line. So the sub-struct maybe defined as struct { union{ uint64_t *u64 ; int64_t *s64; uint32_t *u32; int32_t *s32; uint16_t *u16; int16_t *s16; uint8_t *u8; int8_t *s8; } mem; target_ulong vxrm; target_ulong vxsat; target_ulong vl; target_ulong vstart; target_ulong vtype; } vext; Will that be OK? >> +static inline bool vector_vtype_ill(CPURISCVState *env) >> +{ >> + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) { >> + return true; >> + } >> + return false; >> +} >> + >> +static inline void vector_vtype_set_ill(CPURISCVState *env) >> +{ >> + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1); >> + return; >> +} >> + >> +static inline int vector_vtype_get_sew(CPURISCVState *env) >> +{ >> + return (env->vfp.vtype >> 2) & 0x7; >> +} >> + >> +static inline int vector_get_width(CPURISCVState *env) >> +{ >> + return 8 * (1 << vector_vtype_get_sew(env)); >> +} >> + >> +static inline int vector_get_lmul(CPURISCVState *env) >> +{ >> + return 1 << (env->vfp.vtype & 0x3); >> +} >> + >> +static inline int vector_get_vlmax(CPURISCVState *env) >> +{ >> + return vector_get_lmul(env) * VLEN / vector_get_width(env); >> +} >> + >> +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int width, >> + int lmul, int index) >> +{ >> + int mlen = width / lmul; >> + int idx = (index * mlen) / 8; >> + int pos = (index * mlen) % 8; >> + >> + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1); >> +} > I would strongly encourage you place the components of vtype within tb_flags > via cpu_get_tb_cpu_state. This would allow you to move quite a few checks from > run-time to translation-time. > > Recall that translation happens once (per configuration), whereas execution > happens many times. Obviously, the more configurations that we create, the > more translation that must happen. All check code will be moved from execution time to translation. > But the vtypei argument to vsetvli is a good choice, because it is constant, > relates directly to the compiled code, and is unrelated to the length of the > data being processed. > > With that, you can verify at translation: > > (1) vill > (2) v[n], for (n % lmul) != 0 > (3) v[n] overlapping v[0] for masked/carry operations, with lmul > 1 > > and > > (4) you can arrange the helpers so that instead of 1 helper that has to > handle all SEW, you have N helpers, each handling a different SEW. > > And with all of this done, I believe you no longer need to pass the register > number to the helper. You can pass the address of v[n], which is much more > like how the tcg generic vector support works. > > Whether or not to include VL in tb_flags is a harder choice. Certainly not the > exact value of VL, as that would lead to different translations for every loop > tail. But it might be reasonable to include (VSTART == 0 && VL == VLMAX) as a > single bit. Knowing that this condition is true would allow some use of the > tcg generic vector support. The (ill, lmul, sew ) of vtype will be placed within tb_flags, also the bit of (VSTART == 0 && VL == VLMAX). So it will take 8 bits of tb flags for vector extension at least. > E.g. vadd.vv could be > > if (masked) { > switch (SEW) { > case MO_8: > gen_helper_vadd8_mask(...); > break; > ... > } > } else if (vl_eq_vlmax) { > tcg_gen_gvec_add(SEW, vreg_ofs(vd), vreg_ofs(vs2), vreg_ofs(vs1), > VLEN * LMUL, VLEN * LMUL); > } else { > switch (SEW) { > case MO_8: > gen_helper_vadd8(...); > break; > ... > } > } > > Or, equivalently, pack pointers to the actual generator functions into a > structure so that this code structure can be shared between many instructions. It's quiker to use generic vector of TCG. However, I have one problem to support both command line VLEN and vreg_ofs. As in SVE, vreg ofs is the offset from cpu_env. If the structure of vector extension (to support command line VLEN) is struct { union{ uint64_t *u64 ; int64_t *s64; uint32_t *u32; int32_t *s32; uint16_t *u16; int16_t *s16; uint8_t *u8; int8_t *s8; } mem; target_ulong vxrm; target_ulong vxsat; target_ulong vl; target_ulong vstart; target_ulong vtype; } vext I can't find the way to get the direct offset of vreg from cpu_env. Maybe I should specify a max VLEN like the way of SVE? Best Regards, LIU Zhiwei > Bear in mind that all tcg gvec operations operate strictly upon lanes. I.e. > > vd[x] = vs1[x] op vs2[x] > > thus the actual arrangement of the elements in storage is irrelevant and SLEN > need not be considered here. > > > r~
On 12/18/19 11:11 PM, LIU Zhiwei wrote: > I'm sorry that it's really hard to absorb your opinion. I don't know why clang > will fail > > when index beyond the end of vreg[n] into vreg[n+1]. I thought sure one of the address sanitizer checks would detect array bounds overrun. But it becomes irrelevant > As Chih-Min Chao said in another part of PATCH V2 thread, VLEN will be a > property which can be > > specified from command line. So the sub-struct maybe defined as > > struct { > union{ > uint64_t *u64 ; > int64_t *s64; > uint32_t *u32; > int32_t *s32; > uint16_t *u16; > int16_t *s16; > uint8_t *u8; > int8_t *s8; > } mem; > target_ulong vxrm; > target_ulong vxsat; > target_ulong vl; > target_ulong vstart; > target_ulong vtype; > } vext; > > Will that be OK? Pointers have consequences. It can be done, but I don't think it is ideal. > The (ill, lmul, sew ) of vtype will be placed within tb_flags, also the bit of > (VSTART == 0 && VL == VLMAX). > > So it will take 8 bits of tb flags for vector extension at least. Good. > However, I have one problem to support both command line VLEN and vreg_ofs. > > As in SVE, vreg ofs is the offset from cpu_env. If the structure of vector > extension (to support command line VLEN) is > > struct { > union{ > uint64_t *u64 ; > int64_t *s64; > uint32_t *u32; > int32_t *s32; > uint16_t *u16; > int16_t *s16; > uint8_t *u8; > int8_t *s8; > } mem; > target_ulong vxrm; > target_ulong vxsat; > target_ulong vl; > target_ulong vstart; > target_ulong vtype; > } vext > > I can't find the way to get the direct offset of vreg from cpu_env. > > Maybe I should specify a max VLEN like the way of SVE? I think a maximum vlen is best. A command-line option to adjust vlen is all well and good, but there's no reason to have to support vlen=(1<<29). Oh, and you probably need a minimum vlen of 16 bytes as well, otherwise you will run afoul of the assert in tcg-op-gvec.c that requires gvec operations to be aligned mod 16. I think that all you need is uint64_t vreg[32 * MAX_VLEN / 8] QEMU_ALIGNED(16); which gives us uint32_t vreg_ofs(DisasContext *ctx, int reg) { return offsetof(CPURISCVState, vreg) + reg * ctx->vlen; } I don't see the point of a union for vreg. I don't think you'll find that you actually use it at all. You do need to document the element ordering that you're going to use for vreg. I.e. the mapping between the architectural vector register state and the emulation state. You have two choices: (1) all bytes in host endianness (e.g. target/ppc) (2) bytes within each uint64_t in host endianness, but each uint64_t is little-endian (e.g. target/arm). Both require some fixup when running on a big-endian host. r~
On 2019/12/20 4:38, Richard Henderson wrote: > On 12/18/19 11:11 PM, LIU Zhiwei wrote: >> I'm sorry that it's really hard to absorb your opinion. I don't know why clang >> will fail >> >> when index beyond the end of vreg[n] into vreg[n+1]. > I thought sure one of the address sanitizer checks would detect array bounds > overrun. But it becomes irrelevant > >> As Chih-Min Chao said in another part of PATCH V2 thread, VLEN will be a >> property which can be >> >> specified from command line. So the sub-struct maybe defined as >> >> struct { >> union{ >> uint64_t *u64 ; >> int64_t *s64; >> uint32_t *u32; >> int32_t *s32; >> uint16_t *u16; >> int16_t *s16; >> uint8_t *u8; >> int8_t *s8; >> } mem; >> target_ulong vxrm; >> target_ulong vxsat; >> target_ulong vl; >> target_ulong vstart; >> target_ulong vtype; >> } vext; >> >> Will that be OK? > Pointers have consequences. It can be done, but I don't think it is ideal. > >> The (ill, lmul, sew ) of vtype will be placed within tb_flags, also the bit of >> (VSTART == 0 && VL == VLMAX). >> >> So it will take 8 bits of tb flags for vector extension at least. > Good. >> However, I have one problem to support both command line VLEN and vreg_ofs. >> >> As in SVE, vreg ofs is the offset from cpu_env. If the structure of vector >> extension (to support command line VLEN) is >> >> struct { >> union{ >> uint64_t *u64 ; >> int64_t *s64; >> uint32_t *u32; >> int32_t *s32; >> uint16_t *u16; >> int16_t *s16; >> uint8_t *u8; >> int8_t *s8; >> } mem; >> target_ulong vxrm; >> target_ulong vxsat; >> target_ulong vl; >> target_ulong vstart; >> target_ulong vtype; >> } vext >> >> I can't find the way to get the direct offset of vreg from cpu_env. >> >> Maybe I should specify a max VLEN like the way of SVE? > I think a maximum vlen is best. A command-line option to adjust vlen is all > well and good, but there's no reason to have to support vlen=(1<<29). > > Oh, and you probably need a minimum vlen of 16 bytes as well, otherwise you > will run afoul of the assert in tcg-op-gvec.c that requires gvec operations to > be aligned mod 16. > > I think that all you need is > > uint64_t vreg[32 * MAX_VLEN / 8] QEMU_ALIGNED(16); > > which gives us > > uint32_t vreg_ofs(DisasContext *ctx, int reg) > { > return offsetof(CPURISCVState, vreg) + reg * ctx->vlen; > } struct { uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16); target_ulong vxrm; target_ulong vxsat; target_ulong vl; target_ulong vstart; target_ulong vtype; } vext; Is it OK? > I don't see the point of a union for vreg. I don't think you'll find that you > actually use it at all. I think I can move most of execution check to translate time like SVE now. However, there are still some differences from SVE. 1)cpu_env must be used as a parameter for helper function. The helpers need use env->vext.vl and env->vext.vstart. Thus it will be difficult to use out of line tcg_gen_gvec_ool. void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int32_t data, gen_helper_gvec_2 *fn) { ...... fn(a0, a1, desc); ...... } Maybe I have to write something similar to tcg_gen_gvec_ool in trans_rvv.inc.c. But it will be redundant. 2)simd_desc is not proper. I also need to transfer some members of DisasContext to helpers. (Data, Vlmax, Mlen) is my current choice. Vlmax is the num of elements of this operation, so it will defined as ctx->lmul * ctx->vlen / ctx->sew; Data is reserved to expand. Mlen is mask length for one elment, so it will defined as ctx->sew/ctx->lmul. As with Mlen, a active element will be selected by static inline int vext_elem_mask(void *v0, int mlen, int index) { int idx = (index * mlen) / 8; int pos = (index * mlen) % 8; return (v0[idx] >> pos) & 0x1; } So I may have to implement vext_desc instead of use the simd_desc, which will be another redundant. Maybe a better way to mask elements? > You do need to document the element ordering that you're going to use for vreg. > I.e. the mapping between the architectural vector register state and the > emulation state. You have two choices: > > (1) all bytes in host endianness (e.g. target/ppc) > (2) bytes within each uint64_t in host endianness, > but each uint64_t is little-endian (e.g. target/arm). > > Both require some fixup when running on a big-endian host. Yes, I will take (2). Best Regards, Zhiwei > > r~
On 12/25/19 8:36 PM, LIU Zhiwei wrote: > struct { > > uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16); > target_ulong vxrm; > target_ulong vxsat; > target_ulong vl; > target_ulong vstart; > target_ulong vtype; > } vext; > > Is it OK? I don't think there's a good reason for the vext structure -- I would drop that. Otherwise it looks good. > However, there are still some differences from SVE. > > 1)cpu_env must be used as a parameter for helper function. > > The helpers need use env->vext.vl and env->vext.vstart. Thus it will be > difficult to use out of line tcg_gen_gvec_ool. Sure. That's also true of any of the fp operations, which will want to accumulate ieee exceptions. See tcg_gen_gvec_*_ptr(), which allows you to pass in cpu_env. > 2)simd_desc is not proper. > > I also need to transfer some members of DisasContext to helpers. > > (Data, Vlmax, Mlen) is my current choice. Vlmax is the num of elements of > this operation, so it will defined as ctx->lmul * ctx->vlen / ctx->sew; The oprsz & maxsz parameters to tcg_gen_gvec_* should be given (ctx->lmul * ctx->vlen). The sew parameter should be implied by the helper function called, each helper function using a different type. Therefore vlmax can be trivially computed within the helper from oprsz / sizeof(type). > Data is reserved to expand. Mlen is mask length for one elment, so it will > defined as ctx->sew/ctx->lmul. As with Mlen, a active element will > > be selected by > > static inline int vext_elem_mask(void *v0, int mlen, int index) > { > int idx = (index * mlen) / 8; > int pos = (index * mlen) % 8; > > return (v0[idx] >> pos) & 0x1; > } > > So I may have to implement vext_desc instead of use the simd_desc, which > will be another redundant. Maybe a better way to mask elements? I think you will want to define your own vext_desc, building upon simd_desc, such that lg2(mlen) is passed in the first N bits of simd_data. r~
On 2019/12/28 9:14, Richard Henderson wrote: > On 12/25/19 8:36 PM, LIU Zhiwei wrote: >> struct { >> >> uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16); >> target_ulong vxrm; >> target_ulong vxsat; >> target_ulong vl; >> target_ulong vstart; >> target_ulong vtype; >> } vext; >> >> Is it OK? > I don't think there's a good reason for the vext structure -- I would drop > that. Otherwise it looks good. > >> However, there are still some differences from SVE. >> >> 1)cpu_env must be used as a parameter for helper function. >> >> The helpers need use env->vext.vl and env->vext.vstart. Thus it will be >> difficult to use out of line tcg_gen_gvec_ool. > Sure. That's also true of any of the fp operations, which will want to > accumulate ieee exceptions. > > See tcg_gen_gvec_*_ptr(), which allows you to pass in cpu_env. Thanks. The tcg_gen_gvec_*_ptr is good. > >> 2)simd_desc is not proper. >> >> I also need to transfer some members of DisasContext to helpers. >> >> (Data, Vlmax, Mlen) is my current choice. Vlmax is the num of elements of >> this operation, so it will defined as ctx->lmul * ctx->vlen / ctx->sew; > The oprsz & maxsz parameters to tcg_gen_gvec_* should be given (ctx->lmul * > ctx->vlen). The sew parameter should be implied by the helper function called, > each helper function using a different type. Therefore vlmax can be trivially > computed within the helper from oprsz / sizeof(type). It's clear that the oprsz & maxsz paramenters should be given (ctx->lmul * ctx->vlen) for tcg_gen_gvec_add. However It's not clear when use tcg_gen_gvec_*_ptr or tcg_gen_gvec_ool. I think the meaning of oprsz is the the bits of active elements. Therefore , oprsz is 8 * env->vext.vl in RISC-V and it can't be fetched from TB_FLAGS like SVE. Probably oprsz field will be not be used in RISC-V vector extension. >> Data is reserved to expand. Mlen is mask length for one elment, so it will >> defined as ctx->sew/ctx->lmul. As with Mlen, a active element will >> >> be selected by >> >> static inline int vext_elem_mask(void *v0, int mlen, int index) >> { >> int idx = (index * mlen) / 8; >> int pos = (index * mlen) % 8; >> >> return (v0[idx] >> pos) & 0x1; >> } >> >> So I may have to implement vext_desc instead of use the simd_desc, which >> will be another redundant. Maybe a better way to mask elements? > I think you will want to define your own vext_desc, building upon simd_desc, > such that lg2(mlen) is passed in the first N bits of simd_data. Good. It's a good way to use the tcg_gen_gvec_*_ptr or tcg_gen_gvec_ool API. Best Regards, Zhiwei > > r~
On 12/30/19 6:11 PM, LIU Zhiwei wrote: > > However It's not clear when use tcg_gen_gvec_*_ptr or tcg_gen_gvec_ool. I think > the meaning of oprsz is the > the bits of active elements. Therefore , oprsz is 8 * env->vext.vl in RISC-V > and it can't be fetched from > TB_FLAGS like SVE. > > Probably oprsz field will be not be used in RISC-V vector extension. Correct. For those risc-v helpers that are called when VL != VLMAX, you would ignore the oprsz field and fetch it from env. It may still be handy to pass in vlmax as maxsz, even if you leave the oprsz field 0. You'll find that out as you do the coding, I suppose. r~
diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 2ba36ec..da155ea 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -433,6 +433,16 @@ static inline int extractFloat16Exp(float16 a) } /*---------------------------------------------------------------------------- +| Returns the sign bit of the half-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +static inline flag extractFloat16Sign(float16 a) +{ + return float16_val(a) >> 0xf; +} + + +/*---------------------------------------------------------------------------- | Returns the fraction bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ @@ -4790,6 +4800,35 @@ int float32_eq(float32 a, float32 b, float_status *status) } /*---------------------------------------------------------------------------- +| Returns 1 if the half-precision floating-point value `a' is less than +| or equal to the corresponding value `b', and 0 otherwise. The invalid +| exception is raised if either operand is a NaN. The comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float16_le(float16 a, float16 b, float_status *status) +{ + flag aSign, bSign; + uint16_t av, bv; + a = float16_squash_input_denormal(a, status); + b = float16_squash_input_denormal(b, status); + + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + aSign = extractFloat16Sign( a ); + bSign = extractFloat16Sign( b ); + av = float16_val(a); + bv = float16_val(b); + if ( aSign != bSign ) return aSign || ( (uint16_t) ( ( av | bv )<<1 ) == 0 ); + return ( av == bv ) || ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- | Returns 1 if the single-precision floating-point value `a' is less than | or equal to the corresponding value `b', and 0 otherwise. The invalid | exception is raised if either operand is a NaN. The comparison is performed @@ -4825,6 +4864,35 @@ int float32_le(float32 a, float32 b, float_status *status) | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ +int float16_lt(float16 a, float16 b, float_status *status) +{ + flag aSign, bSign; + uint16_t av, bv; + a = float16_squash_input_denormal(a, status); + b = float16_squash_input_denormal(b, status); + + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + aSign = extractFloat16Sign( a ); + bSign = extractFloat16Sign( b ); + av = float16_val(a); + bv = float16_val(b); + if ( aSign != bSign ) return aSign && ( (uint16_t) ( ( av | bv )<<1 ) != 0 ); + return ( av != bv ) && ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is less than +| the corresponding value `b', and 0 otherwise. The invalid exception is +| raised if either operand is a NaN. The comparison is performed according +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + int float32_lt(float32 a, float32 b, float_status *status) { flag aSign, bSign; @@ -4869,6 +4937,32 @@ int float32_unordered(float32 a, float32 b, float_status *status) } /*---------------------------------------------------------------------------- +| Returns 1 if the half-precision floating-point value `a' is equal to +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +| exception. The comparison is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float16_eq_quiet(float16 a, float16 b, float_status *status) +{ + a = float16_squash_input_denormal(a, status); + b = float16_squash_input_denormal(b, status); + + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) + ) { + if (float16_is_signaling_nan(a, status) + || float16_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + return ( float16_val(a) == float16_val(b) ) || + ( (uint16_t) ( ( float16_val(a) | float16_val(b) )<<1 ) == 0 ); +} + + +/*---------------------------------------------------------------------------- | Returns 1 if the single-precision floating-point value `a' is equal to | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an | exception. The comparison is performed according to the IEC/IEEE Standard @@ -4958,6 +5052,31 @@ int float32_lt_quiet(float32 a, float32 b, float_status *status) } /*---------------------------------------------------------------------------- +| Returns 1 if the half-precision floating-point values `a' and `b' cannot +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The +| comparison is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float16_unordered_quiet(float16 a, float16 b, float_status *status) +{ + a = float16_squash_input_denormal(a, status); + b = float16_squash_input_denormal(b, status); + + if ( ( ( extractFloat16Exp( a ) == 0x1F ) && extractFloat16Frac( a ) ) + || ( ( extractFloat16Exp( b ) == 0x1F ) && extractFloat16Frac( b ) ) + ) { + if (float16_is_signaling_nan(a, status) + || float16_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 1; + } + return 0; +} + + +/*---------------------------------------------------------------------------- | Returns 1 if the single-precision floating-point values `a' and `b' cannot | be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The | comparison is performed according to the IEC/IEEE Standard for Binary diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index 3ff3fa5..3b0754c 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -293,6 +293,10 @@ float16 float16_maxnummag(float16, float16, float_status *status); float16 float16_sqrt(float16, float_status *status); int float16_compare(float16, float16, float_status *status); int float16_compare_quiet(float16, float16, float_status *status); +int float16_unordered_quiet(float16, float16, float_status *status); +int float16_le(float16, float16, float_status *status); +int float16_lt(float16, float16, float_status *status); +int float16_eq_quiet(float16, float16, float_status *status); int float16_is_quiet_nan(float16, float_status *status); int float16_is_signaling_nan(float16, float_status *status); diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c index 12aa3c0..b01548a 100644 --- a/linux-user/riscv/cpu_loop.c +++ b/linux-user/riscv/cpu_loop.c @@ -40,7 +40,13 @@ void cpu_loop(CPURISCVState *env) signum = 0; sigcode = 0; sigaddr = 0; - + if (env->foflag) { + if (env->vfp.vl != 0) { + env->foflag = false; + env->pc += 4; + continue; + } + } switch (trapnr) { case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs index b1c79bc..d577cef 100644 --- a/target/riscv/Makefile.objs +++ b/target/riscv/Makefile.objs @@ -1,4 +1,4 @@ -obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o pmp.o +obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o vector_helper.o gdbstub.o pmp.o DECODETREE = $(SRC_PATH)/scripts/decodetree.py diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 0adb307..5a93aa2 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -67,6 +67,7 @@ #define RVC RV('C') #define RVS RV('S') #define RVU RV('U') +#define RVV RV('V') /* S extension denotes that Supervisor mode exists, however it is possible to have a core that support S mode but does not have an MMU and there @@ -93,9 +94,38 @@ typedef struct CPURISCVState CPURISCVState; #include "pmp.h" +#define VLEN 128 +#define VUNIT(x) (VLEN / x) + struct CPURISCVState { target_ulong gpr[32]; uint64_t fpr[32]; /* assume both F and D extensions */ + + /* vector coprocessor state. */ + struct { + union VECTOR { + float64 f64[VUNIT(64)]; + float32 f32[VUNIT(32)]; + float16 f16[VUNIT(16)]; + target_ulong ul[VUNIT(sizeof(target_ulong))]; + uint64_t u64[VUNIT(64)]; + int64_t s64[VUNIT(64)]; + uint32_t u32[VUNIT(32)]; + int32_t s32[VUNIT(32)]; + uint16_t u16[VUNIT(16)]; + int16_t s16[VUNIT(16)]; + uint8_t u8[VUNIT(8)]; + int8_t s8[VUNIT(8)]; + } vreg[32]; + target_ulong vxrm; + target_ulong vxsat; + target_ulong vl; + target_ulong vstart; + target_ulong vtype; + float_status fp_status; + } vfp; + + bool foflag; target_ulong pc; target_ulong load_res; target_ulong load_val; diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index 11f971a..9eb43ec 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -29,6 +29,14 @@ #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) +/* Vector Fixed-Point round model */ +#define FSR_VXRM_SHIFT 9 +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) + +/* Vector Fixed-Point saturation flag */ +#define FSR_VXSAT_SHIFT 8 +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) + /* Control and Status Registers */ /* User Trap Setup */ @@ -48,6 +56,13 @@ #define CSR_FRM 0x002 #define CSR_FCSR 0x003 +/* User Vector CSRs */ +#define CSR_VSTART 0x008 +#define CSR_VXSAT 0x009 +#define CSR_VXRM 0x00a +#define CSR_VL 0xc20 +#define CSR_VTYPE 0xc21 + /* User Timers and Counters */ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index e32b612..405caf6 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -521,6 +521,13 @@ void riscv_cpu_do_interrupt(CPUState *cs) [PRV_H] = RISCV_EXCP_H_ECALL, [PRV_M] = RISCV_EXCP_M_ECALL }; + if (env->foflag) { + if (env->vfp.vl != 0) { + env->foflag = false; + env->pc += 4; + return; + } + } if (!async) { /* set tval to badaddr for traps with address information */ diff --git a/target/riscv/csr.c b/target/riscv/csr.c index e0d4586..a6131ff 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -87,12 +87,12 @@ static int ctr(CPURISCVState *env, int csrno) return 0; } -#if !defined(CONFIG_USER_ONLY) static int any(CPURISCVState *env, int csrno) { return 0; } +#if !defined(CONFIG_USER_ONLY) static int smode(CPURISCVState *env, int csrno) { return -!riscv_has_ext(env, RVS); @@ -158,8 +158,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val) return -1; } #endif - *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) - | (env->frm << FSR_RD_SHIFT); + *val = (env->vfp.vxrm << FSR_VXRM_SHIFT) + | (env->vfp.vxsat << FSR_VXSAT_SHIFT) + | (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) + | (env->frm << FSR_RD_SHIFT); return 0; } @@ -172,10 +174,60 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val) env->mstatus |= MSTATUS_FS; #endif env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; + env->vfp.vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; + env->vfp.vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); return 0; } +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vfp.vtype; + return 0; +} + +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vfp.vl; + return 0; +} + +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vfp.vxrm; + return 0; +} + +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vfp.vxsat; + return 0; +} + +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vfp.vstart; + return 0; +} + +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vfp.vxrm = val; + return 0; +} + +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vfp.vxsat = val; + return 0; +} + +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vfp.vstart = val; + return 0; +} + /* User Timers and Counters */ static int read_instret(CPURISCVState *env, int csrno, target_ulong *val) { @@ -873,7 +925,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_FFLAGS] = { fs, read_fflags, write_fflags }, [CSR_FRM] = { fs, read_frm, write_frm }, [CSR_FCSR] = { fs, read_fcsr, write_fcsr }, - + /* Vector CSRs */ + [CSR_VSTART] = { any, read_vstart, write_vstart }, + [CSR_VXSAT] = { any, read_vxsat, write_vxsat }, + [CSR_VXRM] = { any, read_vxrm, write_vxrm }, + [CSR_VL] = { any, read_vl }, + [CSR_VTYPE] = { any, read_vtype }, /* User Timers and Counters */ [CSR_CYCLE] = { ctr, read_instret }, [CSR_INSTRET] = { ctr, read_instret }, diff --git a/target/riscv/helper.h b/target/riscv/helper.h index debb22a..fee02c0 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -76,3 +76,357 @@ DEF_HELPER_2(mret, tl, env, tl) DEF_HELPER_1(wfi, void, env) DEF_HELPER_1(tlb_flush, void, env) #endif +/* Vector functions */ +DEF_HELPER_5(vector_vlb_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlh_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlw_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vle_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlbu_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlhu_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlwu_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlbff_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlhff_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlwff_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vleff_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlbuff_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlhuff_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vlwuff_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsb_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsh_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsw_v, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vse_v, void, env, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlsb_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlsh_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlsw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlse_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlsbu_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlshu_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlswu_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vssb_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vssh_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vssw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vsse_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlxb_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlxh_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlxw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlxe_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlxbu_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlxhu_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vlxwu_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vsxb_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vsxh_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vsxw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vsxe_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vsuxb_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vsuxh_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vsuxw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vsuxe_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoswapw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoswapd_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoaddw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoaddd_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoxorw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoxord_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoandw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoandd_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoorw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamoord_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamominw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamomind_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamomaxw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamomaxd_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamominuw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamominud_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamomaxuw_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(vector_vamomaxud_v, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_4(vector_vext_x_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfmv_f_s, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmv_s_x, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfmv_s_f, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vadc_vvm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vadc_vxm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vadc_vim, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmadc_vvm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmadc_vxm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmadc_vim, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vsbc_vvm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vsbc_vxm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmsbc_vvm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmsbc_vxm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmpopc_m, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmfirst_m, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vcompress_vm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmandnot_mm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmand_mm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmor_mm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmxor_mm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmornot_mm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmnand_mm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmnor_mm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmxnor_mm, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmsbf_m, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmsof_m, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vmsif_m, void, env, i32, i32, i32) +DEF_HELPER_4(vector_viota_m, void, env, i32, i32, i32) +DEF_HELPER_3(vector_vid_v, void, env, i32, i32) +DEF_HELPER_4(vector_vfcvt_xu_f_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfcvt_x_f_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfcvt_f_xu_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfcvt_f_x_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfwcvt_xu_f_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfwcvt_x_f_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfwcvt_f_xu_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfwcvt_f_x_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfwcvt_f_f_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfncvt_xu_f_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfncvt_x_f_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfncvt_f_xu_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfncvt_f_x_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfncvt_f_f_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfsqrt_v, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vfclass_v, void, env, i32, i32, i32) +DEF_HELPER_5(vector_vadd_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vadd_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vadd_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vredsum_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfadd_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfadd_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vredand_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfredsum_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsub_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsub_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vredor_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfsub_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfsub_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vrsub_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vrsub_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vredxor_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfredosum_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vminu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vminu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vredminu_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmin_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmin_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmin_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmin_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vredmin_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfredmin_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmaxu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmaxu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vredmaxu_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmax_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmax_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmax_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmax_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vredmax_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfredmax_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfsgnj_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfsgnj_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vand_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vand_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vand_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfsgnjn_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfsgnjn_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vor_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vor_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vor_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfsgnjx_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfsgnjx_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vxor_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vxor_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vxor_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vrgather_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vrgather_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vrgather_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vslideup_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vslideup_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vslide1up_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vslidedown_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vslidedown_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vslide1down_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmerge_vvm, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmerge_vxm, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmerge_vim, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmerge_vfm, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmseq_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmseq_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmseq_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmfeq_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmfeq_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsne_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsne_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsne_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmfle_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmfle_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsltu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsltu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmford_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmford_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmslt_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmslt_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmflt_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmflt_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsleu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsleu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsleu_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmfne_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmfne_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsle_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsle_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsle_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmfgt_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsgtu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsgtu_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsgt_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmsgt_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmfge_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsaddu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsaddu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsaddu_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vdivu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vdivu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfdiv_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfdiv_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsadd_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsadd_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsadd_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vdiv_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vdiv_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfrdiv_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vssubu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vssubu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vremu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vremu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vssub_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vssub_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vrem_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vrem_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vaadd_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vaadd_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vaadd_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmulhu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmulhu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmul_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmul_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsll_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsll_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsll_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmul_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmul_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vasub_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vasub_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmulhsu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmulhsu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsmul_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsmul_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmulh_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmulh_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfrsub_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsrl_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsrl_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsrl_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmadd_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmadd_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsra_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsra_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vsra_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmadd_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmadd_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfnmadd_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfnmadd_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vssrl_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vssrl_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vssrl_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmsub_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmsub_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vssra_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vssra_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vssra_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnmsub_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnmsub_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfnmsub_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfnmsub_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnsrl_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnsrl_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnsrl_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmacc_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmacc_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnsra_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnsra_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnsra_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmacc_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vmacc_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfnmacc_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfnmacc_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnclipu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnclipu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnclipu_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmsac_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfmsac_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnclip_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnclip_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnclip_vi, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnmsac_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vnmsac_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfnmsac_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfnmsac_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwredsumu_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwaddu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwaddu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwadd_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwadd_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwredsum_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwadd_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwadd_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwredsum_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsubu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsubu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwsub_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwsub_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsub_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsub_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwredosum_vs, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwaddu_wv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwaddu_wx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwadd_wv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwadd_wf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwadd_wv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwadd_wx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsubu_wv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsubu_wx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwsub_wv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwsub_wf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsub_wv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsub_wx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmulu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmulu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwmul_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwmul_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmulsu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmulsu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmul_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmul_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsmaccu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsmaccu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmaccu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmaccu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwmacc_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwmacc_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsmacc_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsmacc_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmacc_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmacc_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwnmacc_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwnmacc_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsmaccsu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsmaccsu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmaccsu_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmaccsu_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwmsac_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwmsac_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwsmaccus_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vwmaccus_vx, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwnmsac_vv, void, env, i32, i32, i32, i32) +DEF_HELPER_5(vector_vfwnmsac_vf, void, env, i32, i32, i32, i32) +DEF_HELPER_4(vector_vsetvli, void, env, i32, i32, i32) +DEF_HELPER_4(vector_vsetvl, void, env, i32, i32, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 77f794e..d125ff9 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -25,7 +25,7 @@ %sh10 20:10 %csr 20:12 %rm 12:3 - +%nf 29:3 # immediates: %imm_i 20:s12 %imm_s 25:s7 7:5 @@ -43,7 +43,6 @@ &u imm rd &shift shamt rs1 rd &atomic aq rl rs2 rs1 rd - # Formats 32: @r ....... ..... ..... ... ..... ....... &r %rs2 %rs1 %rd @i ............ ..... ... ..... ....... &i imm=%imm_i %rs1 %rd @@ -62,11 +61,17 @@ @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd @r2 ....... ..... ..... ... ..... ....... %rs1 %rd +@r_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd +@r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd +@r_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs2 %rs1 %rd +@r2_nfvm nf:3 ... vm:1 ..... ..... ... ..... ....... %rs1 %rd +@r2_vm ...... vm:1 ..... ..... ... ..... ....... %rs2 %rd +@r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd +@r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd @sfence_vma ....... ..... ..... ... ..... ....... %rs2 %rs1 @sfence_vm ....... ..... ..... ... ..... ....... %rs1 - # *** Privileged Instructions *** ecall 000000000000 00000 000 00000 1110011 ebreak 000000000001 00000 000 00000 1110011 @@ -203,3 +208,366 @@ fcvt_w_d 1100001 00000 ..... ... ..... 1010011 @r2_rm fcvt_wu_d 1100001 00001 ..... ... ..... 1010011 @r2_rm fcvt_d_w 1101001 00000 ..... ... ..... 1010011 @r2_rm fcvt_d_wu 1101001 00001 ..... ... ..... 1010011 @r2_rm + +# *** RV32V Standard Extension *** + +# *** Vector loads and stores are encoded within LOADFP/STORE-FP *** +vlb_v ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm +vlh_v ... 100 . 00000 ..... 101 ..... 0000111 @r2_nfvm +vlw_v ... 100 . 00000 ..... 110 ..... 0000111 @r2_nfvm +vle_v ... 000 . 00000 ..... 111 ..... 0000111 @r2_nfvm +vlbu_v ... 000 . 00000 ..... 000 ..... 0000111 @r2_nfvm +vlhu_v ... 000 . 00000 ..... 101 ..... 0000111 @r2_nfvm +vlwu_v ... 000 . 00000 ..... 110 ..... 0000111 @r2_nfvm +vlbff_v ... 100 . 10000 ..... 000 ..... 0000111 @r2_nfvm +vlhff_v ... 100 . 10000 ..... 101 ..... 0000111 @r2_nfvm +vlwff_v ... 100 . 10000 ..... 110 ..... 0000111 @r2_nfvm +vleff_v ... 000 . 10000 ..... 111 ..... 0000111 @r2_nfvm +vlbuff_v ... 000 . 10000 ..... 000 ..... 0000111 @r2_nfvm +vlhuff_v ... 000 . 10000 ..... 101 ..... 0000111 @r2_nfvm +vlwuff_v ... 000 . 10000 ..... 110 ..... 0000111 @r2_nfvm +vsb_v ... 000 . 00000 ..... 000 ..... 0100111 @r2_nfvm +vsh_v ... 000 . 00000 ..... 101 ..... 0100111 @r2_nfvm +vsw_v ... 000 . 00000 ..... 110 ..... 0100111 @r2_nfvm +vse_v ... 000 . 00000 ..... 111 ..... 0100111 @r2_nfvm + +vlsb_v ... 110 . ..... ..... 000 ..... 0000111 @r_nfvm +vlsh_v ... 110 . ..... ..... 101 ..... 0000111 @r_nfvm +vlsw_v ... 110 . ..... ..... 110 ..... 0000111 @r_nfvm +vlse_v ... 010 . ..... ..... 111 ..... 0000111 @r_nfvm +vlsbu_v ... 010 . ..... ..... 000 ..... 0000111 @r_nfvm +vlshu_v ... 010 . ..... ..... 101 ..... 0000111 @r_nfvm +vlswu_v ... 010 . ..... ..... 110 ..... 0000111 @r_nfvm +vssb_v ... 010 . ..... ..... 000 ..... 0100111 @r_nfvm +vssh_v ... 010 . ..... ..... 101 ..... 0100111 @r_nfvm +vssw_v ... 010 . ..... ..... 110 ..... 0100111 @r_nfvm +vsse_v ... 010 . ..... ..... 111 ..... 0100111 @r_nfvm + +vlxb_v ... 111 . ..... ..... 000 ..... 0000111 @r_nfvm +vlxh_v ... 111 . ..... ..... 101 ..... 0000111 @r_nfvm +vlxw_v ... 111 . ..... ..... 110 ..... 0000111 @r_nfvm +vlxe_v ... 011 . ..... ..... 111 ..... 0000111 @r_nfvm +vlxbu_v ... 011 . ..... ..... 000 ..... 0000111 @r_nfvm +vlxhu_v ... 011 . ..... ..... 101 ..... 0000111 @r_nfvm +vlxwu_v ... 011 . ..... ..... 110 ..... 0000111 @r_nfvm +vsxb_v ... 011 . ..... ..... 000 ..... 0100111 @r_nfvm +vsxh_v ... 011 . ..... ..... 101 ..... 0100111 @r_nfvm +vsxw_v ... 011 . ..... ..... 110 ..... 0100111 @r_nfvm +vsxe_v ... 011 . ..... ..... 111 ..... 0100111 @r_nfvm +vsuxb_v ... 111 . ..... ..... 000 ..... 0100111 @r_nfvm +vsuxh_v ... 111 . ..... ..... 101 ..... 0100111 @r_nfvm +vsuxw_v ... 111 . ..... ..... 110 ..... 0100111 @r_nfvm +vsuxe_v ... 111 . ..... ..... 111 ..... 0100111 @r_nfvm + +#*** Vector AMO operations are encoded under the standard AMO major opcode.*** +vamoswapw_v 00001 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoswapd_v 00001 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoaddw_v 00000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoaddd_v 00000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoxorw_v 00100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoxord_v 00100 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoandw_v 01100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoandd_v 01100 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamoorw_v 01000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamoord_v 01000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamominw_v 10000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamomind_v 10000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamomaxw_v 10100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamomaxd_v 10100 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamominuw_v 11000 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamominud_v 11000 . . ..... ..... 111 ..... 0101111 @r_wdvm +vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm +vamomaxud_v 11100 . . ..... ..... 111 ..... 0101111 @r_wdvm + +#*** new major opcode OP-V *** +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm +vredsum_vs 000000 . ..... ..... 010 ..... 1010111 @r_vm +vfadd_vv 000000 . ..... ..... 001 ..... 1010111 @r_vm +vfadd_vf 000000 . ..... ..... 101 ..... 1010111 @r_vm +vredand_vs 000001 . ..... ..... 010 ..... 1010111 @r_vm +vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm +vredor_vs 000010 . ..... ..... 010 ..... 1010111 @r_vm +vfsub_vv 000010 . ..... ..... 001 ..... 1010111 @r_vm +vfsub_vf 000010 . ..... ..... 101 ..... 1010111 @r_vm +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm +vredxor_vs 000011 . ..... ..... 010 ..... 1010111 @r_vm +vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm +vminu_vv 000100 . ..... ..... 000 ..... 1010111 @r_vm +vminu_vx 000100 . ..... ..... 100 ..... 1010111 @r_vm +vredminu_vs 000100 . ..... ..... 010 ..... 1010111 @r_vm +vfmin_vv 000100 . ..... ..... 001 ..... 1010111 @r_vm +vfmin_vf 000100 . ..... ..... 101 ..... 1010111 @r_vm +vmin_vv 000101 . ..... ..... 000 ..... 1010111 @r_vm +vmin_vx 000101 . ..... ..... 100 ..... 1010111 @r_vm +vredmin_vs 000101 . ..... ..... 010 ..... 1010111 @r_vm +vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm +vmaxu_vv 000110 . ..... ..... 000 ..... 1010111 @r_vm +vmaxu_vx 000110 . ..... ..... 100 ..... 1010111 @r_vm +vredmaxu_vs 000110 . ..... ..... 010 ..... 1010111 @r_vm +vfmax_vv 000110 . ..... ..... 001 ..... 1010111 @r_vm +vfmax_vf 000110 . ..... ..... 101 ..... 1010111 @r_vm +vmax_vv 000111 . ..... ..... 000 ..... 1010111 @r_vm +vmax_vx 000111 . ..... ..... 100 ..... 1010111 @r_vm +vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm +vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm +vfsgnj_vv 001000 . ..... ..... 001 ..... 1010111 @r_vm +vfsgnj_vf 001000 . ..... ..... 101 ..... 1010111 @r_vm +vand_vv 001001 . ..... ..... 000 ..... 1010111 @r_vm +vand_vx 001001 . ..... ..... 100 ..... 1010111 @r_vm +vand_vi 001001 . ..... ..... 011 ..... 1010111 @r_vm +vfsgnjn_vv 001001 . ..... ..... 001 ..... 1010111 @r_vm +vfsgnjn_vf 001001 . ..... ..... 101 ..... 1010111 @r_vm +vor_vv 001010 . ..... ..... 000 ..... 1010111 @r_vm +vor_vx 001010 . ..... ..... 100 ..... 1010111 @r_vm +vor_vi 001010 . ..... ..... 011 ..... 1010111 @r_vm +vfsgnjx_vv 001010 . ..... ..... 001 ..... 1010111 @r_vm +vfsgnjx_vf 001010 . ..... ..... 101 ..... 1010111 @r_vm +vxor_vv 001011 . ..... ..... 000 ..... 1010111 @r_vm +vxor_vx 001011 . ..... ..... 100 ..... 1010111 @r_vm +vxor_vi 001011 . ..... ..... 011 ..... 1010111 @r_vm +vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm +vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm +vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm +vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r +vfmv_f_s 001100 1 ..... ..... 001 ..... 1010111 @r +vmv_s_x 001101 1 ..... ..... 110 ..... 1010111 @r +vfmv_s_f 001101 1 ..... ..... 101 ..... 1010111 @r +vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm +vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm +vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm +vslidedown_vx 001111 . ..... ..... 100 ..... 1010111 @r_vm +vslidedown_vi 001111 . ..... ..... 011 ..... 1010111 @r_vm +vslide1down_vx 001111 . ..... ..... 110 ..... 1010111 @r_vm +vadc_vvm 010000 1 ..... ..... 000 ..... 1010111 @r +vadc_vxm 010000 1 ..... ..... 100 ..... 1010111 @r +vadc_vim 010000 1 ..... ..... 011 ..... 1010111 @r +vmadc_vvm 010001 1 ..... ..... 000 ..... 1010111 @r +vmadc_vxm 010001 1 ..... ..... 100 ..... 1010111 @r +vmadc_vim 010001 1 ..... ..... 011 ..... 1010111 @r +vsbc_vvm 010010 1 ..... ..... 000 ..... 1010111 @r +vsbc_vxm 010010 1 ..... ..... 100 ..... 1010111 @r +vmsbc_vvm 010011 1 ..... ..... 000 ..... 1010111 @r +vmsbc_vxm 010011 1 ..... ..... 100 ..... 1010111 @r +vmpopc_m 010100 . ..... ----- 010 ..... 1010111 @r2_vm +vmfirst_m 010101 . ..... ----- 010 ..... 1010111 @r2_vm +vmsbf_m 010110 . ..... 00001 010 ..... 1010111 @r2_vm +vmsof_m 010110 . ..... 00010 010 ..... 1010111 @r2_vm +vmsif_m 010110 . ..... 00011 010 ..... 1010111 @r2_vm +viota_m 010110 . ..... 10000 010 ..... 1010111 @r2_vm +vid_v 010110 . 00000 10001 010 ..... 1010111 @r1_vm +vmerge_vvm 010111 . ..... ..... 000 ..... 1010111 @r_vm +vmerge_vxm 010111 . ..... ..... 100 ..... 1010111 @r_vm +vmerge_vim 010111 . ..... ..... 011 ..... 1010111 @r_vm +vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r +vfmerge_vfm 010111 . ..... ..... 101 ..... 1010111 @r_vm +vmseq_vv 011000 . ..... ..... 000 ..... 1010111 @r_vm +vmseq_vx 011000 . ..... ..... 100 ..... 1010111 @r_vm +vmseq_vi 011000 . ..... ..... 011 ..... 1010111 @r_vm +vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r +vmfeq_vv 011000 . ..... ..... 001 ..... 1010111 @r_vm +vmfeq_vf 011000 . ..... ..... 101 ..... 1010111 @r_vm +vmsne_vv 011001 . ..... ..... 000 ..... 1010111 @r_vm +vmsne_vx 011001 . ..... ..... 100 ..... 1010111 @r_vm +vmsne_vi 011001 . ..... ..... 011 ..... 1010111 @r_vm +vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r +vmfle_vv 011001 . ..... ..... 001 ..... 1010111 @r_vm +vmfle_vf 011001 . ..... ..... 101 ..... 1010111 @r_vm +vmsltu_vv 011010 . ..... ..... 000 ..... 1010111 @r_vm +vmsltu_vx 011010 . ..... ..... 100 ..... 1010111 @r_vm +vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r +vmford_vv 011010 . ..... ..... 001 ..... 1010111 @r_vm +vmford_vf 011010 . ..... ..... 101 ..... 1010111 @r_vm +vmslt_vv 011011 . ..... ..... 000 ..... 1010111 @r_vm +vmslt_vx 011011 . ..... ..... 100 ..... 1010111 @r_vm +vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r +vmflt_vv 011011 . ..... ..... 001 ..... 1010111 @r_vm +vmflt_vf 011011 . ..... ..... 101 ..... 1010111 @r_vm +vmsleu_vv 011100 . ..... ..... 000 ..... 1010111 @r_vm +vmsleu_vx 011100 . ..... ..... 100 ..... 1010111 @r_vm +vmsleu_vi 011100 . ..... ..... 011 ..... 1010111 @r_vm +vmornot_mm 011100 - ..... ..... 010 ..... 1010111 @r +vmfne_vv 011100 . ..... ..... 001 ..... 1010111 @r_vm +vmfne_vf 011100 . ..... ..... 101 ..... 1010111 @r_vm +vmsle_vv 011101 . ..... ..... 000 ..... 1010111 @r_vm +vmsle_vx 011101 . ..... ..... 100 ..... 1010111 @r_vm +vmsle_vi 011101 . ..... ..... 011 ..... 1010111 @r_vm +vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r +vmfgt_vf 011101 . ..... ..... 101 ..... 1010111 @r_vm +vmsgtu_vx 011110 . ..... ..... 100 ..... 1010111 @r_vm +vmsgtu_vi 011110 . ..... ..... 011 ..... 1010111 @r_vm +vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r +vmsgt_vx 011111 . ..... ..... 100 ..... 1010111 @r_vm +vmsgt_vi 011111 . ..... ..... 011 ..... 1010111 @r_vm +vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r +vmfge_vf 011111 . ..... ..... 101 ..... 1010111 @r_vm +vsaddu_vv 100000 . ..... ..... 000 ..... 1010111 @r_vm +vsaddu_vx 100000 . ..... ..... 100 ..... 1010111 @r_vm +vsaddu_vi 100000 . ..... ..... 011 ..... 1010111 @r_vm +vdivu_vv 100000 . ..... ..... 010 ..... 1010111 @r_vm +vdivu_vx 100000 . ..... ..... 110 ..... 1010111 @r_vm +vfdiv_vv 100000 . ..... ..... 001 ..... 1010111 @r_vm +vfdiv_vf 100000 . ..... ..... 101 ..... 1010111 @r_vm +vsadd_vv 100001 . ..... ..... 000 ..... 1010111 @r_vm +vsadd_vx 100001 . ..... ..... 100 ..... 1010111 @r_vm +vsadd_vi 100001 . ..... ..... 011 ..... 1010111 @r_vm +vdiv_vv 100001 . ..... ..... 010 ..... 1010111 @r_vm +vdiv_vx 100001 . ..... ..... 110 ..... 1010111 @r_vm +vfrdiv_vf 100001 . ..... ..... 101 ..... 1010111 @r_vm +vssubu_vv 100010 . ..... ..... 000 ..... 1010111 @r_vm +vssubu_vx 100010 . ..... ..... 100 ..... 1010111 @r_vm +vremu_vv 100010 . ..... ..... 010 ..... 1010111 @r_vm +vremu_vx 100010 . ..... ..... 110 ..... 1010111 @r_vm +vfcvt_xu_f_v 100010 . ..... 00000 001 ..... 1010111 @r2_vm +vfcvt_x_f_v 100010 . ..... 00001 001 ..... 1010111 @r2_vm +vfcvt_f_xu_v 100010 . ..... 00010 001 ..... 1010111 @r2_vm +vfcvt_f_x_v 100010 . ..... 00011 001 ..... 1010111 @r2_vm +vfwcvt_xu_f_v 100010 . ..... 01000 001 ..... 1010111 @r2_vm +vfwcvt_x_f_v 100010 . ..... 01001 001 ..... 1010111 @r2_vm +vfwcvt_f_xu_v 100010 . ..... 01010 001 ..... 1010111 @r2_vm +vfwcvt_f_x_v 100010 . ..... 01011 001 ..... 1010111 @r2_vm +vfwcvt_f_f_v 100010 . ..... 01100 001 ..... 1010111 @r2_vm +vfncvt_xu_f_v 100010 . ..... 10000 001 ..... 1010111 @r2_vm +vfncvt_x_f_v 100010 . ..... 10001 001 ..... 1010111 @r2_vm +vfncvt_f_xu_v 100010 . ..... 10010 001 ..... 1010111 @r2_vm +vfncvt_f_x_v 100010 . ..... 10011 001 ..... 1010111 @r2_vm +vfncvt_f_f_v 100010 . ..... 10100 001 ..... 1010111 @r2_vm +vssub_vv 100011 . ..... ..... 000 ..... 1010111 @r_vm +vssub_vx 100011 . ..... ..... 100 ..... 1010111 @r_vm +vrem_vv 100011 . ..... ..... 010 ..... 1010111 @r_vm +vrem_vx 100011 . ..... ..... 110 ..... 1010111 @r_vm +vfsqrt_v 100011 . ..... 00000 001 ..... 1010111 @r2_vm +vfclass_v 100011 . ..... 10000 001 ..... 1010111 @r2_vm +vaadd_vv 100100 . ..... ..... 000 ..... 1010111 @r_vm +vaadd_vx 100100 . ..... ..... 100 ..... 1010111 @r_vm +vaadd_vi 100100 . ..... ..... 011 ..... 1010111 @r_vm +vmulhu_vv 100100 . ..... ..... 010 ..... 1010111 @r_vm +vmulhu_vx 100100 . ..... ..... 110 ..... 1010111 @r_vm +vfmul_vv 100100 . ..... ..... 001 ..... 1010111 @r_vm +vfmul_vf 100100 . ..... ..... 101 ..... 1010111 @r_vm +vsll_vv 100101 . ..... ..... 000 ..... 1010111 @r_vm +vsll_vx 100101 . ..... ..... 100 ..... 1010111 @r_vm +vsll_vi 100101 . ..... ..... 011 ..... 1010111 @r_vm +vmul_vv 100101 . ..... ..... 010 ..... 1010111 @r_vm +vmul_vx 100101 . ..... ..... 110 ..... 1010111 @r_vm +vasub_vv 100110 . ..... ..... 000 ..... 1010111 @r_vm +vasub_vx 100110 . ..... ..... 100 ..... 1010111 @r_vm +vmulhsu_vv 100110 . ..... ..... 010 ..... 1010111 @r_vm +vmulhsu_vx 100110 . ..... ..... 110 ..... 1010111 @r_vm +vsmul_vv 100111 . ..... ..... 000 ..... 1010111 @r_vm +vsmul_vx 100111 . ..... ..... 100 ..... 1010111 @r_vm +vmulh_vv 100111 . ..... ..... 010 ..... 1010111 @r_vm +vmulh_vx 100111 . ..... ..... 110 ..... 1010111 @r_vm +vfrsub_vf 100111 . ..... ..... 101 ..... 1010111 @r_vm +vsrl_vv 101000 . ..... ..... 000 ..... 1010111 @r_vm +vsrl_vx 101000 . ..... ..... 100 ..... 1010111 @r_vm +vsrl_vi 101000 . ..... ..... 011 ..... 1010111 @r_vm +vfmadd_vv 101000 . ..... ..... 001 ..... 1010111 @r_vm +vfmadd_vf 101000 . ..... ..... 101 ..... 1010111 @r_vm +vsra_vv 101001 . ..... ..... 000 ..... 1010111 @r_vm +vsra_vx 101001 . ..... ..... 100 ..... 1010111 @r_vm +vsra_vi 101001 . ..... ..... 011 ..... 1010111 @r_vm +vmadd_vv 101001 . ..... ..... 010 ..... 1010111 @r_vm +vmadd_vx 101001 . ..... ..... 110 ..... 1010111 @r_vm +vfnmadd_vv 101001 . ..... ..... 001 ..... 1010111 @r_vm +vfnmadd_vf 101001 . ..... ..... 101 ..... 1010111 @r_vm +vssrl_vv 101010 . ..... ..... 000 ..... 1010111 @r_vm +vssrl_vx 101010 . ..... ..... 100 ..... 1010111 @r_vm +vssrl_vi 101010 . ..... ..... 011 ..... 1010111 @r_vm +vfmsub_vv 101010 . ..... ..... 001 ..... 1010111 @r_vm +vfmsub_vf 101010 . ..... ..... 101 ..... 1010111 @r_vm +vssra_vv 101011 . ..... ..... 000 ..... 1010111 @r_vm +vssra_vx 101011 . ..... ..... 100 ..... 1010111 @r_vm +vssra_vi 101011 . ..... ..... 011 ..... 1010111 @r_vm +vnmsub_vv 101011 . ..... ..... 010 ..... 1010111 @r_vm +vnmsub_vx 101011 . ..... ..... 110 ..... 1010111 @r_vm +vfnmsub_vv 101011 . ..... ..... 001 ..... 1010111 @r_vm +vfnmsub_vf 101011 . ..... ..... 101 ..... 1010111 @r_vm +vnsrl_vv 101100 . ..... ..... 000 ..... 1010111 @r_vm +vnsrl_vx 101100 . ..... ..... 100 ..... 1010111 @r_vm +vnsrl_vi 101100 . ..... ..... 011 ..... 1010111 @r_vm +vfmacc_vv 101100 . ..... ..... 001 ..... 1010111 @r_vm +vfmacc_vf 101100 . ..... ..... 101 ..... 1010111 @r_vm +vnsra_vv 101101 . ..... ..... 000 ..... 1010111 @r_vm +vnsra_vx 101101 . ..... ..... 100 ..... 1010111 @r_vm +vnsra_vi 101101 . ..... ..... 011 ..... 1010111 @r_vm +vmacc_vv 101101 . ..... ..... 010 ..... 1010111 @r_vm +vmacc_vx 101101 . ..... ..... 110 ..... 1010111 @r_vm +vfnmacc_vv 101101 . ..... ..... 001 ..... 1010111 @r_vm +vfnmacc_vf 101101 . ..... ..... 101 ..... 1010111 @r_vm +vnclipu_vv 101110 . ..... ..... 000 ..... 1010111 @r_vm +vnclipu_vx 101110 . ..... ..... 100 ..... 1010111 @r_vm +vnclipu_vi 101110 . ..... ..... 011 ..... 1010111 @r_vm +vfmsac_vv 101110 . ..... ..... 001 ..... 1010111 @r_vm +vfmsac_vf 101110 . ..... ..... 101 ..... 1010111 @r_vm +vnclip_vv 101111 . ..... ..... 000 ..... 1010111 @r_vm +vnclip_vx 101111 . ..... ..... 100 ..... 1010111 @r_vm +vnclip_vi 101111 . ..... ..... 011 ..... 1010111 @r_vm +vnmsac_vv 101111 . ..... ..... 010 ..... 1010111 @r_vm +vnmsac_vx 101111 . ..... ..... 110 ..... 1010111 @r_vm +vfnmsac_vv 101111 . ..... ..... 001 ..... 1010111 @r_vm +vfnmsac_vf 101111 . ..... ..... 101 ..... 1010111 @r_vm +vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm +vwaddu_vv 110000 . ..... ..... 010 ..... 1010111 @r_vm +vwaddu_vx 110000 . ..... ..... 110 ..... 1010111 @r_vm +vfwadd_vv 110000 . ..... ..... 001 ..... 1010111 @r_vm +vfwadd_vf 110000 . ..... ..... 101 ..... 1010111 @r_vm +vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm +vwadd_vv 110001 . ..... ..... 010 ..... 1010111 @r_vm +vwadd_vx 110001 . ..... ..... 110 ..... 1010111 @r_vm +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm +vwsubu_vv 110010 . ..... ..... 010 ..... 1010111 @r_vm +vwsubu_vx 110010 . ..... ..... 110 ..... 1010111 @r_vm +vfwsub_vv 110010 . ..... ..... 001 ..... 1010111 @r_vm +vfwsub_vf 110010 . ..... ..... 101 ..... 1010111 @r_vm +vwsub_vv 110011 . ..... ..... 010 ..... 1010111 @r_vm +vwsub_vx 110011 . ..... ..... 110 ..... 1010111 @r_vm +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm +vwaddu_wv 110100 . ..... ..... 010 ..... 1010111 @r_vm +vwaddu_wx 110100 . ..... ..... 110 ..... 1010111 @r_vm +vfwadd_wv 110100 . ..... ..... 001 ..... 1010111 @r_vm +vfwadd_wf 110100 . ..... ..... 101 ..... 1010111 @r_vm +vwadd_wv 110101 . ..... ..... 010 ..... 1010111 @r_vm +vwadd_wx 110101 . ..... ..... 110 ..... 1010111 @r_vm +vwsubu_wv 110110 . ..... ..... 010 ..... 1010111 @r_vm +vwsubu_wx 110110 . ..... ..... 110 ..... 1010111 @r_vm +vfwsub_wv 110110 . ..... ..... 001 ..... 1010111 @r_vm +vfwsub_wf 110110 . ..... ..... 101 ..... 1010111 @r_vm +vwsub_wv 110111 . ..... ..... 010 ..... 1010111 @r_vm +vwsub_wx 110111 . ..... ..... 110 ..... 1010111 @r_vm +vwmulu_vv 111000 . ..... ..... 010 ..... 1010111 @r_vm +vwmulu_vx 111000 . ..... ..... 110 ..... 1010111 @r_vm +vfwmul_vv 111000 . ..... ..... 001 ..... 1010111 @r_vm +vfwmul_vf 111000 . ..... ..... 101 ..... 1010111 @r_vm +vwmulsu_vv 111010 . ..... ..... 010 ..... 1010111 @r_vm +vwmulsu_vx 111010 . ..... ..... 110 ..... 1010111 @r_vm +vwmul_vv 111011 . ..... ..... 010 ..... 1010111 @r_vm +vwmul_vx 111011 . ..... ..... 110 ..... 1010111 @r_vm +vwsmaccu_vv 111100 . ..... ..... 000 ..... 1010111 @r_vm +vwsmaccu_vx 111100 . ..... ..... 100 ..... 1010111 @r_vm +vwmaccu_vv 111100 . ..... ..... 010 ..... 1010111 @r_vm +vwmaccu_vx 111100 . ..... ..... 110 ..... 1010111 @r_vm +vfwmacc_vv 111100 . ..... ..... 001 ..... 1010111 @r_vm +vfwmacc_vf 111100 . ..... ..... 101 ..... 1010111 @r_vm +vwsmacc_vv 111101 . ..... ..... 000 ..... 1010111 @r_vm +vwsmacc_vx 111101 . ..... ..... 100 ..... 1010111 @r_vm +vwmacc_vv 111101 . ..... ..... 010 ..... 1010111 @r_vm +vwmacc_vx 111101 . ..... ..... 110 ..... 1010111 @r_vm +vfwnmacc_vv 111101 . ..... ..... 001 ..... 1010111 @r_vm +vfwnmacc_vf 111101 . ..... ..... 101 ..... 1010111 @r_vm +vwsmaccsu_vv 111110 . ..... ..... 000 ..... 1010111 @r_vm +vwsmaccsu_vx 111110 . ..... ..... 100 ..... 1010111 @r_vm +vwmaccsu_vv 111110 . ..... ..... 010 ..... 1010111 @r_vm +vwmaccsu_vx 111110 . ..... ..... 110 ..... 1010111 @r_vm +vfwmsac_vv 111110 . ..... ..... 001 ..... 1010111 @r_vm +vfwmsac_vf 111110 . ..... ..... 101 ..... 1010111 @r_vm +vwsmaccus_vx 111111 . ..... ..... 100 ..... 1010111 @r_vm +vwmaccus_vx 111111 . ..... ..... 110 ..... 1010111 @r_vm +vfwnmsac_vv 111111 . ..... ..... 001 ..... 1010111 @r_vm +vfwnmsac_vf 111111 . ..... ..... 101 ..... 1010111 @r_vm +vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm +vsetvl 1000000 ..... ..... 111 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c new file mode 100644 index 0000000..dc8e6ce --- /dev/null +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -0,0 +1,484 @@ +/* + * RISC-V translation routines for the RVV Standard Extension. + * + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#define GEN_VECTOR_R2_NFVM(INSN) \ +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ +{ \ + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ + TCGv_i32 d = tcg_const_i32(a->rd); \ + TCGv_i32 nf = tcg_const_i32(a->nf); \ + TCGv_i32 vm = tcg_const_i32(a->vm); \ + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, d); \ + tcg_temp_free_i32(s1); \ + tcg_temp_free_i32(d); \ + tcg_temp_free_i32(nf); \ + tcg_temp_free_i32(vm); \ + return true; \ +} +#define GEN_VECTOR_R_NFVM(INSN) \ +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ +{ \ + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ + TCGv_i32 d = tcg_const_i32(a->rd); \ + TCGv_i32 nf = tcg_const_i32(a->nf); \ + TCGv_i32 vm = tcg_const_i32(a->vm); \ + gen_helper_vector_##INSN(cpu_env, nf, vm, s1, s2, d);\ + tcg_temp_free_i32(s1); \ + tcg_temp_free_i32(s2); \ + tcg_temp_free_i32(d); \ + tcg_temp_free_i32(nf); \ + tcg_temp_free_i32(vm); \ + return true; \ +} + +#define GEN_VECTOR_R_WDVM(INSN) \ +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ +{ \ + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ + TCGv_i32 d = tcg_const_i32(a->rd); \ + TCGv_i32 wd = tcg_const_i32(a->wd); \ + TCGv_i32 vm = tcg_const_i32(a->vm); \ + gen_helper_vector_##INSN(cpu_env, wd, vm, s1, s2, d);\ + tcg_temp_free_i32(s1); \ + tcg_temp_free_i32(s2); \ + tcg_temp_free_i32(d); \ + tcg_temp_free_i32(wd); \ + tcg_temp_free_i32(vm); \ + return true; \ +} +#define GEN_VECTOR_R(INSN) \ +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ +{ \ + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ + TCGv_i32 d = tcg_const_i32(a->rd); \ + gen_helper_vector_##INSN(cpu_env, s1, s2, d); \ + tcg_temp_free_i32(s1); \ + tcg_temp_free_i32(s2); \ + tcg_temp_free_i32(d); \ + return true; \ +} +#define GEN_VECTOR_R2_VM(INSN) \ +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ +{ \ + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ + TCGv_i32 d = tcg_const_i32(a->rd); \ + TCGv_i32 vm = tcg_const_i32(a->vm); \ + gen_helper_vector_##INSN(cpu_env, vm, s2, d); \ + tcg_temp_free_i32(s2); \ + tcg_temp_free_i32(d); \ + tcg_temp_free_i32(vm); \ + return true; \ +} + +#define GEN_VECTOR_R1_VM(INSN) \ +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ +{ \ + TCGv_i32 d = tcg_const_i32(a->rd); \ + TCGv_i32 vm = tcg_const_i32(a->vm); \ + gen_helper_vector_##INSN(cpu_env, vm, d); \ + tcg_temp_free_i32(d); \ + tcg_temp_free_i32(vm); \ + return true; \ +} +#define GEN_VECTOR_R_VM(INSN) \ +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ +{ \ + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ + TCGv_i32 s2 = tcg_const_i32(a->rs2); \ + TCGv_i32 d = tcg_const_i32(a->rd); \ + TCGv_i32 vm = tcg_const_i32(a->vm); \ + gen_helper_vector_##INSN(cpu_env, vm, s1, s2, d); \ + tcg_temp_free_i32(s1); \ + tcg_temp_free_i32(s2); \ + tcg_temp_free_i32(d); \ + tcg_temp_free_i32(vm); \ + return true; \ +} +#define GEN_VECTOR_R2_ZIMM(INSN) \ +static bool trans_##INSN(DisasContext *ctx, arg_##INSN * a) \ +{ \ + TCGv_i32 s1 = tcg_const_i32(a->rs1); \ + TCGv_i32 zimm = tcg_const_i32(a->zimm); \ + TCGv_i32 d = tcg_const_i32(a->rd); \ + gen_helper_vector_##INSN(cpu_env, s1, zimm, d); \ + tcg_temp_free_i32(s1); \ + tcg_temp_free_i32(zimm); \ + tcg_temp_free_i32(d); \ + return true; \ +} + +GEN_VECTOR_R2_NFVM(vlb_v) +GEN_VECTOR_R2_NFVM(vlh_v) +GEN_VECTOR_R2_NFVM(vlw_v) +GEN_VECTOR_R2_NFVM(vle_v) +GEN_VECTOR_R2_NFVM(vlbu_v) +GEN_VECTOR_R2_NFVM(vlhu_v) +GEN_VECTOR_R2_NFVM(vlwu_v) +GEN_VECTOR_R2_NFVM(vlbff_v) +GEN_VECTOR_R2_NFVM(vlhff_v) +GEN_VECTOR_R2_NFVM(vlwff_v) +GEN_VECTOR_R2_NFVM(vleff_v) +GEN_VECTOR_R2_NFVM(vlbuff_v) +GEN_VECTOR_R2_NFVM(vlhuff_v) +GEN_VECTOR_R2_NFVM(vlwuff_v) +GEN_VECTOR_R2_NFVM(vsb_v) +GEN_VECTOR_R2_NFVM(vsh_v) +GEN_VECTOR_R2_NFVM(vsw_v) +GEN_VECTOR_R2_NFVM(vse_v) + +GEN_VECTOR_R_NFVM(vlsb_v) +GEN_VECTOR_R_NFVM(vlsh_v) +GEN_VECTOR_R_NFVM(vlsw_v) +GEN_VECTOR_R_NFVM(vlse_v) +GEN_VECTOR_R_NFVM(vlsbu_v) +GEN_VECTOR_R_NFVM(vlshu_v) +GEN_VECTOR_R_NFVM(vlswu_v) +GEN_VECTOR_R_NFVM(vssb_v) +GEN_VECTOR_R_NFVM(vssh_v) +GEN_VECTOR_R_NFVM(vssw_v) +GEN_VECTOR_R_NFVM(vsse_v) +GEN_VECTOR_R_NFVM(vlxb_v) +GEN_VECTOR_R_NFVM(vlxh_v) +GEN_VECTOR_R_NFVM(vlxw_v) +GEN_VECTOR_R_NFVM(vlxe_v) +GEN_VECTOR_R_NFVM(vlxbu_v) +GEN_VECTOR_R_NFVM(vlxhu_v) +GEN_VECTOR_R_NFVM(vlxwu_v) +GEN_VECTOR_R_NFVM(vsxb_v) +GEN_VECTOR_R_NFVM(vsxh_v) +GEN_VECTOR_R_NFVM(vsxw_v) +GEN_VECTOR_R_NFVM(vsxe_v) +GEN_VECTOR_R_NFVM(vsuxb_v) +GEN_VECTOR_R_NFVM(vsuxh_v) +GEN_VECTOR_R_NFVM(vsuxw_v) +GEN_VECTOR_R_NFVM(vsuxe_v) + +GEN_VECTOR_R_WDVM(vamoswapw_v) +GEN_VECTOR_R_WDVM(vamoswapd_v) +GEN_VECTOR_R_WDVM(vamoaddw_v) +GEN_VECTOR_R_WDVM(vamoaddd_v) +GEN_VECTOR_R_WDVM(vamoxorw_v) +GEN_VECTOR_R_WDVM(vamoxord_v) +GEN_VECTOR_R_WDVM(vamoandw_v) +GEN_VECTOR_R_WDVM(vamoandd_v) +GEN_VECTOR_R_WDVM(vamoorw_v) +GEN_VECTOR_R_WDVM(vamoord_v) +GEN_VECTOR_R_WDVM(vamominw_v) +GEN_VECTOR_R_WDVM(vamomind_v) +GEN_VECTOR_R_WDVM(vamomaxw_v) +GEN_VECTOR_R_WDVM(vamomaxd_v) +GEN_VECTOR_R_WDVM(vamominuw_v) +GEN_VECTOR_R_WDVM(vamominud_v) +GEN_VECTOR_R_WDVM(vamomaxuw_v) +GEN_VECTOR_R_WDVM(vamomaxud_v) + +GEN_VECTOR_R(vext_x_v) +GEN_VECTOR_R(vfmv_f_s) +GEN_VECTOR_R(vmv_s_x) +GEN_VECTOR_R(vfmv_s_f) +GEN_VECTOR_R(vadc_vvm) +GEN_VECTOR_R(vadc_vxm) +GEN_VECTOR_R(vadc_vim) +GEN_VECTOR_R(vmadc_vvm) +GEN_VECTOR_R(vmadc_vxm) +GEN_VECTOR_R(vmadc_vim) +GEN_VECTOR_R(vsbc_vvm) +GEN_VECTOR_R(vsbc_vxm) +GEN_VECTOR_R(vmsbc_vvm) +GEN_VECTOR_R(vmsbc_vxm) +GEN_VECTOR_R2_VM(vmpopc_m) +GEN_VECTOR_R2_VM(vmfirst_m) +GEN_VECTOR_R(vcompress_vm) +GEN_VECTOR_R(vmandnot_mm) +GEN_VECTOR_R(vmand_mm) +GEN_VECTOR_R(vmor_mm) +GEN_VECTOR_R(vmxor_mm) +GEN_VECTOR_R(vmornot_mm) +GEN_VECTOR_R(vmnand_mm) +GEN_VECTOR_R(vmnor_mm) +GEN_VECTOR_R(vmxnor_mm) +GEN_VECTOR_R2_VM(vmsbf_m) +GEN_VECTOR_R2_VM(vmsof_m) +GEN_VECTOR_R2_VM(vmsif_m) +GEN_VECTOR_R2_VM(viota_m) +GEN_VECTOR_R1_VM(vid_v) +GEN_VECTOR_R2_VM(vfcvt_xu_f_v) +GEN_VECTOR_R2_VM(vfcvt_x_f_v) +GEN_VECTOR_R2_VM(vfcvt_f_xu_v) +GEN_VECTOR_R2_VM(vfcvt_f_x_v) +GEN_VECTOR_R2_VM(vfwcvt_xu_f_v) +GEN_VECTOR_R2_VM(vfwcvt_x_f_v) +GEN_VECTOR_R2_VM(vfwcvt_f_xu_v) +GEN_VECTOR_R2_VM(vfwcvt_f_x_v) +GEN_VECTOR_R2_VM(vfwcvt_f_f_v) +GEN_VECTOR_R2_VM(vfncvt_xu_f_v) +GEN_VECTOR_R2_VM(vfncvt_x_f_v) +GEN_VECTOR_R2_VM(vfncvt_f_xu_v) +GEN_VECTOR_R2_VM(vfncvt_f_x_v) +GEN_VECTOR_R2_VM(vfncvt_f_f_v) +GEN_VECTOR_R2_VM(vfsqrt_v) +GEN_VECTOR_R2_VM(vfclass_v) + +GEN_VECTOR_R_VM(vadd_vv) +GEN_VECTOR_R_VM(vadd_vx) +GEN_VECTOR_R_VM(vadd_vi) +GEN_VECTOR_R_VM(vredsum_vs) +GEN_VECTOR_R_VM(vfadd_vv) +GEN_VECTOR_R_VM(vfadd_vf) +GEN_VECTOR_R_VM(vredand_vs) +GEN_VECTOR_R_VM(vfredsum_vs) +GEN_VECTOR_R_VM(vsub_vv) +GEN_VECTOR_R_VM(vsub_vx) +GEN_VECTOR_R_VM(vredor_vs) +GEN_VECTOR_R_VM(vfsub_vv) +GEN_VECTOR_R_VM(vfsub_vf) +GEN_VECTOR_R_VM(vrsub_vx) +GEN_VECTOR_R_VM(vrsub_vi) +GEN_VECTOR_R_VM(vredxor_vs) +GEN_VECTOR_R_VM(vfredosum_vs) +GEN_VECTOR_R_VM(vminu_vv) +GEN_VECTOR_R_VM(vminu_vx) +GEN_VECTOR_R_VM(vredminu_vs) +GEN_VECTOR_R_VM(vfmin_vv) +GEN_VECTOR_R_VM(vfmin_vf) +GEN_VECTOR_R_VM(vmin_vv) +GEN_VECTOR_R_VM(vmin_vx) +GEN_VECTOR_R_VM(vredmin_vs) +GEN_VECTOR_R_VM(vfredmin_vs) +GEN_VECTOR_R_VM(vmaxu_vv) +GEN_VECTOR_R_VM(vmaxu_vx) +GEN_VECTOR_R_VM(vredmaxu_vs) +GEN_VECTOR_R_VM(vfmax_vv) +GEN_VECTOR_R_VM(vfmax_vf) +GEN_VECTOR_R_VM(vmax_vv) +GEN_VECTOR_R_VM(vmax_vx) +GEN_VECTOR_R_VM(vredmax_vs) +GEN_VECTOR_R_VM(vfredmax_vs) +GEN_VECTOR_R_VM(vfsgnj_vv) +GEN_VECTOR_R_VM(vfsgnj_vf) +GEN_VECTOR_R_VM(vand_vv) +GEN_VECTOR_R_VM(vand_vx) +GEN_VECTOR_R_VM(vand_vi) +GEN_VECTOR_R_VM(vfsgnjn_vv) +GEN_VECTOR_R_VM(vfsgnjn_vf) +GEN_VECTOR_R_VM(vor_vv) +GEN_VECTOR_R_VM(vor_vx) +GEN_VECTOR_R_VM(vor_vi) +GEN_VECTOR_R_VM(vfsgnjx_vv) +GEN_VECTOR_R_VM(vfsgnjx_vf) +GEN_VECTOR_R_VM(vxor_vv) +GEN_VECTOR_R_VM(vxor_vx) +GEN_VECTOR_R_VM(vxor_vi) +GEN_VECTOR_R_VM(vrgather_vv) +GEN_VECTOR_R_VM(vrgather_vx) +GEN_VECTOR_R_VM(vrgather_vi) +GEN_VECTOR_R_VM(vslideup_vx) +GEN_VECTOR_R_VM(vslideup_vi) +GEN_VECTOR_R_VM(vslide1up_vx) +GEN_VECTOR_R_VM(vslidedown_vx) +GEN_VECTOR_R_VM(vslidedown_vi) +GEN_VECTOR_R_VM(vslide1down_vx) +GEN_VECTOR_R_VM(vmerge_vvm) +GEN_VECTOR_R_VM(vmerge_vxm) +GEN_VECTOR_R_VM(vmerge_vim) +GEN_VECTOR_R_VM(vfmerge_vfm) +GEN_VECTOR_R_VM(vmseq_vv) +GEN_VECTOR_R_VM(vmseq_vx) +GEN_VECTOR_R_VM(vmseq_vi) +GEN_VECTOR_R_VM(vmfeq_vv) +GEN_VECTOR_R_VM(vmfeq_vf) +GEN_VECTOR_R_VM(vmsne_vv) +GEN_VECTOR_R_VM(vmsne_vx) +GEN_VECTOR_R_VM(vmsne_vi) +GEN_VECTOR_R_VM(vmfle_vv) +GEN_VECTOR_R_VM(vmfle_vf) +GEN_VECTOR_R_VM(vmsltu_vv) +GEN_VECTOR_R_VM(vmsltu_vx) +GEN_VECTOR_R_VM(vmford_vv) +GEN_VECTOR_R_VM(vmford_vf) +GEN_VECTOR_R_VM(vmslt_vv) +GEN_VECTOR_R_VM(vmslt_vx) +GEN_VECTOR_R_VM(vmflt_vv) +GEN_VECTOR_R_VM(vmflt_vf) +GEN_VECTOR_R_VM(vmsleu_vv) +GEN_VECTOR_R_VM(vmsleu_vx) +GEN_VECTOR_R_VM(vmsleu_vi) +GEN_VECTOR_R_VM(vmfne_vv) +GEN_VECTOR_R_VM(vmfne_vf) +GEN_VECTOR_R_VM(vmsle_vv) +GEN_VECTOR_R_VM(vmsle_vx) +GEN_VECTOR_R_VM(vmsle_vi) +GEN_VECTOR_R_VM(vmfgt_vf) +GEN_VECTOR_R_VM(vmsgtu_vx) +GEN_VECTOR_R_VM(vmsgtu_vi) +GEN_VECTOR_R_VM(vmsgt_vx) +GEN_VECTOR_R_VM(vmsgt_vi) +GEN_VECTOR_R_VM(vmfge_vf) +GEN_VECTOR_R_VM(vsaddu_vv) +GEN_VECTOR_R_VM(vsaddu_vx) +GEN_VECTOR_R_VM(vsaddu_vi) +GEN_VECTOR_R_VM(vdivu_vv) +GEN_VECTOR_R_VM(vdivu_vx) +GEN_VECTOR_R_VM(vfdiv_vv) +GEN_VECTOR_R_VM(vfdiv_vf) +GEN_VECTOR_R_VM(vsadd_vv) +GEN_VECTOR_R_VM(vsadd_vx) +GEN_VECTOR_R_VM(vsadd_vi) +GEN_VECTOR_R_VM(vdiv_vv) +GEN_VECTOR_R_VM(vdiv_vx) +GEN_VECTOR_R_VM(vfrdiv_vf) +GEN_VECTOR_R_VM(vssubu_vv) +GEN_VECTOR_R_VM(vssubu_vx) +GEN_VECTOR_R_VM(vremu_vv) +GEN_VECTOR_R_VM(vremu_vx) +GEN_VECTOR_R_VM(vssub_vv) +GEN_VECTOR_R_VM(vssub_vx) +GEN_VECTOR_R_VM(vrem_vv) +GEN_VECTOR_R_VM(vrem_vx) +GEN_VECTOR_R_VM(vaadd_vv) +GEN_VECTOR_R_VM(vaadd_vx) +GEN_VECTOR_R_VM(vaadd_vi) +GEN_VECTOR_R_VM(vmulhu_vv) +GEN_VECTOR_R_VM(vmulhu_vx) +GEN_VECTOR_R_VM(vfmul_vv) +GEN_VECTOR_R_VM(vfmul_vf) +GEN_VECTOR_R_VM(vsll_vv) +GEN_VECTOR_R_VM(vsll_vx) +GEN_VECTOR_R_VM(vsll_vi) +GEN_VECTOR_R_VM(vmul_vv) +GEN_VECTOR_R_VM(vmul_vx) +GEN_VECTOR_R_VM(vasub_vv) +GEN_VECTOR_R_VM(vasub_vx) +GEN_VECTOR_R_VM(vmulhsu_vv) +GEN_VECTOR_R_VM(vmulhsu_vx) +GEN_VECTOR_R_VM(vsmul_vv) +GEN_VECTOR_R_VM(vsmul_vx) +GEN_VECTOR_R_VM(vmulh_vv) +GEN_VECTOR_R_VM(vmulh_vx) +GEN_VECTOR_R_VM(vfrsub_vf) +GEN_VECTOR_R_VM(vsrl_vv) +GEN_VECTOR_R_VM(vsrl_vx) +GEN_VECTOR_R_VM(vsrl_vi) +GEN_VECTOR_R_VM(vfmadd_vv) +GEN_VECTOR_R_VM(vfmadd_vf) +GEN_VECTOR_R_VM(vsra_vv) +GEN_VECTOR_R_VM(vsra_vx) +GEN_VECTOR_R_VM(vsra_vi) +GEN_VECTOR_R_VM(vmadd_vv) +GEN_VECTOR_R_VM(vmadd_vx) +GEN_VECTOR_R_VM(vfnmadd_vv) +GEN_VECTOR_R_VM(vfnmadd_vf) +GEN_VECTOR_R_VM(vssrl_vv) +GEN_VECTOR_R_VM(vssrl_vx) +GEN_VECTOR_R_VM(vssrl_vi) +GEN_VECTOR_R_VM(vfmsub_vv) +GEN_VECTOR_R_VM(vfmsub_vf) +GEN_VECTOR_R_VM(vssra_vv) +GEN_VECTOR_R_VM(vssra_vx) +GEN_VECTOR_R_VM(vssra_vi) +GEN_VECTOR_R_VM(vnmsub_vv) +GEN_VECTOR_R_VM(vnmsub_vx) +GEN_VECTOR_R_VM(vfnmsub_vv) +GEN_VECTOR_R_VM(vfnmsub_vf) +GEN_VECTOR_R_VM(vnsrl_vv) +GEN_VECTOR_R_VM(vnsrl_vx) +GEN_VECTOR_R_VM(vnsrl_vi) +GEN_VECTOR_R_VM(vfmacc_vv) +GEN_VECTOR_R_VM(vfmacc_vf) +GEN_VECTOR_R_VM(vnsra_vv) +GEN_VECTOR_R_VM(vnsra_vx) +GEN_VECTOR_R_VM(vnsra_vi) +GEN_VECTOR_R_VM(vmacc_vv) +GEN_VECTOR_R_VM(vmacc_vx) +GEN_VECTOR_R_VM(vfnmacc_vv) +GEN_VECTOR_R_VM(vfnmacc_vf) +GEN_VECTOR_R_VM(vnclipu_vv) +GEN_VECTOR_R_VM(vnclipu_vx) +GEN_VECTOR_R_VM(vnclipu_vi) +GEN_VECTOR_R_VM(vfmsac_vv) +GEN_VECTOR_R_VM(vfmsac_vf) +GEN_VECTOR_R_VM(vnclip_vv) +GEN_VECTOR_R_VM(vnclip_vx) +GEN_VECTOR_R_VM(vnclip_vi) +GEN_VECTOR_R_VM(vnmsac_vv) +GEN_VECTOR_R_VM(vnmsac_vx) +GEN_VECTOR_R_VM(vfnmsac_vv) +GEN_VECTOR_R_VM(vfnmsac_vf) +GEN_VECTOR_R_VM(vwredsumu_vs) +GEN_VECTOR_R_VM(vwaddu_vv) +GEN_VECTOR_R_VM(vwaddu_vx) +GEN_VECTOR_R_VM(vfwadd_vv) +GEN_VECTOR_R_VM(vfwadd_vf) +GEN_VECTOR_R_VM(vwredsum_vs) +GEN_VECTOR_R_VM(vwadd_vv) +GEN_VECTOR_R_VM(vwadd_vx) +GEN_VECTOR_R_VM(vfwredsum_vs) +GEN_VECTOR_R_VM(vwsubu_vv) +GEN_VECTOR_R_VM(vwsubu_vx) +GEN_VECTOR_R_VM(vfwsub_vv) +GEN_VECTOR_R_VM(vfwsub_vf) +GEN_VECTOR_R_VM(vwsub_vv) +GEN_VECTOR_R_VM(vwsub_vx) +GEN_VECTOR_R_VM(vfwredosum_vs) +GEN_VECTOR_R_VM(vwaddu_wv) +GEN_VECTOR_R_VM(vwaddu_wx) +GEN_VECTOR_R_VM(vfwadd_wv) +GEN_VECTOR_R_VM(vfwadd_wf) +GEN_VECTOR_R_VM(vwadd_wv) +GEN_VECTOR_R_VM(vwadd_wx) +GEN_VECTOR_R_VM(vwsubu_wv) +GEN_VECTOR_R_VM(vwsubu_wx) +GEN_VECTOR_R_VM(vfwsub_wv) +GEN_VECTOR_R_VM(vfwsub_wf) +GEN_VECTOR_R_VM(vwsub_wv) +GEN_VECTOR_R_VM(vwsub_wx) +GEN_VECTOR_R_VM(vwmulu_vv) +GEN_VECTOR_R_VM(vwmulu_vx) +GEN_VECTOR_R_VM(vfwmul_vv) +GEN_VECTOR_R_VM(vfwmul_vf) +GEN_VECTOR_R_VM(vwmulsu_vv) +GEN_VECTOR_R_VM(vwmulsu_vx) +GEN_VECTOR_R_VM(vwmul_vv) +GEN_VECTOR_R_VM(vwmul_vx) +GEN_VECTOR_R_VM(vwsmaccu_vv) +GEN_VECTOR_R_VM(vwsmaccu_vx) +GEN_VECTOR_R_VM(vwmaccu_vv) +GEN_VECTOR_R_VM(vwmaccu_vx) +GEN_VECTOR_R_VM(vfwmacc_vv) +GEN_VECTOR_R_VM(vfwmacc_vf) +GEN_VECTOR_R_VM(vwsmacc_vv) +GEN_VECTOR_R_VM(vwsmacc_vx) +GEN_VECTOR_R_VM(vwmacc_vv) +GEN_VECTOR_R_VM(vwmacc_vx) +GEN_VECTOR_R_VM(vfwnmacc_vv) +GEN_VECTOR_R_VM(vfwnmacc_vf) +GEN_VECTOR_R_VM(vwsmaccsu_vv) +GEN_VECTOR_R_VM(vwsmaccsu_vx) +GEN_VECTOR_R_VM(vwmaccsu_vv) +GEN_VECTOR_R_VM(vwmaccsu_vx) +GEN_VECTOR_R_VM(vfwmsac_vv) +GEN_VECTOR_R_VM(vfwmsac_vf) +GEN_VECTOR_R_VM(vwsmaccus_vx) +GEN_VECTOR_R_VM(vwmaccus_vx) +GEN_VECTOR_R_VM(vfwnmsac_vv) +GEN_VECTOR_R_VM(vfwnmsac_vf) +GEN_VECTOR_R2_ZIMM(vsetvli) +GEN_VECTOR_R(vsetvl) diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 8d6ab73..587c23e 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -706,6 +706,7 @@ static bool gen_shift(DisasContext *ctx, arg_r *a, #include "insn_trans/trans_rva.inc.c" #include "insn_trans/trans_rvf.inc.c" #include "insn_trans/trans_rvd.inc.c" +#include "insn_trans/trans_rvv.inc.c" #include "insn_trans/trans_privileged.inc.c" /* diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c new file mode 100644 index 0000000..1f8f1ec --- /dev/null +++ b/target/riscv/vector_helper.c @@ -0,0 +1,26563 @@ +/* + * RISC-V Vectore Extension Helpers for QEMU. + * + * Copyright (c) 2011-2019 C-SKY Limited. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "cpu.h" +#include "qemu/main-loop.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "exec/translator.h" +#include "exec/cpu_ldst.h" +#include <math.h> +#include "instmap.h" + +#define VECTOR_HELPER(name) HELPER(glue(vector_, name)) +#define SIGNBIT8 (1 << 7) +#define MAX_U8 ((uint8_t)0xff) +#define MIN_U8 ((uint8_t)0x0) +#define MAX_S8 ((int8_t)0x7f) +#define MIN_S8 ((int8_t)0x80) +#define SIGNBIT16 (1 << 15) +#define MAX_U16 ((uint16_t)0xffff) +#define MIN_U16 ((uint16_t)0x0) +#define MAX_S16 ((int16_t)0x7fff) +#define MIN_S16 ((int16_t)0x8000) +#define SIGNBIT32 (1 << 31) +#define MAX_U32 ((uint32_t)0xffffffff) +#define MIN_U32 ((uint32_t)0x0) +#define MAX_S32 ((int32_t)0x7fffffff) +#define MIN_S32 ((int32_t)0x80000000) +#define SIGNBIT64 ((uint64_t)1 << 63) +#define MAX_U64 ((uint64_t)0xffffffffffffffff) +#define MIN_U64 ((uint64_t)0x0) +#define MAX_S64 ((int64_t)0x7fffffffffffffff) +#define MIN_S64 ((int64_t)0x8000000000000000) + +static int64_t sign_extend(int64_t a, int8_t width) +{ + return a << (64 - width) >> (64 - width); +} + +static int64_t extend_gpr(target_ulong reg) +{ + return sign_extend(reg, sizeof(target_ulong) * 8); +} + +static target_ulong vector_get_index(CPURISCVState *env, int rs1, int rs2, + int index, int mem, int width, int nf) +{ + target_ulong abs_off, base = env->gpr[rs1]; + target_long offset; + switch (width) { + case 8: + offset = sign_extend(env->vfp.vreg[rs2].s8[index], 8) + nf * mem; + break; + case 16: + offset = sign_extend(env->vfp.vreg[rs2].s16[index], 16) + nf * mem; + break; + case 32: + offset = sign_extend(env->vfp.vreg[rs2].s32[index], 32) + nf * mem; + break; + case 64: + offset = env->vfp.vreg[rs2].s64[index] + nf * mem; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return 0; + } + if (offset < 0) { + abs_off = ~offset + 1; + if (base >= abs_off) { + return base - abs_off; + } + } else { + if ((target_ulong)((target_ulong)offset + base) >= base) { + return (target_ulong)offset + base; + } + } + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return 0; +} + + + +/* ADD/SUB/COMPARE instructions. */ +static inline uint8_t sat_add_u8(CPURISCVState *env, uint8_t a, uint8_t b) +{ + uint8_t res = a + b; + if (res < a) { + res = MAX_U8; + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint16_t sat_add_u16(CPURISCVState *env, uint16_t a, uint16_t b) +{ + uint16_t res = a + b; + if (res < a) { + res = MAX_U16; + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint32_t sat_add_u32(CPURISCVState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (res < a) { + res = MAX_U32; + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint64_t sat_add_u64(CPURISCVState *env, uint64_t a, uint64_t b) +{ + uint64_t res = a + b; + if (res < a) { + res = MAX_U64; + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint8_t sat_add_s8(CPURISCVState *env, uint8_t a, uint8_t b) +{ + uint8_t res = a + b; + if (((res ^ a) & SIGNBIT8) && !((a ^ b) & SIGNBIT8)) { + res = ~(((int8_t)a >> 7) ^ SIGNBIT8); + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint16_t sat_add_s16(CPURISCVState *env, uint16_t a, uint16_t b) +{ + uint16_t res = a + b; + if (((res ^ a) & SIGNBIT16) && !((a ^ b) & SIGNBIT16)) { + res = ~(((int16_t)a >> 15) ^ SIGNBIT16); + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint32_t sat_add_s32(CPURISCVState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (((res ^ a) & SIGNBIT32) && !((a ^ b) & SIGNBIT32)) { + res = ~(((int32_t)a >> 31) ^ SIGNBIT32); + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint64_t sat_add_s64(CPURISCVState *env, uint64_t a, uint64_t b) +{ + uint64_t res = a + b; + if (((res ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) { + res = ~(((int64_t)a >> 63) ^ SIGNBIT64); + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint8_t sat_sub_u8(CPURISCVState *env, uint8_t a, uint8_t b) +{ + uint8_t res = a - b; + if (res > a) { + res = 0; + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint16_t sat_sub_u16(CPURISCVState *env, uint16_t a, uint16_t b) +{ + uint16_t res = a - b; + if (res > a) { + res = 0; + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint32_t sat_sub_u32(CPURISCVState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a - b; + if (res > a) { + res = 0; + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint64_t sat_sub_u64(CPURISCVState *env, uint64_t a, uint64_t b) +{ + uint64_t res = a - b; + if (res > a) { + res = 0; + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint8_t sat_sub_s8(CPURISCVState *env, uint8_t a, uint8_t b) +{ + uint8_t res = a - b; + if (((res ^ a) & SIGNBIT8) && ((a ^ b) & SIGNBIT8)) { + res = ~(((int8_t)a >> 7) ^ SIGNBIT8); + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint16_t sat_sub_s16(CPURISCVState *env, uint16_t a, uint16_t b) +{ + uint16_t res = a - b; + if (((res ^ a) & SIGNBIT16) && ((a ^ b) & SIGNBIT16)) { + res = ~(((int16_t)a >> 15) ^ SIGNBIT16); + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint32_t sat_sub_s32(CPURISCVState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a - b; + if (((res ^ a) & SIGNBIT32) && ((a ^ b) & SIGNBIT32)) { + res = ~(((int32_t)a >> 31) ^ SIGNBIT32); + env->vfp.vxsat = 0x1; + + } + return res; +} + +static inline uint64_t sat_sub_s64(CPURISCVState *env, uint64_t a, uint64_t b) +{ + uint64_t res = a - b; + if (((res ^ a) & SIGNBIT64) && ((a ^ b) & SIGNBIT64)) { + res = ~(((int64_t)a >> 63) ^ SIGNBIT64); + env->vfp.vxsat = 0x1; + + } + return res; +} + +static uint64_t fix_data_round(CPURISCVState *env, uint64_t result, + uint8_t shift) +{ + uint64_t lsb_1 = (uint64_t)1 << shift; + int mod = env->vfp.vxrm; + int mask = ((uint64_t)1 << shift) - 1; + + if (mod == 0x0) { /* rnu */ + return lsb_1 >> 1; + } else if (mod == 0x1) { /* rne */ + if ((result & mask) > (lsb_1 >> 1) || + (((result & mask) == (lsb_1 >> 1)) && + (((result >> shift) & 0x1)) == 1)) { + return lsb_1 >> 1; + } + } else if (mod == 0x3) { /* rod */ + if (((result & mask) >= 0x1) && (((result >> shift) & 0x1) == 0)) { + return lsb_1; + } + } + return 0; +} + +static int8_t saturate_s8(CPURISCVState *env, int16_t res) +{ + if (res > MAX_S8) { + env->vfp.vxsat = 0x1; + return MAX_S8; + } else if (res < MIN_S8) { + env->vfp.vxsat = 0x1; + return MIN_S8; + } else { + return res; + } +} + +static uint8_t saturate_u8(CPURISCVState *env, uint16_t res) +{ + if (res > MAX_U8) { + env->vfp.vxsat = 0x1; + return MAX_U8; + } else { + return res; + } +} + +static uint16_t saturate_u16(CPURISCVState *env, uint32_t res) +{ + if (res > MAX_U16) { + env->vfp.vxsat = 0x1; + return MAX_U16; + } else { + return res; + } +} + +static uint32_t saturate_u32(CPURISCVState *env, uint64_t res) +{ + if (res > MAX_U32) { + env->vfp.vxsat = 0x1; + return MAX_U32; + } else { + return res; + } +} + +static int16_t saturate_s16(CPURISCVState *env, int32_t res) +{ + if (res > MAX_S16) { + env->vfp.vxsat = 0x1; + return MAX_S16; + } else if (res < MIN_S16) { + env->vfp.vxsat = 0x1; + return MIN_S16; + } else { + return res; + } +} + +static int32_t saturate_s32(CPURISCVState *env, int64_t res) +{ + if (res > MAX_S32) { + env->vfp.vxsat = 0x1; + return MAX_S32; + } else if (res < MIN_S32) { + env->vfp.vxsat = 0x1; + return MIN_S32; + } else { + return res; + } +} +static uint16_t vwsmaccu_8(CPURISCVState *env, uint8_t a, uint8_t b, + uint16_t c) +{ + uint16_t round, res; + uint16_t product = (uint16_t)a * (uint16_t)b; + + round = (uint16_t)fix_data_round(env, (uint64_t)product, 4); + res = (round + product) >> 4; + return sat_add_u16(env, c, res); +} + +static uint32_t vwsmaccu_16(CPURISCVState *env, uint16_t a, uint16_t b, + uint32_t c) +{ + uint32_t round, res; + uint32_t product = (uint32_t)a * (uint32_t)b; + + round = (uint32_t)fix_data_round(env, (uint64_t)product, 8); + res = (round + product) >> 8; + return sat_add_u32(env, c, res); +} + +static uint64_t vwsmaccu_32(CPURISCVState *env, uint32_t a, uint32_t b, + uint64_t c) +{ + uint64_t round, res; + uint64_t product = (uint64_t)a * (uint64_t)b; + + round = (uint64_t)fix_data_round(env, (uint64_t)product, 16); + res = (round + product) >> 16; + return sat_add_u64(env, c, res); +} + +static int16_t vwsmacc_8(CPURISCVState *env, int8_t a, int8_t b, + int16_t c) +{ + int16_t round, res; + int16_t product = (int16_t)a * (int16_t)b; + + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); + res = (int16_t)(round + product) >> 4; + return sat_add_s16(env, c, res); +} + +static int32_t vwsmacc_16(CPURISCVState *env, int16_t a, int16_t b, + int32_t c) +{ + int32_t round, res; + int32_t product = (int32_t)a * (int32_t)b; + + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); + res = (int32_t)(round + product) >> 8; + return sat_add_s32(env, c, res); +} + +static int64_t vwsmacc_32(CPURISCVState *env, int32_t a, int32_t b, + int64_t c) +{ + int64_t round, res; + int64_t product = (int64_t)a * (int64_t)b; + + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); + res = (int64_t)(round + product) >> 16; + return sat_add_s64(env, c, res); +} + +static int16_t vwsmaccsu_8(CPURISCVState *env, uint8_t a, int8_t b, + int16_t c) +{ + int16_t round, res; + int16_t product = (uint16_t)a * (int16_t)b; + + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); + res = (round + product) >> 4; + return sat_sub_s16(env, c, res); +} + +static int32_t vwsmaccsu_16(CPURISCVState *env, uint16_t a, int16_t b, + uint32_t c) +{ + int32_t round, res; + int32_t product = (uint32_t)a * (int32_t)b; + + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); + res = (round + product) >> 8; + return sat_sub_s32(env, c, res); +} + +static int64_t vwsmaccsu_32(CPURISCVState *env, uint32_t a, int32_t b, + int64_t c) +{ + int64_t round, res; + int64_t product = (uint64_t)a * (int64_t)b; + + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); + res = (round + product) >> 16; + return sat_sub_s64(env, c, res); +} + +static int16_t vwsmaccus_8(CPURISCVState *env, int8_t a, uint8_t b, + int16_t c) +{ + int16_t round, res; + int16_t product = (int16_t)a * (uint16_t)b; + + round = (int16_t)fix_data_round(env, (uint64_t)product, 4); + res = (round + product) >> 4; + return sat_sub_s16(env, c, res); +} + +static int32_t vwsmaccus_16(CPURISCVState *env, int16_t a, uint16_t b, + int32_t c) +{ + int32_t round, res; + int32_t product = (int32_t)a * (uint32_t)b; + + round = (int32_t)fix_data_round(env, (uint64_t)product, 8); + res = (round + product) >> 8; + return sat_sub_s32(env, c, res); +} + +static uint64_t vwsmaccus_32(CPURISCVState *env, int32_t a, uint32_t b, + int64_t c) +{ + int64_t round, res; + int64_t product = (int64_t)a * (uint64_t)b; + + round = (int64_t)fix_data_round(env, (uint64_t)product, 16); + res = (round + product) >> 16; + return sat_sub_s64(env, c, res); +} + +static int8_t vssra_8(CPURISCVState *env, int8_t a, uint8_t b) +{ + int16_t round, res; + uint8_t shift = b & 0x7; + + round = (int16_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + + return res; +} + +static int16_t vssra_16(CPURISCVState *env, int16_t a, uint16_t b) +{ + int32_t round, res; + uint8_t shift = b & 0xf; + + round = (int32_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + return res; +} + +static int32_t vssra_32(CPURISCVState *env, int32_t a, uint32_t b) +{ + int64_t round, res; + uint8_t shift = b & 0x1f; + + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + return res; +} + +static int64_t vssra_64(CPURISCVState *env, int64_t a, uint64_t b) +{ + int64_t round, res; + uint8_t shift = b & 0x3f; + + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); + res = (a >> (shift - 1)) + (round >> (shift - 1)); + return res >> 1; +} + +static int8_t vssrai_8(CPURISCVState *env, int8_t a, uint8_t b) +{ + int16_t round, res; + + round = (int16_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + return res; +} + +static int16_t vssrai_16(CPURISCVState *env, int16_t a, uint8_t b) +{ + int32_t round, res; + + round = (int32_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + return res; +} + +static int32_t vssrai_32(CPURISCVState *env, int32_t a, uint8_t b) +{ + int64_t round, res; + + round = (int64_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + return res; +} + +static int64_t vssrai_64(CPURISCVState *env, int64_t a, uint8_t b) +{ + int64_t round, res; + + round = (int64_t)fix_data_round(env, (uint64_t)a, b); + res = (a >> (b - 1)) + (round >> (b - 1)); + return res >> 1; +} + +static int8_t vnclip_16(CPURISCVState *env, int16_t a, uint8_t b) +{ + int16_t round, res; + uint8_t shift = b & 0xf; + + round = (int16_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + + return saturate_s8(env, res); +} + +static int16_t vnclip_32(CPURISCVState *env, int32_t a, uint16_t b) +{ + int32_t round, res; + uint8_t shift = b & 0x1f; + + round = (int32_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + return saturate_s16(env, res); +} + +static int32_t vnclip_64(CPURISCVState *env, int64_t a, uint32_t b) +{ + int64_t round, res; + uint8_t shift = b & 0x3f; + + round = (int64_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + + return saturate_s32(env, res); +} + +static int8_t vnclipi_16(CPURISCVState *env, int16_t a, uint8_t b) +{ + int16_t round, res; + + round = (int16_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + + return saturate_s8(env, res); +} + +static int16_t vnclipi_32(CPURISCVState *env, int32_t a, uint8_t b) +{ + int32_t round, res; + + round = (int32_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + + return saturate_s16(env, res); +} + +static int32_t vnclipi_64(CPURISCVState *env, int64_t a, uint8_t b) +{ + int32_t round, res; + + round = (int64_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + + return saturate_s32(env, res); +} + +static uint8_t vnclipu_16(CPURISCVState *env, uint16_t a, uint8_t b) +{ + uint16_t round, res; + uint8_t shift = b & 0xf; + + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + + return saturate_u8(env, res); +} + +static uint16_t vnclipu_32(CPURISCVState *env, uint32_t a, uint16_t b) +{ + uint32_t round, res; + uint8_t shift = b & 0x1f; + + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + + return saturate_u16(env, res); +} + +static uint32_t vnclipu_64(CPURISCVState *env, uint64_t a, uint32_t b) +{ + uint64_t round, res; + uint8_t shift = b & 0x3f; + + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + + return saturate_u32(env, res); +} + +static uint8_t vnclipui_16(CPURISCVState *env, uint16_t a, uint8_t b) +{ + uint16_t round, res; + + round = (uint16_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + + return saturate_u8(env, res); +} + +static uint16_t vnclipui_32(CPURISCVState *env, uint32_t a, uint8_t b) +{ + uint32_t round, res; + + round = (uint32_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + + return saturate_u16(env, res); +} + +static uint32_t vnclipui_64(CPURISCVState *env, uint64_t a, uint8_t b) +{ + uint64_t round, res; + + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + + return saturate_u32(env, res); +} + +static uint8_t vssrl_8(CPURISCVState *env, uint8_t a, uint8_t b) +{ + uint16_t round, res; + uint8_t shift = b & 0x7; + + round = (uint16_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + return res; +} + +static uint16_t vssrl_16(CPURISCVState *env, uint16_t a, uint16_t b) +{ + uint32_t round, res; + uint8_t shift = b & 0xf; + + round = (uint32_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + return res; +} + +static uint32_t vssrl_32(CPURISCVState *env, uint32_t a, uint32_t b) +{ + uint64_t round, res; + uint8_t shift = b & 0x1f; + + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); + res = (a + round) >> shift; + return res; +} + +static uint64_t vssrl_64(CPURISCVState *env, uint64_t a, uint64_t b) +{ + uint64_t round, res; + uint8_t shift = b & 0x3f; + + round = (uint64_t)fix_data_round(env, (uint64_t)a, shift); + res = (a >> (shift - 1)) + (round >> (shift - 1)); + return res >> 1; +} + +static uint8_t vssrli_8(CPURISCVState *env, uint8_t a, uint8_t b) +{ + uint16_t round, res; + + round = (uint16_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + return res; +} + +static uint16_t vssrli_16(CPURISCVState *env, uint16_t a, uint8_t b) +{ + uint32_t round, res; + + round = (uint32_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + return res; +} + +static uint32_t vssrli_32(CPURISCVState *env, uint32_t a, uint8_t b) +{ + uint64_t round, res; + + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); + res = (a + round) >> b; + return res; +} + +static uint64_t vssrli_64(CPURISCVState *env, uint64_t a, uint8_t b) +{ + uint64_t round, res; + + round = (uint64_t)fix_data_round(env, (uint64_t)a, b); + res = (a >> (b - 1)) + (round >> (b - 1)); + return res >> 1; +} + +static int8_t vsmul_8(CPURISCVState *env, int8_t a, int8_t b) +{ + int16_t round; + int8_t res; + int16_t product = (int16_t)a * (int16_t)b; + + if (a == MIN_S8 && b == MIN_S8) { + env->vfp.vxsat = 1; + + return MAX_S8; + } + + round = (int16_t)fix_data_round(env, (uint64_t)product, 7); + res = sat_add_s16(env, product, round) >> 7; + return res; +} + + +static int16_t vsmul_16(CPURISCVState *env, int16_t a, int16_t b) +{ + int32_t round; + int16_t res; + int32_t product = (int32_t)a * (int32_t)b; + + if (a == MIN_S16 && b == MIN_S16) { + env->vfp.vxsat = 1; + + return MAX_S16; + } + + round = (int32_t)fix_data_round(env, (uint64_t)product, 15); + res = sat_add_s32(env, product, round) >> 15; + return res; +} + +static int32_t vsmul_32(CPURISCVState *env, int32_t a, int32_t b) +{ + int64_t round; + int32_t res; + int64_t product = (int64_t)a * (int64_t)b; + + if (a == MIN_S32 && b == MIN_S32) { + env->vfp.vxsat = 1; + + return MAX_S32; + } + + round = (int64_t)fix_data_round(env, (uint64_t)product, 31); + res = sat_add_s64(env, product, round) >> 31; + return res; +} + + +static int64_t vsmul_64(CPURISCVState *env, int64_t a, int64_t b) +{ + int64_t res; + uint64_t abs_a = a, abs_b = b; + uint64_t lo_64, hi_64, carry, round; + + if (a == MIN_S64 && b == MIN_S64) { + env->vfp.vxsat = 1; + + return MAX_S64; + } + + if (a < 0) { + abs_a = ~a + 1; + } + if (b < 0) { + abs_b = ~b + 1; + } + + /* first get the whole product in {hi_64, lo_64} */ + uint64_t a_hi = abs_a >> 32; + uint64_t a_lo = (uint32_t)abs_a; + uint64_t b_hi = abs_b >> 32; + uint64_t b_lo = (uint32_t)abs_b; + + /* + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + + * (a_lo * b_hi) << 32 + a_lo * b_lo + * = {hi_64, lo_64} + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * b_lo)) >> 64 + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) >> 32 + */ + + lo_64 = abs_a * abs_b; + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + + (uint64_t)(uint32_t)(a_lo * b_hi) + + ((a_lo * b_lo) >> 32)) >> 32; + + hi_64 = a_hi * b_hi + + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + + carry; + + if ((a ^ b) & SIGNBIT64) { + lo_64 = ~lo_64; + hi_64 = ~hi_64; + if (lo_64 == MAX_U64) { + lo_64 = 0; + hi_64 += 1; + } else { + lo_64 += 1; + } + } + + /* set rem and res */ + round = fix_data_round(env, lo_64, 63); + if ((lo_64 + round) < lo_64) { + hi_64 += 1; + res = (hi_64 << 1); + } else { + res = (hi_64 << 1) | ((lo_64 + round) >> 63); + } + + return res; +} +static inline int8_t avg_round_s8(CPURISCVState *env, int8_t a, int8_t b) +{ + int16_t round; + int8_t res; + int16_t sum = a + b; + + round = (int16_t)fix_data_round(env, (uint64_t)sum, 1); + res = (sum + round) >> 1; + + return res; +} + +static inline int16_t avg_round_s16(CPURISCVState *env, int16_t a, int16_t b) +{ + int32_t round; + int16_t res; + int32_t sum = a + b; + + round = (int32_t)fix_data_round(env, (uint64_t)sum, 1); + res = (sum + round) >> 1; + + return res; +} + +static inline int32_t avg_round_s32(CPURISCVState *env, int32_t a, int32_t b) +{ + int64_t round; + int32_t res; + int64_t sum = a + b; + + round = (int64_t)fix_data_round(env, (uint64_t)sum, 1); + res = (sum + round) >> 1; + + return res; +} + +static inline int64_t avg_round_s64(CPURISCVState *env, int64_t a, int64_t b) +{ + int64_t rem = (a & 0x1) + (b & 0x1); + int64_t res = (a >> 1) + (b >> 1) + (rem >> 1); + int mod = env->vfp.vxrm; + + if (mod == 0x0) { /* rnu */ + if (rem == 0x1) { + return res + 1; + } + } else if (mod == 0x1) { /* rne */ + if ((rem & 0x1) == 1 && ((res & 0x1) == 1)) { + return res + 1; + } + } else if (mod == 0x3) { /* rod */ + if (((rem & 0x1) >= 0x1) && (res & 0x1) == 0) { + return res + 1; + } + } + return res; +} + +static target_ulong helper_fclass_h(uint64_t frs1) +{ + float16 f = frs1; + bool sign = float16_is_neg(f); + + if (float16_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float16_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float16_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float16_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +static inline bool vector_vtype_ill(CPURISCVState *env) +{ + if ((env->vfp.vtype >> (sizeof(target_ulong) - 1)) & 0x1) { + return true; + } + return false; +} + +static inline void vector_vtype_set_ill(CPURISCVState *env) +{ + env->vfp.vtype = ((target_ulong)1) << (sizeof(target_ulong) - 1); + return; +} + +static inline int vector_vtype_get_sew(CPURISCVState *env) +{ + return (env->vfp.vtype >> 2) & 0x7; +} + +static inline int vector_get_width(CPURISCVState *env) +{ + return 8 * (1 << vector_vtype_get_sew(env)); +} + +static inline int vector_get_lmul(CPURISCVState *env) +{ + return 1 << (env->vfp.vtype & 0x3); +} + +static inline int vector_get_vlmax(CPURISCVState *env) +{ + return vector_get_lmul(env) * VLEN / vector_get_width(env); +} + +static inline int vector_elem_mask(CPURISCVState *env, uint32_t vm, int width, + int lmul, int index) +{ + int mlen = width / lmul; + int idx = (index * mlen) / 8; + int pos = (index * mlen) % 8; + + return vm || ((env->vfp.vreg[0].u8[idx] >> pos) & 0x1); +} + +static inline bool vector_overlap_vm_common(int lmul, int vm, int rd) +{ + if (lmul > 1 && vm == 0 && rd == 0) { + return true; + } + return false; +} + +static inline bool vector_overlap_vm_force(int vm, int rd) +{ + if (vm == 0 && rd == 0) { + return true; + } + return false; +} + +static inline bool vector_overlap_carry(int lmul, int rd) +{ + if (lmul > 1 && rd == 0) { + return true; + } + return false; +} + +static inline bool vector_overlap_dstgp_srcgp(int rd, int dlen, int rs, + int slen) +{ + if ((rd >= rs && rd < rs + slen) || (rs >= rd && rs < rd + dlen)) { + return true; + } + return false; +} + +static inline uint64_t vector_get_mask(int start, int end) +{ + return ((uint64_t)(~((uint64_t)0))) << (63 - end + start) >> (63 - end); +} + +/* fetch unsigned element by width */ +static inline uint64_t vector_get_iu_elem(CPURISCVState *env, uint32_t width, + uint32_t rs2, uint32_t index) +{ + uint64_t elem; + if (width == 8) { + elem = env->vfp.vreg[rs2].u8[index]; + } else if (width == 16) { + elem = env->vfp.vreg[rs2].u16[index]; + } else if (width == 32) { + elem = env->vfp.vreg[rs2].u32[index]; + } else if (width == 64) { + elem = env->vfp.vreg[rs2].u64[index]; + } else { /* the max of (XLEN, FLEN) is no bigger than 64 */ + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return 0; + } + return elem; +} + +static inline int vector_mask_reg(CPURISCVState *env, uint32_t reg, int width, + int lmul, int index) +{ + int mlen = width / lmul; + int idx = (index * mlen) / 8; + int pos = (index * mlen) % 8; + return (env->vfp.vreg[reg].u8[idx] >> pos) & 0x1; +} + +static inline void vector_mask_result(CPURISCVState *env, uint32_t reg, + int width, int lmul, int index, uint32_t result) +{ + int mlen = width / lmul; + int idx = (index * mlen) / width; + int pos = (index * mlen) % width; + uint64_t mask = ~((((uint64_t)1 << mlen) - 1) << pos); + + switch (width) { + case 8: + env->vfp.vreg[reg].u8[idx] = (env->vfp.vreg[reg].u8[idx] & mask) + | (result << pos); + break; + case 16: + env->vfp.vreg[reg].u16[idx] = (env->vfp.vreg[reg].u16[idx] & mask) + | (result << pos); + break; + case 32: + env->vfp.vreg[reg].u32[idx] = (env->vfp.vreg[reg].u32[idx] & mask) + | (result << pos); + break; + case 64: + env->vfp.vreg[reg].u64[idx] = (env->vfp.vreg[reg].u64[idx] & mask) + | ((uint64_t)result << pos); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + + return; +} + +/** + * deposit16: + * @value: initial value to insert bit field into + * @start: the lowest bit in the bit field (numbered from 0) + * @length: the length of the bit field + * @fieldval: the value to insert into the bit field + * + * Deposit @fieldval into the 16 bit @value at the bit field specified + * by the @start and @length parameters, and return the modified + * @value. Bits of @value outside the bit field are not modified. + * Bits of @fieldval above the least significant @length bits are + * ignored. The bit field must lie entirely within the 16 bit word. + * It is valid to request that all 16 bits are modified (ie @length + * 16 and @start 0). + * + * Returns: the modified @value. + */ +static inline uint16_t deposit16(uint16_t value, int start, int length, + uint16_t fieldval) +{ + uint16_t mask; + assert(start >= 0 && length > 0 && length <= 16 - start); + mask = (~0U >> (16 - length)) << start; + return (value & ~mask) | ((fieldval << start) & mask); +} + +static void vector_tail_amo(CPURISCVState *env, int vreg, int index, int width) +{ + switch (width) { + case 32: + env->vfp.vreg[vreg].u32[index] = 0; + break; + case 64: + env->vfp.vreg[vreg].u64[index] = 0; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } +} + +static void vector_tail_common(CPURISCVState *env, int vreg, int index, + int width) +{ + switch (width) { + case 8: + env->vfp.vreg[vreg].u8[index] = 0; + break; + case 16: + env->vfp.vreg[vreg].u16[index] = 0; + break; + case 32: + env->vfp.vreg[vreg].u32[index] = 0; + break; + case 64: + env->vfp.vreg[vreg].u64[index] = 0; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } +} + +static void vector_tail_segment(CPURISCVState *env, int vreg, int index, + int width, int nf, int lmul) +{ + switch (width) { + case 8: + while (nf >= 0) { + env->vfp.vreg[vreg + nf * lmul].u8[index] = 0; + nf--; + } + break; + case 16: + while (nf >= 0) { + env->vfp.vreg[vreg + nf * lmul].u16[index] = 0; + nf--; + } + break; + case 32: + while (nf >= 0) { + env->vfp.vreg[vreg + nf * lmul].u32[index] = 0; + nf--; + } + break; + case 64: + while (nf >= 0) { + env->vfp.vreg[vreg + nf * lmul].u64[index] = 0; + nf--; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } +} + +static void vector_tail_widen(CPURISCVState *env, int vreg, int index, + int width) +{ + switch (width) { + case 8: + env->vfp.vreg[vreg].u16[index] = 0; + break; + case 16: + env->vfp.vreg[vreg].u32[index] = 0; + break; + case 32: + env->vfp.vreg[vreg].u64[index] = 0; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } +} + +static void vector_tail_narrow(CPURISCVState *env, int vreg, int index, + int width) +{ + switch (width) { + case 8: + env->vfp.vreg[vreg].u8[index] = 0; + break; + case 16: + env->vfp.vreg[vreg].u16[index] = 0; + break; + case 32: + env->vfp.vreg[vreg].u32[index] = 0; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } +} + +static void vector_tail_fcommon(CPURISCVState *env, int vreg, int index, + int width) +{ + switch (width) { + case 16: + env->vfp.vreg[vreg].u16[index] = 0; + break; + case 32: + env->vfp.vreg[vreg].u32[index] = 0; + break; + case 64: + env->vfp.vreg[vreg].u64[index] = 0; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } +} + +static void vector_tail_fwiden(CPURISCVState *env, int vreg, int index, + int width) +{ + switch (width) { + case 16: + env->vfp.vreg[vreg].u32[index] = 0; + break; + case 32: + env->vfp.vreg[vreg].u64[index] = 0; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } +} + +static void vector_tail_fnarrow(CPURISCVState *env, int vreg, int index, + int width) +{ + switch (width) { + case 16: + env->vfp.vreg[vreg].u16[index] = 0; + break; + case 32: + env->vfp.vreg[vreg].u32[index] = 0; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } +} +static inline int vector_get_carry(CPURISCVState *env, int width, int lmul, + int index) +{ + int mlen = width / lmul; + int idx = (index * mlen) / 8; + int pos = (index * mlen) % 8; + + return (env->vfp.vreg[0].u8[idx] >> pos) & 0x1; +} + +static inline void vector_get_layout(CPURISCVState *env, int width, int lmul, + int index, int *idx, int *pos) +{ + int mlen = width / lmul; + *idx = (index * mlen) / 8; + *pos = (index * mlen) % 8; +} + +static bool vector_lmul_check_reg(CPURISCVState *env, uint32_t lmul, + uint32_t reg, bool widen) +{ + int legal = widen ? (lmul * 2) : lmul; + + if ((lmul != 1 && lmul != 2 && lmul != 4 && lmul != 8) || + (lmul == 8 && widen)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return false; + } + + if (reg % legal != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return false; + } + return true; +} + +static inline uint64_t u64xu64_lh(uint64_t a, uint64_t b) +{ + uint64_t hi_64, carry; + + + /* first get the whole product in {hi_64, lo_64} */ + uint64_t a_hi = a >> 32; + uint64_t a_lo = (uint32_t)a; + uint64_t b_hi = b >> 32; + uint64_t b_lo = (uint32_t)b; + + /* + * a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + + * (a_lo * b_hi) << 32 + a_lo * b_lo + * = {hi_64, lo_64} + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * b_lo)) >> 64 + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) >> 32 + */ + + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + + (uint64_t)(uint32_t)(a_lo * b_hi) + + ((a_lo * b_lo) >> 32)) >> 32; + + hi_64 = a_hi * b_hi + + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + + carry; + + return hi_64; +} + + +static inline int64_t s64xu64_lh(int64_t a, uint64_t b) +{ + uint64_t abs_a = a; + uint64_t lo_64, hi_64, carry; + + if (a < 0) { + abs_a = ~a + 1; + } + + /* first get the whole product in {hi_64, lo_64} */ + uint64_t a_hi = abs_a >> 32; + uint64_t a_lo = (uint32_t)abs_a; + uint64_t b_hi = b >> 32; + uint64_t b_lo = (uint32_t)b; + + /* + * abs_a * b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + + * (a_lo * b_hi) << 32 + a_lo * b_lo + * = {hi_64, lo_64} + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * b_lo)) >> 64 + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) >> 32 + */ + + lo_64 = abs_a * b; + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + + (uint64_t)(uint32_t)(a_lo * b_hi) + + ((a_lo * b_lo) >> 32)) >> 32; + + hi_64 = a_hi * b_hi + + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + + carry; + if ((a ^ b) & SIGNBIT64) { + lo_64 = ~lo_64; + hi_64 = ~hi_64; + if (lo_64 == MAX_U64) { + lo_64 = 0; + hi_64 += 1; + } else { + lo_64 += 1; + } + } + return hi_64; +} + + +static inline int64_t s64xs64_lh(int64_t a, int64_t b) +{ + uint64_t abs_a = a, abs_b = b; + uint64_t lo_64, hi_64, carry; + + if (a < 0) { + abs_a = ~a + 1; + } + if (b < 0) { + abs_b = ~b + 1; + } + + /* first get the whole product in {hi_64, lo_64} */ + uint64_t a_hi = abs_a >> 32; + uint64_t a_lo = (uint32_t)abs_a; + uint64_t b_hi = abs_b >> 32; + uint64_t b_lo = (uint32_t)abs_b; + + /* + * abs_a * abs_b = (a_hi << 32 + a_lo) * (b_hi << 32 + b_lo) + * = (a_hi * b_hi) << 64 + (a_hi * b_lo) << 32 + + * (a_lo * b_hi) << 32 + a_lo * b_lo + * = {hi_64, lo_64} + * hi_64 = ((a_hi * b_lo) << 32 + (a_lo * b_hi) << 32 + (a_lo * b_lo)) >> 64 + * = (a_hi * b_lo) >> 32 + (a_lo * b_hi) >> 32 + carry + * carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + + * (uint64_t)(uint32_t)(a_lo * b_hi) + (a_lo * b_lo) >> 32) >> 32 + */ + + lo_64 = abs_a * abs_b; + carry = ((uint64_t)(uint32_t)(a_hi * b_lo) + + (uint64_t)(uint32_t)(a_lo * b_hi) + + ((a_lo * b_lo) >> 32)) >> 32; + + hi_64 = a_hi * b_hi + + ((a_hi * b_lo) >> 32) + ((a_lo * b_hi) >> 32) + + carry; + + if ((a ^ b) & SIGNBIT64) { + lo_64 = ~lo_64; + hi_64 = ~hi_64; + if (lo_64 == MAX_U64) { + lo_64 = 0; + hi_64 += 1; + } else { + lo_64 += 1; + } + } + return hi_64; +} + +void VECTOR_HELPER(vsetvl)(CPURISCVState *env, uint32_t rs1, uint32_t rs2, + uint32_t rd) +{ + int sew, max_sew, vlmax, vl; + + if (rs2 == 0) { + vector_vtype_set_ill(env); + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + env->vfp.vtype = env->gpr[rs2]; + sew = 1 << vector_get_width(env) / 8; + max_sew = sizeof(target_ulong); + + + if (env->misa & RVD) { + max_sew = max_sew > 8 ? max_sew : 8; + } else if (env->misa & RVF) { + max_sew = max_sew > 4 ? max_sew : 4; + } + if (sew > max_sew) { + vector_vtype_set_ill(env); + return; + } + + vlmax = vector_get_vlmax(env); + if (rs1 == 0) { + vl = vlmax; + } else if (env->gpr[rs1] <= vlmax) { + vl = env->gpr[rs1]; + } else if (env->gpr[rs1] < 2 * vlmax) { + vl = ceil(env->gpr[rs1] / 2); + } else { + vl = vlmax; + } + env->vfp.vl = vl; + env->gpr[rd] = vl; + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsetvli)(CPURISCVState *env, uint32_t rs1, uint32_t zimm, + uint32_t rd) +{ + int sew, max_sew, vlmax, vl; + + env->vfp.vtype = zimm; + sew = vector_get_width(env) / 8; + max_sew = sizeof(target_ulong); + + if (env->misa & RVD) { + max_sew = max_sew > 8 ? max_sew : 8; + } else if (env->misa & RVF) { + max_sew = max_sew > 4 ? max_sew : 4; + } + if (sew > max_sew) { + vector_vtype_set_ill(env); + return; + } + + vlmax = vector_get_vlmax(env); + if (rs1 == 0) { + vl = vlmax; + } else if (env->gpr[rs1] <= vlmax) { + vl = env->gpr[rs1]; + } else if (env->gpr[rs1] < 2 * vlmax) { + vl = ceil(env->gpr[rs1] / 2); + } else { + vl = vlmax; + } + env->vfp.vl = vl; + env->gpr[rd] = vl; + return; + env->vfp.vstart = 0; +} + +/* + * vrgather.vv vd, vs2, vs1, vm # + * vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; + */ +void VECTOR_HELPER(vrgather_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src, src1; + uint32_t index; + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + if (env->vfp.vstart >= vl) { + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + index = env->vfp.vreg[src1].u8[j]; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u8[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src].u8[index]; + } + } + break; + case 16: + index = env->vfp.vreg[src1].u16[j]; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u16[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src].u16[index]; + } + } + break; + case 32: + index = env->vfp.vreg[src1].u32[j]; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u32[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src].u32[index]; + } + } + break; + case 64: + index = env->vfp.vreg[src1].u64[j]; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u64[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src].u64[index]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ +void VECTOR_HELPER(vrgather_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src; + uint32_t index; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + index = env->gpr[rs1]; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u8[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src].u8[index]; + } + } + break; + case 16: + index = env->gpr[rs1]; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u16[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src].u16[index]; + } + } + break; + case 32: + index = env->gpr[rs1]; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u32[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src].u32[index]; + } + } + break; + case 64: + index = env->gpr[rs1]; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u64[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src].u64[index]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */ +void VECTOR_HELPER(vrgather_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src; + uint32_t index; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + index = rs1; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u8[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src].u8[index]; + } + } + break; + case 16: + index = rs1; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u16[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src].u16[index]; + } + } + break; + case 32: + index = rs1; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u32[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src].u32[index]; + } + } + break; + case 64: + index = rs1; + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (index >= vlmax) { + env->vfp.vreg[dest].u64[j] = 0; + } else { + src = rs2 + (index / (VLEN / width)); + index = index % (VLEN / width); + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src].u64[index]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vext_x_v)(CPURISCVState *env, uint32_t rs1, uint32_t rs2, + uint32_t rd) +{ + int width; + uint64_t elem; + target_ulong index = env->gpr[rs1]; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + + elem = vector_get_iu_elem(env, width, rs2, index); + if (index >= VLEN / width) { /* index is too big */ + env->gpr[rd] = 0; + } else { + env->gpr[rd] = elem; + } + return; + env->vfp.vstart = 0; +} + +/* vfmv.f.s rd, vs2 # rd = vs2[0] (rs1=0) */ +void VECTOR_HELPER(vfmv_f_s)(CPURISCVState *env, uint32_t rs1, uint32_t rs2, + uint32_t rd) +{ + int width, flen; + uint64_t mask; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + if (env->misa & RVD) { + flen = 8; + } else if (env->misa & RVF) { + flen = 4; + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + mask = (~((uint64_t)0)) << width; + + if (width == 8) { + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s8[0] | mask; + } else if (width == 16) { + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s16[0] | mask; + } else if (width == 32) { + env->fpr[rd] = (uint64_t)env->vfp.vreg[rs2].s32[0] | mask; + } else if (width == 64) { + if (flen == 4) { + env->fpr[rd] = env->vfp.vreg[rs2].s64[0] & 0xffffffff; + } else { + env->fpr[rd] = env->vfp.vreg[rs2].s64[0]; + } + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + return; + env->vfp.vstart = 0; +} + +/* vmv.s.x vd, rs1 # vd[0] = rs1 */ +void VECTOR_HELPER(vmv_s_x)(CPURISCVState *env, uint32_t rs1, uint32_t rs2, + uint32_t rd) +{ + int width; + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= env->vfp.vl) { + return; + } + + memset(&env->vfp.vreg[rd].u8[0], 0, VLEN / 8); + width = vector_get_width(env); + + if (width == 8) { + env->vfp.vreg[rd].u8[0] = env->gpr[rs1]; + } else if (width == 16) { + env->vfp.vreg[rd].u16[0] = env->gpr[rs1]; + } else if (width == 32) { + env->vfp.vreg[rd].u32[0] = env->gpr[rs1]; + } else if (width == 64) { + env->vfp.vreg[rd].u64[0] = env->gpr[rs1]; + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + return; + env->vfp.vstart = 0; +} + +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2 = 0) */ +void VECTOR_HELPER(vfmv_s_f)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, flen; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + if (env->vfp.vstart >= env->vfp.vl) { + return; + } + if (env->misa & RVD) { + flen = 8; + } else if (env->misa & RVF) { + flen = 4; + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + + if (width == 8) { + env->vfp.vreg[rd].u8[0] = env->fpr[rs1]; + } else if (width == 16) { + env->vfp.vreg[rd].u16[0] = env->fpr[rs1]; + } else if (width == 32) { + env->vfp.vreg[rd].u32[0] = env->fpr[rs1]; + } else if (width == 64) { + if (flen == 4) { /* 1-extended to FLEN bits */ + env->vfp.vreg[rd].u64[0] = (uint64_t)env->fpr[rs1] + | 0xffffffff00000000; + } else { + env->vfp.vreg[rd].u64[0] = env->fpr[rs1]; + } + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + return; + env->vfp.vstart = 0; +} + +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ +void VECTOR_HELPER(vslideup_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax, offset; + int i, j, dest, src, k; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + offset = env->gpr[rs1]; + + if (offset < env->vfp.vstart) { + offset = env->vfp.vstart; + } + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src = rs2 + ((i - offset) / (VLEN / width)); + j = i % (VLEN / width); + k = (i - offset) % (VLEN / width); + if (i < offset) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src].u8[k]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src].u16[k]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src].u32[k]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src].u64[k]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + + env->vfp.vstart = 0; +} + +/* vslideup.vi vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ +void VECTOR_HELPER(vslideup_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax, offset; + int i, j, dest, src, k; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + offset = rs1; + + if (offset < env->vfp.vstart) { + offset = env->vfp.vstart; + } + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src = rs2 + ((i - offset) / (VLEN / width)); + j = i % (VLEN / width); + k = (i - offset) % (VLEN / width); + if (i < offset) { + continue; + } else if (i < vl) { + if (width == 8) { + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src].u8[k]; + } + } else if (width == 16) { + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src].u16[k]; + } + } else if (width == 32) { + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src].u32[k]; + } + } else if (width == 64) { + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src].u64[k]; + } + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ +void VECTOR_HELPER(vslide1up_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src, k; + uint64_t s1; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + s1 = env->gpr[rs1]; + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src = rs2 + ((i - 1) / (VLEN / width)); + j = i % (VLEN / width); + k = (i - 1) % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i == 0 && env->vfp.vstart == 0) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = s1; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = s1; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = s1; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = s1; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src].u16[k]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src].u32[k]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src].u64[k]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i + rs1] */ +void VECTOR_HELPER(vslidedown_vx)(CPURISCVState *env, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax, offset; + int i, j, dest, src, k; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + offset = env->gpr[rs1]; + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src = rs2 + ((i + offset) / (VLEN / width)); + j = i % (VLEN / width); + k = (i + offset) % (VLEN / width); + if (i < offset) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (i + offset < vlmax) { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src].u8[k]; + } else { + env->vfp.vreg[dest].u8[j] = 0; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (i + offset < vlmax) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src].u16[k]; + } else { + env->vfp.vreg[dest].u16[j] = 0; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (i + offset < vlmax) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src].u32[k]; + } else { + env->vfp.vreg[dest].u32[j] = 0; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (i + offset < vlmax) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src].u64[k]; + } else { + env->vfp.vreg[dest].u64[j] = 0; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vslidedown_vi)(CPURISCVState *env, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax, offset; + int i, j, dest, src, k; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + offset = rs1; + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src = rs2 + ((i + offset) / (VLEN / width)); + j = i % (VLEN / width); + k = (i + offset) % (VLEN / width); + if (i < offset) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (i + offset < vlmax) { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src].u8[k]; + } else { + env->vfp.vreg[dest].u8[j] = 0; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (i + offset < vlmax) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src].u16[k]; + } else { + env->vfp.vreg[dest].u16[j] = 0; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (i + offset < vlmax) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src].u32[k]; + } else { + env->vfp.vreg[dest].u32[j] = 0; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (i + offset < vlmax) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src].u64[k]; + } else { + env->vfp.vreg[dest].u64[j] = 0; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vslide1down.vx vd, vs2, rs1, vm # vd[vl - 1]=x[rs1], vd[i] = vs2[i + 1] */ +void VECTOR_HELPER(vslide1down_vx)(CPURISCVState *env, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src, k; + uint64_t s1; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_force(vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + s1 = env->gpr[rs1]; + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src = rs2 + ((i + 1) / (VLEN / width)); + j = i % (VLEN / width); + k = (i + 1) % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i == vl - 1 && i >= env->vfp.vstart) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = s1; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = s1; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = s1; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = s1; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else if (i < vl - 1) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src].u8[k]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src].u16[k]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src].u32[k]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src].u64[k]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* + * vcompress.vm vd, vs2, vs1 + * Compress into vd elements of vs2 where vs1 is enabled + */ +void VECTOR_HELPER(vcompress_vm)(CPURISCVState *env, uint32_t rs1, uint32_t rs2, + uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src; + uint32_t vd_idx, num = 0; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, lmul, rs1, 1) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + /* zeroed all elements */ + for (i = 0; i < lmul; i++) { + memset(&env->vfp.vreg[rd + i].u64[0], 0, VLEN / 8); + } + + for (i = 0; i < vlmax; i++) { + dest = rd + (num / (VLEN / width)); + src = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + vd_idx = num % (VLEN / width); + if (i < vl) { + switch (width) { + case 8: + if (vector_mask_reg(env, rs1, width, lmul, i)) { + env->vfp.vreg[dest].u8[vd_idx] = + env->vfp.vreg[src].u8[j]; + num++; + } + break; + case 16: + if (vector_mask_reg(env, rs1, width, lmul, i)) { + env->vfp.vreg[dest].u16[vd_idx] = + env->vfp.vreg[src].u16[j]; + num++; + } + break; + case 32: + if (vector_mask_reg(env, rs1, width, lmul, i)) { + env->vfp.vreg[dest].u32[vd_idx] = + env->vfp.vreg[src].u32[j]; + num++; + } + break; + case 64: + if (vector_mask_reg(env, rs1, width, lmul, i)) { + env->vfp.vreg[dest].u64[vd_idx] = + env->vfp.vreg[src].u64[j]; + num++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] + + env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j] + + env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j] + + env->vfp.vreg[src2].u32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j] + + env->vfp.vreg[src2].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] + + env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] + + env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] + + env->vfp.vreg[src2].u32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = + (uint64_t)extend_gpr(env->gpr[rs1]) + + env->vfp.vreg[src2].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) + + env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) + + env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) + + env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) + + env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vredsum.vs vd, vs2, vs1, vm # vd[0] = sum(vs1[0] , vs2[*]) */ +void VECTOR_HELPER(vredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + + int width, lmul, vl, vlmax; + int i, j, src2; + uint64_t sum = 0; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum += env->vfp.vreg[src2].u8[j]; + } + if (i == 0) { + sum += env->vfp.vreg[rs1].u8[0]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u8[0] = sum; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum += env->vfp.vreg[src2].u16[j]; + } + if (i == 0) { + sum += env->vfp.vreg[rs1].u16[0]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u16[0] = sum; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum += env->vfp.vreg[src2].u32[j]; + } + if (i == 0) { + sum += env->vfp.vreg[rs1].u32[0]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u32[0] = sum; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum += env->vfp.vreg[src2].u64[j]; + } + if (i == 0) { + sum += env->vfp.vreg[rs1].u64[0]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u64[0] = sum; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfadd.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vfadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_add( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_add( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_add( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfadd.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_add( + env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_add( + env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_add( + env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vredand.vs vd, vs2, vs1, vm # vd[0] = and( vs1[0] , vs2[*] ) */ +void VECTOR_HELPER(vredand_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + uint64_t res = 0; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 8: + if (i == 0) { + res = env->vfp.vreg[rs1].u8[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res &= env->vfp.vreg[src2].u8[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u8[0] = res; + } + break; + case 16: + if (i == 0) { + res = env->vfp.vreg[rs1].u16[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res &= env->vfp.vreg[src2].u16[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u16[0] = res; + } + break; + case 32: + if (i == 0) { + res = env->vfp.vreg[rs1].u32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res &= env->vfp.vreg[src2].u32[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u32[0] = res; + } + break; + case 64: + if (i == 0) { + res = env->vfp.vreg[rs1].u64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res &= env->vfp.vreg[src2].u64[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u64[0] = res; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfredsum.vs vd, vs2, vs1, vm # Unordered sum */ +void VECTOR_HELPER(vfredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + float16 sum16 = 0.0f; + float32 sum32 = 0.0f; + float64 sum64 = 0.0f; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 16: + if (i == 0) { + sum16 = env->vfp.vreg[rs1].f16[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum16 = float16_add(sum16, env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f16[0] = sum16; + } + break; + case 32: + if (i == 0) { + sum32 = env->vfp.vreg[rs1].f32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum32 = float32_add(sum32, env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f32[0] = sum32; + } + break; + case 64: + if (i == 0) { + sum64 = env->vfp.vreg[rs1].f64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum64 = float64_add(sum64, env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f64[0] = sum64; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] + - env->vfp.vreg[src1].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + - env->vfp.vreg[src1].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + - env->vfp.vreg[src1].u32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + - env->vfp.vreg[src1].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] + - env->gpr[rs1]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + - env->gpr[rs1]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + - env->gpr[rs1]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + - (uint64_t)extend_gpr(env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vredor.vs vd, vs2, vs1, vm # vd[0] = or( vs1[0] , vs2[*] ) */ +void VECTOR_HELPER(vredor_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + uint64_t res = 0; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 8: + if (i == 0) { + res = env->vfp.vreg[rs1].u8[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res |= env->vfp.vreg[src2].u8[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u8[0] = res; + } + break; + case 16: + if (i == 0) { + res = env->vfp.vreg[rs1].u16[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res |= env->vfp.vreg[src2].u16[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u16[0] = res; + } + break; + case 32: + if (i == 0) { + res = env->vfp.vreg[rs1].u32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res |= env->vfp.vreg[src2].u32[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u32[0] = res; + } + break; + case 64: + if (i == 0) { + res = env->vfp.vreg[rs1].u64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res |= env->vfp.vreg[src2].u64[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u64[0] = res; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfsub.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vfsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_sub( + env->vfp.vreg[src2].f16[j], + env->vfp.vreg[src1].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_sub( + env->vfp.vreg[src2].f32[j], + env->vfp.vreg[src1].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_sub( + env->vfp.vreg[src2].f64[j], + env->vfp.vreg[src1].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] - f[rs1] */ +void VECTOR_HELPER(vfsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_sub( + env->vfp.vreg[src2].f16[j], + env->fpr[rs1], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_sub( + env->vfp.vreg[src2].f32[j], + env->fpr[rs1], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_sub( + env->vfp.vreg[src2].f64[j], + env->fpr[rs1], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vrsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] + - env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] + - env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] + - env->vfp.vreg[src2].u32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = + (uint64_t)extend_gpr(env->gpr[rs1]) + - env->vfp.vreg[src2].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vrsub_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) + - env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) + - env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) + - env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) + - env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vredxor.vs vd, vs2, vs1, vm # vd[0] = xor( vs1[0] , vs2[*] ) */ +void VECTOR_HELPER(vredxor_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + uint64_t res = 0; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 8: + if (i == 0) { + res = env->vfp.vreg[rs1].u8[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res ^= env->vfp.vreg[src2].u8[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u8[0] = res; + } + break; + case 16: + if (i == 0) { + res = env->vfp.vreg[rs1].u16[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res ^= env->vfp.vreg[src2].u16[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u16[0] = res; + } + break; + case 32: + if (i == 0) { + res = env->vfp.vreg[rs1].u32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res ^= env->vfp.vreg[src2].u32[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u32[0] = res; + } + break; + case 64: + if (i == 0) { + res = env->vfp.vreg[rs1].u64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + res ^= env->vfp.vreg[src2].u64[j]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u64[0] = res; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfredosum.vs vd, vs2, vs1, vm # Ordered sum */ +void VECTOR_HELPER(vfredosum_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + helper_vector_vfredsum_vs(env, vm, rs1, rs2, rd); + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vminu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u8[j] <= + env->vfp.vreg[src2].u8[j]) { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src1].u8[j]; + } else { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src2].u8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u16[j] <= + env->vfp.vreg[src2].u16[j]) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src1].u16[j]; + } else { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src2].u16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u32[j] <= + env->vfp.vreg[src2].u32[j]) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src1].u32[j]; + } else { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src2].u32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u64[j] <= + env->vfp.vreg[src2].u64[j]) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src1].u64[j]; + } else { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src2].u64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vminu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint8_t)env->gpr[rs1] <= + env->vfp.vreg[src2].u8[j]) { + env->vfp.vreg[dest].u8[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src2].u8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint16_t)env->gpr[rs1] <= + env->vfp.vreg[src2].u16[j]) { + env->vfp.vreg[dest].u16[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src2].u16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint32_t)env->gpr[rs1] <= + env->vfp.vreg[src2].u32[j]) { + env->vfp.vreg[dest].u32[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src2].u32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint64_t)extend_gpr(env->gpr[rs1]) <= + env->vfp.vreg[src2].u64[j]) { + env->vfp.vreg[dest].u64[j] = + (uint64_t)extend_gpr(env->gpr[rs1]); + } else { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src2].u64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vredminu.vs vd, vs2, vs1, vm # vd[0] = minu( vs1[0] , vs2[*] ) */ +void VECTOR_HELPER(vredminu_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + uint64_t minu = 0; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 8: + if (i == 0) { + minu = env->vfp.vreg[rs1].u8[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (minu > env->vfp.vreg[src2].u8[j]) { + minu = env->vfp.vreg[src2].u8[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].u8[0] = minu; + } + break; + case 16: + if (i == 0) { + minu = env->vfp.vreg[rs1].u16[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (minu > env->vfp.vreg[src2].u16[j]) { + minu = env->vfp.vreg[src2].u16[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].u16[0] = minu; + } + break; + case 32: + if (i == 0) { + minu = env->vfp.vreg[rs1].u32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (minu > env->vfp.vreg[src2].u32[j]) { + minu = env->vfp.vreg[src2].u32[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].u32[0] = minu; + } + break; + case 64: + if (i == 0) { + minu = env->vfp.vreg[rs1].u64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (minu > env->vfp.vreg[src2].u64[j]) { + minu = env->vfp.vreg[src2].u64[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].u64[0] = minu; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfmin.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vfmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_minnum( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_minnum( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_minnum( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfmin.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfmin_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_minnum( + env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_minnum( + env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_minnum( + env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmin_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s8[j] <= + env->vfp.vreg[src2].s8[j]) { + env->vfp.vreg[dest].s8[j] = + env->vfp.vreg[src1].s8[j]; + } else { + env->vfp.vreg[dest].s8[j] = + env->vfp.vreg[src2].s8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s16[j] <= + env->vfp.vreg[src2].s16[j]) { + env->vfp.vreg[dest].s16[j] = + env->vfp.vreg[src1].s16[j]; + } else { + env->vfp.vreg[dest].s16[j] = + env->vfp.vreg[src2].s16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s32[j] <= + env->vfp.vreg[src2].s32[j]) { + env->vfp.vreg[dest].s32[j] = + env->vfp.vreg[src1].s32[j]; + } else { + env->vfp.vreg[dest].s32[j] = + env->vfp.vreg[src2].s32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s64[j] <= + env->vfp.vreg[src2].s64[j]) { + env->vfp.vreg[dest].s64[j] = + env->vfp.vreg[src1].s64[j]; + } else { + env->vfp.vreg[dest].s64[j] = + env->vfp.vreg[src2].s64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmin_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int8_t)env->gpr[rs1] <= + env->vfp.vreg[src2].s8[j]) { + env->vfp.vreg[dest].s8[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].s8[j] = + env->vfp.vreg[src2].s8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int16_t)env->gpr[rs1] <= + env->vfp.vreg[src2].s16[j]) { + env->vfp.vreg[dest].s16[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].s16[j] = + env->vfp.vreg[src2].s16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int32_t)env->gpr[rs1] <= + env->vfp.vreg[src2].s32[j]) { + env->vfp.vreg[dest].s32[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].s32[j] = + env->vfp.vreg[src2].s32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int64_t)extend_gpr(env->gpr[rs1]) <= + env->vfp.vreg[src2].s64[j]) { + env->vfp.vreg[dest].s64[j] = + (int64_t)extend_gpr(env->gpr[rs1]); + } else { + env->vfp.vreg[dest].s64[j] = + env->vfp.vreg[src2].s64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vredmin.vs vd, vs2, vs1, vm # vd[0] = min( vs1[0] , vs2[*] ) */ +void VECTOR_HELPER(vredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + int64_t min = 0; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 8: + if (i == 0) { + min = env->vfp.vreg[rs1].s8[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (min > env->vfp.vreg[src2].s8[j]) { + min = env->vfp.vreg[src2].s8[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].s8[0] = min; + } + break; + case 16: + if (i == 0) { + min = env->vfp.vreg[rs1].s16[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (min > env->vfp.vreg[src2].s16[j]) { + min = env->vfp.vreg[src2].s16[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].s16[0] = min; + } + break; + case 32: + if (i == 0) { + min = env->vfp.vreg[rs1].s32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (min > env->vfp.vreg[src2].s32[j]) { + min = env->vfp.vreg[src2].s32[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].s32[0] = min; + } + break; + case 64: + if (i == 0) { + min = env->vfp.vreg[rs1].s64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (min > env->vfp.vreg[src2].s64[j]) { + min = env->vfp.vreg[src2].s64[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].s64[0] = min; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfredmin.vs vd, vs2, vs1, vm # Minimum value */ +void VECTOR_HELPER(vfredmin_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + float16 min16 = 0.0f; + float32 min32 = 0.0f; + float64 min64 = 0.0f; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 16: + if (i == 0) { + min16 = env->vfp.vreg[rs1].f16[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + min16 = float16_minnum(min16, env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f16[0] = min16; + } + break; + case 32: + if (i == 0) { + min32 = env->vfp.vreg[rs1].f32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + min32 = float32_minnum(min32, env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f32[0] = min32; + } + break; + case 64: + if (i == 0) { + min64 = env->vfp.vreg[rs1].f64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + min64 = float64_minnum(min64, env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f64[0] = min64; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmaxu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u8[j] >= + env->vfp.vreg[src2].u8[j]) { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src1].u8[j]; + } else { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src2].u8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u16[j] >= + env->vfp.vreg[src2].u16[j]) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src1].u16[j]; + } else { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src2].u16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u32[j] >= + env->vfp.vreg[src2].u32[j]) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src1].u32[j]; + } else { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src2].u32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u64[j] >= + env->vfp.vreg[src2].u64[j]) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src1].u64[j]; + } else { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src2].u64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmaxu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint8_t)env->gpr[rs1] >= + env->vfp.vreg[src2].u8[j]) { + env->vfp.vreg[dest].u8[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src2].u8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint16_t)env->gpr[rs1] >= + env->vfp.vreg[src2].u16[j]) { + env->vfp.vreg[dest].u16[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src2].u16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint32_t)env->gpr[rs1] >= + env->vfp.vreg[src2].u32[j]) { + env->vfp.vreg[dest].u32[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src2].u32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint64_t)extend_gpr(env->gpr[rs1]) >= + env->vfp.vreg[src2].u64[j]) { + env->vfp.vreg[dest].u64[j] = + (uint64_t)extend_gpr(env->gpr[rs1]); + } else { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src2].u64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vredmaxu.vs vd, vs2, vs1, vm # vd[0] = maxu( vs1[0] , vs2[*] ) */ +void VECTOR_HELPER(vredmaxu_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + uint64_t maxu = 0; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 8: + if (i == 0) { + maxu = env->vfp.vreg[rs1].u8[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (maxu < env->vfp.vreg[src2].u8[j]) { + maxu = env->vfp.vreg[src2].u8[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].u8[0] = maxu; + } + break; + case 16: + if (i == 0) { + maxu = env->vfp.vreg[rs1].u16[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (maxu < env->vfp.vreg[src2].u16[j]) { + maxu = env->vfp.vreg[src2].u16[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].u16[0] = maxu; + } + break; + case 32: + if (i == 0) { + maxu = env->vfp.vreg[rs1].u32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (maxu < env->vfp.vreg[src2].u32[j]) { + maxu = env->vfp.vreg[src2].u32[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].u32[0] = maxu; + } + break; + case 64: + if (i == 0) { + maxu = env->vfp.vreg[rs1].u64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (maxu < env->vfp.vreg[src2].u64[j]) { + maxu = env->vfp.vreg[src2].u64[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].u64[0] = maxu; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/*vfmax.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vfmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_maxnum( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_maxnum( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_maxnum( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfmax.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfmax_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (env->vfp.vstart >= vl) { + return; + } + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_maxnum( + env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_maxnum( + env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_maxnum( + env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmax_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s8[j] >= + env->vfp.vreg[src2].s8[j]) { + env->vfp.vreg[dest].s8[j] = + env->vfp.vreg[src1].s8[j]; + } else { + env->vfp.vreg[dest].s8[j] = + env->vfp.vreg[src2].s8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s16[j] >= + env->vfp.vreg[src2].s16[j]) { + env->vfp.vreg[dest].s16[j] = + env->vfp.vreg[src1].s16[j]; + } else { + env->vfp.vreg[dest].s16[j] = + env->vfp.vreg[src2].s16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s32[j] >= + env->vfp.vreg[src2].s32[j]) { + env->vfp.vreg[dest].s32[j] = + env->vfp.vreg[src1].s32[j]; + } else { + env->vfp.vreg[dest].s32[j] = + env->vfp.vreg[src2].s32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s64[j] >= + env->vfp.vreg[src2].s64[j]) { + env->vfp.vreg[dest].s64[j] = + env->vfp.vreg[src1].s64[j]; + } else { + env->vfp.vreg[dest].s64[j] = + env->vfp.vreg[src2].s64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmax_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int8_t)env->gpr[rs1] >= + env->vfp.vreg[src2].s8[j]) { + env->vfp.vreg[dest].s8[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].s8[j] = + env->vfp.vreg[src2].s8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int16_t)env->gpr[rs1] >= + env->vfp.vreg[src2].s16[j]) { + env->vfp.vreg[dest].s16[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].s16[j] = + env->vfp.vreg[src2].s16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int32_t)env->gpr[rs1] >= + env->vfp.vreg[src2].s32[j]) { + env->vfp.vreg[dest].s32[j] = + env->gpr[rs1]; + } else { + env->vfp.vreg[dest].s32[j] = + env->vfp.vreg[src2].s32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int64_t)extend_gpr(env->gpr[rs1]) >= + env->vfp.vreg[src2].s64[j]) { + env->vfp.vreg[dest].s64[j] = + (int64_t)extend_gpr(env->gpr[rs1]); + } else { + env->vfp.vreg[dest].s64[j] = + env->vfp.vreg[src2].s64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vredmax.vs vd, vs2, vs1, vm # vd[0] = max( vs1[0] , vs2[*] ) */ +void VECTOR_HELPER(vredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + int64_t max = 0; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 8: + if (i == 0) { + max = env->vfp.vreg[rs1].s8[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (max < env->vfp.vreg[src2].s8[j]) { + max = env->vfp.vreg[src2].s8[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].s8[0] = max; + } + break; + case 16: + if (i == 0) { + max = env->vfp.vreg[rs1].s16[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (max < env->vfp.vreg[src2].s16[j]) { + max = env->vfp.vreg[src2].s16[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].s16[0] = max; + } + break; + case 32: + if (i == 0) { + max = env->vfp.vreg[rs1].s32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (max < env->vfp.vreg[src2].s32[j]) { + max = env->vfp.vreg[src2].s32[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].s32[0] = max; + } + break; + case 64: + if (i == 0) { + max = env->vfp.vreg[rs1].s64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (max < env->vfp.vreg[src2].s64[j]) { + max = env->vfp.vreg[src2].s64[j]; + } + } + if (i == vl - 1) { + env->vfp.vreg[rd].s64[0] = max; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfredmax.vs vd, vs2, vs1, vm # Maximum value */ +void VECTOR_HELPER(vfredmax_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + float16 max16 = 0.0f; + float32 max32 = 0.0f; + float64 max64 = 0.0f; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 16: + if (i == 0) { + max16 = env->vfp.vreg[rs1].f16[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + max16 = float16_maxnum(max16, env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f16[0] = max16; + } + break; + case 32: + if (i == 0) { + max32 = env->vfp.vreg[rs1].f32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + max32 = float32_maxnum(max32, env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f32[0] = max32; + } + break; + case 64: + if (i == 0) { + max64 = env->vfp.vreg[rs1].f64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + max64 = float64_maxnum(max64, env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f64[0] = max64; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfsgnj.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vfsgnj_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = deposit16( + env->vfp.vreg[src1].f16[j], + 0, + 15, + env->vfp.vreg[src2].f16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = deposit32( + env->vfp.vreg[src1].f32[j], + 0, + 31, + env->vfp.vreg[src2].f32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = deposit64( + env->vfp.vreg[src1].f64[j], + 0, + 63, + env->vfp.vreg[src2].f64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfsgnj.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfsgnj_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = deposit16( + env->fpr[rs1], + 0, + 15, + env->vfp.vreg[src2].f16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = deposit32( + env->fpr[rs1], + 0, + 31, + env->vfp.vreg[src2].f32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = deposit64( + env->fpr[rs1], + 0, + 63, + env->vfp.vreg[src2].f64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vand_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] + & env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j] + & env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j] + & env->vfp.vreg[src2].u32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j] + & env->vfp.vreg[src2].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vand_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] + & env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] + & env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] + & env->vfp.vreg[src2].u32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = + (uint64_t)extend_gpr(env->gpr[rs1]) + & env->vfp.vreg[src2].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vand_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) + & env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) + & env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) + & env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) + & env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfsgnjn.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vfsgnjn_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = deposit16( + ~env->vfp.vreg[src1].f16[j], + 0, + 15, + env->vfp.vreg[src2].f16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = deposit32( + ~env->vfp.vreg[src1].f32[j], + 0, + 31, + env->vfp.vreg[src2].f32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = deposit64( + ~env->vfp.vreg[src1].f64[j], + 0, + 63, + env->vfp.vreg[src2].f64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} +/* vfsgnjn.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfsgnjn_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = deposit16( + ~env->fpr[rs1], + 0, + 15, + env->vfp.vreg[src2].f16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = deposit32( + ~env->fpr[rs1], + 0, + 31, + env->vfp.vreg[src2].f32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = deposit64( + ~env->fpr[rs1], + 0, + 63, + env->vfp.vreg[src2].f64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] + | env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j] + | env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j] + | env->vfp.vreg[src2].u32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j] + | env->vfp.vreg[src2].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] + | env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] + | env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] + | env->vfp.vreg[src2].u32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = + (uint64_t)extend_gpr(env->gpr[rs1]) + | env->vfp.vreg[src2].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) + | env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) + | env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) + | env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) + | env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfsgnjx.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vfsgnjx_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = deposit16( + env->vfp.vreg[src1].f16[j] ^ + env->vfp.vreg[src2].f16[j], + 0, + 15, + env->vfp.vreg[src2].f16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = deposit32( + env->vfp.vreg[src1].f32[j] ^ + env->vfp.vreg[src2].f32[j], + 0, + 31, + env->vfp.vreg[src2].f32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = deposit64( + env->vfp.vreg[src1].f64[j] ^ + env->vfp.vreg[src2].f64[j], + 0, + 63, + env->vfp.vreg[src2].f64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + + env->vfp.vstart = 0; +} + +/* vfsgnjx.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfsgnjx_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = deposit16( + env->fpr[rs1] ^ + env->vfp.vreg[src2].f16[j], + 0, + 15, + env->vfp.vreg[src2].f16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = deposit32( + env->fpr[rs1] ^ + env->vfp.vreg[src2].f32[j], + 0, + 31, + env->vfp.vreg[src2].f32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = deposit64( + env->fpr[rs1] ^ + env->vfp.vreg[src2].f64[j], + 0, + 63, + env->vfp.vreg[src2].f64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vxor_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] + ^ env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j] + ^ env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j] + ^ env->vfp.vreg[src2].u32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j] + ^ env->vfp.vreg[src2].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vxor_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] + ^ env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] + ^ env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] + ^ env->vfp.vreg[src2].u32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = + (uint64_t)extend_gpr(env->gpr[rs1]) + ^ env->vfp.vreg[src2].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vxor_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = sign_extend(rs1, 5) + ^ env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = sign_extend(rs1, 5) + ^ env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = sign_extend(rs1, 5) + ^ env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = sign_extend(rs1, 5) + ^ env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vadc_vvm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax, carry; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j] + + env->vfp.vreg[src2].u8[j] + carry; + break; + case 16: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j] + + env->vfp.vreg[src2].u16[j] + carry; + break; + case 32: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j] + + env->vfp.vreg[src2].u32[j] + carry; + break; + case 64: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j] + + env->vfp.vreg[src2].u64[j] + carry; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vadc_vxm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax, carry; + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u8[j] = env->gpr[rs1] + + env->vfp.vreg[src2].u8[j] + carry; + break; + case 16: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u16[j] = env->gpr[rs1] + + env->vfp.vreg[src2].u16[j] + carry; + break; + case 32: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u32[j] = env->gpr[rs1] + + env->vfp.vreg[src2].u32[j] + carry; + break; + case 64: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u64[j] = (uint64_t)extend_gpr(env->gpr[rs1]) + + env->vfp.vreg[src2].u64[j] + carry; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vadc_vim)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax, carry; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u8[j] = sign_extend(rs1, 5) + + env->vfp.vreg[src2].u8[j] + carry; + break; + case 16: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u16[j] = sign_extend(rs1, 5) + + env->vfp.vreg[src2].u16[j] + carry; + break; + case 32: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u32[j] = sign_extend(rs1, 5) + + env->vfp.vreg[src2].u32[j] + carry; + break; + case 64: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u64[j] = sign_extend(rs1, 5) + + env->vfp.vreg[src2].u64[j] + carry; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmadc_vvm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, vlmax, carry; + uint64_t tmp; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) + || (rd == 0)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + carry = vector_get_carry(env, width, lmul, i); + tmp = env->vfp.vreg[src1].u8[j] + + env->vfp.vreg[src2].u8[j] + carry; + tmp = tmp >> width; + + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 16: + carry = vector_get_carry(env, width, lmul, i); + tmp = env->vfp.vreg[src1].u16[j] + + env->vfp.vreg[src2].u16[j] + carry; + tmp = tmp >> width; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 32: + carry = vector_get_carry(env, width, lmul, i); + tmp = (uint64_t)env->vfp.vreg[src1].u32[j] + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; + tmp = tmp >> width; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 64: + carry = vector_get_carry(env, width, lmul, i); + tmp = env->vfp.vreg[src1].u64[j] + + env->vfp.vreg[src2].u64[j] + carry; + + if ((tmp < env->vfp.vreg[src1].u64[j] || + tmp < env->vfp.vreg[src2].u64[j]) + || (env->vfp.vreg[src1].u64[j] == MAX_U64 && + env->vfp.vreg[src2].u64[j] == MAX_U64)) { + tmp = 1; + } else { + tmp = 0; + } + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmadc_vxm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax, carry; + uint64_t tmp, extend_rs1; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) + || (rd == 0)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + carry = vector_get_carry(env, width, lmul, i); + tmp = (uint8_t)env->gpr[rs1] + + env->vfp.vreg[src2].u8[j] + carry; + tmp = tmp >> width; + + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 16: + carry = vector_get_carry(env, width, lmul, i); + tmp = (uint16_t)env->gpr[rs1] + + env->vfp.vreg[src2].u16[j] + carry; + tmp = tmp >> width; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 32: + carry = vector_get_carry(env, width, lmul, i); + tmp = (uint64_t)((uint32_t)env->gpr[rs1]) + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; + tmp = tmp >> width; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 64: + carry = vector_get_carry(env, width, lmul, i); + + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]); + tmp = extend_rs1 + env->vfp.vreg[src2].u64[j] + carry; + if ((tmp < extend_rs1) || + (carry && (env->vfp.vreg[src2].u64[j] == MAX_U64))) { + tmp = 1; + } else { + tmp = 0; + } + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmadc_vim)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax, carry; + uint64_t tmp; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) + || (rd == 0)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + carry = vector_get_carry(env, width, lmul, i); + tmp = (uint8_t)sign_extend(rs1, 5) + + env->vfp.vreg[src2].u8[j] + carry; + tmp = tmp >> width; + + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 16: + carry = vector_get_carry(env, width, lmul, i); + tmp = (uint16_t)sign_extend(rs1, 5) + + env->vfp.vreg[src2].u16[j] + carry; + tmp = tmp >> width; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 32: + carry = vector_get_carry(env, width, lmul, i); + tmp = (uint64_t)((uint32_t)sign_extend(rs1, 5)) + + (uint64_t)env->vfp.vreg[src2].u32[j] + carry; + tmp = tmp >> width; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 64: + carry = vector_get_carry(env, width, lmul, i); + tmp = (uint64_t)sign_extend(rs1, 5) + + env->vfp.vreg[src2].u64[j] + carry; + + if ((tmp < (uint64_t)sign_extend(rs1, 5) || + tmp < env->vfp.vreg[src2].u64[j]) + || ((uint64_t)sign_extend(rs1, 5) == MAX_U64 && + env->vfp.vreg[src2].u64[j] == MAX_U64)) { + tmp = 1; + } else { + tmp = 0; + } + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsbc_vvm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax, carry; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] + - env->vfp.vreg[src1].u8[j] - carry; + break; + case 16: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + - env->vfp.vreg[src1].u16[j] - carry; + break; + case 32: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + - env->vfp.vreg[src1].u32[j] - carry; + break; + case 64: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + - env->vfp.vreg[src1].u64[j] - carry; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vsbc_vxm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax, carry; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_carry(lmul, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] + - env->gpr[rs1] - carry; + break; + case 16: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + - env->gpr[rs1] - carry; + break; + case 32: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + - env->gpr[rs1] - carry; + break; + case 64: + carry = vector_get_carry(env, width, lmul, i); + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + - (uint64_t)extend_gpr(env->gpr[rs1]) - carry; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsbc_vvm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, vlmax, carry; + uint64_t tmp; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, 1, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) + || (rd == 0)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + carry = vector_get_carry(env, width, lmul, i); + tmp = env->vfp.vreg[src2].u8[j] + - env->vfp.vreg[src1].u8[j] - carry; + tmp = (tmp >> width) & 0x1; + + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 16: + carry = vector_get_carry(env, width, lmul, i); + tmp = env->vfp.vreg[src2].u16[j] + - env->vfp.vreg[src1].u16[j] - carry; + tmp = (tmp >> width) & 0x1; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 32: + carry = vector_get_carry(env, width, lmul, i); + tmp = (uint64_t)env->vfp.vreg[src2].u32[j] + - (uint64_t)env->vfp.vreg[src1].u32[j] - carry; + tmp = (tmp >> width) & 0x1; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 64: + carry = vector_get_carry(env, width, lmul, i); + tmp = env->vfp.vreg[src2].u64[j] + - env->vfp.vreg[src1].u64[j] - carry; + + if (((env->vfp.vreg[src1].u64[j] == MAX_U64) && carry) || + env->vfp.vreg[src2].u64[j] < + (env->vfp.vreg[src1].u64[j] + carry)) { + tmp = 1; + } else { + tmp = 0; + } + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsbc_vxm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax, carry; + uint64_t tmp, extend_rs1; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, 1, rs2, lmul) + || (rd == 0)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + carry = vector_get_carry(env, width, lmul, i); + tmp = env->vfp.vreg[src2].u8[j] + - (uint8_t)env->gpr[rs1] - carry; + tmp = (tmp >> width) & 0x1; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 16: + carry = vector_get_carry(env, width, lmul, i); + tmp = env->vfp.vreg[src2].u16[j] + - (uint16_t)env->gpr[rs1] - carry; + tmp = (tmp >> width) & 0x1; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 32: + carry = vector_get_carry(env, width, lmul, i); + tmp = (uint64_t)env->vfp.vreg[src2].u32[j] + - (uint64_t)((uint32_t)env->gpr[rs1]) - carry; + tmp = (tmp >> width) & 0x1; + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + case 64: + carry = vector_get_carry(env, width, lmul, i); + + extend_rs1 = (uint64_t)extend_gpr(env->gpr[rs1]); + tmp = env->vfp.vreg[src2].u64[j] - extend_rs1 - carry; + + if ((tmp > env->vfp.vreg[src2].u64[j]) || + ((extend_rs1 == MAX_U64) && carry)) { + tmp = 1; + } else { + tmp = 0; + } + vector_mask_result(env, rd, width, lmul, i, tmp); + break; + + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +/* vmpopc.m rd, vs2, v0.t # x[rd] = sum_i ( vs2[i].LSB && v0[i].LSB ) */ +void VECTOR_HELPER(vmpopc_m)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i; + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + env->gpr[rd] = 0; + + for (i = 0; i < vlmax; i++) { + if (i < vl) { + if (vector_mask_reg(env, rs2, width, lmul, i) && + vector_elem_mask(env, vm, width, lmul, i)) { + env->gpr[rd]++; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vmfirst.m rd, vs2, vm */ +void VECTOR_HELPER(vmfirst_m)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i; + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + if (i < vl) { + if (vector_mask_reg(env, rs2, width, lmul, i) && + vector_elem_mask(env, vm, width, lmul, i)) { + env->gpr[rd] = i; + break; + } + } else { + env->gpr[rd] = -1; + } + } + + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmerge_vvm)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl, idx, pos; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src2].u8[j]; + } else { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src1].u8[j]; + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src1].u8[j]; + } + break; + case 16: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src2].u16[j]; + } else { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src1].u16[j]; + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src1].u16[j]; + } + break; + case 32: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src2].u32[j]; + } else { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src1].u32[j]; + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src1].u32[j]; + } + break; + case 64: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src2].u64[j]; + } else { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src1].u64[j]; + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src1].u64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmerge_vxm)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl, idx, pos; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src2].u8[j]; + } else { + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]; + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u8[j] = env->gpr[rs1]; + } + break; + case 16: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src2].u16[j]; + } else { + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]; + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u16[j] = env->gpr[rs1]; + } + break; + case 32: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src2].u32[j]; + } else { + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]; + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u32[j] = env->gpr[rs1]; + } + break; + case 64: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src2].u64[j]; + } else { + env->vfp.vreg[dest].u64[j] = + (uint64_t)extend_gpr(env->gpr[rs1]); + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u64[j] = + (uint64_t)extend_gpr(env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmerge_vim)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl, idx, pos; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u8[j] = + env->vfp.vreg[src2].u8[j]; + } else { + env->vfp.vreg[dest].u8[j] = + (uint8_t)sign_extend(rs1, 5); + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u8[j] = (uint8_t)sign_extend(rs1, 5); + } + break; + case 16: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u16[j] = + env->vfp.vreg[src2].u16[j]; + } else { + env->vfp.vreg[dest].u16[j] = + (uint16_t)sign_extend(rs1, 5); + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u16[j] = (uint16_t)sign_extend(rs1, 5); + } + break; + case 32: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u32[j] = + env->vfp.vreg[src2].u32[j]; + } else { + env->vfp.vreg[dest].u32[j] = + (uint32_t)sign_extend(rs1, 5); + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u32[j] = (uint32_t)sign_extend(rs1, 5); + } + break; + case 64: + if (vm == 0) { + vector_get_layout(env, width, lmul, i, &idx, &pos); + if (((env->vfp.vreg[0].u8[idx] >> pos) & 0x1) == 0) { + env->vfp.vreg[dest].u64[j] = + env->vfp.vreg[src2].u64[j]; + } else { + env->vfp.vreg[dest].u64[j] = + (uint64_t)sign_extend(rs1, 5); + } + } else { + if (rs2 != 0) { + riscv_raise_exception(env, + RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + env->vfp.vreg[dest].u64[j] = (uint64_t)sign_extend(rs1, 5); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfmerge.vfm vd, vs2, rs1, v0 # vd[i] = v0[i].LSB ? f[rs1] : vs2[i] */ +void VECTOR_HELPER(vfmerge_vfm)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* vfmv.v.f vd, rs1 # vd[i] = f[rs1]; */ + if (vm && (rs2 != 0)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = env->fpr[rs1]; + } else { + env->vfp.vreg[dest].f16[j] = env->vfp.vreg[src2].f16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = env->fpr[rs1]; + } else { + env->vfp.vreg[dest].f32[j] = env->vfp.vreg[src2].f32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = env->fpr[rs1]; + } else { + env->vfp.vreg[dest].f64[j] = env->vfp.vreg[src2].f64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmseq_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u8[j] == + env->vfp.vreg[src2].u8[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u16[j] == + env->vfp.vreg[src2].u16[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u32[j] == + env->vfp.vreg[src2].u32[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u64[j] == + env->vfp.vreg[src2].u64[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmseq_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint8_t)env->gpr[rs1] == env->vfp.vreg[src2].u8[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint16_t)env->gpr[rs1] == env->vfp.vreg[src2].u16[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint32_t)env->gpr[rs1] == env->vfp.vreg[src2].u32[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint64_t)extend_gpr(env->gpr[rs1]) == + env->vfp.vreg[src2].u64[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmseq_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint8_t)sign_extend(rs1, 5) + == env->vfp.vreg[src2].u8[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint16_t)sign_extend(rs1, 5) + == env->vfp.vreg[src2].u16[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint32_t)sign_extend(rs1, 5) + == env->vfp.vreg[src2].u32[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint64_t)sign_extend(rs1, 5) == + env->vfp.vreg[src2].u64[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +/* vmandnot.mm vd, vs2, vs1 # vd = vs2 & ~vs1 */ +void VECTOR_HELPER(vmandnot_mm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, i, vlmax; + uint32_t tmp; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + for (i = 0; i < vlmax; i++) { + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) & + vector_mask_reg(env, rs2, width, lmul, i); + vector_mask_result(env, rd, width, lmul, i, tmp); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + + env->vfp.vstart = 0; + return; + env->vfp.vstart = 0; +} + +/* vmfeq.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vmfeq_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src1, src2, result; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_eq_quiet(env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_eq_quiet(env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_eq_quiet(env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vmfeq.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vmfeq_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2, result; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_eq_quiet(env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_eq_quiet(env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_eq_quiet(env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmsne_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u8[j] != + env->vfp.vreg[src2].u8[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u16[j] != + env->vfp.vreg[src2].u16[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u32[j] != + env->vfp.vreg[src2].u32[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u64[j] != + env->vfp.vreg[src2].u64[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsne_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint8_t)env->gpr[rs1] != env->vfp.vreg[src2].u8[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint16_t)env->gpr[rs1] != env->vfp.vreg[src2].u16[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint32_t)env->gpr[rs1] != env->vfp.vreg[src2].u32[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint64_t)extend_gpr(env->gpr[rs1]) != + env->vfp.vreg[src2].u64[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsne_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint8_t)sign_extend(rs1, 5) + != env->vfp.vreg[src2].u8[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint16_t)sign_extend(rs1, 5) + != env->vfp.vreg[src2].u16[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint32_t)sign_extend(rs1, 5) + != env->vfp.vreg[src2].u32[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint64_t)sign_extend(rs1, 5) != + env->vfp.vreg[src2].u64[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +/* vmand.mm vd, vs2, vs1 # vd = vs2 & vs1 */ +void VECTOR_HELPER(vmand_mm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, i, vlmax; + uint32_t tmp; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + for (i = 0; i < vlmax; i++) { + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + tmp = vector_mask_reg(env, rs1, width, lmul, i) & + vector_mask_reg(env, rs2, width, lmul, i); + vector_mask_result(env, rd, width, lmul, i, tmp); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + + env->vfp.vstart = 0; + return; + env->vfp.vstart = 0; +} + +/* vmfle.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vmfle_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src1, src2, result; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_le(env->vfp.vreg[src2].f16[j], + env->vfp.vreg[src1].f16[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_le(env->vfp.vreg[src2].f32[j], + env->vfp.vreg[src1].f32[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_le(env->vfp.vreg[src2].f64[j], + env->vfp.vreg[src1].f64[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vmfle.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vmfle_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2, result; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_le(env->vfp.vreg[src2].f16[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_le(env->vfp.vreg[src2].f32[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_le(env->vfp.vreg[src2].f64[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsltu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u8[j] < + env->vfp.vreg[src1].u8[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u16[j] < + env->vfp.vreg[src1].u16[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u32[j] < + env->vfp.vreg[src1].u32[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u64[j] < + env->vfp.vreg[src1].u64[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsltu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u8[j] < (uint8_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u16[j] < (uint16_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u32[j] < (uint32_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u64[j] < + (uint64_t)extend_gpr(env->gpr[rs1])) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +/* vmor.mm vd, vs2, vs1 # vd = vs2 | vs1 */ +void VECTOR_HELPER(vmor_mm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, i, vlmax; + uint32_t tmp; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + for (i = 0; i < vlmax; i++) { + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + tmp = vector_mask_reg(env, rs1, width, lmul, i) | + vector_mask_reg(env, rs2, width, lmul, i); + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + + env->vfp.vstart = 0; + return; + env->vfp.vstart = 0; +} + +/* vmford.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vmford_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src1, src2, result; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_unordered_quiet(env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_unordered_quiet(env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_unordered_quiet(env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vmford.vf vd, vs2, rs1, vm # Vector-scalar */ +void VECTOR_HELPER(vmford_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2, result; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_unordered_quiet(env->vfp.vreg[src2].f16[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_unordered_quiet(env->vfp.vreg[src2].f32[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_unordered_quiet(env->vfp.vreg[src2].f64[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmslt_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s8[j] < + env->vfp.vreg[src1].s8[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s16[j] < + env->vfp.vreg[src1].s16[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s32[j] < + env->vfp.vreg[src1].s32[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s64[j] < + env->vfp.vreg[src1].s64[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmslt_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s8[j] < (int8_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s16[j] < (int16_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s32[j] < (int32_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s64[j] < + (int64_t)extend_gpr(env->gpr[rs1])) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +/* vmxor.mm vd, vs2, vs1 # vd = vs2 ^ vs1 */ +void VECTOR_HELPER(vmxor_mm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, i, vlmax; + uint32_t tmp; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + for (i = 0; i < vlmax; i++) { + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^ + vector_mask_reg(env, rs2, width, lmul, i); + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + + env->vfp.vstart = 0; + return; + env->vfp.vstart = 0; +} + +/* vmflt.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vmflt_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src1, src2, result; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_lt(env->vfp.vreg[src2].f16[j], + env->vfp.vreg[src1].f16[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_lt(env->vfp.vreg[src2].f32[j], + env->vfp.vreg[src1].f32[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_lt(env->vfp.vreg[src2].f64[j], + env->vfp.vreg[src1].f64[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vmflt.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vmflt_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2, result; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_lt(env->vfp.vreg[src2].f16[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_lt(env->vfp.vreg[src2].f32[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_lt(env->vfp.vreg[src2].f64[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsleu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u8[j] <= + env->vfp.vreg[src1].u8[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u16[j] <= + env->vfp.vreg[src1].u16[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u32[j] <= + env->vfp.vreg[src1].u32[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u64[j] <= + env->vfp.vreg[src1].u64[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsleu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u8[j] <= (uint8_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u16[j] <= (uint16_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u32[j] <= (uint32_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u64[j] <= + (uint64_t)extend_gpr(env->gpr[rs1])) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsleu_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u8[j] <= (uint8_t)rs1) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u16[j] <= (uint16_t)rs1) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u32[j] <= (uint32_t)rs1) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u64[j] <= + (uint64_t)rs1) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +/* vmornot.mm vd, vs2, vs1 # vd = vs2 | ~vs1 */ +void VECTOR_HELPER(vmornot_mm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, i, vlmax; + uint32_t tmp; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + for (i = 0; i < vlmax; i++) { + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + tmp = ~vector_mask_reg(env, rs1, width, lmul, i) | + vector_mask_reg(env, rs2, width, lmul, i); + vector_mask_result(env, rd, width, lmul, i, tmp & 0x1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + + env->vfp.vstart = 0; + return; + env->vfp.vstart = 0; +} + +/* vmfne.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vmfne_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src1, src2, result; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_eq_quiet(env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_eq_quiet(env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_eq_quiet(env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vmfne.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vmfne_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2, result; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_eq_quiet(env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_eq_quiet(env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_eq_quiet(env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsle_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s8[j] <= + env->vfp.vreg[src1].s8[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s16[j] <= + env->vfp.vreg[src1].s16[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s32[j] <= + env->vfp.vreg[src1].s32[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s64[j] <= + env->vfp.vreg[src1].s64[j]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsle_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s8[j] <= (int8_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s16[j] <= (int16_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s32[j] <= (int32_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s64[j] <= + (int64_t)extend_gpr(env->gpr[rs1])) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsle_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s8[j] <= + (int8_t)sign_extend(rs1, 5)) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s16[j] <= + (int16_t)sign_extend(rs1, 5)) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s32[j] <= + (int32_t)sign_extend(rs1, 5)) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s64[j] <= + sign_extend(rs1, 5)) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +/* vmnand.mm vd, vs2, vs1 # vd = ~(vs2 & vs1) */ +void VECTOR_HELPER(vmnand_mm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, i, vlmax; + uint32_t tmp; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + for (i = 0; i < vlmax; i++) { + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + tmp = vector_mask_reg(env, rs1, width, lmul, i) & + vector_mask_reg(env, rs2, width, lmul, i); + vector_mask_result(env, rd, width, lmul, i, (~tmp & 0x1)); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + + env->vfp.vstart = 0; + return; + env->vfp.vstart = 0; +} + +/* vmfgt.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vmfgt_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2, result; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_le(env->vfp.vreg[src2].f16[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_le(env->vfp.vreg[src2].f32[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_le(env->vfp.vreg[src2].f64[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsgtu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u8[j] > (uint8_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u16[j] > (uint16_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u32[j] > (uint32_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u64[j] > + (uint64_t)extend_gpr(env->gpr[rs1])) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsgtu_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u8[j] > (uint8_t)rs1) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u16[j] > (uint16_t)rs1) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u32[j] > (uint32_t)rs1) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].u64[j] > + (uint64_t)rs1) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +/* vmnor.mm vd, vs2, vs1 # vd = ~(vs2 | vs1) */ +void VECTOR_HELPER(vmnor_mm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, i, vlmax; + uint32_t tmp; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + for (i = 0; i < vlmax; i++) { + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + tmp = vector_mask_reg(env, rs1, width, lmul, i) | + vector_mask_reg(env, rs2, width, lmul, i); + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + + env->vfp.vstart = 0; + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmsgt_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s8[j] > (int8_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s16[j] > (int16_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s32[j] > (int32_t)env->gpr[rs1]) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s64[j] > + (int64_t)extend_gpr(env->gpr[rs1])) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmsgt_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s8[j] > + (int8_t)sign_extend(rs1, 5)) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s16[j] > + (int16_t)sign_extend(rs1, 5)) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s32[j] > + (int32_t)sign_extend(rs1, 5)) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src2].s64[j] > + sign_extend(rs1, 5)) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + if (width <= 64) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + + +/* vmxnor.mm vd, vs2, vs1 # vd = ~(vs2 ^ vs1) */ +void VECTOR_HELPER(vmxnor_mm)(CPURISCVState *env, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, i, vlmax; + uint32_t tmp; + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + for (i = 0; i < vlmax; i++) { + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + tmp = vector_mask_reg(env, rs1, width, lmul, i) ^ + vector_mask_reg(env, rs2, width, lmul, i); + vector_mask_result(env, rd, width, lmul, i, ~tmp & 0x1); + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + + env->vfp.vstart = 0; + return; + env->vfp.vstart = 0; +} + +/* vmfge.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vmfge_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2, result; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float16_lt(env->vfp.vreg[src2].f16[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float32_lt(env->vfp.vreg[src2].f32[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + result = float64_lt(env->vfp.vreg[src2].f64[j], + env->fpr[rs1], + &env->fp_status); + vector_mask_result(env, rd, width, lmul, i, !result); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + case 32: + case 64: + vector_mask_result(env, rd, width, lmul, i, 0); + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vsaddu.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vsaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, + env->vfp.vreg[src1].u8[j], env->vfp.vreg[src2].u8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, + env->vfp.vreg[src1].u16[j], env->vfp.vreg[src2].u16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, + env->vfp.vreg[src1].u32[j], env->vfp.vreg[src2].u32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, + env->vfp.vreg[src1].u64[j], env->vfp.vreg[src2].u64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vsaddu.vx vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vsaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vsaddu.vi vd, vs2, imm, vm # vector-immediate */ +void VECTOR_HELPER(vsaddu_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = sat_add_u8(env, + env->vfp.vreg[src2].u8[j], rs1); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = sat_add_u16(env, + env->vfp.vreg[src2].u16[j], rs1); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = sat_add_u32(env, + env->vfp.vreg[src2].u32[j], rs1); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = sat_add_u64(env, + env->vfp.vreg[src2].u64[j], rs1); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vdivu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u8[j] == 0) { + env->vfp.vreg[dest].u8[j] = MAX_U8; + } else { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] / + env->vfp.vreg[src1].u8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u16[j] == 0) { + env->vfp.vreg[dest].u16[j] = MAX_U16; + } else { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + / env->vfp.vreg[src1].u16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u32[j] == 0) { + env->vfp.vreg[dest].u32[j] = MAX_U32; + } else { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + / env->vfp.vreg[src1].u32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u64[j] == 0) { + env->vfp.vreg[dest].u64[j] = MAX_U64; + } else { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + / env->vfp.vreg[src1].u64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vdivu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint8_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].u8[j] = MAX_U8; + } else { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] / + (uint8_t)env->gpr[rs1]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint16_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].u16[j] = MAX_U16; + } else { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + / (uint16_t)env->gpr[rs1]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint32_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].u32[j] = MAX_U32; + } else { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + / (uint32_t)env->gpr[rs1]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) { + env->vfp.vreg[dest].u64[j] = MAX_U64; + } else { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + / (uint64_t)extend_gpr(env->gpr[rs1]); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfdiv.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vfdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_div( + env->vfp.vreg[src2].f16[j], + env->vfp.vreg[src1].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_div( + env->vfp.vreg[src2].f32[j], + env->vfp.vreg[src1].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_div( + env->vfp.vreg[src2].f64[j], + env->vfp.vreg[src1].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfdiv.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_div( + env->vfp.vreg[src2].f16[j], + env->fpr[rs1], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_div( + env->vfp.vreg[src2].f32[j], + env->fpr[rs1], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_div( + env->vfp.vreg[src2].f64[j], + env->fpr[rs1], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vsadd.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vsadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, + env->vfp.vreg[src1].s8[j], env->vfp.vreg[src2].s8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, + env->vfp.vreg[src1].s16[j], env->vfp.vreg[src2].s16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, + env->vfp.vreg[src1].s32[j], env->vfp.vreg[src2].s32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, + env->vfp.vreg[src1].s64[j], env->vfp.vreg[src2].s64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vsadd.vx vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vsadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vsadd.vi vd, vs2, imm, vm # vector-immediate */ +void VECTOR_HELPER(vsadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = sat_add_s8(env, + env->vfp.vreg[src2].s8[j], sign_extend(rs1, 5)); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = sat_add_s16(env, + env->vfp.vreg[src2].s16[j], sign_extend(rs1, 5)); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = sat_add_s32(env, + env->vfp.vreg[src2].s32[j], sign_extend(rs1, 5)); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = sat_add_s64(env, + env->vfp.vreg[src2].s64[j], sign_extend(rs1, 5)); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vdiv_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s8[j] == 0) { + env->vfp.vreg[dest].s8[j] = -1; + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && + (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) { + env->vfp.vreg[dest].s8[j] = MIN_S8; + } else { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] / + env->vfp.vreg[src1].s8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s16[j] == 0) { + env->vfp.vreg[dest].s16[j] = -1; + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && + (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) { + env->vfp.vreg[dest].s16[j] = MIN_S16; + } else { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j] + / env->vfp.vreg[src1].s16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s32[j] == 0) { + env->vfp.vreg[dest].s32[j] = -1; + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && + (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) { + env->vfp.vreg[dest].s32[j] = MIN_S32; + } else { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j] + / env->vfp.vreg[src1].s32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s64[j] == 0) { + env->vfp.vreg[dest].s64[j] = -1; + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && + (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) { + env->vfp.vreg[dest].s64[j] = MIN_S64; + } else { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j] + / env->vfp.vreg[src1].s64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vdiv_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int8_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].s8[j] = -1; + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) { + env->vfp.vreg[dest].s8[j] = MIN_S8; + } else { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] / + (int8_t)env->gpr[rs1]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int16_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].s16[j] = -1; + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) { + env->vfp.vreg[dest].s16[j] = MIN_S16; + } else { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j] + / (int16_t)env->gpr[rs1]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int32_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].s32[j] = -1; + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) { + env->vfp.vreg[dest].s32[j] = MIN_S32; + } else { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j] + / (int32_t)env->gpr[rs1]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) { + env->vfp.vreg[dest].s64[j] = -1; + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && + ((int64_t)extend_gpr(env->gpr[rs1]) == (int64_t)(-1))) { + env->vfp.vreg[dest].s64[j] = MIN_S64; + } else { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j] + / (int64_t)extend_gpr(env->gpr[rs1]); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] */ +void VECTOR_HELPER(vfrdiv_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_div( + env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_div( + env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_div( + env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vssubu.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vssubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env, + env->vfp.vreg[src2].u8[j], env->vfp.vreg[src1].u8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env, + env->vfp.vreg[src2].u16[j], env->vfp.vreg[src1].u16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env, + env->vfp.vreg[src2].u32[j], env->vfp.vreg[src1].u32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env, + env->vfp.vreg[src2].u64[j], env->vfp.vreg[src1].u64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vssubu.vx vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vssubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = sat_sub_u8(env, + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = sat_sub_u16(env, + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = sat_sub_u32(env, + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = sat_sub_u64(env, + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vremu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u8[j] == 0) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]; + } else { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] % + env->vfp.vreg[src1].u8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u16[j] == 0) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]; + } else { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + % env->vfp.vreg[src1].u16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u32[j] == 0) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]; + } else { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + % env->vfp.vreg[src1].u32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].u64[j] == 0) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]; + } else { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + % env->vfp.vreg[src1].u64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vremu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint8_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j]; + } else { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] % + (uint8_t)env->gpr[rs1]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint16_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j]; + } else { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + % (uint16_t)env->gpr[rs1]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint32_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j]; + } else { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + % (uint32_t)env->gpr[rs1]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((uint64_t)extend_gpr(env->gpr[rs1]) == 0) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j]; + } else { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + % (uint64_t)extend_gpr(env->gpr[rs1]); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vmsbf.m vd, vs2, vm # set-before-first mask bit */ +void VECTOR_HELPER(vmsbf_m)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i; + bool first_mask_bit = false; + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + if (i < vl) { + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (first_mask_bit) { + vector_mask_result(env, rd, width, lmul, i, 0); + continue; + } + if (!vector_mask_reg(env, rs2, width, lmul, i)) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + first_mask_bit = true; + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + return; + env->vfp.vstart = 0; +} + +/* vmsif.m vd, vs2, vm # set-including-first mask bit */ +void VECTOR_HELPER(vmsif_m)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i; + bool first_mask_bit = false; + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + if (i < vl) { + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (first_mask_bit) { + vector_mask_result(env, rd, width, lmul, i, 0); + continue; + } + if (!vector_mask_reg(env, rs2, width, lmul, i)) { + vector_mask_result(env, rd, width, lmul, i, 1); + } else { + first_mask_bit = true; + vector_mask_result(env, rd, width, lmul, i, 1); + } + } + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + return; + env->vfp.vstart = 0; +} + +/* vmsof.m vd, vs2, vm # set-only-first mask bit */ +void VECTOR_HELPER(vmsof_m)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i; + bool first_mask_bit = false; + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + if (i < vl) { + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (first_mask_bit) { + vector_mask_result(env, rd, width, lmul, i, 0); + continue; + } + if (!vector_mask_reg(env, rs2, width, lmul, i)) { + vector_mask_result(env, rd, width, lmul, i, 0); + } else { + first_mask_bit = true; + vector_mask_result(env, rd, width, lmul, i, 1); + } + } + } else { + vector_mask_result(env, rd, width, lmul, i, 0); + } + } + return; + env->vfp.vstart = 0; +} + +/* viota.m v4, v2, v0.t */ +void VECTOR_HELPER(viota_m)(CPURISCVState *env, uint32_t vm, uint32_t rs2, + uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest; + uint32_t sum = 0; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 1)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = sum; + if (vector_mask_reg(env, rs2, width, lmul, i)) { + sum++; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = sum; + if (vector_mask_reg(env, rs2, width, lmul, i)) { + sum++; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = sum; + if (vector_mask_reg(env, rs2, width, lmul, i)) { + sum++; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = sum; + if (vector_mask_reg(env, rs2, width, lmul, i)) { + sum++; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vid.v vd, vm # Write element ID to destination. */ +void VECTOR_HELPER(vid_v)(CPURISCVState *env, uint32_t vm, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = i; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = i; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = i; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = i; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vssub.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vssub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env, + env->vfp.vreg[src2].s8[j], env->vfp.vreg[src1].s8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env, + env->vfp.vreg[src2].s16[j], env->vfp.vreg[src1].s16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env, + env->vfp.vreg[src2].s32[j], env->vfp.vreg[src1].s32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env, + env->vfp.vreg[src2].s64[j], env->vfp.vreg[src1].s64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vssub.vx vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vssub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = sat_sub_s8(env, + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = sat_sub_s16(env, + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = sat_sub_s32(env, + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = sat_sub_s64(env, + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vrem_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s8[j] == 0) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]; + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && + (env->vfp.vreg[src1].s8[j] == (int8_t)(-1))) { + env->vfp.vreg[dest].s8[j] = 0; + } else { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] % + env->vfp.vreg[src1].s8[j]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s16[j] == 0) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]; + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && + (env->vfp.vreg[src1].s16[j] == (int16_t)(-1))) { + env->vfp.vreg[dest].s16[j] = 0; + } else { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j] + % env->vfp.vreg[src1].s16[j]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s32[j] == 0) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]; + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && + (env->vfp.vreg[src1].s32[j] == (int32_t)(-1))) { + env->vfp.vreg[dest].s32[j] = 0; + } else { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j] + % env->vfp.vreg[src1].s32[j]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if (env->vfp.vreg[src1].s64[j] == 0) { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]; + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && + (env->vfp.vreg[src1].s64[j] == (int64_t)(-1))) { + env->vfp.vreg[dest].s64[j] = 0; + } else { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j] + % env->vfp.vreg[src1].s64[j]; + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vrem_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int8_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j]; + } else if ((env->vfp.vreg[src2].s8[j] == MIN_S8) && + ((int8_t)env->gpr[rs1] == (int8_t)(-1))) { + env->vfp.vreg[dest].s8[j] = 0; + } else { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] % + (int8_t)env->gpr[rs1]; + } + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int16_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j]; + } else if ((env->vfp.vreg[src2].s16[j] == MIN_S16) && + ((int16_t)env->gpr[rs1] == (int16_t)(-1))) { + env->vfp.vreg[dest].s16[j] = 0; + } else { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j] + % (int16_t)env->gpr[rs1]; + } + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int32_t)env->gpr[rs1] == 0) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j]; + } else if ((env->vfp.vreg[src2].s32[j] == MIN_S32) && + ((int32_t)env->gpr[rs1] == (int32_t)(-1))) { + env->vfp.vreg[dest].s32[j] = 0; + } else { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j] + % (int32_t)env->gpr[rs1]; + } + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + if ((int64_t)extend_gpr(env->gpr[rs1]) == 0) { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j]; + } else if ((env->vfp.vreg[src2].s64[j] == MIN_S64) && + ((int64_t)extend_gpr(env->gpr[rs1]) == (int64_t)(-1))) { + env->vfp.vreg[dest].s64[j] = 0; + } else { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j] + % (int64_t)extend_gpr(env->gpr[rs1]); + } + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + + env->vfp.vstart = 0; +} + +/* vaadd.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vaadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, + env->vfp.vreg[src1].s8[j], env->vfp.vreg[src2].s8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, + env->vfp.vreg[src1].s16[j], env->vfp.vreg[src2].s16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, + env->vfp.vreg[src1].s32[j], env->vfp.vreg[src2].s32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, + env->vfp.vreg[src1].s64[j], env->vfp.vreg[src2].s64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vaadd.vx vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vaadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, + env->gpr[rs1], env->vfp.vreg[src2].s8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, + env->gpr[rs1], env->vfp.vreg[src2].s16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, + env->gpr[rs1], env->vfp.vreg[src2].s32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, + env->gpr[rs1], env->vfp.vreg[src2].s64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vaadd.vi vd, vs2, imm, vm # vector-immediate */ +void VECTOR_HELPER(vaadd_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = avg_round_s8(env, + rs1, env->vfp.vreg[src2].s8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = avg_round_s16(env, + rs1, env->vfp.vreg[src2].s16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = avg_round_s32(env, + rs1, env->vfp.vreg[src2].s32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = avg_round_s64(env, + rs1, env->vfp.vreg[src2].s64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmulhu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = + ((uint16_t)env->vfp.vreg[src1].u8[j] + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = + ((uint32_t)env->vfp.vreg[src1].u16[j] + * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = + ((uint64_t)env->vfp.vreg[src1].u32[j] + * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = u64xu64_lh( + env->vfp.vreg[src1].u64[j], env->vfp.vreg[src2].u64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmulhu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = + ((uint16_t)(uint8_t)env->gpr[rs1] + * (uint16_t)env->vfp.vreg[src2].u8[j]) >> width; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = + ((uint32_t)(uint16_t)env->gpr[rs1] + * (uint32_t)env->vfp.vreg[src2].u16[j]) >> width; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = + ((uint64_t)(uint32_t)env->gpr[rs1] + * (uint64_t)env->vfp.vreg[src2].u32[j]) >> width; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = u64xu64_lh( + (uint64_t)extend_gpr(env->gpr[rs1]) + , env->vfp.vreg[src2].u64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + + env->vfp.vstart = 0; +} + +/* vfmul.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vfmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_mul( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_mul( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_mul( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfmul.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_mul( + env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_mul( + env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_mul( + env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vsll_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] + << (env->vfp.vreg[src1].u8[j] & 0x7); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + << (env->vfp.vreg[src1].u16[j] & 0xf); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + << (env->vfp.vreg[src1].u32[j] & 0x1f); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + << (env->vfp.vreg[src1].u64[j] & 0x3f); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsll_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] + << (env->gpr[rs1] & 0x7); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + << (env->gpr[rs1] & 0xf); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + << (env->gpr[rs1] & 0x1f); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + << ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsll_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] + << (rs1); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + << (rs1); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + << (rs1); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + << (rs1); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j] + * env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src1].s16[j] + * env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src1].s32[j] + * env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src1].s64[j] + * env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->gpr[rs1] + * env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->gpr[rs1] + * env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->gpr[rs1] + * env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = + (int64_t)extend_gpr(env->gpr[rs1]) + * env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vasub.vv vd, vs2, vs1, vm # Vector-vector */ +void VECTOR_HELPER(vasub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = avg_round_s8( + env, + ~env->vfp.vreg[src1].s8[j] + 1, + env->vfp.vreg[src2].s8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = avg_round_s16( + env, + ~env->vfp.vreg[src1].s16[j] + 1, + env->vfp.vreg[src2].s16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = avg_round_s32( + env, + ~env->vfp.vreg[src1].s32[j] + 1, + env->vfp.vreg[src2].s32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = avg_round_s64( + env, + ~env->vfp.vreg[src1].s64[j] + 1, + env->vfp.vreg[src2].s64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + + env->vfp.vstart = 0; +} + +/* vasub.vx vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vasub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = avg_round_s8( + env, ~env->gpr[rs1] + 1, env->vfp.vreg[src2].s8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = avg_round_s16( + env, ~env->gpr[rs1] + 1, env->vfp.vreg[src2].s16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = avg_round_s32( + env, ~env->gpr[rs1] + 1, env->vfp.vreg[src2].s32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = avg_round_s64( + env, ~env->gpr[rs1] + 1, env->vfp.vreg[src2].s64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmulhsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = + ((uint16_t)env->vfp.vreg[src1].u8[j] + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = + ((uint32_t)env->vfp.vreg[src1].u16[j] + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = + ((uint64_t)env->vfp.vreg[src1].u32[j] + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = s64xu64_lh( + env->vfp.vreg[src2].s64[j], env->vfp.vreg[src1].u64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmulhsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = + ((uint16_t)(uint8_t)env->gpr[rs1] + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = + ((uint32_t)(uint16_t)env->gpr[rs1] + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = + ((uint64_t)(uint32_t)env->gpr[rs1] + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = s64xu64_lh( + env->vfp.vreg[src2].s64[j], + (uint64_t)extend_gpr(env->gpr[rs1])); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vsmul.vv vd, vs2, vs1, vm # vd[i] = clip((vs2[i]*vs1[i]+round)>>(SEW-1)) */ +void VECTOR_HELPER(vsmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if ((!(vm)) && rd == 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = vsmul_8(env, + env->vfp.vreg[src1].s8[j], env->vfp.vreg[src2].s8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = vsmul_16(env, + env->vfp.vreg[src1].s16[j], env->vfp.vreg[src2].s16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = vsmul_32(env, + env->vfp.vreg[src1].s32[j], env->vfp.vreg[src2].s32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = vsmul_64(env, + env->vfp.vreg[src1].s64[j], env->vfp.vreg[src2].s64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vsmul.vx vd, vs2, rs1, vm # vd[i] = clip((vs2[i]*x[rs1]+round)>>(SEW-1)) */ +void VECTOR_HELPER(vsmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if ((!(vm)) && rd == 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = vsmul_8(env, + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = vsmul_16(env, + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = vsmul_32(env, + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = vsmul_64(env, + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vmulh_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = + ((int16_t)env->vfp.vreg[src1].s8[j] + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = + ((int32_t)env->vfp.vreg[src1].s16[j] + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = + ((int64_t)env->vfp.vreg[src1].s32[j] + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = s64xs64_lh( + env->vfp.vreg[src1].s64[j], env->vfp.vreg[src2].s64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmulh_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = + ((int16_t)(int8_t)env->gpr[rs1] + * (int16_t)env->vfp.vreg[src2].s8[j]) >> width; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = + ((int32_t)(int16_t)env->gpr[rs1] + * (int32_t)env->vfp.vreg[src2].s16[j]) >> width; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = + ((int64_t)(int32_t)env->gpr[rs1] + * (int64_t)env->vfp.vreg[src2].s32[j]) >> width; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = s64xs64_lh( + (int64_t)extend_gpr(env->gpr[rs1]) + , env->vfp.vreg[src2].s64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] - vs2[i] */ +void VECTOR_HELPER(vfrsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_sub( + env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_sub( + env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_sub( + env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] + >> (env->vfp.vreg[src1].u8[j] & 0x7); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + >> (env->vfp.vreg[src1].u16[j] & 0xf); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + >> (env->vfp.vreg[src1].u32[j] & 0x1f); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + >> (env->vfp.vreg[src1].u64[j] & 0x3f); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] + >> (env->gpr[rs1] & 0x7); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + >> (env->gpr[rs1] & 0xf); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + >> (env->gpr[rs1] & 0x1f); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u8[j] + >> (rs1); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u16[j] + >> (rs1); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u32[j] + >> (rs1); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = env->vfp.vreg[src2].u64[j] + >> (rs1); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i] */ +void VECTOR_HELPER(vfmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[dest].f16[j], + env->vfp.vreg[src2].f16[j], + 0, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[dest].f32[j], + env->vfp.vreg[src2].f32[j], + 0, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[dest].f64[j], + env->vfp.vreg[src2].f64[j], + 0, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i] */ +void VECTOR_HELPER(vfmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f16[j], + env->vfp.vreg[src2].f16[j], + 0, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f32[j], + env->vfp.vreg[src2].f32[j], + 0, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f64[j], + env->vfp.vreg[src2].f64[j], + 0, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] + >> (env->vfp.vreg[src1].s8[j] & 0x7); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j] + >> (env->vfp.vreg[src1].s16[j] & 0xf); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j] + >> (env->vfp.vreg[src1].s32[j] & 0x1f); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j] + >> (env->vfp.vreg[src1].s64[j] & 0x3f); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] + >> (env->gpr[rs1] & 0x7); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j] + >> (env->gpr[rs1] & 0xf); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j] + >> (env->gpr[rs1] & 0x1f); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j] + >> ((uint64_t)extend_gpr(env->gpr[rs1]) & 0x3f); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] + >> (rs1); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j] + >> (rs1); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j] + >> (rs1); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j] + >> (rs1); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src1].s8[j] + * env->vfp.vreg[dest].s8[j] + + env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src1].s16[j] + * env->vfp.vreg[dest].s16[j] + + env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src1].s32[j] + * env->vfp.vreg[dest].s32[j] + + env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src1].s64[j] + * env->vfp.vreg[dest].s64[j] + + env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->gpr[rs1] + * env->vfp.vreg[dest].s8[j] + + env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->gpr[rs1] + * env->vfp.vreg[dest].s16[j] + + env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->gpr[rs1] + * env->vfp.vreg[dest].s32[j] + + env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = + (int64_t)extend_gpr(env->gpr[rs1]) + * env->vfp.vreg[dest].s64[j] + + env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + + env->vfp.vstart = 0; +} + +/* vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) - vs2[i] */ +void VECTOR_HELPER(vfnmadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[dest].f16[j], + env->vfp.vreg[src2].f16[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[dest].f32[j], + env->vfp.vreg[src2].f32[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[dest].f64[j], + env->vfp.vreg[src2].f64[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) - vs2[i] */ +void VECTOR_HELPER(vfnmadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f16[j], + env->vfp.vreg[src2].f16[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f32[j], + env->vfp.vreg[src2].f32[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f64[j], + env->vfp.vreg[src2].f64[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vssrl.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i] */ +void VECTOR_HELPER(vssrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = vssrl_8(env, + env->vfp.vreg[src2].u8[j], env->vfp.vreg[src1].u8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = vssrl_16(env, + env->vfp.vreg[src2].u16[j], env->vfp.vreg[src1].u16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = vssrl_32(env, + env->vfp.vreg[src2].u32[j], env->vfp.vreg[src1].u32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = vssrl_64(env, + env->vfp.vreg[src2].u64[j], env->vfp.vreg[src1].u64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vssrl.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */ +void VECTOR_HELPER(vssrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = vssrl_8(env, + env->vfp.vreg[src2].u8[j], env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = vssrl_16(env, + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = vssrl_32(env, + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = vssrl_64(env, + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vssrl.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */ +void VECTOR_HELPER(vssrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = vssrli_8(env, + env->vfp.vreg[src2].u8[j], rs1); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = vssrli_16(env, + env->vfp.vreg[src2].u16[j], rs1); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = vssrli_32(env, + env->vfp.vreg[src2].u32[j], rs1); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = vssrli_64(env, + env->vfp.vreg[src2].u64[j], rs1); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i] */ +void VECTOR_HELPER(vfmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[dest].f16[j], + env->vfp.vreg[src2].f16[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[dest].f32[j], + env->vfp.vreg[src2].f32[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[dest].f64[j], + env->vfp.vreg[src2].f64[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i] */ +void VECTOR_HELPER(vfmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f16[j], + env->vfp.vreg[src2].f16[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f32[j], + env->vfp.vreg[src2].f32[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f64[j], + env->vfp.vreg[src2].f64[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vssra.vv vd, vs2, vs1, vm # vd[i] = ((vs2[i] + round)>>vs1[i]) */ +void VECTOR_HELPER(vssra_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = vssra_8(env, + env->vfp.vreg[src2].s8[j], env->vfp.vreg[src1].u8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = vssra_16(env, + env->vfp.vreg[src2].s16[j], env->vfp.vreg[src1].u16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = vssra_32(env, + env->vfp.vreg[src2].s32[j], env->vfp.vreg[src1].u32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = vssra_64(env, + env->vfp.vreg[src2].s64[j], env->vfp.vreg[src1].u64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vssra.vx vd, vs2, rs1, vm # vd[i] = ((vs2[i] + round)>>x[rs1]) */ +void VECTOR_HELPER(vssra_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = vssra_8(env, + env->vfp.vreg[src2].s8[j], env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = vssra_16(env, + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = vssra_32(env, + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = vssra_64(env, + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vssra.vi vd, vs2, imm, vm # vd[i] = ((vs2[i] + round)>>imm) */ +void VECTOR_HELPER(vssra_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = vssrai_8(env, + env->vfp.vreg[src2].s8[j], rs1); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = vssrai_16(env, + env->vfp.vreg[src2].s16[j], rs1); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = vssrai_32(env, + env->vfp.vreg[src2].s32[j], rs1); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = vssrai_64(env, + env->vfp.vreg[src2].s64[j], rs1); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] + - env->vfp.vreg[src1].s8[j] + * env->vfp.vreg[dest].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j] + - env->vfp.vreg[src1].s16[j] + * env->vfp.vreg[dest].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j] + - env->vfp.vreg[src1].s32[j] + * env->vfp.vreg[dest].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j] + - env->vfp.vreg[src1].s64[j] + * env->vfp.vreg[dest].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vnmsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s8[j] + - env->gpr[rs1] + * env->vfp.vreg[dest].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s16[j] + - env->gpr[rs1] + * env->vfp.vreg[dest].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s32[j] + - env->gpr[rs1] + * env->vfp.vreg[dest].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = env->vfp.vreg[src2].s64[j] + - (int64_t)extend_gpr(env->gpr[rs1]) + * env->vfp.vreg[dest].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + vs2[i] */ +void VECTOR_HELPER(vfnmsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[dest].f16[j], + env->vfp.vreg[src2].f16[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[dest].f32[j], + env->vfp.vreg[src2].f32[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[dest].f64[j], + env->vfp.vreg[src2].f64[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + + + env->vfp.vstart = 0; +} + +/* vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) + vs2[i] */ +void VECTOR_HELPER(vfnmsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f16[j], + env->vfp.vreg[src2].f16[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f32[j], + env->vfp.vreg[src2].f32[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->fpr[rs1], + env->vfp.vreg[dest].f64[j], + env->vfp.vreg[src2].f64[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vnsrl_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k] + >> (env->vfp.vreg[src1].u8[j] & 0xf); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u32[k] + >> (env->vfp.vreg[src1].u16[j] & 0x1f); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u64[k] + >> (env->vfp.vreg[src1].u32[j] & 0x3f); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_narrow(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vnsrl_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k] + >> (env->gpr[rs1] & 0xf); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u32[k] + >> (env->gpr[rs1] & 0x1f); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u64[k] + >> (env->gpr[rs1] & 0x3f); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_narrow(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vnsrl_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[j] = env->vfp.vreg[src2].u16[k] + >> (rs1); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = env->vfp.vreg[src2].u32[k] + >> (rs1); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = env->vfp.vreg[src2].u64[k] + >> (rs1); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_narrow(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */ +void VECTOR_HELPER(vfmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + env->vfp.vreg[dest].f16[j], + 0, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + env->vfp.vreg[dest].f32[j], + 0, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + env->vfp.vreg[dest].f64[j], + 0, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */ +void VECTOR_HELPER(vfmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + env->vfp.vreg[dest].f16[j], + 0, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + env->vfp.vreg[dest].f32[j], + 0, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + env->vfp.vreg[dest].f64[j], + 0, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vnsra_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k] + >> (env->vfp.vreg[src1].s8[j] & 0xf); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s32[k] + >> (env->vfp.vreg[src1].s16[j] & 0x1f); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s64[k] + >> (env->vfp.vreg[src1].s32[j] & 0x3f); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_narrow(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vnsra_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k] + >> (env->gpr[rs1] & 0xf); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s32[k] + >> (env->gpr[rs1] & 0x1f); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s64[k] + >> (env->gpr[rs1] & 0x3f); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_narrow(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vnsra_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] = env->vfp.vreg[src2].s16[k] + >> (rs1); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = env->vfp.vreg[src2].s32[k] + >> (rs1); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = env->vfp.vreg[src2].s64[k] + >> (rs1); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_narrow(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] += env->vfp.vreg[src1].s8[j] + * env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] += env->vfp.vreg[src1].s16[j] + * env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] += env->vfp.vreg[src1].s32[j] + * env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] += env->vfp.vreg[src1].s64[j] + * env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] += env->gpr[rs1] + * env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] += env->gpr[rs1] + * env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] += env->gpr[rs1] + * env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] += + (int64_t)extend_gpr(env->gpr[rs1]) + * env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */ +void VECTOR_HELPER(vfnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + env->vfp.vreg[dest].f16[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + env->vfp.vreg[dest].f32[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + env->vfp.vreg[dest].f64[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */ +void VECTOR_HELPER(vfnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + env->vfp.vreg[dest].f16[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + env->vfp.vreg[dest].f32[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + env->vfp.vreg[dest].f64[j], + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vnclipu.vv vd, vs2, vs1, vm # vector-vector */ +void VECTOR_HELPER(vnclipu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, k, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) + || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / (2 * width)); + k = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[k] = vnclipu_16(env, + env->vfp.vreg[src2].u16[j], env->vfp.vreg[src1].u8[k]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = vnclipu_32(env, + env->vfp.vreg[src2].u32[j], env->vfp.vreg[src1].u16[k]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = vnclipu_64(env, + env->vfp.vreg[src2].u64[j], env->vfp.vreg[src1].u32[k]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_narrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vnclipu.vx vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vnclipu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) + || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + j = i % (VLEN / (2 * width)); + k = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[k] = vnclipu_16(env, + env->vfp.vreg[src2].u16[j], env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = vnclipu_32(env, + env->vfp.vreg[src2].u32[j], env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = vnclipu_64(env, + env->vfp.vreg[src2].u64[j], env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_narrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + + +/* vnclipu.vi vd, vs2, imm, vm # vector-immediate */ +void VECTOR_HELPER(vnclipu_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) + || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + j = i % (VLEN / (2 * width)); + k = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u8[k] = vnclipui_16(env, + env->vfp.vreg[src2].u16[j], rs1); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = vnclipui_32(env, + env->vfp.vreg[src2].u32[j], rs1); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = vnclipui_64(env, + env->vfp.vreg[src2].u64[j], rs1); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_narrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */ +void VECTOR_HELPER(vfmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + env->vfp.vreg[dest].f16[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + env->vfp.vreg[dest].f32[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + env->vfp.vreg[dest].f64[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */ +void VECTOR_HELPER(vfmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + env->vfp.vreg[dest].f16[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + env->vfp.vreg[dest].f32[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + env->vfp.vreg[dest].f64[j], + float_muladd_negate_c, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + + env->vfp.vstart = 0; +} + +/* vnclip.vv vd, vs2, vs1, vm # vector-vector */ +void VECTOR_HELPER(vnclip_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, k, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) + || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / (2 * width)); + k = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[k] = vnclip_16(env, + env->vfp.vreg[src2].s16[j], env->vfp.vreg[src1].u8[k]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = vnclip_32(env, + env->vfp.vreg[src2].s32[j], env->vfp.vreg[src1].u16[k]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = vnclip_64(env, + env->vfp.vreg[src2].s64[j], env->vfp.vreg[src1].u32[k]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_narrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vnclip.vx vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vnclip_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, k, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) + || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + j = i % (VLEN / (2 * width)); + k = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[k] = vnclip_16(env, + env->vfp.vreg[src2].s16[j], env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = vnclip_32(env, + env->vfp.vreg[src2].s32[j], env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = vnclip_64(env, + env->vfp.vreg[src2].s64[j], env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_narrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vnclip.vi vd, vs2, imm, vm # vector-immediate */ +void VECTOR_HELPER(vnclip_vi)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, k, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul) + || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + j = i % (VLEN / (2 * width)); + k = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[k] = vnclipi_16(env, + env->vfp.vreg[src2].s16[j], rs1); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = vnclipi_32(env, + env->vfp.vreg[src2].s32[j], rs1); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = vnclipi_64(env, + env->vfp.vreg[src2].s64[j], rs1); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_narrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] -= env->vfp.vreg[src1].s8[j] + * env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] -= env->vfp.vreg[src1].s16[j] + * env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] -= env->vfp.vreg[src1].s32[j] + * env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] -= env->vfp.vreg[src1].s64[j] + * env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vnmsac_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, vl; + uint32_t lmul, width, src2, dest, vlmax; + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s8[j] -= env->gpr[rs1] + * env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] -= env->gpr[rs1] + * env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] -= env->gpr[rs1] + * env->vfp.vreg[src2].s32[j]; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] -= + (int64_t)extend_gpr(env->gpr[rs1]) + * env->vfp.vreg[src2].s64[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_common(env, dest, j, width); + } + } + env->vfp.vstart = 0; +} + +/* vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */ +void VECTOR_HELPER(vfnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->vfp.vreg[src1].f16[j], + env->vfp.vreg[src2].f16[j], + env->vfp.vreg[dest].f16[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->vfp.vreg[src1].f32[j], + env->vfp.vreg[src2].f32[j], + env->vfp.vreg[dest].f32[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->vfp.vreg[src1].f64[j], + env->vfp.vreg[src2].f64[j], + env->vfp.vreg[dest].f64[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */ +void VECTOR_HELPER(vfnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f16[j], + env->vfp.vreg[dest].f16[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f32[j], + env->vfp.vreg[dest].f32[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_muladd( + env->fpr[rs1], + env->vfp.vreg[src2].f64[j], + env->vfp.vreg[dest].f64[j], + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vwredsumu.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(zero-extend(SEW)) */ +void VECTOR_HELPER(vwredsumu_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + uint64_t sum = 0; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum += env->vfp.vreg[src2].u8[j]; + } + if (i == 0) { + sum += env->vfp.vreg[rs1].u16[0]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u16[0] = sum; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum += env->vfp.vreg[src2].u16[j]; + } + if (i == 0) { + sum += env->vfp.vreg[rs1].u32[0]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u32[0] = sum; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum += env->vfp.vreg[src2].u32[j]; + } + if (i == 0) { + sum += env->vfp.vreg[rs1].u64[0]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].u64[0] = sum; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwaddu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) + ) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = + (uint16_t)env->vfp.vreg[src1].u8[j] + + (uint16_t)env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = + (uint32_t)env->vfp.vreg[src1].u16[j] + + (uint32_t)env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = + (uint64_t)env->vfp.vreg[src1].u32[j] + + (uint64_t)env->vfp.vreg[src2].u32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwaddu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) + ) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = + (uint16_t)env->vfp.vreg[src2].u8[j] + + (uint16_t)((uint8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = + (uint32_t)env->vfp.vreg[src2].u16[j] + + (uint32_t)((uint16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = + (uint64_t)env->vfp.vreg[src2].u32[j] + + (uint64_t)((uint32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* vfwadd.vv vd, vs2, vs1, vm # vector-vector */ +void VECTOR_HELPER(vfwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_add( + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[src1].f16[j], true, + &env->fp_status), + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_add( + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[src1].f32[j], + &env->fp_status), + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwadd.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfwadd_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_add( + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->fpr[rs1], true, + &env->fp_status), + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_add( + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->fpr[rs1], &env->fp_status), + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vwredsum.vs vd, vs2, vs1, vm # 2*SEW = 2*SEW + sum(sign-extend(SEW)) */ +void VECTOR_HELPER(vwredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + int64_t sum = 0; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum += (int16_t)env->vfp.vreg[src2].s8[j] << 8 >> 8; + } + if (i == 0) { + sum += env->vfp.vreg[rs1].s16[0]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].s16[0] = sum; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum += (int32_t)env->vfp.vreg[src2].s16[j] << 16 >> 16; + } + if (i == 0) { + sum += env->vfp.vreg[rs1].s32[0]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].s32[0] = sum; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum += (int64_t)env->vfp.vreg[src2].s32[j] << 32 >> 32; + } + if (i == 0) { + sum += env->vfp.vreg[rs1].s64[0]; + } + if (i == vl - 1) { + env->vfp.vreg[rd].s64[0] = sum; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwadd_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)env->vfp.vreg[src1].s8[j] + + (int16_t)env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)env->vfp.vreg[src1].s16[j] + + (int32_t)env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)env->vfp.vreg[src1].s32[j] + + (int64_t)env->vfp.vreg[src2].s32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwadd_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) + + (int16_t)((int8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) + + (int32_t)((int16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) + + (int64_t)((int32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* vd, vs2, vs1, vm # Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ +void VECTOR_HELPER(vfwredsum_vs)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, src2; + float32 sum32 = 0.0f; + float64 sum64 = 0.0f; + + lmul = vector_get_lmul(env); + vector_lmul_check_reg(env, lmul, rs2, false); + + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vl = env->vfp.vl; + if (vl == 0) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < VLEN / 64; i++) { + env->vfp.vreg[rd].u64[i] = 0; + } + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + + if (i < vl) { + switch (width) { + case 16: + if (i == 0) { + sum32 = env->vfp.vreg[rs1].f32[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum32 = float32_add(sum32, + float16_to_float32(env->vfp.vreg[src2].f16[j], + true, &env->fp_status), + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f32[0] = sum32; + } + break; + case 32: + if (i == 0) { + sum64 = env->vfp.vreg[rs1].f64[0]; + } + if (vector_elem_mask(env, vm, width, lmul, i)) { + sum64 = float64_add(sum64, + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + &env->fp_status); + } + if (i == vl - 1) { + env->vfp.vreg[rd].f64[0] = sum64; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwsubu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) + ) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = + (uint16_t)env->vfp.vreg[src2].u8[j] - + (uint16_t)env->vfp.vreg[src1].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = + (uint32_t)env->vfp.vreg[src2].u16[j] - + (uint32_t)env->vfp.vreg[src1].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = + (uint64_t)env->vfp.vreg[src2].u32[j] - + (uint64_t)env->vfp.vreg[src1].u32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwsubu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) + ) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = + (uint16_t)env->vfp.vreg[src2].u8[j] - + (uint16_t)((uint8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = + (uint32_t)env->vfp.vreg[src2].u16[j] - + (uint32_t)((uint16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = + (uint64_t)env->vfp.vreg[src2].u32[j] - + (uint64_t)((uint32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* vfwsub.vv vd, vs2, vs1, vm # vector-vector */ +void VECTOR_HELPER(vfwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_sub( + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[src1].f16[j], true, + &env->fp_status), + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_sub( + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[src1].f32[j], + &env->fp_status), + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwsub.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfwsub_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_sub( + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->fpr[rs1], true, + &env->fp_status), + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_sub( + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->fpr[rs1], &env->fp_status), + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vwsub_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) + ) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)env->vfp.vreg[src2].s8[j] - + (int16_t)env->vfp.vreg[src1].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)env->vfp.vreg[src2].s16[j] - + (int32_t)env->vfp.vreg[src1].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)env->vfp.vreg[src2].s32[j] - + (int64_t)env->vfp.vreg[src1].s32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vwsub_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul) + ) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) - + (int16_t)((int8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) - + (int32_t)((int16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) - + (int64_t)((int32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* + * vfwredosum.vs vd, vs2, vs1, vm # + * Ordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) + */ +void VECTOR_HELPER(vfwredosum_vs)(CPURISCVState *env, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + helper_vector_vfwredsum_vs(env, vm, rs1, rs2, rd); + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwaddu_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = + (uint16_t)env->vfp.vreg[src1].u8[j] + + (uint16_t)env->vfp.vreg[src2].u16[k]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = + (uint32_t)env->vfp.vreg[src1].u16[j] + + (uint32_t)env->vfp.vreg[src2].u32[k]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = + (uint64_t)env->vfp.vreg[src1].u32[j] + + (uint64_t)env->vfp.vreg[src2].u64[k]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwaddu_wx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / (2 * width))); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = + (uint16_t)env->vfp.vreg[src2].u16[k] + + (uint16_t)((uint8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = + (uint32_t)env->vfp.vreg[src2].u32[k] + + (uint32_t)((uint16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = + (uint64_t)env->vfp.vreg[src2].u64[k] + + (uint64_t)((uint32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* vfwadd.wv vd, vs2, vs1, vm # vector-vector */ +void VECTOR_HELPER(vfwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / (2 * width))); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_add( + env->vfp.vreg[src2].f32[k], + float16_to_float32(env->vfp.vreg[src1].f16[j], true, + &env->fp_status), + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_add( + env->vfp.vreg[src2].f64[k], + float32_to_float64(env->vfp.vreg[src1].f32[j], + &env->fp_status), + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwadd.wf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfwadd_wf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, k, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / (2 * width))); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_add( + env->vfp.vreg[src2].f32[k], + float16_to_float32(env->fpr[rs1], true, + &env->fp_status), + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_add( + env->vfp.vreg[src2].f64[k], + float32_to_float64(env->fpr[rs1], &env->fp_status), + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwadd_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]) + + (int16_t)env->vfp.vreg[src2].s16[k]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]) + + (int32_t)env->vfp.vreg[src2].s32[k]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]) + + (int64_t)env->vfp.vreg[src2].s64[k]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwadd_wx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / (2 * width))); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)env->vfp.vreg[src2].s16[k] + + (int16_t)((int8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)env->vfp.vreg[src2].s32[k] + + (int32_t)((int16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)env->vfp.vreg[src2].s64[k] + + (int64_t)((int32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwsubu_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = + (uint16_t)env->vfp.vreg[src2].u16[k] - + (uint16_t)env->vfp.vreg[src1].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = + (uint32_t)env->vfp.vreg[src2].u32[k] - + (uint32_t)env->vfp.vreg[src1].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = + (uint64_t)env->vfp.vreg[src2].u64[k] - + (uint64_t)env->vfp.vreg[src1].u32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwsubu_wx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / (2 * width))); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = + (uint16_t)env->vfp.vreg[src2].u16[k] - + (uint16_t)((uint8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = + (uint32_t)env->vfp.vreg[src2].u32[k] - + (uint32_t)((uint16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = + (uint64_t)env->vfp.vreg[src2].u64[k] - + (uint64_t)((uint32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* vfwsub.wv vd, vs2, vs1, vm # vector-vector */ +void VECTOR_HELPER(vfwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / (2 * width))); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_sub( + env->vfp.vreg[src2].f32[k], + float16_to_float32(env->vfp.vreg[src1].f16[j], true, + &env->fp_status), + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_sub( + env->vfp.vreg[src2].f64[k], + float32_to_float64(env->vfp.vreg[src1].f32[j], + &env->fp_status), + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwsub.wf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfwsub_wf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, k, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / (2 * width))); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_sub( + env->vfp.vreg[src2].f32[k], + float16_to_float32(env->fpr[rs1], true, + &env->fp_status), + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_sub( + env->vfp.vreg[src2].f64[k], + float32_to_float64(env->fpr[rs1], &env->fp_status), + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwsub_wv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)env->vfp.vreg[src2].s16[k] - + (int16_t)((int8_t)env->vfp.vreg[src1].s8[j]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)env->vfp.vreg[src2].s32[k] - + (int32_t)((int16_t)env->vfp.vreg[src1].s16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)env->vfp.vreg[src2].s64[k] - + (int64_t)((int32_t)env->vfp.vreg[src1].s32[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwsub_wx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / (2 * width))); + dest = rd + (i / (VLEN / (2 * width))); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)env->vfp.vreg[src2].s16[k] - + (int16_t)((int8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)env->vfp.vreg[src2].s32[k] - + (int32_t)((int16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)env->vfp.vreg[src2].s64[k] - + (int64_t)((int32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwmulu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = + (uint16_t)env->vfp.vreg[src1].u8[j] * + (uint16_t)env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = + (uint32_t)env->vfp.vreg[src1].u16[j] * + (uint32_t)env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = + (uint64_t)env->vfp.vreg[src1].u32[j] * + (uint64_t)env->vfp.vreg[src2].u32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vwmulu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = + (uint16_t)env->vfp.vreg[src2].u8[j] * + (uint16_t)((uint8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = + (uint32_t)env->vfp.vreg[src2].u16[j] * + (uint32_t)((uint16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = + (uint64_t)env->vfp.vreg[src2].u32[j] * + (uint64_t)((uint32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* vfwmul.vv vd, vs2, vs1, vm # vector-vector */ +void VECTOR_HELPER(vfwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_mul( + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[src1].f16[j], true, + &env->fp_status), + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_mul( + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[src1].f32[j], + &env->fp_status), + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + + env->vfp.vstart = 0; +} + +/* vfwmul.vf vd, vs2, rs1, vm # vector-scalar */ +void VECTOR_HELPER(vfwmul_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_mul( + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->fpr[rs1], true, + &env->fp_status), + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_mul( + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->fpr[rs1], &env->fp_status), + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwmulsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)env->vfp.vreg[src2].s8[j] * + (uint16_t)env->vfp.vreg[src1].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)env->vfp.vreg[src2].s16[j] * + (uint32_t)env->vfp.vreg[src1].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)env->vfp.vreg[src2].s32[j] * + (uint64_t)env->vfp.vreg[src1].u32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vwmulsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * + (uint16_t)((uint8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * + (uint32_t)((uint16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * + (uint64_t)((uint32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vwmul_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)env->vfp.vreg[src1].s8[j] * + (int16_t)env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)env->vfp.vreg[src1].s16[j] * + (int32_t)env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)env->vfp.vreg[src1].s32[j] * + (int64_t)env->vfp.vreg[src2].s32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vwmul_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * + (int16_t)((int8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * + (int32_t)((int16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * + (int64_t)((int32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* + * vwsmaccu.vv vd, vs1, vs2, vm # + * vd[i] = clipu((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i]) + */ +void VECTOR_HELPER(vwsmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env, + env->vfp.vreg[src2].u8[j], + env->vfp.vreg[src1].u8[j], + env->vfp.vreg[dest].u16[k]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env, + env->vfp.vreg[src2].u16[j], + env->vfp.vreg[src1].u16[j], + env->vfp.vreg[dest].u32[k]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env, + env->vfp.vreg[src2].u32[j], + env->vfp.vreg[src1].u32[j], + env->vfp.vreg[dest].u64[k]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* + * vwsmaccu.vx vd, rs1, vs2, vm # + * vd[i] = clipu((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i]) + */ +void VECTOR_HELPER(vwsmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = vwsmaccu_8(env, + env->vfp.vreg[src2].u8[j], + env->gpr[rs1], + env->vfp.vreg[dest].u16[k]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = vwsmaccu_16(env, + env->vfp.vreg[src2].u16[j], + env->gpr[rs1], + env->vfp.vreg[dest].u32[k]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = vwsmaccu_32(env, + env->vfp.vreg[src2].u32[j], + env->gpr[rs1], + env->vfp.vreg[dest].u64[k]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwmaccu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] += + (uint16_t)env->vfp.vreg[src1].u8[j] * + (uint16_t)env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] += + (uint32_t)env->vfp.vreg[src1].u16[j] * + (uint32_t)env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] += + (uint64_t)env->vfp.vreg[src1].u32[j] * + (uint64_t)env->vfp.vreg[src2].u32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vwmaccu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] += + (uint16_t)env->vfp.vreg[src2].u8[j] * + (uint16_t)((uint8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] += + (uint32_t)env->vfp.vreg[src2].u16[j] * + (uint32_t)((uint16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] += + (uint64_t)env->vfp.vreg[src2].u32[j] * + (uint64_t)((uint32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* vfwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] */ +void VECTOR_HELPER(vfwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_muladd( + float16_to_float32(env->vfp.vreg[src1].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[dest].f16[j], true, + &env->fp_status), + 0, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_muladd( + float32_to_float64(env->vfp.vreg[src1].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[dest].f32[j], + &env->fp_status), + 0, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f32[k] = 0; + case 32: + env->vfp.vreg[dest].f64[k] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i] */ +void VECTOR_HELPER(vfwmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_muladd( + env->fpr[rs1], + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[dest].f16[j], true, + &env->fp_status), + 0, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_muladd( + env->fpr[rs1], + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[dest].f32[j], + &env->fp_status), + 0, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f32[k] = 0; + case 32: + env->vfp.vreg[dest].f64[k] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* + * vwsmacc.vv vd, vs1, vs2, vm # + * vd[i] = clip((+(vs1[i]*vs2[i]+round)>>SEW/2)+vd[i]) + */ +void VECTOR_HELPER(vwsmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env, + env->vfp.vreg[src2].s8[j], + env->vfp.vreg[src1].s8[j], + env->vfp.vreg[dest].s16[k]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env, + env->vfp.vreg[src2].s16[j], + env->vfp.vreg[src1].s16[j], + env->vfp.vreg[dest].s32[k]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env, + env->vfp.vreg[src2].s32[j], + env->vfp.vreg[src1].s32[j], + env->vfp.vreg[dest].s64[k]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* + * vwsmacc.vx vd, rs1, vs2, vm # + * vd[i] = clip((+(x[rs1]*vs2[i]+round)>>SEW/2)+vd[i]) + */ +void VECTOR_HELPER(vwsmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = vwsmacc_8(env, + env->vfp.vreg[src2].s8[j], + env->gpr[rs1], + env->vfp.vreg[dest].s16[k]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = vwsmacc_16(env, + env->vfp.vreg[src2].s16[j], + env->gpr[rs1], + env->vfp.vreg[dest].s32[k]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = vwsmacc_32(env, + env->vfp.vreg[src2].s32[j], + env->gpr[rs1], + env->vfp.vreg[dest].s64[k]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* + * vwsmaccsu.vv vd, vs1, vs2, vm + * # vd[i] = clip(-((signed(vs1[i])*unsigned(vs2[i])+round)>>SEW/2)+vd[i]) + */ +void VECTOR_HELPER(vwsmaccsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env, + env->vfp.vreg[src2].u8[j], + env->vfp.vreg[src1].s8[j], + env->vfp.vreg[dest].s16[k]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env, + env->vfp.vreg[src2].u16[j], + env->vfp.vreg[src1].s16[j], + env->vfp.vreg[dest].s32[k]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env, + env->vfp.vreg[src2].u32[j], + env->vfp.vreg[src1].s32[j], + env->vfp.vreg[dest].s64[k]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* + * vwsmaccsu.vx vd, rs1, vs2, vm + * # vd[i] = clip(-((signed(x[rs1])*unsigned(vs2[i])+round)>>SEW/2)+vd[i]) + */ +void VECTOR_HELPER(vwsmaccsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = vwsmaccsu_8(env, + env->vfp.vreg[src2].u8[j], + env->gpr[rs1], + env->vfp.vreg[dest].s16[k]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = vwsmaccsu_16(env, + env->vfp.vreg[src2].u16[j], + env->gpr[rs1], + env->vfp.vreg[dest].s32[k]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = vwsmaccsu_32(env, + env->vfp.vreg[src2].u32[j], + env->gpr[rs1], + env->vfp.vreg[dest].s64[k]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* + * vwsmaccus.vx vd, rs1, vs2, vm + * # vd[i] = clip(-((unsigned(x[rs1])*signed(vs2[i])+round)>>SEW/2)+vd[i]) + */ +void VECTOR_HELPER(vwsmaccus_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + vl = env->vfp.vl; + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = vwsmaccus_8(env, + env->vfp.vreg[src2].s8[j], + env->gpr[rs1], + env->vfp.vreg[dest].s16[k]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = vwsmaccus_16(env, + env->vfp.vreg[src2].s16[j], + env->gpr[rs1], + env->vfp.vreg[dest].s32[k]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = vwsmaccus_32(env, + env->vfp.vreg[src2].s32[j], + env->gpr[rs1], + env->vfp.vreg[dest].s64[k]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + + +void VECTOR_HELPER(vwmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] += + (int16_t)env->vfp.vreg[src1].s8[j] + * (int16_t)env->vfp.vreg[src2].s8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] += + (int32_t)env->vfp.vreg[src1].s16[j] * + (int32_t)env->vfp.vreg[src2].s16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] += + (int64_t)env->vfp.vreg[src1].s32[j] * + (int64_t)env->vfp.vreg[src2].s32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vwmacc_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] += + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * + (int16_t)((int8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] += + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * + (int32_t)((int16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] += + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * + (int64_t)((int32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* vfwnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i] */ +void VECTOR_HELPER(vfwnmacc_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_muladd( + float16_to_float32(env->vfp.vreg[src1].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[dest].f16[j], true, + &env->fp_status), + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_muladd( + float32_to_float64(env->vfp.vreg[src1].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[dest].f32[j], + &env->fp_status), + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f32[k] = 0; + case 32: + env->vfp.vreg[dest].f64[k] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i] */ +void VECTOR_HELPER(vfwnmacc_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_muladd( + env->fpr[rs1], + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[dest].f16[j], true, + &env->fp_status), + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_muladd( + env->fpr[rs1], + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[dest].f32[j], + &env->fp_status), + float_muladd_negate_c | + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f32[k] = 0; + case 32: + env->vfp.vreg[dest].f64[k] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwmaccsu_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src1, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src1 = rs1 + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] += + (int16_t)env->vfp.vreg[src1].s8[j] + * (uint16_t)env->vfp.vreg[src2].u8[j]; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] += + (int32_t)env->vfp.vreg[src1].s16[j] * + (uint32_t)env->vfp.vreg[src2].u16[j]; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] += + (int64_t)env->vfp.vreg[src1].s32[j] * + (uint64_t)env->vfp.vreg[src2].u32[j]; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vwmaccsu_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] += + (uint16_t)((uint8_t)env->vfp.vreg[src2].u8[j]) * + (int16_t)((int8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] += + (uint32_t)((uint16_t)env->vfp.vreg[src2].u16[j]) * + (int32_t)((int16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] += + (uint64_t)((uint32_t)env->vfp.vreg[src2].u32[j]) * + (int64_t)((int32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* vfwmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i] */ +void VECTOR_HELPER(vfwmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_muladd( + float16_to_float32(env->vfp.vreg[src1].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[dest].f16[j], true, + &env->fp_status), + float_muladd_negate_c, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_muladd( + float32_to_float64(env->vfp.vreg[src1].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[dest].f32[j], + &env->fp_status), + float_muladd_negate_c, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f32[k] = 0; + case 32: + env->vfp.vreg[dest].f64[k] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i] */ +void VECTOR_HELPER(vfwmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_muladd( + env->fpr[rs1], + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[dest].f16[j], true, + &env->fp_status), + float_muladd_negate_c, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_muladd( + env->fpr[rs1], + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[dest].f32[j], + &env->fp_status), + float_muladd_negate_c, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f32[k] = 0; + case 32: + env->vfp.vreg[dest].f64[k] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vwmaccus_vx)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl; + uint32_t lmul, width, src2, dest, vlmax; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + for (i = 0; i < vlmax; i++) { + src2 = rs2 + (i / (VLEN / width)); + dest = rd + (i / (VLEN / (2 * width))); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] += + (int16_t)((int8_t)env->vfp.vreg[src2].s8[j]) * + (uint16_t)((uint8_t)env->gpr[rs1]); + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] += + (int32_t)((int16_t)env->vfp.vreg[src2].s16[j]) * + (uint32_t)((uint16_t)env->gpr[rs1]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] += + (int64_t)((int32_t)env->vfp.vreg[src2].s32[j]) * + (uint64_t)((uint32_t)env->gpr[rs1]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_widen(env, dest, k, width); + } + } + env->vfp.vstart = 0; +} + +/* vfwnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i] */ +void VECTOR_HELPER(vfwnmsac_vv)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src1, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs1, lmul) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs1, false); + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + src1 = rs1 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_muladd( + float16_to_float32(env->vfp.vreg[src1].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[dest].f16[j], true, + &env->fp_status), + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_muladd( + float32_to_float64(env->vfp.vreg[src1].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[dest].f32[j], + &env->fp_status), + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f32[k] = 0; + case 32: + env->vfp.vreg[dest].f64[k] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + + env->vfp.vstart = 0; +} + +/* vfwnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i] */ +void VECTOR_HELPER(vfwnmsac_vf)(CPURISCVState *env, uint32_t vm, uint32_t rs1, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float32_muladd( + env->fpr[rs1], + float16_to_float32(env->vfp.vreg[src2].f16[j], true, + &env->fp_status), + float16_to_float32(env->vfp.vreg[dest].f16[j], true, + &env->fp_status), + float_muladd_negate_product, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float64_muladd( + env->fpr[rs1], + float32_to_float64(env->vfp.vreg[src2].f32[j], + &env->fp_status), + float32_to_float64(env->vfp.vreg[dest].f32[j], + &env->fp_status), + float_muladd_negate_product, + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f32[k] = 0; + case 32: + env->vfp.vreg[dest].f64[k] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + + +/* vfsqrt.v vd, vs2, vm # Vector-vector square root */ +void VECTOR_HELPER(vfsqrt_v)(CPURISCVState *env, uint32_t vm, uint32_t rs2, + uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = float16_sqrt( + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = float32_sqrt( + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = float64_sqrt( + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + switch (width) { + case 16: + env->vfp.vreg[dest].f16[j] = 0; + case 32: + env->vfp.vreg[dest].f32[j] = 0; + case 64: + env->vfp.vreg[dest].f64[j] = 0; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + return; + env->vfp.vstart = 0; +} + +/* vfclass.v vd, vs2, vm # Vector-vector */ +void VECTOR_HELPER(vfclass_v)(CPURISCVState *env, uint32_t vm, uint32_t rs2, + uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = helper_fclass_h( + env->vfp.vreg[src2].f16[j]); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = helper_fclass_s( + env->vfp.vreg[src2].f32[j]); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = helper_fclass_d( + env->vfp.vreg[src2].f64[j]); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ +void VECTOR_HELPER(vfcvt_xu_f_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + if (vector_vtype_ill(env)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[j] = float16_to_uint16( + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[j] = float32_to_uint32( + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[j] = float64_to_uint64( + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ +void VECTOR_HELPER(vfcvt_x_f_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[j] = float16_to_int16( + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[j] = float32_to_int32( + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[j] = float64_to_int64( + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ +void VECTOR_HELPER(vfcvt_f_xu_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = uint16_to_float16( + env->vfp.vreg[src2].u16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = uint32_to_float32( + env->vfp.vreg[src2].u32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = uint64_to_float64( + env->vfp.vreg[src2].u64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ +void VECTOR_HELPER(vfcvt_f_x_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, false); + + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[j] = int16_to_float16( + env->vfp.vreg[src2].s16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[j] = int32_to_float32( + env->vfp.vreg[src2].s32[j], + &env->fp_status); + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[j] = int64_to_float64( + env->vfp.vreg[src2].s64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fcommon(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ +void VECTOR_HELPER(vfwcvt_xu_f_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + + if (lmul > 4) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = float16_to_uint32( + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u64[k] = float32_to_uint64( + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + } + } else { + vector_tail_fwiden(env, dest, j, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ +void VECTOR_HELPER(vfwcvt_x_f_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (lmul > 4) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = float16_to_int32( + env->vfp.vreg[src2].f16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s64[k] = float32_to_int64( + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ +void VECTOR_HELPER(vfwcvt_f_xu_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (lmul > 4) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = uint16_to_float32( + env->vfp.vreg[src2].u16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = uint32_to_float64( + env->vfp.vreg[src2].u32[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ +void VECTOR_HELPER(vfwcvt_f_x_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (lmul > 4) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = int16_to_float32( + env->vfp.vreg[src2].s16[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = int32_to_float64( + env->vfp.vreg[src2].s32[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* + * vfwcvt.f.f.v vd, vs2, vm # + * Convert single-width float to double-width float. + */ +void VECTOR_HELPER(vfwcvt_f_f_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) + || vector_overlap_vm_force(vm, rd) + || vector_overlap_dstgp_srcgp(rd, 2 * lmul, rs2, lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, false); + vector_lmul_check_reg(env, lmul, rd, true); + + if (lmul > 4) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / (2 * width))); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float16_to_float32( + env->vfp.vreg[src2].f16[j], + true, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f64[k] = float32_to_float64( + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fwiden(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ +void VECTOR_HELPER(vfncvt_xu_f_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + if (lmul > 4) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + k = i % (VLEN / width); + j = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u16[k] = float32_to_uint16( + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].u32[k] = float64_to_uint32( + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fnarrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ +void VECTOR_HELPER(vfncvt_x_f_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + if (lmul > 4) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + k = i % (VLEN / width); + j = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s16[k] = float32_to_int16( + env->vfp.vreg[src2].f32[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].s32[k] = float64_to_int32( + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fnarrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ +void VECTOR_HELPER(vfncvt_f_xu_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + if (lmul > 4) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + k = i % (VLEN / width); + j = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[k] = uint32_to_float16( + env->vfp.vreg[src2].u32[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = uint64_to_float32( + env->vfp.vreg[src2].u64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fnarrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ +void VECTOR_HELPER(vfncvt_f_x_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + if (lmul > 4) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + k = i % (VLEN / width); + j = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[k] = int32_to_float16( + env->vfp.vreg[src2].s32[j], + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = int64_to_float32( + env->vfp.vreg[src2].s64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fnarrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ +void VECTOR_HELPER(vfncvt_f_f_v)(CPURISCVState *env, uint32_t vm, + uint32_t rs2, uint32_t rd) +{ + int width, lmul, vl, vlmax; + int i, j, k, dest, src2; + + lmul = vector_get_lmul(env); + vl = env->vfp.vl; + if (vector_vtype_ill(env) || + vector_overlap_vm_common(lmul, vm, rd) || + vector_overlap_dstgp_srcgp(rd, lmul, rs2, 2 * lmul)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rs2, true); + vector_lmul_check_reg(env, lmul, rd, false); + + if (lmul > 4) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (env->vfp.vstart >= vl) { + return; + } + + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / (2 * width))); + k = i % (VLEN / width); + j = i % (VLEN / (2 * width)); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f16[k] = float32_to_float16( + env->vfp.vreg[src2].f32[j], + true, + &env->fp_status); + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + env->vfp.vreg[dest].f32[k] = float64_to_float32( + env->vfp.vreg[src2].f64[j], + &env->fp_status); + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_fnarrow(env, dest, k, width); + } + } + return; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].u8[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].s8[j] = + cpu_ldsb_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].s16[j] = sign_extend( + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend( + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlsbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k; + env->vfp.vreg[dest + k * lmul].u8[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k; + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k; + env->vfp.vreg[dest + k * lmul].s8[j] = + cpu_ldsb_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k; + env->vfp.vreg[dest + k * lmul].s16[j] = sign_extend( + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k; + env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend( + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k; + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlxbu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + env->vfp.vreg[dest + k * lmul].u8[j] = + cpu_ldub_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_ldub_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldub_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldub_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + env->vfp.vreg[dest + k * lmul].s8[j] = + cpu_ldsb_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + env->vfp.vreg[dest + k * lmul].s16[j] = sign_extend( + cpu_ldsb_data(env, addr), 8); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend( + cpu_ldsb_data(env, addr), 8); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldsb_data(env, addr), 8); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlbuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + env->foflag = true; + env->vfp.vl = 0; + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].u8[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->foflag = false; + env->vfp.vl = vl; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlbff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + env->foflag = true; + env->vfp.vl = 0; + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].s8[j] = + cpu_ldsb_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].s16[j] = sign_extend( + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend( + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldsb_data(env, env->gpr[rs1] + read), 8); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->foflag = false; + env->vfp.vl = vl; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].s16[j] = + cpu_ldsw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend( + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlshu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 2; + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 2; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 2; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 2; + env->vfp.vreg[dest + k * lmul].s16[j] = + cpu_ldsw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 2; + env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend( + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 2; + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlxhu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_lduw_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_lduw_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_lduw_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + env->vfp.vreg[dest + k * lmul].s16[j] = + cpu_ldsw_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend( + cpu_ldsw_data(env, addr), 16); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldsw_data(env, addr), 16); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlhuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + env->foflag = true; + env->vfp.vl = 0; + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->foflag = false; + env->vfp.vl = vl; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlhff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + env->foflag = true; + env->vfp.vl = 0; + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].s16[j] = + cpu_ldsw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].s32[j] = sign_extend( + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldsw_data(env, env->gpr[rs1] + read), 16); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->foflag = false; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 4; + env->vfp.vreg[dest + k * lmul].s32[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 4; + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldl_data(env, env->gpr[rs1] + read), 32); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 4; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 4; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlswu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 4; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 4; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 4; + env->vfp.vreg[dest + k * lmul].s32[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 4; + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldl_data(env, env->gpr[rs1] + read), 32); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlxwu_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 4, width, k); + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldl_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 4, width, k); + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldl_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 4, width, k); + env->vfp.vreg[dest + k * lmul].s32[j] = + cpu_ldl_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 4, width, k); + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldl_data(env, addr), 32); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlwuff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + env->foflag = true; + env->vfp.vl = 0; + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 4; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 4; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->foflag = false; + env->vfp.vl = vl; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlwff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + vector_lmul_check_reg(env, lmul, rd, false); + env->foflag = true; + env->vfp.vl = 0; + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 4; + env->vfp.vreg[dest + k * lmul].s32[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 4; + env->vfp.vreg[dest + k * lmul].s64[j] = sign_extend( + cpu_ldl_data(env, env->gpr[rs1] + read), 32); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->foflag = false; + env->vfp.vl = vl; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vle_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].u8[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 4; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 8; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldq_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k; + env->vfp.vreg[dest + k * lmul].u8[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 2; + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 4; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * env->gpr[rs2] + k * 8; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldq_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vlxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + env->vfp.vreg[dest + k * lmul].u8[j] = + cpu_ldub_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_lduw_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 4, width, k); + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldl_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 8, width, k); + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldq_data(env, addr); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vleff_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, read; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + env->vfp.vl = 0; + env->foflag = true; + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = i * (nf + 1) + k; + env->vfp.vreg[dest + k * lmul].u8[j] = + cpu_ldub_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 2; + env->vfp.vreg[dest + k * lmul].u16[j] = + cpu_lduw_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 4; + env->vfp.vreg[dest + k * lmul].u32[j] = + cpu_ldl_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + read = (i * (nf + 1) + k) * 8; + env->vfp.vreg[dest + k * lmul].u64[j] = + cpu_ldq_data(env, env->gpr[rs1] + read); + k--; + } + env->vfp.vstart++; + } + env->vfp.vl++; + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } else { + vector_tail_segment(env, dest, j, width, k, lmul); + } + } + env->foflag = false; + env->vfp.vl = vl; + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, wrote; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * (nf + 1) + k; + cpu_stb_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s8[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * (nf + 1) + k; + cpu_stb_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s16[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * (nf + 1) + k; + cpu_stb_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * (nf + 1) + k; + cpu_stb_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vssb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, wrote; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k; + cpu_stb_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s8[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k; + cpu_stb_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s16[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k; + cpu_stb_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k; + cpu_stb_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + cpu_stb_data(env, addr, + env->vfp.vreg[dest + k * lmul].s8[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + cpu_stb_data(env, addr, + env->vfp.vreg[dest + k * lmul].s16[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + cpu_stb_data(env, addr, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + cpu_stb_data(env, addr, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsuxb_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + return VECTOR_HELPER(vsxb_v)(env, nf, vm, rs1, rs2, rd); + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, wrote; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = (i * (nf + 1) + k) * 2; + cpu_stw_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s16[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = (i * (nf + 1) + k) * 2; + cpu_stw_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = (i * (nf + 1) + k) * 2; + cpu_stw_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vssh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, wrote; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k * 2; + cpu_stw_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s16[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k * 2; + cpu_stw_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k * 2; + cpu_stw_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + cpu_stw_data(env, addr, + env->vfp.vreg[dest + k * lmul].s16[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + cpu_stw_data(env, addr, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + cpu_stw_data(env, addr, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsuxh_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + return VECTOR_HELPER(vsxh_v)(env, nf, vm, rs1, rs2, rd); + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, wrote; + + vl = env->vfp.vl; + + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = (i * (nf + 1) + k) * 4; + cpu_stl_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = (i * (nf + 1) + k) * 4; + cpu_stl_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vssw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, wrote; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k * 4; + cpu_stl_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k * 4; + cpu_stl_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 4, width, k); + cpu_stl_data(env, addr, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 4, width, k); + cpu_stl_data(env, addr, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsuxw_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + return VECTOR_HELPER(vsxw_v)(env, nf, vm, rs1, rs2, rd); + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, wrote; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * (nf + 1) + k; + cpu_stb_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s8[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = (i * (nf + 1) + k) * 2; + cpu_stw_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s16[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = (i * (nf + 1) + k) * 4; + cpu_stl_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = (i * (nf + 1) + k) * 8; + cpu_stq_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsse_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, wrote; + + vl = env->vfp.vl; + + + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k; + cpu_stb_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s8[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k * 2; + cpu_stw_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s16[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k * 4; + cpu_stl_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + wrote = i * env->gpr[rs2] + k * 8; + cpu_stq_data(env, env->gpr[rs1] + wrote, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + int i, j, k, vl, vlmax, lmul, width, dest, src2; + target_ulong addr; + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + + if (vector_vtype_ill(env) || vector_overlap_vm_common(lmul, vm, rd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + if (lmul * (nf + 1) > 32) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, rd, false); + + for (i = 0; i < vlmax; i++) { + dest = rd + (i / (VLEN / width)); + src2 = rs2 + (i / (VLEN / width)); + j = i % (VLEN / width); + k = nf; + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 8: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 1, width, k); + cpu_stb_data(env, addr, + env->vfp.vreg[dest + k * lmul].s8[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 16: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 2, width, k); + cpu_stw_data(env, addr, + env->vfp.vreg[dest + k * lmul].s16[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 4, width, k); + cpu_stl_data(env, addr, + env->vfp.vreg[dest + k * lmul].s32[j]); + k--; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + while (k >= 0) { + addr = vector_get_index(env, rs1, src2, j, 8, width, k); + cpu_stq_data(env, addr, + env->vfp.vreg[dest + k * lmul].s64[j]); + k--; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vsuxe_v)(CPURISCVState *env, uint32_t nf, uint32_t vm, + uint32_t rs1, uint32_t rs2, uint32_t rd) +{ + return VECTOR_HELPER(vsxe_v)(env, nf, vm, rs1, rs2, rd); + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoswapw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_xchgl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_xchgl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_xchgl_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + + +void VECTOR_HELPER(vamoswapd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_xchgq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_xchgq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoaddw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_addl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_addl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_addl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_addl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vamoaddd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_addq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_addq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoxorw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_xorl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_xorl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_xorl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + + +void VECTOR_HELPER(vamoxord_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_xorq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_xorq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoandw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_andl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_andl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_andl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_andl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoandd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_andq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_andq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoorw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_orl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_orl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_orl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_orl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamoord_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_orq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_orq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamominw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_sminl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_sminl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_sminl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + + +void VECTOR_HELPER(vamomind_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_sminq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_sminq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamomaxw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_smaxl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_smaxl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env, + addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_smaxl_le(env, + addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + + +void VECTOR_HELPER(vamomaxd_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + int64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_smaxq_le(env, addr, + env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_smaxq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamominuw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_uminl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_uminl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le( + env, addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_uminl_le( + env, addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + + +void VECTOR_HELPER(vamominud_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_uminl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_uminl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_uminq_le( + env, addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_uminq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} + +void VECTOR_HELPER(vamomaxuw_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TESL; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 32 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 32: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint32_t tmp; + idx = (target_long)env->vfp.vreg[src2].s32[j]; + addr = idx + env->gpr[rs1]; +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_umaxl_le(env, addr, + env->vfp.vreg[src3].s32[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_umaxl_le(env, addr, + env->vfp.vreg[src3].s32[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s32[j] = tmp; + } + env->vfp.vstart++; + } + break; + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le( + env, addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = (int64_t)(int32_t)helper_atomic_fetch_umaxl_le( + env, addr, env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} +void VECTOR_HELPER(vamomaxud_v)(CPURISCVState *env, uint32_t wd, uint32_t vm, + uint32_t rs1, uint32_t vs2, uint32_t vs3) +{ + int i, j, vl; + target_long idx; + uint32_t lmul, width, src2, src3, vlmax; + target_ulong addr; +#ifdef CONFIG_SOFTMMU + int mem_idx = cpu_mmu_index(env, false); + TCGMemOp memop = MO_ALIGN | MO_TEQ; +#endif + + vl = env->vfp.vl; + lmul = vector_get_lmul(env); + width = vector_get_width(env); + vlmax = vector_get_vlmax(env); + /* MEM <= SEW <= XLEN */ + if (width < 64 || (width > sizeof(target_ulong) * 8)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + /* if wd, rd is writen the old value */ + if (vector_vtype_ill(env) || + (vector_overlap_vm_common(lmul, vm, vs3) && wd)) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + return; + } + + vector_lmul_check_reg(env, lmul, vs2, false); + vector_lmul_check_reg(env, lmul, vs3, false); + + for (i = 0; i < vlmax; i++) { + src2 = vs2 + (i / (VLEN / width)); + src3 = vs3 + (i / (VLEN / width)); + j = i % (VLEN / width); + if (i < env->vfp.vstart) { + continue; + } else if (i < vl) { + switch (width) { + case 64: + if (vector_elem_mask(env, vm, width, lmul, i)) { + uint64_t tmp; + idx = (target_long)env->vfp.vreg[src2].s64[j]; + addr = idx + env->gpr[rs1]; + +#ifdef CONFIG_SOFTMMU + tmp = helper_atomic_fetch_umaxq_le( + env, addr, env->vfp.vreg[src3].s64[j], + make_memop_idx(memop & ~MO_SIGN, mem_idx)); +#else + tmp = helper_atomic_fetch_umaxq_le(env, addr, + env->vfp.vreg[src3].s64[j]); +#endif + if (wd) { + env->vfp.vreg[src3].s64[j] = tmp; + } + env->vfp.vstart++; + } + break; + default: + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + break; + } + } else { + vector_tail_amo(env, src3, j, width); + } + } + env->vfp.vstart = 0; +} +
Change-Id: I3cf891bc400713b95f47ecca82b1bf773f3dcb25 Signed-off-by: liuzhiwei <zhiwei_liu@c-sky.com> --- fpu/softfloat.c | 119 + include/fpu/softfloat.h | 4 + linux-user/riscv/cpu_loop.c | 8 +- target/riscv/Makefile.objs | 2 +- target/riscv/cpu.h | 30 + target/riscv/cpu_bits.h | 15 + target/riscv/cpu_helper.c | 7 + target/riscv/csr.c | 65 +- target/riscv/helper.h | 354 + target/riscv/insn32.decode | 374 +- target/riscv/insn_trans/trans_rvv.inc.c | 484 + target/riscv/translate.c | 1 + target/riscv/vector_helper.c | 26563 ++++++++++++++++++++++++++++++ 13 files changed, 28017 insertions(+), 9 deletions(-) create mode 100644 target/riscv/insn_trans/trans_rvv.inc.c create mode 100644 target/riscv/vector_helper.c