@@ -2223,3 +2223,16 @@ DEF_HELPER_5(th_vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(th_vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(th_vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(th_vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_5(th_vfncvt_xu_f_v_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_x_f_v_b, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_x_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_x_f_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
+DEF_HELPER_5(th_vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
@@ -2285,17 +2285,101 @@ GEN_OPFV_WIDEN_TRANS_TH(th_vfwcvt_xu_f_v)
GEN_OPFV_WIDEN_TRANS_TH(th_vfwcvt_x_f_v)
GEN_OPFV_WIDEN_TRANS_TH(th_vfwcvt_f_f_v)
+/* Narrowing Floating-Point/Integer Type-Convert Instructions */
+
+/*
+ * If the current SEW does not correspond to a supported IEEE floating-point
+ * type, an illegal instruction exception is raised
+ */
+static bool opfv_narrow_check_th(DisasContext *s, arg_rmr *a)
+{
+ return (require_xtheadvector(s) &&
+ vext_check_isa_ill(s) &&
+ th_check_overlap_mask(s, a->rd, a->vm, false) &&
+ th_check_reg(s, a->rd, false) &&
+ th_check_reg(s, a->rs2, true) &&
+ th_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
+ 2 << s->lmul) &&
+ (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0));
+}
+
+static bool opxfv_narrow_check_th(DisasContext *s, arg_rmr *a)
+{
+ return (require_xtheadvector(s) &&
+ vext_check_isa_ill(s) &&
+ th_check_overlap_mask(s, a->rd, a->vm, false) &&
+ th_check_reg(s, a->rd, false) &&
+ th_check_reg(s, a->rs2, true) &&
+ th_check_overlap_group(a->rd, 1 << s->lmul, a->rs2,
+ 2 << s->lmul) &&
+ (s->lmul < 0x3) && (s->sew < 0x3));
+}
+
+#define GEN_OPXFV_NARROW_TRANS_TH(NAME) \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
+{ \
+ if (opxfv_narrow_check_th(s, a)) { \
+ uint32_t data = 0; \
+ static gen_helper_gvec_3_ptr * const fns[3] = { \
+ gen_helper_##NAME##_b, \
+ gen_helper_##NAME##_h, \
+ gen_helper_##NAME##_w, \
+ }; \
+ gen_set_rm(s, RISCV_FRM_DYN); \
+ \
+ data = FIELD_DP32(data, VDATA_TH, MLEN, s->mlen); \
+ data = FIELD_DP32(data, VDATA_TH, VM, a->vm); \
+ data = FIELD_DP32(data, VDATA_TH, LMUL, s->lmul); \
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \
+ vreg_ofs(s, 0), \
+ vreg_ofs(s, a->rs2), tcg_env, \
+ s->cfg_ptr->vlenb, \
+ s->cfg_ptr->vlenb, data, \
+ fns[s->sew]); \
+ finalize_rvv_inst(s); \
+ return true; \
+ } \
+ return false; \
+}
+
+GEN_OPXFV_NARROW_TRANS_TH(th_vfncvt_xu_f_v)
+GEN_OPXFV_NARROW_TRANS_TH(th_vfncvt_x_f_v)
+
+#define GEN_OPFV_NARROW_TRANS_TH(NAME) \
+static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
+{ \
+ if (opfv_narrow_check_th(s, a)) { \
+ uint32_t data = 0; \
+ static gen_helper_gvec_3_ptr * const fns[2] = { \
+ gen_helper_##NAME##_h, \
+ gen_helper_##NAME##_w, \
+ }; \
+ gen_set_rm(s, RISCV_FRM_DYN); \
+ \
+ data = FIELD_DP32(data, VDATA_TH, MLEN, s->mlen); \
+ data = FIELD_DP32(data, VDATA_TH, VM, a->vm); \
+ data = FIELD_DP32(data, VDATA_TH, LMUL, s->lmul); \
+ tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \
+ vreg_ofs(s, 0), \
+ vreg_ofs(s, a->rs2), tcg_env, \
+ s->cfg_ptr->vlenb, \
+ s->cfg_ptr->vlenb, data, \
+ fns[s->sew - 1]); \
+ finalize_rvv_inst(s); \
+ return true; \
+ } \
+ return false; \
+}
+GEN_OPFV_NARROW_TRANS_TH(th_vfncvt_f_xu_v)
+GEN_OPFV_NARROW_TRANS_TH(th_vfncvt_f_x_v)
+GEN_OPFV_NARROW_TRANS_TH(th_vfncvt_f_f_v)
+
#define TH_TRANS_STUB(NAME) \
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
{ \
return require_xtheadvector(s); \
}
-TH_TRANS_STUB(th_vfncvt_xu_f_v)
-TH_TRANS_STUB(th_vfncvt_x_f_v)
-TH_TRANS_STUB(th_vfncvt_f_xu_v)
-TH_TRANS_STUB(th_vfncvt_f_x_v)
-TH_TRANS_STUB(th_vfncvt_f_f_v)
TH_TRANS_STUB(th_vredsum_vs)
TH_TRANS_STUB(th_vredand_vs)
TH_TRANS_STUB(th_vredor_vs)
@@ -4315,10 +4315,7 @@ RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
/* Narrowing Floating-Point/Integer Type-Convert Instructions */
-/* (TD, T2, TX2) */
-#define NOP_UU_B uint8_t, uint16_t, uint32_t
-#define NOP_UU_H uint16_t, uint32_t, uint32_t
-#define NOP_UU_W uint32_t, uint64_t, uint64_t
+
/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
@@ -135,6 +135,9 @@ void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
#define WOP_UU_B uint16_t, uint8_t, uint8_t
#define WOP_UU_H uint32_t, uint16_t, uint16_t
#define WOP_UU_W uint64_t, uint32_t, uint32_t
+#define NOP_UU_B uint8_t, uint16_t, uint32_t
+#define NOP_UU_H uint16_t, uint32_t, uint32_t
+#define NOP_UU_W uint32_t, uint64_t, uint64_t
/* (TD, T1, T2, TX1, TX2) */
#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
@@ -3282,3 +3282,44 @@ THCALL(TH_OPFVV1, th_vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
THCALL(TH_OPFVV1, th_vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
GEN_TH_V_ENV(th_vfwcvt_f_f_v_h, 2, 4, clearl_th)
GEN_TH_V_ENV(th_vfwcvt_f_f_v_w, 4, 8, clearq_th)
+
+/* Narrowing Floating-Point/Integer Type-Convert Instructions */
+
+/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
+THCALL(TH_OPFVV1, th_vfncvt_xu_f_v_b, NOP_UU_B, H1, H2, float16_to_uint8)
+THCALL(TH_OPFVV1, th_vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16)
+THCALL(TH_OPFVV1, th_vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32)
+GEN_TH_V_ENV(th_vfncvt_xu_f_v_b, 1, 1, clearb_th)
+GEN_TH_V_ENV(th_vfncvt_xu_f_v_h, 2, 2, clearh_th)
+GEN_TH_V_ENV(th_vfncvt_xu_f_v_w, 4, 4, clearl_th)
+
+/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
+THCALL(TH_OPFVV1, th_vfncvt_x_f_v_b, NOP_UU_B, H1, H2, float16_to_int8)
+THCALL(TH_OPFVV1, th_vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16)
+THCALL(TH_OPFVV1, th_vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32)
+GEN_TH_V_ENV(th_vfncvt_x_f_v_b, 1, 1, clearb_th)
+GEN_TH_V_ENV(th_vfncvt_x_f_v_h, 2, 2, clearh_th)
+GEN_TH_V_ENV(th_vfncvt_x_f_v_w, 4, 4, clearl_th)
+
+/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
+THCALL(TH_OPFVV1, th_vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16)
+THCALL(TH_OPFVV1, th_vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32)
+GEN_TH_V_ENV(th_vfncvt_f_xu_v_h, 2, 2, clearh_th)
+GEN_TH_V_ENV(th_vfncvt_f_xu_v_w, 4, 4, clearl_th)
+
+/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
+THCALL(TH_OPFVV1, th_vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16)
+THCALL(TH_OPFVV1, th_vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32)
+GEN_TH_V_ENV(th_vfncvt_f_x_v_h, 2, 2, clearh_th)
+GEN_TH_V_ENV(th_vfncvt_f_x_v_w, 4, 4, clearl_th)
+
+/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
+static uint16_t vfncvtffv16(uint32_t a, float_status *s)
+{
+ return float32_to_float16(a, true, s);
+}
+
+THCALL(TH_OPFVV1, th_vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
+THCALL(TH_OPFVV1, th_vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
+GEN_TH_V_ENV(th_vfncvt_f_f_v_h, 2, 2, clearh_th)
+GEN_TH_V_ENV(th_vfncvt_f_f_v_w, 4, 4, clearl_th)
Compared to RVV1.0, XTheadVector lacks .rtz and .rod instructions, which specify the rounding mode. Except of lack of similar instructions, the instructions have the same function as RVV1.0. Overall there are only general differences between XTheadVector and RVV1.0. Signed-off-by: Huang Tao <eric.huang@linux.alibaba.com> --- target/riscv/helper.h | 13 +++ .../riscv/insn_trans/trans_xtheadvector.c.inc | 94 ++++++++++++++++++- target/riscv/vector_helper.c | 5 +- target/riscv/vector_internals.h | 3 + target/riscv/xtheadvector_helper.c | 41 ++++++++ 5 files changed, 147 insertions(+), 9 deletions(-)