Message ID | 20200817084955.28793-37-frank.chang@sifive.com |
---|---|
State | New |
Headers | show |
Series | support vector extension v1.0 | expand |
On 8/17/20 1:49 AM, frank.chang@sifive.com wrote: > static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) > { > if (require_rvv(s) && > + has_ext(s, RVF) && > vext_check_isa_ill(s) && > require_align(a->rd, 1 << s->lmul) && > (s->sew != 0)) { > + TCGv_i64 t1 = tcg_temp_local_new_i64(); > + /* NaN-box f[rs1] */ > + do_nanbox(s, t1, cpu_fpr[a->rs1]); Don't you need to check s->sew == MO_64 ? has_ext(s, RVD) : has_ext(s, RVF) ? It might be worth folding that into it's own helper function, which also incorporates the s->sew != 0 check. E.g. static bool require_rvf(Disascontext *s) { switch (s->sew) { case MO_16: case MO_32: return has_ext(s, RVF); case MO_64: return has_ext(s, RVD); default: return false; } } > + TCGv_i64 t1 = tcg_temp_local_new_i64(); > + /* NaN-box f[rs1] */ > + do_nanbox(s, t1, cpu_fpr[a->rs1]); > + > if (s->vl_eq_vlmax) { > tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), > - MAXSZ(s), MAXSZ(s), cpu_fpr[a->rs1]); > + MAXSZ(s), MAXSZ(s), t1); > mark_vs_dirty(s); > } else { > TCGv_ptr dest; Recall that local temps get written to the stack at branches. You should avoid the local temp by computing do_nanbox on both arms of this IF. In the else branch, do_nanbox should be after the brcond. r~
On 8/29/20 1:00 PM, Richard Henderson wrote: > static bool require_rvf(Disascontext *s) > { > switch (s->sew) { > case MO_16: > case MO_32: > return has_ext(s, RVF); > case MO_64: > return has_ext(s, RVD); > default: > return false; > } > } Oh, and check mstatus_fs here too. Which is missing in trans_vfmv_v_f, btw. r~
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index 92d34be5a99..7a12b89dc13 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -2689,12 +2689,17 @@ GEN_OPFVF_TRANS(vfmerge_vfm, opfvf_check) static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) { if (require_rvv(s) && + has_ext(s, RVF) && vext_check_isa_ill(s) && require_align(a->rd, 1 << s->lmul) && (s->sew != 0)) { + TCGv_i64 t1 = tcg_temp_local_new_i64(); + /* NaN-box f[rs1] */ + do_nanbox(s, t1, cpu_fpr[a->rs1]); + if (s->vl_eq_vlmax) { tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), - MAXSZ(s), MAXSZ(s), cpu_fpr[a->rs1]); + MAXSZ(s), MAXSZ(s), t1); mark_vs_dirty(s); } else { TCGv_ptr dest; @@ -2711,13 +2716,15 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) dest = tcg_temp_new_ptr(); desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data)); tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd)); - fns[s->sew - 1](dest, cpu_fpr[a->rs1], cpu_env, desc); + + fns[s->sew - 1](dest, t1, cpu_env, desc); tcg_temp_free_ptr(dest); tcg_temp_free_i32(desc); mark_vs_dirty(s); gen_set_label(over); } + tcg_temp_free_i64(t1); return true; } return false;