Message ID | 1B2CDBE7-339F-425A-8130-824EB2B23F56@nokia.com |
---|---|
State | New |
Headers | show |
On Wed, Oct 21, 2009 at 12:17 PM, <Juha.Riihimaki@nokia.com> wrote: > Add support for neon vld1.64 instruction. > > From: Riku Voipio <riku.voipio@iki.fi> > Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com> > --- > diff --git a/target-arm/translate.c b/target-arm/translate.c > index 3ea9d51..d027572 100644 > --- a/target-arm/translate.c > +++ b/target-arm/translate.c > @@ -795,6 +795,12 @@ static inline TCGv gen_ld32(TCGv addr, int index) > tcg_gen_qemu_ld32u(tmp, addr, index); > return tmp; > } > +static inline TCGv_i64 gen_ld64(TCGv addr, int index) > +{ > + TCGv_i64 tmp = tcg_temp_new_i64(); > + tcg_gen_qemu_ld64(tmp, addr, index); > + return tmp; > +} > static inline void gen_st8(TCGv val, TCGv addr, int index) > { > tcg_gen_qemu_st8(val, addr, index); > @@ -810,6 +816,11 @@ static inline void gen_st32(TCGv val, TCGv addr, > int index) > tcg_gen_qemu_st32(val, addr, index); > dead_tmp(val); > } > +static inline void gen_st64(TCGv_i64 val, TCGv addr, int index) > +{ > + tcg_gen_qemu_st64(val, addr, index); > + tcg_temp_free_i64(val); > +} > > static inline void gen_set_pc_im(uint32_t val) > { > @@ -3690,6 +3701,7 @@ static int disas_neon_ls_insn(CPUState * env, > DisasContext *s, uint32_t insn) > TCGv addr; > TCGv tmp; > TCGv tmp2; > + TCGv_i64 tmp64; > > if (!vfp_enabled(env)) > return 1; > @@ -3702,7 +3714,7 @@ static int disas_neon_ls_insn(CPUState * env, > DisasContext *s, uint32_t insn) > /* Load store all elements. */ > op = (insn >> 8) & 0xf; > size = (insn >> 6) & 3; > - if (op > 10 || size == 3) > + if (op > 10) This is wrong: a size of 3 is limited to vld1.64 and vst1.64 which you don't enforce here. Apart from that, the rest looks OK. Laurent > return 1; > nregs = neon_ls_element_type[op].nregs; > interleave = neon_ls_element_type[op].interleave; > @@ -3716,61 +3728,74 @@ static int disas_neon_ls_insn(CPUState * env, > DisasContext *s, uint32_t insn) > load_reg_var(s, addr, rn); > tcg_gen_addi_i32(addr, addr, 1 << size); > } > - for (pass = 0; pass < 2; pass++) { > - if (size == 2) { > - if (load) { > - tmp = gen_ld32(addr, IS_USER(s)); > - neon_store_reg(rd, pass, tmp); > - } else { > - tmp = neon_load_reg(rd, pass); > - gen_st32(tmp, addr, IS_USER(s)); > - } > - tcg_gen_addi_i32(addr, addr, stride); > - } else if (size == 1) { > - if (load) { > - tmp = gen_ld16u(addr, IS_USER(s)); > - tcg_gen_addi_i32(addr, addr, stride); > - tmp2 = gen_ld16u(addr, IS_USER(s)); > - tcg_gen_addi_i32(addr, addr, stride); > - gen_bfi(tmp, tmp, tmp2, 16, 0xffff); > - dead_tmp(tmp2); > - neon_store_reg(rd, pass, tmp); > - } else { > - tmp = neon_load_reg(rd, pass); > - tmp2 = new_tmp(); > - tcg_gen_shri_i32(tmp2, tmp, 16); > - gen_st16(tmp, addr, IS_USER(s)); > - tcg_gen_addi_i32(addr, addr, stride); > - gen_st16(tmp2, addr, IS_USER(s)); > + if (size == 3) { > + if (load) { > + tmp64 = gen_ld64(addr, IS_USER(s)); > + neon_store_reg64(tmp64, rd); > + tcg_temp_free_i64(tmp64); > + } else { > + tmp64 = tcg_temp_new_i64(); > + neon_load_reg64(tmp64, rd); > + gen_st64(tmp64, addr, IS_USER(s)); > + } > + tcg_gen_addi_i32(addr, addr, stride); > + } else { > + for (pass = 0; pass < 2; pass++) { > + if (size == 2) { > + if (load) { > + tmp = gen_ld32(addr, IS_USER(s)); > + neon_store_reg(rd, pass, tmp); > + } else { > + tmp = neon_load_reg(rd, pass); > + gen_st32(tmp, addr, IS_USER(s)); > + } > tcg_gen_addi_i32(addr, addr, stride); > - } > - } else /* size == 0 */ { > - if (load) { > - TCGV_UNUSED(tmp2); > - for (n = 0; n < 4; n++) { > - tmp = gen_ld8u(addr, IS_USER(s)); > + } else if (size == 1) { > + if (load) { > + tmp = gen_ld16u(addr, IS_USER(s)); > + tcg_gen_addi_i32(addr, addr, stride); > + tmp2 = gen_ld16u(addr, IS_USER(s)); > + tcg_gen_addi_i32(addr, addr, stride); > + gen_bfi(tmp, tmp, tmp2, 16, 0xffff); > + dead_tmp(tmp2); > + neon_store_reg(rd, pass, tmp); > + } else { > + tmp = neon_load_reg(rd, pass); > + tmp2 = new_tmp(); > + tcg_gen_shri_i32(tmp2, tmp, 16); > + gen_st16(tmp, addr, IS_USER(s)); > + tcg_gen_addi_i32(addr, addr, stride); > + gen_st16(tmp2, addr, IS_USER(s)); > tcg_gen_addi_i32(addr, addr, stride); > - if (n == 0) { > - tmp2 = tmp; > - } else { > - gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff); > - dead_tmp(tmp); > - } > } > - neon_store_reg(rd, pass, tmp2); > - } else { > - tmp2 = neon_load_reg(rd, pass); > - for (n = 0; n < 4; n++) { > - tmp = new_tmp(); > - if (n == 0) { > - tcg_gen_mov_i32(tmp, tmp2); > - } else { > - tcg_gen_shri_i32(tmp, tmp2, n * 8); > + } else /* size == 0 */ { > + if (load) { > + TCGV_UNUSED(tmp2); > + for (n = 0; n < 4; n++) { > + tmp = gen_ld8u(addr, IS_USER(s)); > + tcg_gen_addi_i32(addr, addr, stride); > + if (n == 0) { > + tmp2 = tmp; > + } else { > + gen_bfi(tmp2, tmp2, tmp, n * 8, > 0xff); > + dead_tmp(tmp); > + } > } > - gen_st8(tmp, addr, IS_USER(s)); > - tcg_gen_addi_i32(addr, addr, stride); > + neon_store_reg(rd, pass, tmp2); > + } else { > + tmp2 = neon_load_reg(rd, pass); > + for (n = 0; n < 4; n++) { > + tmp = new_tmp(); > + if (n == 0) { > + tcg_gen_mov_i32(tmp, tmp2); > + } else { > + tcg_gen_shri_i32(tmp, tmp2, n * 8); > + } > + gen_st8(tmp, addr, IS_USER(s)); > + tcg_gen_addi_i32(addr, addr, stride); > + } > + dead_tmp(tmp2); > } > - dead_tmp(tmp2); > } > } > } >
On Oct 22, 2009, at 11:39, ext Laurent Desnogues wrote: > On Wed, Oct 21, 2009 at 12:17 PM, <Juha.Riihimaki@nokia.com> wrote: >> Add support for neon vld1.64 instruction. >> >> From: Riku Voipio <riku.voipio@iki.fi> >> Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com> >> --- >> diff --git a/target-arm/translate.c b/target-arm/translate.c >> index 3ea9d51..d027572 100644 >> --- a/target-arm/translate.c >> +++ b/target-arm/translate.c >> @@ -795,6 +795,12 @@ static inline TCGv gen_ld32(TCGv addr, int >> index) >> tcg_gen_qemu_ld32u(tmp, addr, index); >> return tmp; >> } >> +static inline TCGv_i64 gen_ld64(TCGv addr, int index) >> +{ >> + TCGv_i64 tmp = tcg_temp_new_i64(); >> + tcg_gen_qemu_ld64(tmp, addr, index); >> + return tmp; >> +} >> static inline void gen_st8(TCGv val, TCGv addr, int index) >> { >> tcg_gen_qemu_st8(val, addr, index); >> @@ -810,6 +816,11 @@ static inline void gen_st32(TCGv val, TCGv addr, >> int index) >> tcg_gen_qemu_st32(val, addr, index); >> dead_tmp(val); >> } >> +static inline void gen_st64(TCGv_i64 val, TCGv addr, int index) >> +{ >> + tcg_gen_qemu_st64(val, addr, index); >> + tcg_temp_free_i64(val); >> +} >> >> static inline void gen_set_pc_im(uint32_t val) >> { >> @@ -3690,6 +3701,7 @@ static int disas_neon_ls_insn(CPUState * env, >> DisasContext *s, uint32_t insn) >> TCGv addr; >> TCGv tmp; >> TCGv tmp2; >> + TCGv_i64 tmp64; >> >> if (!vfp_enabled(env)) >> return 1; >> @@ -3702,7 +3714,7 @@ static int disas_neon_ls_insn(CPUState * env, >> DisasContext *s, uint32_t insn) >> /* Load store all elements. */ >> op = (insn >> 8) & 0xf; >> size = (insn >> 6) & 3; >> - if (op > 10 || size == 3) >> + if (op > 10) > > This is wrong: a size of 3 is limited to vld1.64 and vst1.64 which > you don't enforce here. > > Apart from that, the rest looks OK. Thanks, and you're right of course. I'll add a check that will return 1 if size equals 3 and interleave or spacing is not 1. Cheers, Juha
diff --git a/target-arm/translate.c b/target-arm/translate.c index 3ea9d51..d027572 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -795,6 +795,12 @@ static inline TCGv gen_ld32(TCGv addr, int index) tcg_gen_qemu_ld32u(tmp, addr, index); return tmp; } +static inline TCGv_i64 gen_ld64(TCGv addr, int index) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld64(tmp, addr, index); + return tmp; +} static inline void gen_st8(TCGv val, TCGv addr, int index) { tcg_gen_qemu_st8(val, addr, index); @@ -810,6 +816,11 @@ static inline void gen_st32(TCGv val, TCGv addr, int index) tcg_gen_qemu_st32(val, addr, index); dead_tmp(val); } +static inline void gen_st64(TCGv_i64 val, TCGv addr, int index) +{ + tcg_gen_qemu_st64(val, addr, index); + tcg_temp_free_i64(val); +} static inline void gen_set_pc_im(uint32_t val)