Message ID | 1386612744-1013-5-git-send-email-peter.maydell@linaro.org |
---|---|
State | New |
Headers | show |
On 12/09/2013 10:12 AM, Peter Maydell wrote: > From: Alex Bennée <alex.bennee@linaro.org> > > This adds support for the load/store forms using a register offset. > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > Signed-off-by: Peter Maydell <peter.maydell@linaro.org> > --- > target-arm/translate-a64.c | 141 ++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 140 insertions(+), 1 deletion(-) > > diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c > index ea3abc3..64f98a7 100644 > --- a/target-arm/translate-a64.c > +++ b/target-arm/translate-a64.c > @@ -467,6 +467,54 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) > tcg_temp_free_i64(tmphi); > } > > +/* > + * This utility function is for doing register extension with an > + * optional shift. You will likely want to pass a temporary for the > + * destination register. See DecodeRegExtend() in the ARM ARM. > + */ > +static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, > + int option, unsigned int shift) > +{ > + int extsize = extract32(option, 0, 2); > + bool is_signed = extract32(option, 2, 1); > + > + if (is_signed) { > + switch (extsize) { > + case 0: > + tcg_gen_ext8s_i64(tcg_out, tcg_in); > + break; > + case 1: > + tcg_gen_ext16s_i64(tcg_out, tcg_in); > + break; > + case 2: > + tcg_gen_ext32s_i64(tcg_out, tcg_in); > + break; > + case 3: > + tcg_gen_mov_i64(tcg_out, tcg_in); > + break; > + } > + } else { > + switch (extsize) { > + case 0: > + tcg_gen_ext8u_i64(tcg_out, tcg_in); > + break; > + case 1: > + tcg_gen_ext16u_i64(tcg_out, tcg_in); > + break; > + case 2: > + tcg_gen_ext32u_i64(tcg_out, tcg_in); > + break; > + case 3: > + tcg_gen_mov_i64(tcg_out, tcg_in); > + break; > + } > + } > + > + if (shift) { > + tcg_gen_shli_i64(tcg_out, tcg_out, shift); > + } > +} > + > static inline void gen_check_sp_alignment(DisasContext *s) > { > /* The AArch64 architecture mandates that (if enabled via PSTATE > @@ -1048,6 +1096,93 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) > } > } > > + > + > +/* > + * C3.3.10 Load/store (register offset) > + * > + * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 > + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ > + * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | > + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ > + * > + * For non-vector: > + * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit > + * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 > + * For vector: > + * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated > + * opc<0>: 0 -> store, 1 -> load > + * V: 1 -> vector/simd > + * opt: extend encoding (see DecodeRegExtend) > + * S: if S=1 then scale (essentially index by sizeof(size)) > + * Rt: register to transfer into/out of > + * Rn: address register or SP for base > + * Rm: offset register or ZR for offset > + */ > +static void handle_ldst_reg_roffset(DisasContext *s, uint32_t insn) > +{ > + int rt = extract32(insn, 0, 5); > + int rn = extract32(insn, 5, 5); > + int shift = extract32(insn, 12, 1); > + int rm = extract32(insn, 16, 5); > + int opc = extract32(insn, 22, 2); > + int opt = extract32(insn, 13, 3); > + int size = extract32(insn, 30, 2); > + bool is_signed = false; > + bool is_store = false; > + bool is_extended = false; > + bool is_vector = extract32(insn, 26, 1); > + > + TCGv_i64 tcg_rm; > + TCGv_i64 tcg_addr; > + > + if (extract32(opt, 1, 1) == 0) { > + unallocated_encoding(s); > + return; > + } > + > + if (is_vector) { > + size |= (opc & 2) << 1; > + if (size > 4) { > + unallocated_encoding(s); > + } > + is_store = ((opc & 1) == 0); > + } else { > + if (size == 3 && opc == 2) { > + /* PRFM - prefetch */ > + return; > + } > + is_store = (opc == 0); > + is_signed = opc & (1<<1); > + is_extended = (size < 3) && (opc & 1); > + } > + > + if (rn == 31) { > + gen_check_sp_alignment(s); > + } > + tcg_addr = read_cpu_reg_sp(s, rn, 1); > + > + tcg_rm = read_cpu_reg(s, rm, 1); > + ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); > + > + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm); > + > + if (is_vector) { > + if (is_store) { > + do_fp_st(s, rt, tcg_addr, size); > + } else { > + do_fp_ld(s, rt, tcg_addr, size); > + } > + } else { > + TCGv_i64 tcg_rt = cpu_reg(s, rt); > + if (is_store) { > + do_gpr_st(s, tcg_rt, tcg_addr, size); > + } else { > + do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended); > + } > + } > +} I wonder if it would be better to merge this function with the one from the previous patch. There are only 3-4 lines that are different. r~
rth@twiddle.net writes: > On 12/09/2013 10:12 AM, Peter Maydell wrote: >> From: Alex Bennée <alex.bennee@linaro.org> >> >> This adds support for the load/store forms using a register offset. >> <snip> >> +/* >> + * C3.3.10 Load/store (register offset) >> + * >> + * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 >> + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ >> + * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | >> + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ >> + * >> + * For non-vector: >> + * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit >> + * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 >> + * For vector: >> + * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated >> + * opc<0>: 0 -> store, 1 -> load >> + * V: 1 -> vector/simd >> + * opt: extend encoding (see DecodeRegExtend) >> + * S: if S=1 then scale (essentially index by sizeof(size)) >> + * Rt: register to transfer into/out of >> + * Rn: address register or SP for base >> + * Rm: offset register or ZR for offset >> + */ >> +static void handle_ldst_reg_roffset(DisasContext *s, uint32_t insn) >> +{ >> + int rt = extract32(insn, 0, 5); >> + int rn = extract32(insn, 5, 5); >> + int shift = extract32(insn, 12, 1); >> + int rm = extract32(insn, 16, 5); >> + int opc = extract32(insn, 22, 2); >> + int opt = extract32(insn, 13, 3); >> + int size = extract32(insn, 30, 2); >> + bool is_signed = false; >> + bool is_store = false; >> + bool is_extended = false; >> + bool is_vector = extract32(insn, 26, 1); >> + >> + TCGv_i64 tcg_rm; >> + TCGv_i64 tcg_addr; >> + >> + if (extract32(opt, 1, 1) == 0) { >> + unallocated_encoding(s); >> + return; >> + } >> + >> + if (is_vector) { >> + size |= (opc & 2) << 1; >> + if (size > 4) { >> + unallocated_encoding(s); >> + } >> + is_store = ((opc & 1) == 0); >> + } else { >> + if (size == 3 && opc == 2) { >> + /* PRFM - prefetch */ >> + return; >> + } >> + is_store = (opc == 0); >> + is_signed = opc & (1<<1); >> + is_extended = (size < 3) && (opc & 1); >> + } >> + >> + if (rn == 31) { >> + gen_check_sp_alignment(s); >> + } >> + tcg_addr = read_cpu_reg_sp(s, rn, 1); >> + >> + tcg_rm = read_cpu_reg(s, rm, 1); >> + ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); >> + >> + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm); >> + >> + if (is_vector) { >> + if (is_store) { >> + do_fp_st(s, rt, tcg_addr, size); >> + } else { >> + do_fp_ld(s, rt, tcg_addr, size); >> + } >> + } else { >> + TCGv_i64 tcg_rt = cpu_reg(s, rt); >> + if (is_store) { >> + do_gpr_st(s, tcg_rt, tcg_addr, size); >> + } else { >> + do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended); >> + } >> + } >> +} > > I wonder if it would be better to merge this function with the one from the > previous patch. There are only 3-4 lines that are different. I was trying to avoid having it jump through special cases when decoding the instruction where Rm, opt, S class with the imm field in the other form. But certainly there is an argument for the rest of the common code to be shared (size/opc decoding and final tcg_addr based load/store). However my preference unless there is a strong objection would be to clean that up in later patches. For one thing the more instructions each patch handles the longer it takes to run the instruction validation on the rather slow models to have good coverage of the decoder! > > > r~
On 12/10/2013 06:16 AM, Alex Bennée wrote: > However my preference unless there is a strong objection would be to > clean that up in later patches. For one thing the more instructions each > patch handles the longer it takes to run the instruction validation on > the rather slow models to have good coverage of the decoder! That'd be ok by me. r~
rth@twiddle.net writes: > On 12/10/2013 06:16 AM, Alex Bennée wrote: >> However my preference unless there is a strong objection would be to >> clean that up in later patches. For one thing the more instructions each >> patch handles the longer it takes to run the instruction validation on >> the rather slow models to have good coverage of the decoder! > > That'd be ok by me. I had a play trying to see what pulling out the common parts of decode and unallocated handling based on the ARM ARM pseudo-code into a separate function would look like. Unfortunately what I ended up with was a horrible function full of pass-by-reference parameters and a not particularly cleaner or shorter call-sites in each handler. I did briefly consider if I could construct a macro which would make for less duplicated code but suspect that won't help in the long run. This is certainly something I think that requires more thought. In the meantime I've addresses your other review comments and Peter should be pushing a new set of patches soon. Cheers, -- Alex Bennée QEMU/KVM Hacker for Linaro
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index ea3abc3..64f98a7 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -467,6 +467,54 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) tcg_temp_free_i64(tmphi); } +/* + * This utility function is for doing register extension with an + * optional shift. You will likely want to pass a temporary for the + * destination register. See DecodeRegExtend() in the ARM ARM. + */ +static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, + int option, unsigned int shift) +{ + int extsize = extract32(option, 0, 2); + bool is_signed = extract32(option, 2, 1); + + if (is_signed) { + switch (extsize) { + case 0: + tcg_gen_ext8s_i64(tcg_out, tcg_in); + break; + case 1: + tcg_gen_ext16s_i64(tcg_out, tcg_in); + break; + case 2: + tcg_gen_ext32s_i64(tcg_out, tcg_in); + break; + case 3: + tcg_gen_mov_i64(tcg_out, tcg_in); + break; + } + } else { + switch (extsize) { + case 0: + tcg_gen_ext8u_i64(tcg_out, tcg_in); + break; + case 1: + tcg_gen_ext16u_i64(tcg_out, tcg_in); + break; + case 2: + tcg_gen_ext32u_i64(tcg_out, tcg_in); + break; + case 3: + tcg_gen_mov_i64(tcg_out, tcg_in); + break; + } + } + + if (shift) { + tcg_gen_shli_i64(tcg_out, tcg_out, shift); + } +} + static inline void gen_check_sp_alignment(DisasContext *s) { /* The AArch64 architecture mandates that (if enabled via PSTATE @@ -1048,6 +1096,93 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) } } + + +/* + * C3.3.10 Load/store (register offset) + * + * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ + * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ + * + * For non-vector: + * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit + * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 + * For vector: + * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated + * opc<0>: 0 -> store, 1 -> load + * V: 1 -> vector/simd + * opt: extend encoding (see DecodeRegExtend) + * S: if S=1 then scale (essentially index by sizeof(size)) + * Rt: register to transfer into/out of + * Rn: address register or SP for base + * Rm: offset register or ZR for offset + */ +static void handle_ldst_reg_roffset(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int shift = extract32(insn, 12, 1); + int rm = extract32(insn, 16, 5); + int opc = extract32(insn, 22, 2); + int opt = extract32(insn, 13, 3); + int size = extract32(insn, 30, 2); + bool is_signed = false; + bool is_store = false; + bool is_extended = false; + bool is_vector = extract32(insn, 26, 1); + + TCGv_i64 tcg_rm; + TCGv_i64 tcg_addr; + + if (extract32(opt, 1, 1) == 0) { + unallocated_encoding(s); + return; + } + + if (is_vector) { + size |= (opc & 2) << 1; + if (size > 4) { + unallocated_encoding(s); + } + is_store = ((opc & 1) == 0); + } else { + if (size == 3 && opc == 2) { + /* PRFM - prefetch */ + return; + } + is_store = (opc == 0); + is_signed = opc & (1<<1); + is_extended = (size < 3) && (opc & 1); + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + + tcg_rm = read_cpu_reg(s, rm, 1); + ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); + + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm); + + if (is_vector) { + if (is_store) { + do_fp_st(s, rt, tcg_addr, size); + } else { + do_fp_ld(s, rt, tcg_addr, size); + } + } else { + TCGv_i64 tcg_rt = cpu_reg(s, rt); + if (is_store) { + do_gpr_st(s, tcg_rt, tcg_addr, size); + } else { + do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended); + } + } +} + /* * C3.3.13 Load/store (unsigned immediate) * @@ -1125,7 +1260,11 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn) { switch (extract32(insn, 24, 2)) { case 0: - unsupported_encoding(s, insn); + if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) { + handle_ldst_reg_roffset(s, insn); + } else { + unsupported_encoding(s, insn); + } break; case 1: handle_ldst_reg_unsigned_imm(s, insn);