Message ID | BANLkTikW9woHakiWXExjdB6kkHat-R3JRA@mail.gmail.com |
---|---|
State | New |
Headers | show |
On Sat, Apr 09, 2011 at 04:05:45PM +0500, Khansa Butt wrote: > From 7fd3ef1360b78ad85848e54e36d97ab50d77e6a6 Mon Sep 17 00:00:00 2001 > From: Ehsan-ul-Haq & Khansa Butt <khansa@kics.edu.pk> > Date: Sat, 9 Apr 2011 11:09:18 +0500 > Subject: [PATCH 2/2] Support for Cavium-Octeon specific instruction > > > Signed-off-by: Ehsan-ul-Haq, Abdul Qadeer, Abdul Waheed, Khansa Butt < > khansa@kics.edu.pk> > --- > host-utils.c | 14 ++ > host-utils.h | 1 + > linux-user/elfload.c | 5 + > linux-user/main.c | 5 + > linux-user/qemu.h | 1 + > linux-user/syscall.c | 5 + > target-mips/cpu.h | 15 ++ > target-mips/helper.h | 5 + > target-mips/op_helper.c | 70 ++++++ > target-mips/translate.c | 556 > ++++++++++++++++++++++++++++++++++++++++++++++- > 10 files changed, 666 insertions(+), 11 deletions(-) > > diff --git a/host-utils.c b/host-utils.c > index dc96123..3073ef8 100644 > --- a/host-utils.c > +++ b/host-utils.c > @@ -102,4 +102,18 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t > a, int64_t b) > a, b, *phigh, *plow); > #endif > } > + > #endif /* !defined(__x86_64__) */ > +void addc(uint64_t res [], uint64_t a, int i) > +{ > + uint64_t c = res[i]; > + for (;i < 4;i++) { > + res[i] = c + a; > + if (res[i] < a) { > + c = 1; > + a = res[i+1]; > + } > + else > + break; > + } > +} Is it really something we want in host-utils? It is something very specific and in any case it violates coding style. > diff --git a/host-utils.h b/host-utils.h > index 0ddc176..172c4fd 100644 > --- a/host-utils.h > +++ b/host-utils.h > @@ -46,6 +46,7 @@ static inline void muls64(uint64_t *plow, uint64_t *phigh, > void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b); > void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b); > #endif > +void addc(uint64_t res [], uint64_t a, int i); > > /* Binary search for leading zeros. */ > > diff --git a/linux-user/elfload.c b/linux-user/elfload.c > index 2832a33..9399e44 100644 > --- a/linux-user/elfload.c > +++ b/linux-user/elfload.c > @@ -1662,6 +1662,11 @@ int load_elf_binary(struct linux_binprm * bprm, > struct target_pt_regs * regs, > when we load the interpreter. */ > elf_ex = *(struct elfhdr *)bprm->buf; > > +#if defined(TARGET_MIPS64) > + if ((elf_ex.e_flags & EF_MIPS_MARCH) == E_MIPS_MACH_OCTEON) { > + info->elf_arch = 1; > + } > +#endif > bprm->p = copy_elf_strings(1, &bprm->filename, bprm->page, bprm->p); > bprm->p = copy_elf_strings(bprm->envc,bprm->envp,bprm->page,bprm->p); > bprm->p = copy_elf_strings(bprm->argc,bprm->argv,bprm->page,bprm->p); > diff --git a/linux-user/main.c b/linux-user/main.c > index a7f4955..acf9cac 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -3348,6 +3348,11 @@ int main(int argc, char **argv, char **envp) > if (regs->cp0_epc & 1) { > env->hflags |= MIPS_HFLAG_M16; > } > +#if defined(TARGET_MIPS64) > + if (info->elf_arch) { > + env->TARGET_OCTEON = 1; > + } > +#endif > } > #elif defined(TARGET_SH4) > { > diff --git a/linux-user/qemu.h b/linux-user/qemu.h > index 250814d..adef428 100644 > --- a/linux-user/qemu.h > +++ b/linux-user/qemu.h > @@ -51,6 +51,7 @@ struct image_info { > abi_ulong arg_start; > abi_ulong arg_end; > int personality; > + int elf_arch; > #ifdef CONFIG_USE_FDPIC > abi_ulong loadmap_addr; > uint16_t nsegs; > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index bb0999d..d5da0ee 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -7320,6 +7320,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long > arg1, > case TARGET_NR_set_thread_area: > #if defined(TARGET_MIPS) > ((CPUMIPSState *) cpu_env)->tls_value = arg1; > + if (((CPUMIPSState *) cpu_env)->TARGET_OCTEON) { > + /* tls entry is moved to k0 so that this can be used later > + * currently this thing is tested only for Octeon */ > + ((CPUMIPSState *) cpu_env)->active_tc.gpr[26] = arg1; > + } > ret = 0; > break; > #elif defined(TARGET_CRIS) All the above code is actually specific to linux-user. It would be better to put them in a separate patch. > diff --git a/target-mips/cpu.h b/target-mips/cpu.h > index 0b98d10..8ef8afb 100644 > --- a/target-mips/cpu.h > +++ b/target-mips/cpu.h > @@ -5,6 +5,11 @@ > > #define ELF_MACHINE EM_MIPS > > +/* These flags are used to check whether given > + * user mode binary is octeon specific or not */ > +#define EF_MIPS_MARCH 0xFF0000 > +#define E_MIPS_MACH_OCTEON 0x8B0000 > + > #define CPUState struct CPUMIPSState > > #include "config.h" > @@ -171,6 +176,15 @@ struct TCState { > target_ulong CP0_TCSchedule; > target_ulong CP0_TCScheFBack; > int32_t CP0_Debug_tcstatus; > + /* Multiplier registers for Octeon */ > + target_ulong MPL0; > + target_ulong MPL1; > + target_ulong MPL2; > + target_ulong P0; > + target_ulong P1; > + target_ulong P2; > + /* Octeon specific Coprocessor 0 register */ > + target_ulong cvmctl; > }; > > typedef struct CPUMIPSState CPUMIPSState; > @@ -178,6 +192,7 @@ struct CPUMIPSState { > TCState active_tc; > CPUMIPSFPUContext active_fpu; > > + int TARGET_OCTEON; We don't want a specific boolean value for octeons. What we want is a new insn_flags for all octeon specific code. Have a look at mips-defs.h to see how it's done. > uint32_t current_tc; > uint32_t current_fpu; > > diff --git a/target-mips/helper.h b/target-mips/helper.h > index 297ab64..409c08d 100644 > --- a/target-mips/helper.h > +++ b/target-mips/helper.h > @@ -8,7 +8,12 @@ DEF_HELPER_3(ldl, tl, tl, tl, int) > DEF_HELPER_3(ldr, tl, tl, tl, int) > DEF_HELPER_3(sdl, void, tl, tl, int) > DEF_HELPER_3(sdr, void, tl, tl, int) > +DEF_HELPER_3(v3mulu, void, tl, tl, int) > +DEF_HELPER_3(vmulu, void, tl, tl, int) > +DEF_HELPER_2(dpop, void, tl, i32) > #endif > +DEF_HELPER_2(pop, void, tl, i32) > + > DEF_HELPER_3(lwl, tl, tl, tl, int) > DEF_HELPER_3(lwr, tl, tl, tl, int) > DEF_HELPER_3(swl, void, tl, tl, int) > diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c > index bd16ce3..de793ba 100644 > --- a/target-mips/op_helper.c > +++ b/target-mips/op_helper.c > @@ -266,7 +266,77 @@ void helper_dmultu (target_ulong arg1, target_ulong > arg2) > { > mulu64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2); > } > +void helper_v3mulu (target_ulong arg1, target_ulong arg2, int rd) > +{ > + uint64_t hi, lo, res[4]; > + int i; > + for (i = 0;i < 4; i++) { > + res[i] = 0; > + } > + mulu64(&res[0], &res[1], env->active_tc.MPL0, arg1); > + mulu64(&lo, &hi, env->active_tc.MPL1, arg1); > + res[1] = res[1] + lo; > + if (res[1] < lo) > + res[2]++; > + res[2] = res[2] + hi; > + if (res[2] < hi) > + res[3]++; > + mulu64(&lo, &hi, env->active_tc.MPL2, arg1); > + res[2] = res[2] + lo; > + if (res[2] < lo) > + res[3]++; > + res[3] = res[3] + hi; > + addc(res, arg2, 0); > + addc(res, env->active_tc.P0, 0); > + addc(res, env->active_tc.P1, 1); > + addc(res, env->active_tc.P2, 2); > + env->active_tc.gpr[rd] = res[0]; > + env->active_tc.P0 = res[1]; > + env->active_tc.P1 = res[2]; > + env->active_tc.P2 = res[3]; > +} Please watch the CODING_STYLE (braces, indentation) > +void helper_vmulu (target_ulong arg1, target_ulong arg2, int rd) > +{ > + uint64_t hi, lo; > + mulu64(&lo, &hi, env->active_tc.MPL0, arg1); > + lo = lo + arg2; > + if (lo < arg2) > + hi++; > + lo = lo + env->active_tc.P0; > + if (lo < env->active_tc.P0) > + hi++; > + env->active_tc.gpr[rd] = lo; > + env->active_tc.P0 = hi; > +} Please watch the CODING_STYLE (braces, indentation) > +void helper_dpop (target_ulong arg, uint32_t rd) > +{ > + uint32_t rem, ones = 0; > + while (arg >= 1) { > + arg = arg/2; > + rem = arg % 2; > + if(rem == 1) > + ones++; > + if(arg == 1) > + ones++; > + } Please use ctpop64 from host-utils.h instead > + env->active_tc.gpr[rd] = ones; It's better to return a value instead of directly writing the registers, as further optimizations can be done on the helper. > +} > #endif > +void helper_pop (target_ulong arg, uint32_t rd) > +{ > + uint32_t rem, i, ones = 0; > + i = 0; > + while(i <= 31) { > + arg = arg/2; > + rem = arg % 2; > + if(rem == 1) > + ones++; > + if(arg == 1) > + ones++; > + i++; > + } > + env->active_tc.gpr[rd] = ones; Same with ctpop32. > +} > > #ifndef CONFIG_USER_ONLY > > diff --git a/target-mips/translate.c b/target-mips/translate.c > index 63c2563..1394e18 100644 > --- a/target-mips/translate.c > +++ b/target-mips/translate.c > @@ -36,6 +36,14 @@ > #define GEN_HELPER 1 > #include "helper.h" > > +#if defined(TARGET_MIPS64) > +/* Macros for setting values of cvmctl registers */ > +#define FUSE_START_BIT(cvmctl) (cvmctl | 0x80000000) > +#define KASUMI(cvmctl) (cvmctl | 0x20000000) > +#define IPPCI(cvmctl) (cvmctl | 0x380) > +#define IPTI(cvmctl) (cvmctl | 0x70) > +#endif > + > //#define MIPS_DEBUG_DISAS > //#define MIPS_DEBUG_SIGN_EXTENSIONS > > @@ -70,6 +78,11 @@ enum { > OPC_JAL = (0x03 << 26), > OPC_JALS = OPC_JAL | 0x5, > OPC_BEQ = (0x04 << 26), /* Unconditional if rs = rt = 0 (B) */ > + /* Cavium Specific */ > + OPC_BBIT1 = (0x3a << 26), /* jump on bit set, cavium specific */ > + OPC_BBIT132 = (0x3e << 26), /* jump on bit set(for upper 32 bits) */ > + OPC_BBIT0 = (0x32 << 26), /* jump on bit clear, cavium specific */ > + OPC_BBIT032 = (0x36 << 26), /* jump on bit clear(for upper 32 bits) > */ > OPC_BEQL = (0x14 << 26), > OPC_BNE = (0x05 << 26), > OPC_BNEL = (0x15 << 26), > @@ -265,6 +278,31 @@ enum { > OPC_MADD = 0x00 | OPC_SPECIAL2, > OPC_MADDU = 0x01 | OPC_SPECIAL2, > OPC_MUL = 0x02 | OPC_SPECIAL2, > + /* Cavium Specific Instructions */ > + OPC_BADDU = 0x28 | OPC_SPECIAL2, > + OPC_DMUL = 0x03 | OPC_SPECIAL2, > + OPC_EXTS = 0x3a | OPC_SPECIAL2, > + OPC_EXTS32 = 0x3b | OPC_SPECIAL2, > + OPC_CINS = 0x32 | OPC_SPECIAL2, > + OPC_CINS32 = 0x33 | OPC_SPECIAL2, > + OPC_SEQI = 0x2e | OPC_SPECIAL2, > + OPC_SNEI = 0x2f | OPC_SPECIAL2, > + OPC_MTM0 = 0x08 | OPC_SPECIAL2, > + OPC_MTM1 = 0x0c | OPC_SPECIAL2, > + OPC_MTM2 = 0x0d | OPC_SPECIAL2, > + OPC_MTP0 = 0x09 | OPC_SPECIAL2, > + OPC_MTP1 = 0x0a | OPC_SPECIAL2, > + OPC_MTP2 = 0x0b | OPC_SPECIAL2, > + OPC_V3MULU = 0x11 | OPC_SPECIAL2, > + OPC_VMM0 = 0x10 | OPC_SPECIAL2, > + OPC_VMULU = 0x0f | OPC_SPECIAL2, > + OPC_POP = 0X2C | OPC_SPECIAL2, > + OPC_DPOP = 0X2D | OPC_SPECIAL2, > + OPC_SEQ = 0x2a | OPC_SPECIAL2, > + OPC_SNE = 0x2b | OPC_SPECIAL2, > + OPC_SAA = 0x18 | OPC_SPECIAL2, > + OPC_SAAD = 0x19 | OPC_SPECIAL2, > +/**************************************/ > OPC_MSUB = 0x04 | OPC_SPECIAL2, > OPC_MSUBU = 0x05 | OPC_SPECIAL2, > /* Loongson 2F */ > @@ -483,7 +521,7 @@ enum { > static TCGv_ptr cpu_env; > static TCGv cpu_gpr[32], cpu_PC; > static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], > cpu_ACX[MIPS_DSP_ACC]; > -static TCGv cpu_dspctrl, btarget, bcond; > +static TCGv cpu_dspctrl, btarget, bcond, mpl0, mpl1, mpl2, p0, p1, p2; > static TCGv_i32 hflags; > static TCGv_i32 fpu_fcr0, fpu_fcr31; > > @@ -847,6 +885,13 @@ static inline void check_mips_64(DisasContext *ctx) > if (unlikely(!(ctx->hflags & MIPS_HFLAG_64))) > generate_exception(ctx, EXCP_RI); > } > +/* This code generates a "reserved instruction" exception if Octeon > + instructions are not enabled. */ > +static inline void check_octeon(DisasContext *ctx, CPUState *env) > +{ > + if (!env->TARGET_OCTEON) > + generate_exception(ctx, EXCP_RI); > +} You might want to replace calls to check_octeon(ctx, env) by check_insn( env, ctx, INSN_OCTEON); > /* Define small wrappers for gen_load_fpr* so that we have a uniform > calling interface for 32 and 64-bit FPRs. No sense in changing > @@ -1419,7 +1464,33 @@ static void gen_arith_imm (CPUState *env, > DisasContext *ctx, uint32_t opc, > (void)opn; /* avoid a compiler warning */ > MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt], > regnames[rs], uimm); > } > - > +#if defined(TARGET_MIPS64) > +/* set on equal immidiate/seton not equal immidiate */ > +static void gen_set_imm(CPUState *env, uint32_t opc, int rt, int rs, > int16_t imm) > +{ > + target_ulong uimm; > + TCGv t0, t1; > + const char *opn = "imm set"; > + uimm = (uint16_t)imm; > + t0 = tcg_temp_new(); > + t1 = tcg_temp_new(); > + switch (opc) { > + case OPC_SEQI: > + tcg_gen_xori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm); > + gen_load_gpr(t0, rt); Doing so just load cpu_gpr[rt] into t0. Is it really what you want to do here? > + tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr[rt], t0, 1); If you want to access cpu_gpr[rt] and cpu_gpr[rs] directly, you should handle the fact that rt or rs can be 0. I don't know exactly what the instruction is supposed to do, but this looks plainly wrong. > + opn = "seqi"; > + break; > + case OPC_SNEI: > + tcg_gen_xori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm); > + gen_load_gpr(t0, rt); > + gen_load_gpr(t1, 0); > + tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr[rt], t1, t0); > + opn = "snei"; > + break; Ditto. > + } > +} > +#endif > /* Logic with immediate operand */ > static void gen_logic_imm (CPUState *env, uint32_t opc, int rt, int rs, > int16_t imm) > { > @@ -1583,7 +1654,86 @@ static void gen_shift_imm(CPUState *env, DisasContext > *ctx, uint32_t opc, > MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt], > regnames[rs], uimm); > tcg_temp_free(t0); > } > +#if defined(TARGET_MIPS64) > +static void gen_LMI (CPUMIPSState *env, DisasContext *ctx, uint32_t opc, > + int rs, int rt, int rd) > +{ > + const char *opn = "LMI"; > + TCGv t0, t1; > + t0 = tcg_temp_new(); > + t1 = tcg_temp_new(); > + int nomul = env->active_tc.cvmctl & 0x8000000; > + if (!nomul) { > + switch (opc) { > + case OPC_MTM0: > + tcg_gen_mov_tl(mpl0, cpu_gpr[rs]); You should probably handle the case where rs = 0. Or use gen_load_gpr() > + tcg_gen_movi_tl(p0, 0); > + tcg_gen_movi_tl(p1, 0); > + tcg_gen_movi_tl(p2, 0); > + opn = "mtm0"; > + break; > + case OPC_MTM1: > + tcg_gen_mov_tl(mpl1, cpu_gpr[rs]); Ditto here and for the cases below. > + tcg_gen_movi_tl(p0, 0); > + tcg_gen_movi_tl(p1, 0); > + tcg_gen_movi_tl(p2, 0); > + opn = "mtm1"; > + break; > + case OPC_MTM2: > + tcg_gen_mov_tl(mpl2, cpu_gpr[rs]); > + tcg_gen_movi_tl(p0, 0); > + tcg_gen_movi_tl(p1, 0); > + tcg_gen_movi_tl(p2, 0); > + opn = "mtm2"; > + break; > + case OPC_MTP0: > + tcg_gen_mov_tl(p0, cpu_gpr[rs]); > + opn = "mtp0"; > + break; > + case OPC_MTP1: > + tcg_gen_mov_tl(p1, cpu_gpr[rs]); > + opn = "mtp1"; > + break; > + case OPC_MTP2: > + tcg_gen_mov_tl(p2, cpu_gpr[rs]); > + opn = "mtp2"; > + break; > + case OPC_VMM0: > + gen_load_gpr(t1, rs); > + gen_helper_dmultu(t1, mpl0); > + gen_load_gpr(t0, rt); > + tcg_gen_add_tl(t0, t0, cpu_LO[0]); > + tcg_gen_add_tl(t0, t0, p0); > + gen_store_gpr(t0, rd); > + tcg_gen_mov_tl(mpl0, cpu_gpr[rd]); > + tcg_gen_movi_tl(p0, 0); > + tcg_gen_movi_tl(p1, 0); > + tcg_gen_movi_tl(p2, 0); > + opn = "vmm0"; > + break; > + case OPC_VMULU: > + gen_load_gpr(t0, rs); > + gen_load_gpr(t1, rt); > + gen_helper_2i(vmulu, t0, t1, rd); > + opn = "vmulu"; > + break; > + case OPC_V3MULU: > + gen_load_gpr(t0, rs); > + gen_load_gpr(t1, rt); > + gen_helper_2i(v3mulu, t0, t1, rd); > + opn = "v3mulu"; > + break; > + } > + > + } else { > + generate_exception(ctx, EXCP_RI); > + } > + tcg_temp_free(t0); > + tcg_temp_free(t1); > +} > + > > +#endif > /* Arithmetic */ > static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc, > int rd, int rs, int rt) > @@ -1637,6 +1787,25 @@ static void gen_arith (CPUState *env, DisasContext > *ctx, uint32_t opc, > } > opn = "addu"; > break; > + case OPC_BADDU: > + { > + TCGv t0 = tcg_temp_new(); > + TCGv t1 = tcg_temp_new(); > + TCGv t2 = tcg_temp_new(); > + gen_load_gpr(t1, rs); > + gen_load_gpr(t2, rt); > + tcg_gen_andi_tl(t1, t1, 0xff); > + tcg_gen_andi_tl(t2, t2, 0xff); > + tcg_gen_add_tl(t0, t1, t2); > + tcg_gen_andi_tl(t0, t0, 0xff); This looks like overly complicated, you can just do the addition and then do ext8u. You can also use only two registers. > + gen_store_gpr(t0, rd); > + tcg_temp_free(t0); > + tcg_temp_free(t1); > + tcg_temp_free(t2); > + } > + > + opn = "baddu"; > + break; > case OPC_SUB: > { > TCGv t0 = tcg_temp_local_new(); > @@ -2013,7 +2182,70 @@ static void gen_HILO (DisasContext *ctx, uint32_t > opc, int reg) > (void)opn; /* avoid a compiler warning */ > MIPS_DEBUG("%s %s", opn, regnames[reg]); > } > +#if defined(TARGET_MIPS64) > +static void gen_seqsne (DisasContext *ctx, uint32_t opc, > + int rd, int rs, int rt) > +{ > + const char *opn = "seq/sne"; > + TCGv t0, t1; > + t0 = tcg_temp_new(); > + t1 = tcg_temp_new(); > + switch (opc) { > + case OPC_SEQ: > + tcg_gen_xor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]); > + gen_load_gpr(t0, rd); Doing so just load cpu_gpr[rd] into t0. Is it really what you want to do here? > + tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr[rd], t0, 1); > + opn = "seq"; > + break; > + case OPC_SNE: > + tcg_gen_xor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]); > + gen_load_gpr(t0, rd); Ditto. > + gen_load_gpr(t1, 0); > + tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr[rd], t1, t0); > + opn = "sne"; > + break; > + default: > + MIPS_INVAL(opn); > + generate_exception(ctx, EXCP_RI); > + goto out; > + } > +out: > + tcg_temp_free(t0); > + tcg_temp_free(t1); > + > +} > + > +static void gen_saa (CPUState *env, DisasContext *ctx, uint32_t opc, > + int rt, int base) > +{ > + const char *opn = "saa"; > + TCGv t0, t1, temp; > + t0 = tcg_temp_new(); > + t1 = tcg_temp_new(); > + temp = tcg_temp_new(); > + gen_load_gpr(t1, rt); > + gen_base_offset_addr(ctx, t0, base, 0); > + switch (opc) { > + case OPC_SAA: > + save_cpu_state(ctx, 1); > + op_ld_lw(temp, t0, ctx); > + tcg_gen_add_tl(temp, temp, t1); > + op_st_sw(temp, t0, ctx); > + opn = "saa"; > + break; > + case OPC_SAAD: > + save_cpu_state(ctx, 0); > + op_ld_ld(temp, t0, ctx); > + tcg_gen_add_tl(temp, temp, t1); > + op_st_sd(temp, t0, ctx); > + opn = "saad"; > + break; > + } You should add a comment explaining that the operation should be atomic. That will be something to do when emulating SMP systems. > + tcg_temp_free(t0); > + tcg_temp_free(t1); > +} > +#endif > static void gen_muldiv (DisasContext *ctx, uint32_t opc, > int rs, int rt) > { > @@ -2149,6 +2381,10 @@ static void gen_muldiv (DisasContext *ctx, uint32_t > opc, > gen_helper_dmult(t0, t1); > opn = "dmult"; > break; > + case OPC_DMUL: > + gen_helper_dmult(t0, t1); > + opn = "dmul"; > + break; While this is correct, you probably don't want to use an helper, but a simple tcg multiplication. > case OPC_DMULTU: > gen_helper_dmultu(t0, t1); > opn = "dmultu"; > @@ -2368,7 +2604,26 @@ static void gen_cl (DisasContext *ctx, uint32_t opc, > MIPS_DEBUG("%s %s, %s", opn, regnames[rd], regnames[rs]); > tcg_temp_free(t0); > } > - > +#if defined(TARGET_MIPS64) > +static void gen_pop_count (DisasContext *ctx, uint32_t opc, int rd, int rs) > +{ > + const char *opn = "pop"; > + TCGv t0; > + t0 = tcg_temp_new(); > + gen_load_gpr(t0, rs); > + switch (opc) { > + case OPC_DPOP: > + gen_helper_1i(dpop, t0, rd); > + opn = "dpop"; > + break; > + case OPC_POP: > + gen_helper_1i(pop, t0, rd); > + opn = "pop"; > + break; > + } > + tcg_temp_free(t0); > +} > +#endif > /* Godson integer instructions */ > static void gen_loongson_integer (DisasContext *ctx, uint32_t opc, > int rd, int rs, int rt) > @@ -2705,6 +2960,7 @@ static void gen_compute_branch (DisasContext *ctx, > uint32_t opc, > target_ulong btgt = -1; > int blink = 0; > int bcond_compute = 0; > + target_ulong maskb; /* Used in BBIT0 and BBIT1 */ > TCGv t0 = tcg_temp_new(); > TCGv t1 = tcg_temp_new(); > > @@ -2730,6 +2986,39 @@ static void gen_compute_branch (DisasContext *ctx, > uint32_t opc, > } > btgt = ctx->pc + insn_bytes + offset; > break; > + case OPC_BBIT1: > + gen_load_gpr(t0, rs); > + gen_load_gpr(t1, 0); There is no need to load t1 here, as it is not used. > + maskb = 1ULL << rt; > + tcg_gen_andi_tl(t0, t0, maskb); > + bcond_compute = 1; > + btgt = ctx->pc + insn_bytes + offset; > + break; > + case OPC_BBIT132: > + gen_load_gpr(t0, rs); > + gen_load_gpr(t1, 0); Ditto. > + maskb = 1ULL << (rt + 32); > + tcg_gen_andi_tl(t0, t0, maskb); > + bcond_compute = 1; > + btgt = ctx->pc + insn_bytes + offset; > + break; > + case OPC_BBIT0: > + gen_load_gpr(t0, rs); > + gen_load_gpr(t1, 0); Ditto. > + maskb = 1ULL << rt; > + tcg_gen_andi_tl(t0, t0, maskb); > + bcond_compute = 1; > + btgt = ctx->pc + insn_bytes + offset; > + break; > + case OPC_BBIT032: > + gen_load_gpr(t0, rs); > + gen_load_gpr(t1, 0); Ditto. > + maskb = 1ULL << (rt + 32); > + tcg_gen_andi_tl(t0, t0, maskb); > + bcond_compute = 1; > + btgt = ctx->pc + insn_bytes + offset; > + break; > + These 4 instructions have a lot in common, it's probably possible to )implement them with the same code, instead of having four times very similar code. > case OPC_BGEZ: > case OPC_BGEZAL: > case OPC_BGEZALS: > @@ -2888,6 +3177,18 @@ static void gen_compute_branch (DisasContext *ctx, > uint32_t opc, > MIPS_DEBUG("bne %s, %s, " TARGET_FMT_lx, > regnames[rs], regnames[rt], btgt); > goto not_likely; > + case OPC_BBIT1: > + tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1); > + goto not_likely; > + case OPC_BBIT132: > + tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1); > + goto not_likely; > + case OPC_BBIT0: > + tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1); > + goto not_likely; > + case OPC_BBIT032: > + tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1); > + goto not_likely; > case OPC_BNEL: > tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1); > MIPS_DEBUG("bnel %s, %s, " TARGET_FMT_lx, > @@ -2983,7 +3284,42 @@ static void gen_compute_branch (DisasContext *ctx, > uint32_t opc, > tcg_temp_free(t0); > tcg_temp_free(t1); > } > +/* For cavium specific extract instructions */ > +#if defined(TARGET_MIPS64) > +static void gen_exts (CPUState *env,DisasContext *ctx, uint32_t opc, int > rt, > + int rs, int lsb, int msb) > +{ > + TCGv t0 = tcg_temp_new(); > + TCGv t1 = tcg_temp_new(); > + target_ulong mask; > + gen_load_gpr(t1, rs); > + switch (opc) { > + case OPC_EXTS: > + tcg_gen_shri_tl(t0, t1, lsb); > + tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1); > + /* To sign extened the remaining bits according to > + the msb of the bit field */ > + mask = 1ULL << msb; > + tcg_gen_andi_tl(t1, t0, mask); > + tcg_gen_addi_tl(t1, t1, -1); > + tcg_gen_orc_tl(t0, t0, t1); To sign extend a value, you can use tcg_gen_ext32s_tl() > + gen_store_gpr(t0, rt); > + break; > + case OPC_EXTS32: > + tcg_gen_shri_tl(t0, t1, lsb + 32); > + tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1); > + mask = 1ULL << msb; > + tcg_gen_andi_tl(t1, t0, mask); > + tcg_gen_addi_tl(t1, t1, -1); > + tcg_gen_orc_tl(t0, t0, t1); Ditto > + gen_store_gpr(t0, rt); > + break; > > + } > + tcg_temp_free(t0); > + tcg_temp_free(t1); > +} > +#endif > /* special3 bitfield operations */ > static void gen_bitops (DisasContext *ctx, uint32_t opc, int rt, > int rs, int lsb, int msb) > @@ -3063,6 +3399,22 @@ static void gen_bitops (DisasContext *ctx, uint32_t > opc, int rt, > tcg_gen_andi_tl(t1, t1, mask); > tcg_gen_or_tl(t0, t0, t1); > break; > + case OPC_CINS: > + mask = (1ULL << (msb+1))-1; > + gen_load_gpr(t0, rt); > + tcg_gen_andi_tl(t0, t0, 0); > + tcg_gen_andi_tl(t1, t1, mask); > + tcg_gen_shli_tl(t1, t1, lsb); > + tcg_gen_or_tl(t0, t0, t1); > + break; > + case OPC_CINS32: > + mask = (1ULL << (msb+1))-1; > + gen_load_gpr(t0, rt); > + tcg_gen_andi_tl(t0, t0, 0); > + tcg_gen_andi_tl(t1, t1, mask); > + tcg_gen_shli_tl(t1, t1, (lsb+32)); > + tcg_gen_or_tl(t0, t0, t1); > + break; > #endif > default: > fail: > @@ -11609,8 +11961,8 @@ static void decode_opc (CPUState *env, DisasContext > *ctx, int *is_branch) > int32_t offset; > int rs, rt, rd, sa; > uint32_t op, op1, op2; > - int16_t imm; > - > + int16_t imm, imm10; > + int TARGET_OCTEON = env->TARGET_OCTEON; > /* make sure instructions are on a word boundary */ > if (ctx->pc & 0x3) { > env->CP0_BadVAddr = ctx->pc; > @@ -11638,6 +11990,9 @@ static void decode_opc (CPUState *env, DisasContext > *ctx, int *is_branch) > rd = (ctx->opcode >> 11) & 0x1f; > sa = (ctx->opcode >> 6) & 0x1f; > imm = (int16_t)ctx->opcode; > + /* 10 bit Immediate value For SEQI,SNEI */ > + imm10 = (ctx->opcode >> 6) & 0x3ff; > + > switch (op) { > case OPC_SPECIAL: > op1 = MASK_SPECIAL(ctx->opcode); > @@ -11863,6 +12218,84 @@ static void decode_opc (CPUState *env, DisasContext > *ctx, int *is_branch) > case OPC_MUL: > gen_arith(env, ctx, op1, rd, rs, rt); > break; > +#if defined(TARGET_MIPS64) > + > + case OPC_DMUL: > + check_insn(env, ctx, ISA_MIPS3); > + check_mips_64(ctx); > + check_octeon(ctx, env); You don't need to check for both MIPS3 and Octeon. > + gen_muldiv(ctx, op1, rs, rt); > + tcg_gen_mov_tl(cpu_gpr[rd], cpu_LO[0]); > + break; > + case OPC_CINS: > + check_insn(env, ctx, ISA_MIPS64R2); > + check_mips_64(ctx); > + check_octeon(ctx, env); Same here with ISA_MIPS64R2. > + gen_bitops(ctx, op1, rt, rs, sa, rd); > + break; > + case OPC_CINS32: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_bitops(ctx, op1, rt, rs, sa, rd); > + break; > + case OPC_MTM0: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_LMI(env, ctx, op1, rs, rt, rd); > + break; > + case OPC_MTM1: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_LMI(env, ctx, op1, rs, rt, rd); > + break; > + case OPC_MTM2: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_LMI(env, ctx, op1, rs, rt, rd); > + break; > + case OPC_MTP0: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_LMI(env, ctx, op1, rs, rt, rd); > + break; > + case OPC_MTP1: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_LMI(env, ctx, op1, rs, rt, rd); > + break; > + case OPC_MTP2: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_LMI(env, ctx, op1, rs, rt, rd); > + break; > + case OPC_VMULU: > + check_mips_64(ctx); > + gen_LMI(env, ctx, op1, rs, rt, rd); > + break; All the LMI calls can be grouped together. > + case OPC_BADDU: > + check_octeon(ctx, env); > + gen_arith(env, ctx, op1, rd, rs, rt); > + break; > + case OPC_EXTS: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_exts(env, ctx, op1, rt, rs, sa, rd); > + break; > + case OPC_EXTS32: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_exts(env, ctx, op1, rt, rs, sa, rd); > + break; > + case OPC_SAA: > + check_octeon(ctx, env); > + gen_saa(env, ctx, op1, rt, rs); > + break; > + case OPC_SAAD: > + check_octeon(ctx, env); > + check_mips_64(ctx); > + gen_saa(env, ctx, op1, rt, rs); > + break; > +#endif > case OPC_CLO: > case OPC_CLZ: > check_insn(env, ctx, ISA_MIPS32); > @@ -11882,13 +12315,25 @@ static void decode_opc (CPUState *env, > DisasContext *ctx, int *is_branch) > break; > case OPC_DIV_G_2F: > case OPC_DIVU_G_2F: > - case OPC_MULT_G_2F: > case OPC_MULTU_G_2F: > case OPC_MOD_G_2F: > case OPC_MODU_G_2F: > check_insn(env, ctx, INSN_LOONGSON2F); > gen_loongson_integer(ctx, op1, rd, rs, rt); > break; > + case OPC_MULT_G_2F: > + if (!TARGET_OCTEON) { > + check_insn(env, ctx, INSN_LOONGSON2F); > + gen_loongson_integer(ctx, op1, rd, rs, rt); > + } else { > +#if defined(TARGET_MIPS64) > + /* Cavium Specific vmm0 */ > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_LMI(env, ctx, op1, rs, rt, rd); > +#endif > + } > + break; > #if defined(TARGET_MIPS64) > case OPC_DCLO: > case OPC_DCLZ: > @@ -11896,7 +12341,6 @@ static void decode_opc (CPUState *env, DisasContext > *ctx, int *is_branch) > check_mips_64(ctx); > gen_cl(ctx, op1, rd, rs); > break; > - case OPC_DMULT_G_2F: > case OPC_DMULTU_G_2F: > case OPC_DDIV_G_2F: > case OPC_DDIVU_G_2F: > @@ -11905,6 +12349,46 @@ static void decode_opc (CPUState *env, DisasContext > *ctx, int *is_branch) > check_insn(env, ctx, INSN_LOONGSON2F); > gen_loongson_integer(ctx, op1, rd, rs, rt); > break; > + case OPC_DMULT_G_2F: > + if (!TARGET_OCTEON) { > + check_insn(env, ctx, INSN_LOONGSON2F); > + gen_loongson_integer(ctx, op1, rd, rs, rt); > + } else { > + /* Cavium Specific instruction v3mulu */ > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_LMI(env, ctx, op1, rs, rt, rd); > + } > + break; > + case OPC_SEQ: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_seqsne(ctx, op1, rd, rs, rt); > + break; > + case OPC_SNE: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_seqsne(ctx, op1, rd, rs, rt); > + break; > + case OPC_SEQI: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_set_imm(env, op1, rt, rs, imm10); > + break; > + case OPC_SNEI: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_set_imm(env, op1, rt, rs, imm10); > + break; > + case OPC_POP: > + check_octeon(ctx, env); > + gen_pop_count(ctx, op1, rd, rs); > + break; > + case OPC_DPOP: > + check_mips_64(ctx); > + check_octeon(ctx, env); > + gen_pop_count(ctx, op1, rd, rs); > + break; > #endif > default: /* Invalid */ > MIPS_INVAL("special2"); > @@ -12196,10 +12680,32 @@ static void decode_opc (CPUState *env, > DisasContext *ctx, int *is_branch) > break; > > /* COP2. */ > - case OPC_LWC2: > - case OPC_LDC2: > - case OPC_SWC2: > - case OPC_SDC2: > + /* Conflicting opcodes with Cavium specific branch instructions > + if TARGET_OCTEON is set these opcodes will belong to Cavium */ > + case OPC_LWC2: /* BBIT0 */ > + if (TARGET_OCTEON) { > + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2); > + *is_branch = 1; > + break; > + } > + case OPC_LDC2: /* BBIT032 */ > + if (TARGET_OCTEON) { > + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2); > + *is_branch = 1; > + break; > + } > + case OPC_SWC2: /* BBIT1 */ > + if (TARGET_OCTEON) { > + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2); > + *is_branch = 1; > + break; > + } > + case OPC_SDC2: /* BBIT132 */ > + if (TARGET_OCTEON) { > + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2); > + *is_branch = 1; > + break; > + } All of these can be grouped together. > case OPC_CP2: > /* COP2: Not implemented. */ > generate_exception_err(ctx, EXCP_CpU, 2); > @@ -12588,6 +13094,18 @@ static void mips_tcg_init(void) > cpu_dspctrl = tcg_global_mem_new(TCG_AREG0, > offsetof(CPUState, > active_tc.DSPControl), > "DSPControl"); > + mpl0 = tcg_global_mem_new(TCG_AREG0, > + offsetof(CPUState, active_tc.MPL0), "MPL0"); > + mpl1 = tcg_global_mem_new(TCG_AREG0, > + offsetof(CPUState, active_tc.MPL1), "MPL1"); > + mpl2 = tcg_global_mem_new(TCG_AREG0, > + offsetof(CPUState, active_tc.MPL2), "MPL2"); > + p0 = tcg_global_mem_new(TCG_AREG0, > + offsetof(CPUState, active_tc.P0), "P0"); > + p1 = tcg_global_mem_new(TCG_AREG0, > + offsetof(CPUState, active_tc.P1), "P1"); > + p2 = tcg_global_mem_new(TCG_AREG0, > + offsetof(CPUState, active_tc.P2), "P2"); > bcond = tcg_global_mem_new(TCG_AREG0, > offsetof(CPUState, bcond), "bcond"); > btarget = tcg_global_mem_new(TCG_AREG0, > @@ -12611,6 +13129,18 @@ static void mips_tcg_init(void) > > #include "translate_init.c" > > +#if defined(TARGET_MIPS64) > + > +static void set_cvmctl_register(CPUMIPSState *env) > +{ > + env->active_tc.cvmctl = env->active_tc.cvmctl > + ^ env->active_tc.cvmctl; > + env->active_tc.cvmctl = FUSE_START_BIT(env->active_tc.cvmctl); > + env->active_tc.cvmctl = KASUMI(env->active_tc.cvmctl); > + env->active_tc.cvmctl = IPPCI(env->active_tc.cvmctl); > + env->active_tc.cvmctl = IPTI(env->active_tc.cvmctl); > +} > +#endif > CPUMIPSState *cpu_mips_init (const char *cpu_model) > { > CPUMIPSState *env; > @@ -12623,6 +13153,10 @@ CPUMIPSState *cpu_mips_init (const char *cpu_model) > env->cpu_model = def; > env->cpu_model_str = cpu_model; > > +#if defined(TARGET_MIPS64) > + /* Function for setting cvmctl register */ > + set_cvmctl_register(env); > +#endif > cpu_exec_init(env); > #ifndef CONFIG_USER_ONLY > mmu_init(env, def); > -- > 1.7.3.4
We ported MIPS64 r2 user mode emulation. When a binary is given to qemu-mips64, our code first check whether it is Octeon binary or not if yes it enable Octeon specific Instructions for. The following code snippet do this job: > > > diff --git a/linux-user/elfload.c b/linux-user/elfload.c > > index 2832a33..9399e44 100644 > > --- a/linux-user/elfload.c > > +++ b/linux-user/elfload.c > > @@ -1662,6 +1662,11 @@ int load_elf_binary(struct linux_binprm * bprm, > > struct target_pt_regs * regs, > > when we load the interpreter. */ > > elf_ex = *(struct elfhdr *)bprm->buf; > > > > +#if defined(TARGET_MIPS64) > > + if ((elf_ex.e_flags & EF_MIPS_MARCH) == E_MIPS_MACH_OCTEON) { > > + info->elf_arch = 1; > > + } > > +#endif > > +++ b/linux-user/main.c > > @@ -3348,6 +3348,11 @@ int main(int argc, char **argv, char **envp) > > if (regs->cp0_epc & 1) { > > env->hflags |= MIPS_HFLAG_M16; > > } > > +#if defined(TARGET_MIPS64) > > + if (info->elf_arch) { > > + env->insn_flags |= INSN_OCTEON; > > + } > > +#endif > > } > where we put elf_arch in image_info and INSN_OCTEON is in target_mips/mips-defs.h as follows #define INSN_LOONGSON2E 0x20000000 #define INSN_LOONGSON2F 0x40000000 #define INSN_VR54XX 0x80000000 +#define INSN_OCTEON 0x10000000 Is this solution acceptable for you?
On Wed, Apr 13, 2011 at 12:26:01PM +0500, Khansa Butt wrote: > We ported MIPS64 r2 user mode emulation. When a binary is given to > qemu-mips64, our code first check whether it is Octeon binary or not if yes > it enable Octeon specific Instructions for. The following code snippet do > this job: > > > > > > diff --git a/linux-user/elfload.c b/linux-user/elfload.c > > > index 2832a33..9399e44 100644 > > > --- a/linux-user/elfload.c > > > +++ b/linux-user/elfload.c > > > @@ -1662,6 +1662,11 @@ int load_elf_binary(struct linux_binprm * bprm, > > > struct target_pt_regs * regs, > > > when we load the interpreter. */ > > > elf_ex = *(struct elfhdr *)bprm->buf; > > > > > > +#if defined(TARGET_MIPS64) > > > + if ((elf_ex.e_flags & EF_MIPS_MARCH) == E_MIPS_MACH_OCTEON) { > > > + info->elf_arch = 1; > > > + } > > > +#endif > > > > > > +++ b/linux-user/main.c > > > @@ -3348,6 +3348,11 @@ int main(int argc, char **argv, char **envp) > > > if (regs->cp0_epc & 1) { > > > env->hflags |= MIPS_HFLAG_M16; > > > } > > > +#if defined(TARGET_MIPS64) > > > + if (info->elf_arch) { > > > + env->insn_flags |= INSN_OCTEON; > > > + } > > > +#endif > > > } > > > > where we put elf_arch in image_info I am not really sure about this name info->elf_arch, it is something specific to octeon, but the name doesn't represent that. Also we probably want a generic framework for changing a CPU to another one. Note that in any case you can run qemu-mips64 with -cpu octeon if you define an octeon CPU in translate_init.c. > and INSN_OCTEON is in target_mips/mips-defs.h as follows > #define INSN_LOONGSON2E 0x20000000 > #define INSN_LOONGSON2F 0x40000000 > #define INSN_VR54XX 0x80000000 > +#define INSN_OCTEON 0x10000000 > > Is this solution acceptable for you? This part is clearly the way to go
diff --git a/host-utils.c b/host-utils.c index dc96123..3073ef8 100644 --- a/host-utils.c +++ b/host-utils.c @@ -102,4 +102,18 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b) a, b, *phigh, *plow); #endif } + #endif /* !defined(__x86_64__) */ +void addc(uint64_t res [], uint64_t a, int i) +{ + uint64_t c = res[i]; + for (;i < 4;i++) { + res[i] = c + a; + if (res[i] < a) { + c = 1; + a = res[i+1]; + } + else + break; + } +} diff --git a/host-utils.h b/host-utils.h index 0ddc176..172c4fd 100644 --- a/host-utils.h +++ b/host-utils.h @@ -46,6 +46,7 @@ static inline void muls64(uint64_t *plow, uint64_t *phigh, void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b); void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b); #endif +void addc(uint64_t res [], uint64_t a, int i); /* Binary search for leading zeros. */ diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 2832a33..9399e44 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1662,6 +1662,11 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, when we load the interpreter. */ elf_ex = *(struct elfhdr *)bprm->buf; +#if defined(TARGET_MIPS64) + if ((elf_ex.e_flags & EF_MIPS_MARCH) == E_MIPS_MACH_OCTEON) { + info->elf_arch = 1; + } +#endif bprm->p = copy_elf_strings(1, &bprm->filename, bprm->page, bprm->p); bprm->p = copy_elf_strings(bprm->envc,bprm->envp,bprm->page,bprm->p); bprm->p = copy_elf_strings(bprm->argc,bprm->argv,bprm->page,bprm->p); diff --git a/linux-user/main.c b/linux-user/main.c index a7f4955..acf9cac 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -3348,6 +3348,11 @@ int main(int argc, char **argv, char **envp) if (regs->cp0_epc & 1) { env->hflags |= MIPS_HFLAG_M16; } +#if defined(TARGET_MIPS64) + if (info->elf_arch) { + env->TARGET_OCTEON = 1; + } +#endif } #elif defined(TARGET_SH4) { diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 250814d..adef428 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -51,6 +51,7 @@ struct image_info { abi_ulong arg_start; abi_ulong arg_end; int personality; + int elf_arch; #ifdef CONFIG_USE_FDPIC abi_ulong loadmap_addr; uint16_t nsegs; diff --git a/linux-user/syscall.c b/linux-user/syscall.c index bb0999d..d5da0ee 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7320,6 +7320,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_set_thread_area: #if defined(TARGET_MIPS) ((CPUMIPSState *) cpu_env)->tls_value = arg1; + if (((CPUMIPSState *) cpu_env)->TARGET_OCTEON) { + /* tls entry is moved to k0 so that this can be used later + * currently this thing is tested only for Octeon */ + ((CPUMIPSState *) cpu_env)->active_tc.gpr[26] = arg1; + } ret = 0; break; #elif defined(TARGET_CRIS) diff --git a/target-mips/cpu.h b/target-mips/cpu.h index 0b98d10..8ef8afb 100644 --- a/target-mips/cpu.h +++ b/target-mips/cpu.h @@ -5,6 +5,11 @@ #define ELF_MACHINE EM_MIPS +/* These flags are used to check whether given + * user mode binary is octeon specific or not */ +#define EF_MIPS_MARCH 0xFF0000 +#define E_MIPS_MACH_OCTEON 0x8B0000 + #define CPUState struct CPUMIPSState #include "config.h" @@ -171,6 +176,15 @@ struct TCState { target_ulong CP0_TCSchedule; target_ulong CP0_TCScheFBack; int32_t CP0_Debug_tcstatus; + /* Multiplier registers for Octeon */ + target_ulong MPL0; + target_ulong MPL1; + target_ulong MPL2; + target_ulong P0; + target_ulong P1; + target_ulong P2; + /* Octeon specific Coprocessor 0 register */ + target_ulong cvmctl; }; typedef struct CPUMIPSState CPUMIPSState; @@ -178,6 +192,7 @@ struct CPUMIPSState { TCState active_tc; CPUMIPSFPUContext active_fpu; + int TARGET_OCTEON; uint32_t current_tc; uint32_t current_fpu; diff --git a/target-mips/helper.h b/target-mips/helper.h index 297ab64..409c08d 100644 --- a/target-mips/helper.h +++ b/target-mips/helper.h @@ -8,7 +8,12 @@ DEF_HELPER_3(ldl, tl, tl, tl, int) DEF_HELPER_3(ldr, tl, tl, tl, int) DEF_HELPER_3(sdl, void, tl, tl, int) DEF_HELPER_3(sdr, void, tl, tl, int) +DEF_HELPER_3(v3mulu, void, tl, tl, int) +DEF_HELPER_3(vmulu, void, tl, tl, int) +DEF_HELPER_2(dpop, void, tl, i32) #endif +DEF_HELPER_2(pop, void, tl, i32) + DEF_HELPER_3(lwl, tl, tl, tl, int) DEF_HELPER_3(lwr, tl, tl, tl, int) DEF_HELPER_3(swl, void, tl, tl, int) diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c index bd16ce3..de793ba 100644 --- a/target-mips/op_helper.c +++ b/target-mips/op_helper.c @@ -266,7 +266,77 @@ void helper_dmultu (target_ulong arg1, target_ulong arg2) { mulu64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2); } +void helper_v3mulu (target_ulong arg1, target_ulong arg2, int rd) +{ + uint64_t hi, lo, res[4]; + int i; + for (i = 0;i < 4; i++) { + res[i] = 0; + } + mulu64(&res[0], &res[1], env->active_tc.MPL0, arg1); + mulu64(&lo, &hi, env->active_tc.MPL1, arg1); + res[1] = res[1] + lo; + if (res[1] < lo) + res[2]++; + res[2] = res[2] + hi; + if (res[2] < hi) + res[3]++; + mulu64(&lo, &hi, env->active_tc.MPL2, arg1); + res[2] = res[2] + lo; + if (res[2] < lo) + res[3]++; + res[3] = res[3] + hi; + addc(res, arg2, 0); + addc(res, env->active_tc.P0, 0); + addc(res, env->active_tc.P1, 1); + addc(res, env->active_tc.P2, 2); + env->active_tc.gpr[rd] = res[0]; + env->active_tc.P0 = res[1]; + env->active_tc.P1 = res[2]; + env->active_tc.P2 = res[3]; +} +void helper_vmulu (target_ulong arg1, target_ulong arg2, int rd) +{ + uint64_t hi, lo; + mulu64(&lo, &hi, env->active_tc.MPL0, arg1); + lo = lo + arg2; + if (lo < arg2) + hi++; + lo = lo + env->active_tc.P0; + if (lo < env->active_tc.P0) + hi++; + env->active_tc.gpr[rd] = lo; + env->active_tc.P0 = hi; +} +void helper_dpop (target_ulong arg, uint32_t rd) +{ + uint32_t rem, ones = 0; + while (arg >= 1) { + arg = arg/2; + rem = arg % 2; + if(rem == 1) + ones++; + if(arg == 1) + ones++; + } + env->active_tc.gpr[rd] = ones; +} #endif +void helper_pop (target_ulong arg, uint32_t rd) +{ + uint32_t rem, i, ones = 0; + i = 0; + while(i <= 31) { + arg = arg/2; + rem = arg % 2; + if(rem == 1) + ones++; + if(arg == 1) + ones++; + i++; + } + env->active_tc.gpr[rd] = ones; +} #ifndef CONFIG_USER_ONLY diff --git a/target-mips/translate.c b/target-mips/translate.c index 63c2563..1394e18 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -36,6 +36,14 @@ #define GEN_HELPER 1 #include "helper.h" +#if defined(TARGET_MIPS64) +/* Macros for setting values of cvmctl registers */ +#define FUSE_START_BIT(cvmctl) (cvmctl | 0x80000000) +#define KASUMI(cvmctl) (cvmctl | 0x20000000) +#define IPPCI(cvmctl) (cvmctl | 0x380) +#define IPTI(cvmctl) (cvmctl | 0x70) +#endif + //#define MIPS_DEBUG_DISAS //#define MIPS_DEBUG_SIGN_EXTENSIONS @@ -70,6 +78,11 @@ enum { OPC_JAL = (0x03 << 26), OPC_JALS = OPC_JAL | 0x5, OPC_BEQ = (0x04 << 26), /* Unconditional if rs = rt = 0 (B) */ + /* Cavium Specific */ + OPC_BBIT1 = (0x3a << 26), /* jump on bit set, cavium specific */ + OPC_BBIT132 = (0x3e << 26), /* jump on bit set(for upper 32 bits) */ + OPC_BBIT0 = (0x32 << 26), /* jump on bit clear, cavium specific */ + OPC_BBIT032 = (0x36 << 26), /* jump on bit clear(for upper 32 bits) */ OPC_BEQL = (0x14 << 26), OPC_BNE = (0x05 << 26),