diff mbox

[3/3] target-mips:Support for Cavium specific instructions

Message ID 1309857568-2861-4-git-send-email-khansa@kics.edu.pk
State New
Headers show

Commit Message

Khansa Butt July 5, 2011, 9:19 a.m. UTC
From: Ehsan-ul-Haq, Abdul Qadeer, Abdul Waheed, Khansa Butt <khansa@kics.edu.pk>


Signed-off-by: Khansa Butt <khansa@kics.edu.pk>
---
 host-utils.c            |    1 +
 target-mips/cpu.h       |    7 +
 target-mips/helper.h    |    5 +
 target-mips/op_helper.c |   67 +++++++
 target-mips/translate.c |  443 ++++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 514 insertions(+), 9 deletions(-)

Comments

Peter Maydell Aug. 4, 2011, 11:22 a.m. UTC | #1
On 5 July 2011 10:19,  <khansa@kics.edu.pk> wrote:
> ---
>  host-utils.c            |    1 +
>  target-mips/cpu.h       |    7 +
>  target-mips/helper.h    |    5 +
>  target-mips/op_helper.c |   67 +++++++
>  target-mips/translate.c |  443 ++++++++++++++++++++++++++++++++++++++++++++++-
>  5 files changed, 514 insertions(+), 9 deletions(-)

Don't you also need to add support for the new instructions
to the disassembler in mips-dis.c ?

> diff --git a/host-utils.c b/host-utils.c
> index dc96123..1128698 100644
> --- a/host-utils.c
> +++ b/host-utils.c
> @@ -102,4 +102,5 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
>            a, b, *phigh, *plow);
>  #endif
>  }
> +
>  #endif /* !defined(__x86_64__) */

Stray random whitespace change to an unrelated file:
please drop this from the patch.

> diff --git a/target-mips/cpu.h b/target-mips/cpu.h
> index b0ac4da..8e75e9b 100644
> --- a/target-mips/cpu.h
> +++ b/target-mips/cpu.h
> @@ -171,6 +171,13 @@ struct TCState {
>     target_ulong CP0_TCSchedule;
>     target_ulong CP0_TCScheFBack;
>     int32_t CP0_Debug_tcstatus;
> +    /* Multiplier registers for Octeon */
> +    target_ulong MPL0;
> +    target_ulong MPL1;
> +    target_ulong MPL2;
> +    target_ulong P0;
> +    target_ulong P1;
> +    target_ulong P2;
>  };

If you add new fields to the CPU struct then you must also
add code to save/restore them in target-mips/machine.c.

> diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
> index 6b966b1..a1893d1 100644
> --- a/target-mips/op_helper.c
> +++ b/target-mips/op_helper.c
> @@ -266,7 +266,74 @@ void helper_dmultu (target_ulong arg1, target_ulong arg2)
>  {
>     mulu64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
>  }
> +static void addc(uint64_t res[], uint64_t a, int i)

Can you leave blank lines between function definitions, please?
(here and also in a few places below)

> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index eb108bc..b480665 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -69,6 +69,11 @@ enum {
>     OPC_JAL      = (0x03 << 26),
>     OPC_JALS     = OPC_JAL | 0x5,
>     OPC_BEQ      = (0x04 << 26),  /* Unconditional if rs = rt = 0 (B) */
> +    /* Cavium Specific */
> +    OPC_BBIT1    = (0x3a << 26),  /* jump on bit set, cavium specific */
> +    OPC_BBIT132  = (0x3e << 26),  /* jump on bit set(for upper 32 bits) */

Space before the '(' in the comment, please.

> +    OPC_BBIT0    = (0x32 << 26),  /* jump on bit clear, cavium specific */
> +    OPC_BBIT032  = (0x36 << 26),  /* jump on bit clear(for upper 32 bits) */

Ditto.

> @@ -482,7 +512,7 @@ enum {
>  static TCGv_ptr cpu_env;
>  static TCGv cpu_gpr[32], cpu_PC;
>  static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], cpu_ACX[MIPS_DSP_ACC];
> -static TCGv cpu_dspctrl, btarget, bcond;
> +static TCGv cpu_dspctrl, btarget, bcond, mpl0, mpl1, mpl2, p0, p1, p2;

p0/p1/p2 are awful names for global variables -- far too short.
mpl0/mpl1/mpl2 aren't a great deal better.

Also it looks like at least some of these aren't really used very
often -- you should consider just doing loads/stores to env+offset
the way we do for most other cpu fields.

> +/* set on equal/not equal immidiate */
> +static void gen_set_imm(CPUState *env, uint32_t opc,
> +                        int rt, int rs, int16_t imm)
> +{
> +    target_ulong uimm = (target_long)imm;
> +    TCGv t0;
> +    const char *opn = "imm set";
> +    if (rt == 0) {
> +        /* If no destination, treat it as a NOP. */
> +        MIPS_DEBUG("NOP");
> +        return;
> +    }
> +    t0 = tcg_temp_new();
> +    gen_load_gpr(t0, rs);
> +    switch (opc) {
> +    case OPC_SEQI:
> +        tcg_gen_xori_tl(t0, t0, uimm);
> +        tcg_gen_setcondi_tl(TCG_COND_LT, cpu_gpr[rt], t0, 1);
> +        opn = "seqi";
> +        break;
> +    case OPC_SNEI:
> +        tcg_gen_xori_tl(t0, t0, uimm);
> +        tcg_gen_setcondi_tl(TCG_COND_GT, cpu_gpr[rt], t0, 0);
> +        opn = "snei";
> +        break;
> +    }
> +    tcg_temp_free(t0);
> +}

The tcg_gen_xori_tl() is the same in both cases of the switch, you
could pull it out of the switch.

> @@ -1636,6 +1881,30 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
>         }
>         opn = "addu";
>         break;
> +    case OPC_BADDU:
> +        {
> +            TCGv t0 = tcg_temp_new();
> +            TCGv t1 = tcg_temp_new();
> +            gen_load_gpr(t0, rs);
> +            gen_load_gpr(t1, rt);
> +            tcg_gen_add_tl(t0, t1, t0);
> +            tcg_gen_ext8u_tl(t0, t0);
> +            gen_store_gpr(t0, rd);
> +            tcg_temp_free(t0);
> +            tcg_temp_free(t1);
> +        }
> +       opn = "baddu";
> +       break;

These should go inside the braces, please [ditto for other cases below]

> +    case OPC_DMUL:
> +        {
> +            TCGv t0 = tcg_temp_new();
> +            TCGv t1 = tcg_temp_new();
> +            gen_load_gpr(t0, rs);
> +            gen_load_gpr(t1, rt);
> +            tcg_gen_mul_i64(cpu_gpr[rd], t0, t1);
> +        }
> +            opn = "dmul";
> +            break;

Missing tcg_temp_free()s ?


>     case OPC_SUB:
>         {
>             TCGv t0 = tcg_temp_local_new();
> @@ -2704,6 +2973,7 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>     target_ulong btgt = -1;
>     int blink = 0;
>     int bcond_compute = 0;
> +    target_ulong maskb; /* Used in BBIT0 and BBIT1 */

Add braces to the relevant cases and use variables local to that
restricted scope -- there's no need for this to be visible for the
whole function.

> +        case OPC_BBIT1:
> +        case OPC_BBIT132:
> +            tcg_gen_setcondi_tl(TCG_COND_NE, bcond, t0, 0);
> +            goto not_likely;
> +            case OPC_BBIT0:
> +            case OPC_BBIT032:
> +            tcg_gen_setcondi_tl(TCG_COND_EQ, bcond, t0, 0);
> +            goto not_likely;

Indentation on the OPC_BBIT0, OPC_BBIT032 cases is wrong.

> @@ -11637,6 +11946,9 @@ static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch)
>     rd = (ctx->opcode >> 11) & 0x1f;
>     sa = (ctx->opcode >> 6) & 0x1f;
>     imm = (int16_t)ctx->opcode;
> +    /* 10 bit Immediate value For SEQI,SNEI */
> +    imm10 = (ctx->opcode >> 6) & 0x3ff;
> +

Only used inside a single case, so just make it a variable scoped
only to that case and decode it from opcode at that point.

> @@ -11862,6 +12174,58 @@ static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch)
>         case OPC_MUL:
>             gen_arith(env, ctx, op1, rd, rs, rt);
>             break;
> +#if defined(TARGET_MIPS64)
> +
> +        case OPC_DMUL:

This blank line is unnecessary.

> @@ -11881,13 +12245,24 @@ static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch)
>             break;
>         case OPC_DIV_G_2F:
>         case OPC_DIVU_G_2F:
> -        case OPC_MULT_G_2F:
>         case OPC_MULTU_G_2F:
>         case OPC_MOD_G_2F:
>         case OPC_MODU_G_2F:
>             check_insn(env, ctx, INSN_LOONGSON2F);
>             gen_loongson_integer(ctx, op1, rd, rs, rt);
>             break;
> +        case OPC_MULT_G_2F:
> +            if (!(env->insn_flags & CPU_OCTEON)) {
> +                check_insn(env, ctx, INSN_LOONGSON2F);
> +                gen_loongson_integer(ctx, op1, rd, rs, rt);
> +            } else {
> +#if defined(TARGET_MIPS64)
> +                /* Cavium Specific vmm0 */
> +                check_mips_64(ctx);
> +                gen_LMI(env, ctx, op1, rs, rt, rd);
> +#endif
> +            }
> +            break;

You could rearrange this to:
  case OPC_MULT_G_2F:
#if defined(TARGET_MIPS64)
     if (OCTEON) {
         check_mips_64(ctx);
         gen_LMI(...);
     }
#endif
     /* Otherwise fall through, this is also a Loongson insn */
  case OPC_DIV_G_2F:
  case OPC_DIVU_G_2F:
  [etc]
         check_insn(env, ctx, INSN_LOONGSON2F);
         gen_loongson_integer(ctx, op1, rd, rs, rt);

...which lets you avoid duplicating the loongson code.
Ditto in the DMULT/DDIV case.

-- PMM
Khansa Butt Aug. 13, 2011, 8:52 a.m. UTC | #2
On Thu, Aug 4, 2011 at 4:22 PM, Peter Maydell <peter.maydell@linaro.org>wrote:

> On 5 July 2011 10:19,  <khansa@kics.edu.pk> wrote:
> > ---
> >  host-utils.c            |    1 +
> >  target-mips/cpu.h       |    7 +
> >  target-mips/helper.h    |    5 +
> >  target-mips/op_helper.c |   67 +++++++
> >  target-mips/translate.c |  443
> ++++++++++++++++++++++++++++++++++++++++++++++-
> >  5 files changed, 514 insertions(+), 9 deletions(-)
>
> Don't you also need to add support for the new instructions
> to the disassembler in mips-dis.c ?
>
>

The ISA for Cavium Networks Octeon Processor consist of MIPS64r2+Cavium
specific instructions. These are 27 usermode instructions which we
implemented. some of its instructions have some conflicts with mips and
LoongSon instructions. for example Branch on bit clear/set instructions
(these are 4 instructions)  consumes major opcodes of MIPS COP2 instructions
(e.g, LWC2 etc). and V3MULU, VMM0 have same opcode and function fields as
two of Loongson 's Instructions. To detect correct instruction in
disassembling process can I add a CPU specific Flag in DisasContext so that
I can pass this to log_target_disas()/disas.c and set some of top 16 bits in
disassemble_info 's flags. On the basis of which I can pick  correct
instruction in print_insn_mips()/mips-dis.c. In future this Flag can be used
for other vendor specific instruction as well.

Please guide me in this regard. If I make a separate print function for
Cavium, this will not suitable for me as Cavium includes all instructions in
MIPS64r2 so there will be lot of repetition.

Thanks.
diff mbox

Patch

diff --git a/host-utils.c b/host-utils.c
index dc96123..1128698 100644
--- a/host-utils.c
+++ b/host-utils.c
@@ -102,4 +102,5 @@  void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
            a, b, *phigh, *plow);
 #endif
 }
+
 #endif /* !defined(__x86_64__) */
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index b0ac4da..8e75e9b 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -171,6 +171,13 @@  struct TCState {
     target_ulong CP0_TCSchedule;
     target_ulong CP0_TCScheFBack;
     int32_t CP0_Debug_tcstatus;
+    /* Multiplier registers for Octeon */
+    target_ulong MPL0;
+    target_ulong MPL1;
+    target_ulong MPL2;
+    target_ulong P0;
+    target_ulong P1;
+    target_ulong P2;
 };
 
 typedef struct CPUMIPSState CPUMIPSState;
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 297ab64..e892d39 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -8,7 +8,12 @@  DEF_HELPER_3(ldl, tl, tl, tl, int)
 DEF_HELPER_3(ldr, tl, tl, tl, int)
 DEF_HELPER_3(sdl, void, tl, tl, int)
 DEF_HELPER_3(sdr, void, tl, tl, int)
+DEF_HELPER_2(v3mulu, tl, tl, tl)
+DEF_HELPER_2(vmulu, tl, tl, tl)
+DEF_HELPER_1(dpop, tl, tl)
 #endif
+DEF_HELPER_1(pop, tl, tl);
+
 DEF_HELPER_3(lwl, tl, tl, tl, int)
 DEF_HELPER_3(lwr, tl, tl, tl, int)
 DEF_HELPER_3(swl, void, tl, tl, int)
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 6b966b1..a1893d1 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -266,7 +266,74 @@  void helper_dmultu (target_ulong arg1, target_ulong arg2)
 {
     mulu64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
 }
+static void addc(uint64_t res[], uint64_t a, int i)
+{
+    uint64_t c = res[i];
+    for (; i < 4; i++) {
+        res[i] = c + a;
+        if (res[i] < a) {
+            c = 1;
+            a = res[i+1];
+        } else
+              break;
+    }
+}
+target_ulong helper_v3mulu(target_ulong arg1, target_ulong arg2)
+{
+    uint64_t hi, lo, res[4];
+    int i;
+    for (i = 0; i < 4; i++) {
+        res[i] = 0;
+    }
+    mulu64(&res[0], &res[1], env->active_tc.MPL0, arg1);
+    mulu64(&lo, &hi, env->active_tc.MPL1, arg1);
+    res[1] = res[1] + lo;
+    if (res[1] < lo) {
+        res[2]++;
+    }
+    res[2] = res[2] + hi;
+    if (res[2] < hi) {
+        res[3]++;
+    }
+    mulu64(&lo, &hi, env->active_tc.MPL2, arg1);
+    res[2] = res[2] + lo;
+    if (res[2] < lo) {
+        res[3]++;
+    }
+    res[3] = res[3] + hi;
+    addc(res, arg2, 0);
+    addc(res, env->active_tc.P0, 0);
+    addc(res, env->active_tc.P1, 1);
+    addc(res, env->active_tc.P2, 2);
+    env->active_tc.P0 = res[1];
+    env->active_tc.P1 = res[2];
+    env->active_tc.P2 = res[3];
+    return res[0];
+}
+target_ulong helper_vmulu(target_ulong arg1, target_ulong arg2)
+{
+    uint64_t hi, lo;
+    mulu64(&lo, &hi, env->active_tc.MPL0, arg1);
+    lo = lo + arg2;
+    if (lo < arg2) {
+        hi++;
+    }
+    lo = lo + env->active_tc.P0;
+    if (lo < env->active_tc.P0) {
+        hi++;
+    }
+    env->active_tc.P0 = hi;
+    return lo;
+}
+target_ulong helper_dpop(target_ulong arg)
+{
+    return ctpop64(arg);
+}
 #endif
+target_ulong helper_pop(target_ulong arg)
+{
+    return ctpop32((uint32_t)arg);
+}
 
 #ifndef CONFIG_USER_ONLY
 
diff --git a/target-mips/translate.c b/target-mips/translate.c
index eb108bc..b480665 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -69,6 +69,11 @@  enum {
     OPC_JAL      = (0x03 << 26),
     OPC_JALS     = OPC_JAL | 0x5,
     OPC_BEQ      = (0x04 << 26),  /* Unconditional if rs = rt = 0 (B) */
+    /* Cavium Specific */
+    OPC_BBIT1    = (0x3a << 26),  /* jump on bit set, cavium specific */
+    OPC_BBIT132  = (0x3e << 26),  /* jump on bit set(for upper 32 bits) */
+    OPC_BBIT0    = (0x32 << 26),  /* jump on bit clear, cavium specific */
+    OPC_BBIT032  = (0x36 << 26),  /* jump on bit clear(for upper 32 bits) */
     OPC_BEQL     = (0x14 << 26),
     OPC_BNE      = (0x05 << 26),
     OPC_BNEL     = (0x15 << 26),
@@ -264,6 +269,31 @@  enum {
     OPC_MADD     = 0x00 | OPC_SPECIAL2,
     OPC_MADDU    = 0x01 | OPC_SPECIAL2,
     OPC_MUL      = 0x02 | OPC_SPECIAL2,
+    /* Cavium Specific Instructions */
+    OPC_BADDU    = 0x28 | OPC_SPECIAL2,
+    OPC_DMUL     = 0x03 | OPC_SPECIAL2,
+    OPC_EXTS     = 0x3a | OPC_SPECIAL2,
+    OPC_EXTS32   = 0x3b | OPC_SPECIAL2,
+    OPC_CINS     = 0x32 | OPC_SPECIAL2,
+    OPC_CINS32   = 0x33 | OPC_SPECIAL2,
+    OPC_SEQI     = 0x2e | OPC_SPECIAL2,
+    OPC_SNEI     = 0x2f | OPC_SPECIAL2,
+    OPC_MTM0     = 0x08 | OPC_SPECIAL2,
+    OPC_MTM1     = 0x0c | OPC_SPECIAL2,
+    OPC_MTM2     = 0x0d | OPC_SPECIAL2,
+    OPC_MTP0     = 0x09 | OPC_SPECIAL2,
+    OPC_MTP1     = 0x0a | OPC_SPECIAL2,
+    OPC_MTP2     = 0x0b | OPC_SPECIAL2,
+    OPC_V3MULU   = 0x11 | OPC_SPECIAL2,
+    OPC_VMM0     = 0x10 | OPC_SPECIAL2,
+    OPC_VMULU    = 0x0f | OPC_SPECIAL2,
+    OPC_POP      = 0X2C | OPC_SPECIAL2,
+    OPC_DPOP     = 0X2D | OPC_SPECIAL2,
+    OPC_SEQ      = 0x2a | OPC_SPECIAL2,
+    OPC_SNE      = 0x2b | OPC_SPECIAL2,
+    OPC_SAA      = 0x18 | OPC_SPECIAL2,
+    OPC_SAAD     = 0x19 | OPC_SPECIAL2,
+/**************************************/
     OPC_MSUB     = 0x04 | OPC_SPECIAL2,
     OPC_MSUBU    = 0x05 | OPC_SPECIAL2,
     /* Loongson 2F */
@@ -482,7 +512,7 @@  enum {
 static TCGv_ptr cpu_env;
 static TCGv cpu_gpr[32], cpu_PC;
 static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], cpu_ACX[MIPS_DSP_ACC];
-static TCGv cpu_dspctrl, btarget, bcond;
+static TCGv cpu_dspctrl, btarget, bcond, mpl0, mpl1, mpl2, p0, p1, p2;
 static TCGv_i32 hflags;
 static TCGv_i32 fpu_fcr0, fpu_fcr31;
 
@@ -1418,7 +1448,222 @@  static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
     (void)opn; /* avoid a compiler warning */
     MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt], regnames[rs], uimm);
 }
+#if defined(TARGET_MIPS64)
+/* set on equal/not equal immidiate */
+static void gen_set_imm(CPUState *env, uint32_t opc,
+                        int rt, int rs, int16_t imm)
+{
+    target_ulong uimm = (target_long)imm;
+    TCGv t0;
+    const char *opn = "imm set";
+    if (rt == 0) {
+        /* If no destination, treat it as a NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    switch (opc) {
+    case OPC_SEQI:
+        tcg_gen_xori_tl(t0, t0, uimm);
+        tcg_gen_setcondi_tl(TCG_COND_LT, cpu_gpr[rt], t0, 1);
+        opn = "seqi";
+        break;
+    case OPC_SNEI:
+        tcg_gen_xori_tl(t0, t0, uimm);
+        tcg_gen_setcondi_tl(TCG_COND_GT, cpu_gpr[rt], t0, 0);
+        opn = "snei";
+        break;
+    }
+    tcg_temp_free(t0);
+}
+/* Cavium specific Large Multiply Instructions */
+static void gen_LMI(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                     int rs, int rt, int rd)
+{
+    const char *opn = "LMI";
+    TCGv t0, t1;
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+    switch (opc) {
+    case OPC_MTM0:
+        tcg_gen_mov_tl(mpl0, t0);
+        tcg_gen_movi_tl(p0, 0);
+        tcg_gen_movi_tl(p1, 0);
+        tcg_gen_movi_tl(p2, 0);
+        opn = "mtm0";
+        break;
+    case OPC_MTM1:
+        tcg_gen_mov_tl(mpl1, t0);
+        tcg_gen_movi_tl(p0, 0);
+        tcg_gen_movi_tl(p1, 0);
+        tcg_gen_movi_tl(p2, 0);
+        opn = "mtm1";
+        break;
+    case OPC_MTM2:
+        tcg_gen_mov_tl(mpl2, t0);
+        tcg_gen_movi_tl(p0, 0);
+        tcg_gen_movi_tl(p1, 0);
+        tcg_gen_movi_tl(p2, 0);
+        opn = "mtm2";
+        break;
+    case OPC_MTP0:
+        tcg_gen_mov_tl(p0, t0);
+        opn = "mtp0";
+        break;
+    case OPC_MTP1:
+        tcg_gen_mov_tl(p1, t0);
+        opn = "mtp1";
+        break;
+    case OPC_MTP2:
+        tcg_gen_mov_tl(p2, t0);
+        opn = "mtp2";
+        break;
+    case OPC_VMM0:
+        tcg_gen_mul_i64(t0, t0, mpl0);
+        tcg_gen_add_tl(t1, t1, t0);
+        tcg_gen_add_tl(t1, t1, p0);
+        gen_store_gpr(t1, rd);
+        tcg_gen_mov_tl(mpl0, t1);
+        tcg_gen_movi_tl(p0, 0);
+        tcg_gen_movi_tl(p1, 0);
+        tcg_gen_movi_tl(p2, 0);
+        opn = "vmm0";
+        break;
+    case OPC_VMULU:
+        gen_helper_vmulu(t0, t0, t1);
+        gen_store_gpr(t0, rd);
+        opn = "vmulu";
+        break;
+    case OPC_V3MULU:
+        gen_load_gpr(t0, rs);
+        gen_load_gpr(t1, rt);
+        gen_helper_v3mulu(t0, t0, t1);
+        gen_store_gpr(t0, rd);
+        opn = "v3mulu";
+        break;
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+/* set if equal/not equal */
+static void gen_set(DisasContext *ctx, uint32_t opc,
+                        int rd, int rs, int rt)
+{
+    const char *opn = "seq/sne";
+    TCGv t0, t1;
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+    switch (opc) {
+    case OPC_SEQ:
+        tcg_gen_xor_tl(t0, t0, t1);
+        tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr[rd], t0, 1);
+        opn = "seq";
+        break;
+    case OPC_SNE:
+        tcg_gen_xor_tl(t0, t0, t1);
+        tcg_gen_setcondi_tl(TCG_COND_GTU, cpu_gpr[rd], t0, 0);
+        opn = "sne";
+        break;
+    default:
+        MIPS_INVAL(opn);
+        generate_exception(ctx, EXCP_RI);
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+
+}
+/* Store atomic add */
+/* FIXME: something else should be done for emulating SMP system. */
+static void gen_saa(CPUState *env, DisasContext *ctx, uint32_t opc,
+                     int rt, int base)
+{
+    const char *opn = "saa";
+    TCGv t0, t1, temp;
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    temp = tcg_temp_new();
+    gen_load_gpr(t1, rt);
+    gen_base_offset_addr(ctx, t0, base, 0);
+    switch (opc) {
+    case OPC_SAA:
+        save_cpu_state(ctx, 1);
+        op_ld_lw(temp, t0, ctx);
+        tcg_gen_add_tl(temp, temp, t1);
+        op_st_sw(temp, t0, ctx);
+        opn = "saa";
+        break;
+    case OPC_SAAD:
+        save_cpu_state(ctx, 0);
+        op_ld_ld(temp, t0, ctx);
+        tcg_gen_add_tl(temp, temp, t1);
+        op_st_sd(temp, t0, ctx);
+        opn = "saad";
+        break;
+    }
 
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+static void gen_pop_count(DisasContext *ctx, uint32_t opc, int rd, int rs)
+{
+    const char *opn = "pop";
+        TCGv t0;
+        t0 = tcg_temp_new();
+        gen_load_gpr(t0, rs);
+        switch (opc) {
+        case OPC_DPOP:
+            gen_helper_dpop(t0, t0);
+            gen_store_gpr(t0, rd);
+            opn = "dpop";
+            break;
+        case OPC_POP:
+            gen_helper_pop(t0, t0);
+            gen_store_gpr(t0, rd);
+            opn = "pop";
+            break;
+        }
+        tcg_temp_free(t0);
+}
+/* Cavium specific extract instructions */
+static void gen_exts(CPUState *env, DisasContext *ctx, uint32_t opc, int rt,
+                      int rs, int lsb, int msb)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    target_ulong mask;
+    gen_load_gpr(t1, rs);
+    switch (opc) {
+    case OPC_EXTS:
+        tcg_gen_shri_tl(t0, t1, lsb);
+        tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1);
+        /* To sign extened the remaining bits according to
+           the msb of the bit field */
+        mask = 1ULL << msb;
+        tcg_gen_andi_tl(t1, t0, mask);
+        tcg_gen_addi_tl(t1, t1, -1);
+        tcg_gen_orc_tl(t0, t0, t1);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_EXTS32:
+        tcg_gen_shri_tl(t0, t1, lsb + 32);
+        tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1);
+        mask = 1ULL << msb;
+        tcg_gen_andi_tl(t1, t0, mask);
+        tcg_gen_addi_tl(t1, t1, -1);
+        tcg_gen_orc_tl(t0, t0, t1);
+        gen_store_gpr(t0, rt);
+        break;
+
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+#endif
 /* Logic with immediate operand */
 static void gen_logic_imm (CPUState *env, uint32_t opc, int rt, int rs, int16_t imm)
 {
@@ -1636,6 +1881,30 @@  static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
         }
         opn = "addu";
         break;
+    case OPC_BADDU:
+        {
+            TCGv t0 = tcg_temp_new();
+            TCGv t1 = tcg_temp_new();
+            gen_load_gpr(t0, rs);
+            gen_load_gpr(t1, rt);
+            tcg_gen_add_tl(t0, t1, t0);
+            tcg_gen_ext8u_tl(t0, t0);
+            gen_store_gpr(t0, rd);
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+        }
+       opn = "baddu";
+       break;
+    case OPC_DMUL:
+        {
+            TCGv t0 = tcg_temp_new();
+            TCGv t1 = tcg_temp_new();
+            gen_load_gpr(t0, rs);
+            gen_load_gpr(t1, rt);
+            tcg_gen_mul_i64(cpu_gpr[rd], t0, t1);
+        }
+            opn = "dmul";
+            break;
     case OPC_SUB:
         {
             TCGv t0 = tcg_temp_local_new();
@@ -2704,6 +2973,7 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
     target_ulong btgt = -1;
     int blink = 0;
     int bcond_compute = 0;
+    target_ulong maskb; /* Used in BBIT0 and BBIT1 */
     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();
 
@@ -2729,6 +2999,22 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
         }
         btgt = ctx->pc + insn_bytes + offset;
         break;
+    case OPC_BBIT0:
+    case OPC_BBIT1:
+        gen_load_gpr(t0, rs);
+        maskb = 1ULL << rt;
+        tcg_gen_andi_tl(t0, t0, maskb);
+        bcond_compute = 1;
+        btgt = ctx->pc + insn_bytes + offset;
+        break;
+    case OPC_BBIT032:
+    case OPC_BBIT132:
+        gen_load_gpr(t0, rs);
+        maskb = 1ULL << (rt + 32);
+        tcg_gen_andi_tl(t0, t0, maskb);
+        bcond_compute = 1;
+        btgt = ctx->pc + insn_bytes + offset;
+        break;
     case OPC_BGEZ:
     case OPC_BGEZAL:
     case OPC_BGEZALS:
@@ -2887,6 +3173,14 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
             MIPS_DEBUG("bne %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto not_likely;
+        case OPC_BBIT1:
+        case OPC_BBIT132:
+            tcg_gen_setcondi_tl(TCG_COND_NE, bcond, t0, 0);
+            goto not_likely;
+            case OPC_BBIT0:
+            case OPC_BBIT032:
+            tcg_gen_setcondi_tl(TCG_COND_EQ, bcond, t0, 0);
+            goto not_likely;
         case OPC_BNEL:
             tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
             MIPS_DEBUG("bnel %s, %s, " TARGET_FMT_lx,
@@ -3062,6 +3356,22 @@  static void gen_bitops (DisasContext *ctx, uint32_t opc, int rt,
         tcg_gen_andi_tl(t1, t1, mask);
         tcg_gen_or_tl(t0, t0, t1);
         break;
+    case OPC_CINS:
+        mask =  (1ULL << (msb+1))-1;
+        gen_load_gpr(t0, rt);
+        tcg_gen_andi_tl(t0, t0, 0);
+        tcg_gen_andi_tl(t1, t1, mask);
+        tcg_gen_shli_tl(t1, t1, lsb);
+        tcg_gen_or_tl(t0, t0, t1);
+        break;
+    case OPC_CINS32:
+        mask =  (1ULL << (msb+1))-1;
+        gen_load_gpr(t0, rt);
+        tcg_gen_andi_tl(t0, t0, 0);
+        tcg_gen_andi_tl(t1, t1, mask);
+        tcg_gen_shli_tl(t1, t1, (lsb+32));
+        tcg_gen_or_tl(t0, t0, t1);
+        break;
 #endif
     default:
 fail:
@@ -11608,8 +11918,7 @@  static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch)
     int32_t offset;
     int rs, rt, rd, sa;
     uint32_t op, op1, op2;
-    int16_t imm;
-
+    int16_t imm, imm10;
     /* make sure instructions are on a word boundary */
     if (ctx->pc & 0x3) {
         env->CP0_BadVAddr = ctx->pc;
@@ -11637,6 +11946,9 @@  static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch)
     rd = (ctx->opcode >> 11) & 0x1f;
     sa = (ctx->opcode >> 6) & 0x1f;
     imm = (int16_t)ctx->opcode;
+    /* 10 bit Immediate value For SEQI,SNEI */
+    imm10 = (ctx->opcode >> 6) & 0x3ff;
+
     switch (op) {
     case OPC_SPECIAL:
         op1 = MASK_SPECIAL(ctx->opcode);
@@ -11862,6 +12174,58 @@  static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch)
         case OPC_MUL:
             gen_arith(env, ctx, op1, rd, rs, rt);
             break;
+#if defined(TARGET_MIPS64)
+
+        case OPC_DMUL:
+            check_mips_64(ctx);
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_arith(env, ctx, op1, rd, rs, rt);
+            break;
+        case OPC_CINS:
+            check_mips_64(ctx);
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_bitops(ctx, op1, rt, rs, sa, rd);
+            break;
+        case OPC_CINS32:
+            check_mips_64(ctx);
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_bitops(ctx, op1, rt, rs, sa, rd);
+            break;
+        case OPC_MTM0:
+        case OPC_MTM1:
+        case OPC_MTM2:
+        case OPC_MTP0:
+        case OPC_MTP1:
+        case OPC_MTP2:
+        case OPC_VMULU:
+            check_mips_64(ctx);
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_LMI(env, ctx, op1, rs, rt, rd);
+            break;
+        case OPC_BADDU:
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_arith(env, ctx, op1, rd, rs, rt);
+            break;
+        case OPC_EXTS:
+            check_mips_64(ctx);
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_exts(env, ctx, op1, rt, rs, sa, rd);
+            break;
+        case OPC_EXTS32:
+            check_mips_64(ctx);
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_exts(env, ctx, op1, rt, rs, sa, rd);
+            break;
+        case OPC_SAA:
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_saa(env, ctx, op1, rt, rs);
+            break;
+        case OPC_SAAD:
+            check_insn(env, ctx, INSN_OCTEON);
+            check_mips_64(ctx);
+            gen_saa(env, ctx, op1, rt, rs);
+            break;
+#endif
         case OPC_CLO:
         case OPC_CLZ:
             check_insn(env, ctx, ISA_MIPS32);
@@ -11881,13 +12245,24 @@  static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch)
             break;
         case OPC_DIV_G_2F:
         case OPC_DIVU_G_2F:
-        case OPC_MULT_G_2F:
         case OPC_MULTU_G_2F:
         case OPC_MOD_G_2F:
         case OPC_MODU_G_2F:
             check_insn(env, ctx, INSN_LOONGSON2F);
             gen_loongson_integer(ctx, op1, rd, rs, rt);
             break;
+        case OPC_MULT_G_2F:
+            if (!(env->insn_flags & CPU_OCTEON)) {
+                check_insn(env, ctx, INSN_LOONGSON2F);
+                gen_loongson_integer(ctx, op1, rd, rs, rt);
+            } else {
+#if defined(TARGET_MIPS64)
+                /* Cavium Specific vmm0 */
+                check_mips_64(ctx);
+                gen_LMI(env, ctx, op1, rs, rt, rd);
+#endif
+            }
+            break;
 #if defined(TARGET_MIPS64)
         case OPC_DCLO:
         case OPC_DCLZ:
@@ -11895,7 +12270,6 @@  static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch)
             check_mips_64(ctx);
             gen_cl(ctx, op1, rd, rs);
             break;
-        case OPC_DMULT_G_2F:
         case OPC_DMULTU_G_2F:
         case OPC_DDIV_G_2F:
         case OPC_DDIVU_G_2F:
@@ -11904,6 +12278,37 @@  static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch)
             check_insn(env, ctx, INSN_LOONGSON2F);
             gen_loongson_integer(ctx, op1, rd, rs, rt);
             break;
+        case OPC_DMULT_G_2F:
+            if (!(env->insn_flags & CPU_OCTEON)) {
+                check_insn(env, ctx, INSN_LOONGSON2F);
+                gen_loongson_integer(ctx, op1, rd, rs, rt);
+            } else {
+                /* Cavium Specific instruction v3mulu */
+                check_mips_64(ctx);
+                gen_LMI(env, ctx, op1, rs, rt, rd);
+            }
+            break;
+        case OPC_SEQ:
+        case OPC_SNE:
+            check_mips_64(ctx);
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_set(ctx, op1, rd, rs, rt);
+            break;
+        case OPC_SEQI:
+        case OPC_SNEI:
+            check_mips_64(ctx);
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_set_imm(env, op1, rt, rs, imm10);
+            break;
+        case OPC_POP:
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_pop_count(ctx, op1, rd, rs);
+            break;
+        case OPC_DPOP:
+            check_mips_64(ctx);
+            check_insn(env, ctx, INSN_OCTEON);
+            gen_pop_count(ctx, op1, rd, rs);
+            break;
 #endif
         default:            /* Invalid */
             MIPS_INVAL("special2");
@@ -12195,10 +12600,18 @@  static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch)
         break;
 
     /* COP2.  */
-    case OPC_LWC2:
-    case OPC_LDC2:
-    case OPC_SWC2:
-    case OPC_SDC2:
+    /* Conflicting opcodes with Cavium specific branch instructions
+       if cpu_model is set to Octeon these opcodes will
+       belong to Octeon processor */
+    case OPC_LWC2: /* BBIT0 */
+    case OPC_LDC2: /* BBIT032 */
+    case OPC_SWC2: /* BBIT1 */
+    case OPC_SDC2: /* BBIT132 */
+        if (env->insn_flags & CPU_OCTEON) {
+            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
+            *is_branch = 1;
+            break;
+        }
     case OPC_CP2:
         /* COP2: Not implemented. */
         generate_exception_err(ctx, EXCP_CpU, 2);
@@ -12587,6 +13000,18 @@  static void mips_tcg_init(void)
     cpu_dspctrl = tcg_global_mem_new(TCG_AREG0,
                                      offsetof(CPUState, active_tc.DSPControl),
                                      "DSPControl");
+    mpl0 = tcg_global_mem_new(TCG_AREG0,
+                              offsetof(CPUState, active_tc.MPL0), "MPL0");
+    mpl1 = tcg_global_mem_new(TCG_AREG0,
+                              offsetof(CPUState, active_tc.MPL1), "MPL1");
+    mpl2 = tcg_global_mem_new(TCG_AREG0,
+                              offsetof(CPUState, active_tc.MPL2), "MPL2");
+    p0 = tcg_global_mem_new(TCG_AREG0,
+                            offsetof(CPUState, active_tc.P0), "P0");
+    p1 = tcg_global_mem_new(TCG_AREG0,
+                            offsetof(CPUState, active_tc.P1), "P1");
+    p2 = tcg_global_mem_new(TCG_AREG0,
+                            offsetof(CPUState, active_tc.P2), "P2");
     bcond = tcg_global_mem_new(TCG_AREG0,
                                offsetof(CPUState, bcond), "bcond");
     btarget = tcg_global_mem_new(TCG_AREG0,