Patchwork [v7,08/14] target-mips-ase-dsp: Add multiply instructions

login
register
mail settings
Submitter Jia Liu
Date Aug. 28, 2012, 6:36 a.m.
Message ID <1346135785-12119-9-git-send-email-proljc@gmail.com>
Download mbox | patch
Permalink /patch/180353/
State New
Headers show

Comments

Jia Liu - Aug. 28, 2012, 6:36 a.m.
Add MIPS ASE DSP Multiply instructions.

Signed-off-by: Jia Liu <proljc@gmail.com>
---
 target-mips/dsp_helper.c | 2024 ++++++++++++++++++++++++++++++++++++++++++++++
 target-mips/helper.h     |   91 +++
 target-mips/translate.c  |  503 ++++++++++++
 3 files changed, 2618 insertions(+)
Aurelien Jarno - Sept. 6, 2012, 9:11 a.m.
On Tue, Aug 28, 2012 at 02:36:19PM +0800, Jia Liu wrote:
> Add MIPS ASE DSP Multiply instructions.
> 
> Signed-off-by: Jia Liu <proljc@gmail.com>
> ---
>  target-mips/dsp_helper.c | 2024 ++++++++++++++++++++++++++++++++++++++++++++++
>  target-mips/helper.h     |   91 +++
>  target-mips/translate.c  |  503 ++++++++++++
>  3 files changed, 2618 insertions(+)
> 
> diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
> index 8e7bccb..ff51d2f 100644
> --- a/target-mips/dsp_helper.c
> +++ b/target-mips/dsp_helper.c
> @@ -4187,6 +4187,2030 @@ target_ulong helper_shrav_r_pw(target_ulong rt, target_ulong sa)
>  }
>  #endif
>  
> +/** DSP Multiply Sub-class insns **/
> +target_ulong helper_muleu_s_ph_qbl(CPUMIPSState *env,
> +                                   target_ulong rs, target_ulong rt)
> +{
> +    uint8_t rs3, rs2;
> +    uint16_t tempB, tempA, rth, rtl;
> +    uint32_t temp;
> +    uint32_t rd;
> +
> +    rs3 = (rs & MIPSDSP_Q3) >> 24;
> +    rs2 = (rs & MIPSDSP_Q2) >> 16;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +    tempB = mipsdsp_mul_u8_u16(env, rs3, rth);
> +    tempA = mipsdsp_mul_u8_u16(env, rs2, rtl);
> +    temp = ((uint32_t)tempB << 16) | ((uint32_t)tempA & MIPSDSP_LO);
> +    rd = temp;
> +    return (target_long)(int32_t)rd;
> +}

Why using the variable rd here?

> +target_ulong helper_muleu_s_ph_qbr(CPUMIPSState *env,
> +                                   target_ulong rs, target_ulong rt)
> +{
> +    uint8_t  rs1, rs0;
> +    uint16_t tempB, tempA;
> +    uint16_t rth,   rtl;
> +    uint32_t temp;
> +    uint32_t rd;
> +
> +    rs1 = (rs & MIPSDSP_Q1) >>  8;
> +    rs0 =  rs & MIPSDSP_Q0;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_mul_u8_u16(env, rs1, rth);
> +    tempA = mipsdsp_mul_u8_u16(env, rs0, rtl);
> +    temp = ((uint32_t)tempB << 16) | ((uint32_t)tempA & MIPSDSP_LO);
> +    rd = temp;
> +    return (target_long)(int32_t)rd;
> +}

Ditto

> +#if defined(TARGET_MIPS64)
> +target_ulong helper_muleu_s_qh_obl(CPUMIPSState *env,
> +                                   target_ulong rs, target_ulong rt)
> +{
> +    uint8_t rs3, rs2, rs1, rs0;
> +    uint16_t rt3, rt2, rt1, rt0;
> +    uint16_t tempD, tempC, tempB, tempA;
> +    uint64_t temp;
> +
> +    rs3 = (rs >> 56) & MIPSDSP_Q0;
> +    rs2 = (rs >> 48) & MIPSDSP_Q0;
> +    rs1 = (rs >> 40) & MIPSDSP_Q0;
> +    rs0 = (rs >> 32) & MIPSDSP_Q0;
> +    rt3 = (rt >> 48) & MIPSDSP_LO;
> +    rt2 = (rt >> 32) & MIPSDSP_LO;
> +    rt1 = (rt >> 16) & MIPSDSP_LO;
> +    rt0 = rt & MIPSDSP_LO;
> +
> +    tempD = mipsdsp_mul_u8_u16(env, rs3, rt3);
> +    tempC = mipsdsp_mul_u8_u16(env, rs2, rt2);
> +    tempB = mipsdsp_mul_u8_u16(env, rs1, rt1);
> +    tempA = mipsdsp_mul_u8_u16(env, rs0, rt0);
> +
> +    temp = ((uint64_t)tempD << 48) | ((uint64_t)tempC << 32) |
> +           ((uint64_t)tempB << 16) | (uint64_t)tempA;
> +    return temp;
> +}
> +
> +target_ulong helper_muleu_s_qh_obr(CPUMIPSState *env,
> +                                   target_ulong rs, target_ulong rt)
> +{
> +    uint8_t rs3, rs2, rs1, rs0;
> +    uint16_t rt3, rt2, rt1, rt0;
> +    uint16_t tempD, tempC, tempB, tempA;
> +    uint64_t temp;
> +
> +    rs3 = (rs >> 24) & MIPSDSP_Q0;
> +    rs2 = (rs >> 16) & MIPSDSP_Q0;
> +    rs1 = (rs >> 8) & MIPSDSP_Q0;
> +    rs0 = rs & MIPSDSP_Q0;
> +    rt3 = (rt >> 48) & MIPSDSP_LO;
> +    rt2 = (rt >> 32) & MIPSDSP_LO;
> +    rt1 = (rt >> 16) & MIPSDSP_LO;
> +    rt0 = rt & MIPSDSP_LO;
> +
> +    tempD = mipsdsp_mul_u8_u16(env, rs3, rt3);
> +    tempC = mipsdsp_mul_u8_u16(env, rs2, rt2);
> +    tempB = mipsdsp_mul_u8_u16(env, rs1, rt1);
> +    tempA = mipsdsp_mul_u8_u16(env, rs0, rt0);
> +
> +    temp = ((uint64_t)tempD << 48) | ((uint64_t)tempC << 32) |
> +           ((uint64_t)tempB << 16) | (uint64_t)tempA;
> +
> +    return temp;
> +}
> +#endif
> +
> +target_ulong helper_mulq_rs_ph(CPUMIPSState *env,
> +                               target_ulong rs, target_ulong rt)
> +{
> +    int16_t tempB, tempA, rsh, rsl, rth, rtl;
> +    int32_t temp;
> +    uint32_t rd;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_rndq15_mul_q15_q15(env, rsh, rth);
> +    tempA = mipsdsp_rndq15_mul_q15_q15(env, rsl, rtl);
> +    temp = ((uint32_t)tempB << 16) | ((uint32_t)tempA & MIPSDSP_LO);
> +    rd = temp;
> +
> +    return (target_long)(int32_t)rd;
> +}

Same here.

> +#if defined(TARGET_MIPS64)
> +target_ulong helper_mulq_rs_qh(CPUMIPSState *env,
> +                               target_ulong rs, target_ulong rt)
> +{
> +    uint16_t rs3, rs2, rs1, rs0;
> +    uint16_t rt3, rt2, rt1, rt0;
> +    uint16_t tempD, tempC, tempB, tempA;
> +    uint64_t temp;
> +
> +    rs3 = (rs >> 48) & MIPSDSP_LO;
> +    rs2 = (rs >> 32) & MIPSDSP_LO;
> +    rs1 = (rs >> 16) & MIPSDSP_LO;
> +    rs0 = rs & MIPSDSP_LO;
> +    rt3 = (rt >> 48) & MIPSDSP_LO;
> +    rt2 = (rt >> 32) & MIPSDSP_LO;
> +    rt1 = (rt >> 16) & MIPSDSP_LO;
> +    rt0 = rt & MIPSDSP_LO;
> +
> +    tempD = mipsdsp_rndq15_mul_q15_q15(env, rs3, rt3);
> +    tempC = mipsdsp_rndq15_mul_q15_q15(env, rs2, rt2);
> +    tempB = mipsdsp_rndq15_mul_q15_q15(env, rs1, rt1);
> +    tempA = mipsdsp_rndq15_mul_q15_q15(env, rs0, rt0);
> +
> +    temp = ((uint64_t)tempD << 48) | ((uint64_t)tempC << 32) |
> +           ((uint64_t)tempB << 16) | (uint64_t)tempA;
> +    return temp;
> +}
> +#endif
> +
> +target_ulong helper_muleq_s_w_phl(CPUMIPSState *env,
> +                                  target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rth;
> +    int32_t temp;
> +    uint32_t rd;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    temp = mipsdsp_mul_q15_q15_overflowflag21(env, rsh, rth);
> +    rd = temp;
> +
> +    return (target_long)(int32_t)rd;
> +}
> +
> +target_ulong helper_muleq_s_w_phr(CPUMIPSState *env,
> +                                  target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsl, rtl;
> +    int32_t temp;
> +    uint32_t rd;
> +
> +    rsl = rs & MIPSDSP_LO;
> +    rtl = rt & MIPSDSP_LO;
> +    temp = mipsdsp_mul_q15_q15_overflowflag21(env, rsl, rtl);
> +    rd = temp;
> +
> +    return (target_long)(int32_t)rd;
> +}
> +
> +#if defined(TARGET_MIPS64)
> +target_ulong helper_muleq_s_pw_qhl(CPUMIPSState *env,
> +                                   target_ulong rs, target_ulong rt)
> +{
> +    uint16_t rsB, rsA;
> +    uint16_t rtB, rtA;
> +    uint32_t tempB, tempA;
> +    uint64_t temp;
> +
> +    rsB = (rs >> 48) & MIPSDSP_LO;
> +    rsA = (rs >> 32) & MIPSDSP_LO;
> +    rtB = (rt >> 48) & MIPSDSP_LO;
> +    rtA = (rt >> 32) & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_mul_q15_q15(env, 5, rsB, rtB);
> +    tempA = mipsdsp_mul_q15_q15(env, 5, rsA, rtA);
> +
> +    temp = ((uint64_t)tempB << 32) | (uint64_t)tempA;
> +
> +    return temp;
> +}
> +
> +target_ulong helper_muleq_s_pw_qhr(CPUMIPSState *env,
> +                                   target_ulong rs, target_ulong rt)
> +{
> +    uint16_t rsB, rsA;
> +    uint16_t rtB, rtA;
> +    uint32_t tempB, tempA;
> +    uint64_t temp;
> +
> +    rsB = (rs >> 16) & MIPSDSP_LO;
> +    rsA = rs & MIPSDSP_LO;
> +    rtB = (rt >> 16) & MIPSDSP_LO;
> +    rtA = rt & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_mul_q15_q15(env, 5, rsB, rtB);
> +    tempA = mipsdsp_mul_q15_q15(env, 5, rsA, rtA);
> +
> +    temp = ((uint64_t)tempB << 32) | (uint64_t)tempA;
> +
> +    return temp;
> +}
> +#endif
> +
> +void helper_dpau_h_qbl(CPUMIPSState *env,
> +                       uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    uint8_t rs3, rs2;
> +    uint8_t rt3, rt2;
> +    uint16_t tempB, tempA;
> +    uint64_t tempC, tempBL, tempAL, dotp;
> +
> +    rs3 = (rs & MIPSDSP_Q3) >> 24;
> +    rt3 = (rt & MIPSDSP_Q3) >> 24;
> +    rs2 = (rs & MIPSDSP_Q2) >> 16;
> +    rt2 = (rt & MIPSDSP_Q2) >> 16;
> +    tempB = mipsdsp_mul_u8_u8(rs3, rt3);
> +    tempA = mipsdsp_mul_u8_u8(rs2, rt2);
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp = tempBL + tempAL;
> +    tempC = (((uint64_t)env->active_tc.HI[ac] << 32) |
> +             ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO)) + dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
> +}
> +
> +void helper_dpau_h_qbr(CPUMIPSState *env,
> +                       uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    uint8_t rs1, rs0;
> +    uint8_t rt1, rt0;
> +    uint16_t tempB, tempA;
> +    uint64_t tempC, tempBL, tempAL, dotp;
> +
> +    rs1 = (rs & MIPSDSP_Q1) >> 8;
> +    rt1 = (rt & MIPSDSP_Q1) >> 8;
> +    rs0 = (rs & MIPSDSP_Q0);
> +    rt0 = (rt & MIPSDSP_Q0);
> +    tempB = mipsdsp_mul_u8_u8(rs1, rt1);
> +    tempA = mipsdsp_mul_u8_u8(rs0, rt0);
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp = tempBL + tempAL;
> +    tempC = (((uint64_t)env->active_tc.HI[ac] << 32) |
> +             ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO)) + dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
> +}
> +
> +#if defined(TARGET_MIPS64)
> +void helper_dpau_h_obl(CPUMIPSState *env,
> +                       target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    uint8_t rs7, rs6, rs5, rs4;
> +    uint8_t rt7, rt6, rt5, rt4;
> +    uint16_t tempD, tempC, tempB, tempA;
> +    uint64_t temp[2];
> +    uint64_t acc[2];
> +    uint64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs7 = (rs >> 56) & MIPSDSP_Q0;
> +    rs6 = (rs >> 48) & MIPSDSP_Q0;
> +    rs5 = (rs >> 40) & MIPSDSP_Q0;
> +    rs4 = (rs >> 32) & MIPSDSP_Q0;
> +    rt7 = (rt >> 56) & MIPSDSP_Q0;
> +    rt6 = (rt >> 48) & MIPSDSP_Q0;
> +    rt5 = (rt >> 40) & MIPSDSP_Q0;
> +    rt4 = (rt >> 32) & MIPSDSP_Q0;
> +
> +    tempD = mipsdsp_mul_u8_u8(rs7, rt7);
> +    tempC = mipsdsp_mul_u8_u8(rs6, rt6);
> +    tempB = mipsdsp_mul_u8_u8(rs5, rt5);
> +    tempA = mipsdsp_mul_u8_u8(rs4, rt4);
> +
> +    temp[0] = (uint64_t)tempD + (uint64_t)tempC +
> +      (uint64_t)tempB + (uint64_t)tempA;
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] + temp[0];
> +
> +    if (temp_sum < acc[0] && temp_sum < temp[0]) {
> +        temp[1] += 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += acc[1];
> +
> +    env->active_tc.HI[ac] = temp[1];
> +    env->active_tc.LO[ac] = temp[0];
> +}
> +
> +void helper_dpau_h_obr(CPUMIPSState *env,
> +                       target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    uint8_t rs3, rs2, rs1, rs0;
> +    uint8_t rt3, rt2, rt1, rt0;
> +    uint16_t tempD, tempC, tempB, tempA;
> +    uint64_t temp[2];
> +    uint64_t acc[2];
> +    uint64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs3 = (rs >> 24) & MIPSDSP_Q0;
> +    rs2 = (rs >> 16) & MIPSDSP_Q0;
> +    rs1 = (rs >> 8) & MIPSDSP_Q0;
> +    rs0 = rs & MIPSDSP_Q0;
> +    rt3 = (rt >> 24) & MIPSDSP_Q0;
> +    rt2 = (rt >> 16) & MIPSDSP_Q0;
> +    rt1 = (rt >> 8) & MIPSDSP_Q0;
> +    rt0 = rt & MIPSDSP_Q0;
> +
> +    tempD = mipsdsp_mul_u8_u8(rs3, rt3);
> +    tempC = mipsdsp_mul_u8_u8(rs2, rt2);
> +    tempB = mipsdsp_mul_u8_u8(rs1, rt1);
> +    tempA = mipsdsp_mul_u8_u8(rs0, rt0);
> +
> +    temp[0] = (uint64_t)tempD + (uint64_t)tempC +
> +              (uint64_t)tempB + (uint64_t)tempA;
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] + temp[0];
> +
> +    if (temp_sum < acc[0] && temp_sum < temp[0]) {
> +        temp[1] += 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += acc[1];
> +
> +    env->active_tc.HI[ac] = temp[1];
> +    env->active_tc.LO[ac] = temp[0];
> +}
> +#endif
> +
> +void helper_dpsu_h_qbl(CPUMIPSState *env,
> +                       uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    uint8_t  rs3, rs2, rt3, rt2;
> +    uint16_t tempB,  tempA;
> +    uint64_t dotp, tempBL, tempAL, tempC;
> +
> +    rs3 = (rs & MIPSDSP_Q3) >> 24;
> +    rs2 = (rs & MIPSDSP_Q2) >> 16;
> +    rt3 = (rt & MIPSDSP_Q3) >> 24;
> +    rt2 = (rt & MIPSDSP_Q2) >> 16;
> +
> +    tempB = mipsdsp_mul_u8_u8(rs3, rt3);
> +    tempA = mipsdsp_mul_u8_u8(rs2, rt2);
> +    tempBL = tempB & 0xFFFF;
> +    tempAL = tempA & 0xFFFF;
> +
> +    dotp   = tempBL + tempAL;
> +    tempC  = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +             ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    tempC -= dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
> +}
> +
> +void helper_dpsu_h_qbr(CPUMIPSState *env,
> +                       uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    uint8_t  rs1, rs0, rt1, rt0;
> +    uint16_t tempB,  tempA;
> +    uint64_t dotp, tempBL, tempAL, tempC;
> +
> +    rs1 = (rs & MIPSDSP_Q1) >> 8;
> +    rs0 = (rs & MIPSDSP_Q0);
> +    rt1 = (rt & MIPSDSP_Q1) >> 8;
> +    rt0 = (rt & MIPSDSP_Q0);
> +
> +    tempB = mipsdsp_mul_u8_u8(rs1, rt1);
> +    tempA = mipsdsp_mul_u8_u8(rs0, rt0);
> +    tempBL = tempB & 0xFFFF;
> +    tempAL = tempA & 0xFFFF;
> +
> +    dotp   = tempBL + tempAL;
> +    tempC  = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +             ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    tempC -= dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
> +}
> +
> +#if defined(TARGET_MIPS64)
> +void helper_dpsu_h_obl(CPUMIPSState *env,
> +                       target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    uint8_t rs7, rs6, rs5, rs4;
> +    uint8_t rt7, rt6, rt5, rt4;
> +    uint16_t tempD, tempC, tempB, tempA;
> +    uint64_t acc[2];
> +    uint64_t temp[2];
> +    uint64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs7 = (rs >> 56) & MIPSDSP_Q0;
> +    rs6 = (rs >> 48) & MIPSDSP_Q0;
> +    rs5 = (rs >> 40) & MIPSDSP_Q0;
> +    rs4 = (rs >> 32) & MIPSDSP_Q0;
> +    rt7 = (rt >> 56) & MIPSDSP_Q0;
> +    rt6 = (rt >> 48) & MIPSDSP_Q0;
> +    rt5 = (rt >> 40) & MIPSDSP_Q0;
> +    rt4 = (rt >> 32) & MIPSDSP_Q0;
> +
> +    tempD = mipsdsp_mul_u8_u8(rs7, rt7);
> +    tempC = mipsdsp_mul_u8_u8(rs6, rt6);
> +    tempB = mipsdsp_mul_u8_u8(rs5, rt5);
> +    tempA = mipsdsp_mul_u8_u8(rs4, rt4);
> +
> +    temp[0] = (uint64_t)tempD + (uint64_t)tempC +
> +              (uint64_t)tempB + (uint64_t)tempA;
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] - temp[0];
> +    if (temp_sum > acc[0]) {
> +        acc[1] -= 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] -= temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_dpsu_h_obr(CPUMIPSState *env,
> +                       target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    uint8_t rs3, rs2, rs1, rs0;
> +    uint8_t rt3, rt2, rt1, rt0;
> +    uint16_t tempD, tempC, tempB, tempA;
> +    uint64_t acc[2];
> +    uint64_t temp[2];
> +    uint64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs3 = (rs >> 24) & MIPSDSP_Q0;
> +    rs2 = (rs >> 16) & MIPSDSP_Q0;
> +    rs1 = (rs >> 8) & MIPSDSP_Q0;
> +    rs0 = rs & MIPSDSP_Q0;
> +    rt3 = (rt >> 24) & MIPSDSP_Q0;
> +    rt2 = (rt >> 16) & MIPSDSP_Q0;
> +    rt1 = (rt >> 8) & MIPSDSP_Q0;
> +    rt0 = rt & MIPSDSP_Q0;
> +
> +    tempD = mipsdsp_mul_u8_u8(rs3, rt3);
> +    tempC = mipsdsp_mul_u8_u8(rs2, rt2);
> +    tempB = mipsdsp_mul_u8_u8(rs1, rt1);
> +    tempA = mipsdsp_mul_u8_u8(rs0, rt0);
> +
> +    temp[0] = (uint64_t)tempD + (uint64_t)tempC +
> +              (uint64_t)tempB + (uint64_t)tempA;
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] - temp[0];
> +    if (temp_sum > acc[0]) {
> +        acc[1] -= 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] -= temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +#endif
> +
> +void helper_dpa_w_ph(CPUMIPSState *env,
> +                     uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    uint16_t rsh, rsl, rth, rtl;
> +    int32_t  tempA, tempB;
> +    int64_t  acc, tempAL, tempBL;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = (int32_t)rsh * (int32_t)rth;
> +    tempA = (int32_t)rsl * (int32_t)rtl;
> +    tempBL = tempB;
> +    tempAL = tempA;
> +
> +    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    acc += tempBL + tempAL;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
> +}
> +
> +#if defined(TARGET_MIPS64)
> +void helper_dpa_w_qh(CPUMIPSState *env,
> +                     target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int32_t rs3, rs2, rs1, rs0;
> +    int32_t rt3, rt2, rt1, rt0;
> +    int32_t tempD, tempC, tempB, tempA;
> +    int64_t acc[2];
> +    int64_t temp[2];
> +    int64_t temp_sum;
> +
> +    rs3 = (rs >> 48) & MIPSDSP_LO;
> +    rs2 = (rs >> 32) & MIPSDSP_LO;
> +    rs1 = (rs >> 16) & MIPSDSP_LO;
> +    rs0 = rs & MIPSDSP_LO;
> +
> +    rt3 = (rt >> 48) & MIPSDSP_LO;
> +    rt2 = (rt >> 32) & MIPSDSP_LO;
> +    rt1 = (rt >> 16) & MIPSDSP_LO;
> +    rt0 = rt & MIPSDSP_LO;
> +
> +    tempD = rs3 * rt3;
> +    tempC = rs2 * rt2;
> +    tempB = rs1 * rt1;
> +    tempA = rs0 * rt0;
> +
> +    temp[0] = (int64_t)tempD + (int64_t)tempC +
> +              (int64_t)tempB + (int64_t)tempA;
> +    temp[0] = (int64_t)(temp[0] << 31) >> 31;
> +
> +    if (((temp[0] >> 63) & 0x01) == 0) {
> +        temp[1] = 0;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[1] = env->active_tc.HI[ac];
> +    acc[0] = env->active_tc.LO[ac];
> +
> +    temp_sum = acc[0] + temp[0];
> +    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
> +        temp[1] += 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += acc[1];
> +
> +    env->active_tc.HI[ac] = temp[1];
> +    env->active_tc.LO[ac] = temp[0];
> +}
> +#endif
> +
> +void helper_dpax_w_ph(CPUMIPSState *env,
> +                      uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    uint16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA;
> +    int64_t acc, dotp, tempBL, tempAL;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB  = (uint32_t)rsh * (uint32_t)rth;
> +    tempA  = (uint32_t)rsl * (uint32_t)rtl;
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp =  tempBL + tempAL;
> +    acc  =  ((uint64_t)env->active_tc.HI[ac] << 32) |
> +            ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    acc  += dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
> +}
> +
> +void helper_dpaq_s_w_ph(CPUMIPSState *env,
> +                        uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA;
> +    int64_t acc, dotp, tempBL, tempAL;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rth);
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rtl);
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp = tempBL + tempAL;
> +    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    acc += dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
> +}
> +
> +#if defined(TARGET_MIPS64)
> +void helper_dpaq_s_w_qh(CPUMIPSState *env,
> +                        target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int32_t rs3, rs2, rs1, rs0;
> +    int32_t rt3, rt2, rt1, rt0;
> +    int32_t tempD, tempC, tempB, tempA;
> +    int64_t temp[2];
> +    int64_t acc[2];
> +    int64_t temp_sum;
> +
> +    rs3 = (rs >> 48) & MIPSDSP_LO;
> +    rs2 = (rs >> 32) & MIPSDSP_LO;
> +    rs1 = (rs >> 16) & MIPSDSP_LO;
> +    rs0 = rs & MIPSDSP_LO;
> +    rt3 = (rt >> 48) & MIPSDSP_LO;
> +    rt2 = (rt >> 32) & MIPSDSP_LO;
> +    rt1 = (rt >> 16) & MIPSDSP_LO;
> +    rt0 = rt & MIPSDSP_LO;
> +
> +    tempD = mipsdsp_mul_q15_q15(env, ac, rs3, rt3);
> +    tempC = mipsdsp_mul_q15_q15(env, ac, rs2, rt2);
> +    tempB = mipsdsp_mul_q15_q15(env, ac, rs1, rt1);
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rs0, rt0);
> +
> +    temp[0] = (int64_t)tempD + (int64_t)tempC +
> +              (int64_t)tempB + (int64_t)tempA;
> +    if (((temp[0] >> 63) & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = temp[0] + acc[0];
> +    if ((temp_sum < temp[0]) && (temp_sum < acc[0])) {
> +        temp[1] += 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += acc[1];
> +
> +    env->active_tc.HI[ac] = temp[1];
> +    env->active_tc.LO[ac] = temp[0];
> +}
> +#endif
> +
> +void helper_dpaqx_s_w_ph(CPUMIPSState *env,
> +                         uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    uint16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA;
> +    int64_t acc, dotp, tempBL, tempAL;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rtl);
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rth);
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp = tempBL + tempAL;
> +    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    acc += dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
> +}
> +
> +void helper_dpaqx_sa_w_ph(CPUMIPSState *env,
> +                          uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA, tempC62_31, tempC63;
> +    int64_t acc, dotp, tempBL, tempAL, tempC;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rtl);
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rth);
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp = tempBL + tempAL;
> +    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    tempC = acc + dotp;
> +    tempC63 = (tempC >> 63) & 0x01;
> +    tempC62_31 = (tempC >> 31) & 0xFFFFFFFF;
> +
> +    if ((tempC63 == 0) && (tempC62_31 != 0x00000000)) {
> +        tempC = 0x7FFFFFFF;
> +        set_DSPControl_overflow_flag(env, 1, 16 + ac);
> +    }
> +
> +    if ((tempC63 == 1) && (tempC62_31 != 0xFFFFFFFF)) {
> +        tempC = 0xFFFFFFFF80000000ull;
> +        set_DSPControl_overflow_flag(env, 1, 16 + ac);
> +    }
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
> +}
> +
> +void helper_dps_w_ph(CPUMIPSState *env,
> +                     uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    uint16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA;
> +    int64_t acc, dotp, tempBL, tempAL;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB  = (int32_t)rsh * (int32_t)rth;
> +    tempA  = (int32_t)rsl * (int32_t)rtl;
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp =  tempBL + tempAL;
> +    acc  =  ((uint64_t)env->active_tc.HI[ac] << 32) |
> +            ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    acc  -= dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
> +}
> +
> +#if defined(TARGET_MIPS64)
> +void helper_dps_w_qh(CPUMIPSState *env,
> +                     target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs3, rs2, rs1, rs0;
> +    int16_t rt3, rt2, rt1, rt0;
> +    int32_t tempD, tempC, tempB, tempA;
> +    int64_t acc[2];
> +    int64_t temp[2];
> +    int64_t temp_sum;
> +
> +    rs3 = (rs >> 48) & MIPSDSP_LO;
> +    rs2 = (rs >> 32) & MIPSDSP_LO;
> +    rs1 = (rs >> 16) & MIPSDSP_LO;
> +    rs0 = rs & MIPSDSP_LO;
> +    rt3 = (rt >> 48) & MIPSDSP_LO;
> +    rt2 = (rt >> 32) & MIPSDSP_LO;
> +    rt1 = (rt >> 16) & MIPSDSP_LO;
> +    rt0 = rt & MIPSDSP_LO;
> +
> +    tempD = (int32_t)rs3 * (int32_t)rt3;
> +    tempC = (int32_t)rs2 * (int32_t)rt2;
> +    tempB = (int32_t)rs1 * (int32_t)rt1;
> +    tempA = (int32_t)rs0 * (int32_t)rt0;
> +
> +    temp[0] = (int64_t)tempD + (int64_t)tempC +
> +              (int64_t)tempB + (int64_t)tempA;
> +
> +    temp[0] = (int64_t)(temp[0] << 31) >> 31;
> +    if (((temp[0] >> 32) & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] - temp[0];
> +    if ((uint64_t)temp_sum > (uint64_t)acc[0]) {
> +        acc[1] -= 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] -= temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +#endif
> +
> +void helper_dpsx_w_ph(CPUMIPSState *env,
> +                      uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    uint16_t rsh, rsl, rth, rtl;
> +    int32_t  tempB,  tempA;
> +    int64_t acc, dotp, tempBL, tempAL;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = (int32_t)rsh * (int32_t)rtl;
> +    tempA = (int32_t)rsl * (int32_t)rth;
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp = tempBL + tempAL;
> +
> +    acc  = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +           ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    acc -= dotp;
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
> +}
> +
> +void helper_dpsq_s_w_ph(CPUMIPSState *env,
> +                        uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA;
> +    int64_t acc, dotp, tempBL, tempAL;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rth);
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rtl);
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp = tempBL + tempAL;
> +    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +           ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    acc -= dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
> +}
> +
> +#if defined(TARGET_MIPS64)
> +void helper_dpsq_s_w_qh(CPUMIPSState *env,
> +                        target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs3, rs2, rs1, rs0;
> +    int16_t rt3, rt2, rt1, rt0;
> +    int32_t tempD, tempC, tempB, tempA;
> +    int64_t acc[2];
> +    int64_t temp[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs3 = (rs >> 48) & MIPSDSP_LO;
> +    rs2 = (rs >> 32) & MIPSDSP_LO;
> +    rs1 = (rs >> 16) & MIPSDSP_LO;
> +    rs0 = rs & MIPSDSP_LO;
> +    rt3 = (rt >> 48) & MIPSDSP_LO;
> +    rt2 = (rt >> 32) & MIPSDSP_LO;
> +    rt1 = (rt >> 16) & MIPSDSP_LO;
> +    rt0 = rt & MIPSDSP_LO;
> +
> +    tempD = mipsdsp_mul_q15_q15(env, ac, rs3, rt3);
> +    tempC = mipsdsp_mul_q15_q15(env, ac, rs2, rt2);
> +    tempB = mipsdsp_mul_q15_q15(env, ac, rs1, rt1);
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rs0, rt0);
> +
> +    temp[0] = (int64_t)tempD + (int64_t)tempC +
> +              (int64_t)tempB + (int64_t)tempA;
> +    if (((temp[0] >> 63) & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] - temp[0];
> +    if ((uint64_t)temp_sum > (uint64_t)acc[0]) {
> +        acc[1] -= 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] -= temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +#endif
> +
> +void helper_dpsqx_s_w_ph(CPUMIPSState *env,
> +                         uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA;
> +    int64_t dotp, tempC, tempBL, tempAL;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rtl);
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rth);
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp = tempBL + tempAL;
> +    tempC = (((uint64_t)env->active_tc.HI[ac] << 32) |
> +            ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO)) - dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
> +}
> +
> +void helper_dpsqx_sa_w_ph(CPUMIPSState *env,
> +                          uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA, tempC63, tempC62_31;
> +    int64_t dotp, tempBL, tempAL, tempC;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rtl);
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rth);
> +
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp   = tempBL + tempAL;
> +    tempC  = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +             ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    tempC -= dotp;
> +
> +    tempC63 = (tempC >> 63) & 0x01;
> +    tempC62_31 = (tempC >> 31) & 0xFFFFFFFF;
> +
> +    if ((tempC63 == 0) && (tempC62_31 != 0)) {
> +        tempC = 0x7FFFFFFF;
> +        set_DSPControl_overflow_flag(env, 1, 16 + ac);
> +    }
> +
> +    if ((tempC63 == 1) && (tempC62_31 != 0xFFFFFFFF)) {
> +        tempC = 0xFFFFFFFF80000000ull;
> +        set_DSPControl_overflow_flag(env, 1, 16 + ac);
> +    }
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
> +}
> +
> +void helper_mulsaq_s_w_ph(CPUMIPSState *env,
> +                          uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA;
> +    int64_t tempBL, tempAL, acc, dotp;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rth);
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rtl);
> +    tempBL = tempB;
> +    tempAL = tempA;
> +    dotp = tempBL - tempAL;
> +    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    dotp = dotp + acc;
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((dotp & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(dotp & MIPSDSP_LLO);
> +}
> +
> +#if defined(TARGET_MIPS64)
> +void helper_mulsaq_s_w_qh(CPUMIPSState *env,
> +                          target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs3, rs2, rs1, rs0;
> +    int16_t rt3, rt2, rt1, rt0;
> +    int32_t tempD, tempC, tempB, tempA;
> +    int64_t acc[2];
> +    int64_t temp[2];
> +    int64_t temp_sum;
> +
> +    rs3 = (rs >> 48) & MIPSDSP_LO;
> +    rs2 = (rs >> 32) & MIPSDSP_LO;
> +    rs1 = (rs >> 16) & MIPSDSP_LO;
> +    rs0 = rs & MIPSDSP_LO;
> +    rt3 = (rt >> 48) & MIPSDSP_LO;
> +    rt2 = (rt >> 32) & MIPSDSP_LO;
> +    rt1 = (rt >> 16) & MIPSDSP_LO;
> +    rt0 = rt & MIPSDSP_LO;
> +
> +    tempD = mipsdsp_mul_q15_q15(env, ac, rs3, rt3);
> +    tempC = mipsdsp_mul_q15_q15(env, ac, rs2, rt2);
> +    tempB = mipsdsp_mul_q15_q15(env, ac, rs1, rt1);
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rs0, rt0);
> +
> +    temp[0] = ((int32_t)tempD - (int32_t)tempC) +
> +              ((int32_t)tempB - (int32_t)tempA);
> +    temp[0] = (int64_t)(temp[0] << 30) >> 30;
> +    if (((temp[0] >> 33) & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] + temp[0];
> +    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
> +        acc[1] += 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] += temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +#endif
> +
> +void helper_dpaq_sa_l_w(CPUMIPSState *env,
> +                        uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    int32_t temp64, temp63, tempacc63, tempdotp63, tempDL63;
> +    int64_t dotp, acc;
> +    int64_t tempDL[2];
> +    uint64_t temp;
> +
> +    dotp = mipsdsp_mul_q31_q31(env, ac, rs, rt);
> +    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    tempDL[0] = acc + dotp;
> +
> +    tempacc63  = (acc >> 63) & 0x01;
> +    tempdotp63 = (dotp >> 63) & 0x01;
> +    tempDL63   = (tempDL[0] >> 63) & 0x01;
> +
> +    if (((tempacc63 == 1) && (tempdotp63 == 1)) |
> +        (((tempacc63 == 1) || (tempdotp63 == 1)) && tempDL63 == 0)) {
> +        tempDL[1] = 1;
> +    } else {
> +        tempDL[1] = 0;
> +    }
> +
> +    temp = tempDL[0];
> +    temp64 = tempDL[1] & 0x01;
> +    temp63 = (tempDL[0] >> 63) & 0x01;
> +
> +    if (temp64 != temp63) {
> +        if (temp64 == 1) {
> +            temp = 0x8000000000000000ull;
> +        } else {
> +            temp = 0x7FFFFFFFFFFFFFFFull;
> +        }
> +
> +        set_DSPControl_overflow_flag(env, 1, 16 + ac);
> +    }
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((temp & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(temp & MIPSDSP_LLO);
> +}
> +
> +#if defined(TARGET_MIPS64)
> +void helper_dpaq_sa_l_pw(CPUMIPSState *env,
> +                         target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int32_t rs1, rs0;
> +    int32_t rt1, rt0;
> +    int64_t tempB[2], tempA[2];
> +    int64_t temp[2];
> +    int64_t acc[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs1 = (rs >> 32) & MIPSDSP_LLO;
> +    rs0 = rs & MIPSDSP_LLO;
> +    rt1 = (rt >> 32) & MIPSDSP_LLO;
> +    rt0 = rt & MIPSDSP_LLO;
> +
> +    tempB[0] = mipsdsp_mul_q31_q31(env, ac, rs1, rt1);
> +    tempA[0] = mipsdsp_mul_q31_q31(env, ac, rs0, rt0);
> +
> +    if (((tempB[0] >> 63) & 0x01) == 0) {
> +        tempB[1] = 0x00;
> +    } else {
> +        tempB[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    if (((tempA[0] >> 63) & 0x01) == 0) {
> +        tempA[1] = 0x00;
> +    } else {
> +        tempA[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    temp_sum = tempB[0] + tempA[0];
> +    if (((uint64_t)temp_sum < (uint64_t)tempB[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)tempA[0])) {
> +        temp[1] += 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += tempB[1] + tempA[1];
> +
> +    mipsdsp_sat64_acc_add_q63(env, acc, ac, temp);
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +#endif
> +
> +void helper_dpsq_sa_l_w(CPUMIPSState *env,
> +                        uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    int32_t temp64, temp63, tempacc63, tempdotp63, tempDL63;
> +    int64_t dotp, acc;
> +    int64_t tempDL[2];
> +    uint64_t temp;
> +
> +    dotp = mipsdsp_mul_q31_q31(env, ac, rs, rt);
> +    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    tempDL[0] = acc - dotp;
> +
> +    tempacc63  = (acc >> 63) & 0x01;
> +    tempdotp63 = (dotp >> 63) & 0x01;
> +    tempDL63   = (tempDL[0] >> 63) & 0x01;
> +
> +    if (((tempacc63 == 1) && (tempdotp63 == 0)) |
> +        (((tempacc63 == 1) || (tempdotp63 == 0)) && tempDL63 == 0)) {
> +        tempDL[1] = 1;
> +    } else {
> +        tempDL[1] = 0;
> +    }
> +
> +    temp = tempDL[0];
> +    temp64 = tempDL[1] & 0x01;
> +    temp63 = (tempDL[0] >> 63) & 0x01;
> +    if (temp64 != temp63) {
> +        if (temp64 == 1) {
> +            temp = 0x8000000000000000ull;
> +        } else {
> +            temp = 0x7FFFFFFFFFFFFFFFull;
> +        }
> +        set_DSPControl_overflow_flag(env, 1, ac + 16);
> +    }
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((temp & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(temp & MIPSDSP_LLO);
> +}
> +
> +#if defined(TARGET_MIPS64)
> +void helper_dpsq_sa_l_pw(CPUMIPSState *env,
> +                         target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int32_t rs1, rs0;
> +    int32_t rt1, rt0;
> +    int64_t tempB[2], tempA[2];
> +    int64_t temp[2];
> +    int64_t acc[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0x00;
> +    temp[1] = 0x00;
> +
> +    rs1 = (rs >> 32) & MIPSDSP_LLO;
> +    rs0 = rs & MIPSDSP_LLO;
> +    rt1 = (rt >> 32) & MIPSDSP_LLO;
> +    rt0 = rt & MIPSDSP_LLO;
> +
> +    tempB[0] = mipsdsp_mul_q31_q31(env, ac, rs1, rt1);
> +    tempA[0] = mipsdsp_mul_q31_q31(env, ac, rs0, rt0);
> +
> +    if (((tempB[0] >> 31) & 0x01) == 0) {
> +        tempB[1] = 0x00;
> +    } else {
> +        tempB[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    if (((tempA[0] >> 31) & 0x01) == 0) {
> +        tempA[1] = 0x00;
> +    } else {
> +        tempA[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    temp_sum = tempB[0] + tempA[0];
> +    if (((uint64_t)temp_sum < (uint64_t)tempB[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)tempA[0])) {
> +        temp[1] += 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += tempA[1] + tempB[1];
> +
> +    mipsdsp_sat64_acc_sub_q63(env, acc, ac, temp);
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_mulsaq_s_l_pw(CPUMIPSState *env,
> +                          target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int32_t rs1, rs0;
> +    int32_t rt1, rt0;
> +    int64_t tempB[2], tempA[2];
> +    int64_t temp[2];
> +    int64_t acc[2];
> +    int64_t temp_sum;
> +
> +    rs1 = (rs >> 32) & MIPSDSP_LLO;
> +    rs0 = rs & MIPSDSP_LLO;
> +    rt1 = (rt >> 32) & MIPSDSP_LLO;
> +    rt0 = rt & MIPSDSP_LLO;
> +
> +    tempB[0] = mipsdsp_mul_q31_q31(env, ac, rs1, rt1);
> +    tempA[0] = mipsdsp_mul_q31_q31(env, ac, rs0, rt0);
> +
> +    if (((tempB[0] >> 63) & 0x01) == 0) {
> +        tempB[1] = 0x00;
> +    } else {
> +        tempB[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    if (((tempA[0] >> 63) & 0x01) == 0) {
> +        tempA[1] = 0x00;
> +    } else {
> +        tempA[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = tempB[0] - tempA[0];
> +    if ((uint64_t)temp_sum > (uint64_t)tempB[0]) {
> +        tempB[1] -= 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] = tempB[1] - tempA[1];
> +
> +    if ((temp[1] & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    temp_sum = acc[0] + temp[0];
> +    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
> +        acc[1] += 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] += temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +#endif
> +
> +void helper_maq_s_w_phl(CPUMIPSState *env,
> +                        uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rth;
> +    int32_t  tempA;
> +    int64_t tempL, tempAL, acc;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    tempA  = mipsdsp_mul_q15_q15(env, ac, rsh, rth);
> +    tempAL = tempA;
> +    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    tempL  = tempAL + acc;
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempL & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempL & MIPSDSP_LLO);
> +}
> +
> +void helper_maq_s_w_phr(CPUMIPSState *env,
> +                        uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsl, rtl;
> +    int32_t tempA;
> +    int64_t tempL, tempAL, acc;
> +
> +    rsl = rs & MIPSDSP_LO;
> +    rtl = rt & MIPSDSP_LO;
> +    tempA  = mipsdsp_mul_q15_q15(env, ac, rsl, rtl);
> +    tempAL = tempA;
> +    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
> +          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    tempL = tempAL + acc;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempL & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempL & MIPSDSP_LLO);
> +}
> +
> +void helper_maq_sa_w_phl(CPUMIPSState *env, uint32_t ac,
> +                         target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rth;
> +    int32_t tempA;
> +    int64_t tempAL;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rsh, rth);
> +    tempA = mipsdsp_sat32_acc_q31(env, ac, tempA);
> +    tempAL = tempA;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempAL & MIPSDSP_LHI) >>
> +                                                   32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempAL & MIPSDSP_LLO);
> +}
> +
> +void helper_maq_sa_w_phr(CPUMIPSState *env, uint32_t ac,
> +                         target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsl, rtl;
> +    int32_t tempA;
> +    int64_t tempAL;
> +
> +    rsl = rs & MIPSDSP_LO;
> +    rtl = rt & MIPSDSP_LO;
> +
> +    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rtl);
> +    tempA = mipsdsp_sat32_acc_q31(env, ac, tempA);
> +    tempAL = tempA;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((tempAL & MIPSDSP_LHI) >>
> +                                                   32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(tempAL & MIPSDSP_LLO);
> +}
> +
> +/***************************************************************
> + * In manual, GPR[rd](..0) <- tempB(15..0) || tempA(15..0),
> + * I'm not sure its means zero extend or sign extend.
> + * Now treat it as zero extend.
> + ***************************************************************/
> +target_ulong helper_mul_ph(CPUMIPSState *env, target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA;
> +    uint32_t rd;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +    tempB = mipsdsp_mul_i16_i16(env, rsh, rth);
> +    tempA = mipsdsp_mul_i16_i16(env, rsl, rtl);
> +
> +    rd = ((tempB & MIPSDSP_LO) << 16) | (tempA & MIPSDSP_LO);
> +
> +    return (target_ulong)rd;
> +}
> +
> +/***************************************************************
> + * In manual, GPR[rd](..0) <- tempB(15..0) || tempA(15..0),
> + * I'm not sure its means zero extend or sign extend.
> + * Now treat it as zero extend.
> + ***************************************************************/
> +target_ulong helper_mul_s_ph(CPUMIPSState *env,
> +                             target_ulong rs, target_ulong rt)
> +{
> +    int16_t  rsh, rsl, rth, rtl;
> +    int32_t  tempB, tempA;
> +    uint32_t rd;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +    tempB = mipsdsp_sat16_mul_i16_i16(env, rsh, rth);
> +    tempA = mipsdsp_sat16_mul_i16_i16(env, rsl, rtl);
> +
> +    rd = ((tempB & MIPSDSP_LO) << 16) | (tempA & MIPSDSP_LO);
> +
> +    return (target_ulong)rd;
> +}
> +
> +target_ulong helper_mulq_s_ph(CPUMIPSState *env,
> +                              target_ulong rs, target_ulong rt)
> +{
> +    int16_t rsh, rsl, rth, rtl;
> +    int32_t temp, tempB, tempA;
> +    uint32_t rd;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = mipsdsp_sat16_mul_q15_q15(env, rsh, rth);
> +    tempA = mipsdsp_sat16_mul_q15_q15(env, rsl, rtl);
> +    temp = ((tempB & MIPSDSP_LO) << 16) | (tempA & MIPSDSP_LO);
> +    rd = temp;
> +
> +    return (target_long)(int32_t)rd;
> +}
> +
> +target_ulong helper_mulq_s_w(CPUMIPSState *env,
> +                             target_ulong rs, target_ulong rt)
> +{
> +    uint32_t rd;
> +    int32_t tempI;
> +    int64_t tempL;
> +
> +    if ((rs == 0x80000000) && (rt == 0x80000000)) {
> +        tempL = 0x7FFFFFFF00000000ull;
> +        set_DSPControl_overflow_flag(env, 1, 21);
> +    } else {
> +        tempL  = ((int64_t)rs * (int64_t)rt) << 1;
> +    }
> +    tempI = (tempL & MIPSDSP_LHI) >> 32;
> +    rd = tempI;
> +
> +    return (target_long)(int32_t)rd;
> +}
> +
> +target_ulong helper_mulq_rs_w(CPUMIPSState *env,
> +                              target_ulong rs, target_ulong rt)
> +{
> +    uint32_t rd;
> +    uint32_t rs_t, rt_t;
> +    uint32_t tempI;
> +    int64_t tempL;
> +
> +    rs_t = rs & MIPSDSP_LLO;
> +    rt_t = rt & MIPSDSP_LLO;
> +
> +    if ((rs_t == 0x80000000) && (rt_t == 0x80000000)) {
> +        tempL = 0x7FFFFFFF00000000ull;
> +        set_DSPControl_overflow_flag(env, 1, 21);
> +    } else {
> +        tempL  = ((int64_t)rs_t * (int64_t)rt_t) << 1;
> +        tempL += 0x80000000ull;
> +    }
> +    tempI = (tempL & MIPSDSP_LHI) >> 32;
> +    rd = tempI;
> +
> +    return (target_long)(int32_t)rd;
> +}
> +
> +void helper_mulsa_w_ph(CPUMIPSState *env,
> +                       uint32_t ac, target_ulong rs, target_ulong rt)
> +{
> +    uint16_t rsh, rsl, rth, rtl;
> +    int32_t tempB, tempA;
> +    int64_t dotp, acc, tempBL, tempAL;
> +
> +    rsh = (rs & MIPSDSP_HI) >> 16;
> +    rsl =  rs & MIPSDSP_LO;
> +    rth = (rt & MIPSDSP_HI) >> 16;
> +    rtl =  rt & MIPSDSP_LO;
> +
> +    tempB = (int32_t)rsh * (int32_t)rth;
> +    tempA = (int32_t)rsl * (int32_t)rtl;
> +    tempBL = tempB;
> +    tempAL = tempA;
> +
> +    dotp = tempBL - tempAL;
> +    acc  = ((int64_t)env->active_tc.HI[ac] << 32) |
> +           ((int64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
> +    acc = acc + dotp;
> +
> +    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
> +    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
> +}
> +
> +#if defined(TARGET_MIPS64)
> +void helper_maq_s_w_qhll(CPUMIPSState *env,
> +                         target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs_t, rt_t;
> +    int32_t temp_mul;
> +    int64_t temp[2];
> +    int64_t acc[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs_t = (rs >> 48) & MIPSDSP_LO;
> +    rt_t = (rt >> 48) & MIPSDSP_LO;
> +    temp_mul = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
> +
> +    temp[0] = (int64_t)temp_mul;
> +    if (((temp[0] >> 63) & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] + temp[0];
> +    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
> +        acc[1] += 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] += temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_maq_s_w_qhlr(CPUMIPSState *env,
> +                         target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs_t, rt_t;
> +    int32_t temp_mul;
> +    int64_t temp[2];
> +    int64_t acc[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs_t = (rs >> 32) & MIPSDSP_LO;
> +    rt_t = (rt >> 32) & MIPSDSP_LO;
> +    temp_mul = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
> +
> +    temp[0] = (int64_t)temp_mul;
> +    if (((temp[0] >> 63) & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] + temp[0];
> +    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
> +        acc[1] += 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] += temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_maq_s_w_qhrl(CPUMIPSState *env,
> +                         target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs_t, rt_t;
> +    int32_t temp_mul;
> +    int64_t temp[2];
> +    int64_t acc[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs_t = (rs >> 16) & MIPSDSP_LO;
> +    rt_t = (rt >> 16) & MIPSDSP_LO;
> +    temp_mul = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
> +
> +    temp[0] = (int64_t)temp_mul;
> +    if (((temp[0] >> 63) & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] + temp[0];
> +    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
> +        acc[1] += 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] += temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_maq_s_w_qhrr(CPUMIPSState *env,
> +                         target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs_t, rt_t;
> +    int32_t temp_mul;
> +    int64_t temp[2];
> +    int64_t acc[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs_t = rs & MIPSDSP_LO;
> +    rt_t = rt & MIPSDSP_LO;
> +    temp_mul = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
> +
> +    temp[0] = (int64_t)temp_mul;
> +    if (((temp[0] >> 63) & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] + temp[0];
> +    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
> +        acc[1] += 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] += temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_maq_sa_w_qhll(CPUMIPSState *env,
> +                          target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs_t, rt_t;
> +    int32_t temp;
> +    int64_t acc[2];
> +
> +    rs_t = (rs >> 48) & MIPSDSP_LO;
> +    rt_t = (rt >> 48) & MIPSDSP_LO;
> +    temp = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
> +    temp = mipsdsp_sat32_acc_q31(env, ac, temp);
> +
> +    acc[0] = (int64_t)(int32_t)temp;
> +    if (((acc[0] >> 63) & 0x01) == 0) {
> +        acc[1] = 0x00;
> +    } else {
> +        acc[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_maq_sa_w_qhlr(CPUMIPSState *env,
> +                          target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs_t, rt_t;
> +    int32_t temp;
> +    int64_t acc[2];
> +
> +    rs_t = (rs >> 32) & MIPSDSP_LO;
> +    rt_t = (rt >> 32) & MIPSDSP_LO;
> +    temp = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
> +    temp = mipsdsp_sat32_acc_q31(env, ac, temp);
> +
> +    acc[0] = (int64_t)temp;
> +    if (((acc[0] >> 63) & 0x01) == 0) {
> +        acc[1] = 0x00;
> +    } else {
> +        acc[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_maq_sa_w_qhrl(CPUMIPSState *env,
> +                          target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs_t, rt_t;
> +    int32_t temp;
> +    int64_t acc[2];
> +
> +    rs_t = (rs >> 16) & MIPSDSP_LO;
> +    rt_t = (rt >> 16) & MIPSDSP_LO;
> +    temp = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
> +    temp = mipsdsp_sat32_acc_q31(env, ac, temp);
> +
> +    acc[0] = (int64_t)temp;
> +    if (((acc[0] >> 63) & 0x01) == 0) {
> +        acc[1] = 0x00;
> +    } else {
> +        acc[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_maq_sa_w_qhrr(CPUMIPSState *env,
> +                          target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int16_t rs_t, rt_t;
> +    int32_t temp;
> +    int64_t acc[2];
> +
> +    rs_t = rs & MIPSDSP_LO;
> +    rt_t = rt & MIPSDSP_LO;
> +    temp = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
> +    temp = mipsdsp_sat32_acc_q31(env, ac, temp);
> +
> +    acc[0] = (int64_t)temp;
> +    if (((acc[0] >> 63) & 0x01) == 0) {
> +        acc[1] = 0x00;
> +    } else {
> +        acc[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_maq_s_l_pwl(CPUMIPSState *env,
> +                        target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int32_t rs_t, rt_t;
> +    int64_t temp[2];
> +    int64_t acc[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs_t = (rs >> 32) & MIPSDSP_LLO;
> +    rt_t = (rt >> 32) & MIPSDSP_LLO;
> +
> +    temp[0] = mipsdsp_mul_q31_q31(env, ac, rs_t, rt_t);
> +    if (((temp[0] >> 63) & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] + temp[0];
> +    if (((uint64_t)temp_sum < (uint64_t)temp[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)acc[0])) {
> +        acc[1] += 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] += temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_maq_s_l_pwr(CPUMIPSState *env,
> +                        target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int32_t rs_t, rt_t;
> +    int64_t temp[2];
> +    int64_t acc[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0;
> +    temp[1] = 0;
> +
> +    rs_t = rs & MIPSDSP_LLO;
> +    rt_t = rt & MIPSDSP_LLO;
> +
> +    temp[0] = mipsdsp_mul_q31_q31(env, ac, rs_t, rt_t);
> +    if (((temp[0] >> 63) & 0x01) == 0) {
> +        temp[1] = 0x00;
> +    } else {
> +        temp[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[0] = env->active_tc.LO[ac];
> +    acc[1] = env->active_tc.HI[ac];
> +
> +    temp_sum = acc[0] + temp[0];
> +    if (((uint64_t)temp_sum < (uint64_t)temp[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)acc[0])) {
> +        acc[1] += 1;
> +    }
> +    acc[0] = temp_sum;
> +    acc[1] += temp[1];
> +
> +    env->active_tc.HI[ac] = acc[1];
> +    env->active_tc.LO[ac] = acc[0];
> +}
> +
> +void helper_dmadd(CPUMIPSState *env,
> +                  target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int32_t rs1, rs0;
> +    int32_t rt1, rt0;
> +    int32_t tempB, tempA;
> +    int64_t tempBL[2], tempAL[2];
> +    int64_t acc[2];
> +    int64_t temp[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0x00;
> +    temp[1] = 0x00;
> +
> +    rs1 = (rs >> 32) & MIPSDSP_LLO;
> +    rs0 = rs & MIPSDSP_LLO;
> +    rt1 = (rt >> 32) & MIPSDSP_LLO;
> +    rt0 = rt & MIPSDSP_LLO;
> +
> +    tempB = rs1 * rt1;
> +    tempA = rs0 * rt0;
> +
> +    tempBL[0] = (int64_t)tempB;
> +    tempAL[0] = (int64_t)tempA;
> +
> +    if (((tempBL[0] >> 63) & 0x01) == 0) {
> +        tempBL[1] = 0x0;
> +    } else {
> +        tempBL[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    if (((tempAL[0] >> 63) & 0x01) == 0) {
> +        tempAL[1] = 0x0;
> +    } else {
> +        tempAL[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[1] = env->active_tc.HI[ac];
> +    acc[0] = env->active_tc.LO[ac];
> +
> +    temp_sum = tempBL[0] + tempAL[0];
> +    if (((uint64_t)temp_sum < (uint64_t)tempBL[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)tempAL[0])) {
> +        temp[1] += 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += tempBL[1] + tempAL[1];
> +
> +    temp_sum = temp[0] + acc[0];
> +    if (((uint64_t)temp_sum < (uint64_t)temp[0]) &&
> +       ((uint64_t)temp_sum < (uint64_t)acc[0])) {
> +        temp[1] += 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += acc[1];
> +
> +    env->active_tc.HI[ac] = temp[1];
> +    env->active_tc.LO[ac] = temp[0];
> +}
> +
> +void helper_dmaddu(CPUMIPSState *env,
> +                   target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    uint32_t rs1, rs0;
> +    uint32_t rt1, rt0;
> +    uint64_t tempBL[2], tempAL[2];
> +    uint64_t acc[2];
> +    uint64_t temp[2];
> +    uint64_t temp_sum;
> +
> +    temp[0] = 0x00;
> +    temp[1] = 0x00;
> +
> +    rs1 = (rs >> 32) & MIPSDSP_LLO;
> +    rs0 = rs & MIPSDSP_LLO;
> +    rt1 = (rt >> 32) & MIPSDSP_LLO;
> +    rt0 = rt & MIPSDSP_LLO;
> +
> +    tempBL[0] = mipsdsp_mul_u32_u32(env, rs1, rt1);
> +    tempAL[0] = mipsdsp_mul_u32_u32(env, rs0, rt0);
> +    tempBL[1] = 0;
> +    tempAL[1] = 0;
> +
> +    acc[1] = env->active_tc.HI[ac];
> +    acc[0] = env->active_tc.LO[ac];
> +
> +    temp_sum = tempBL[0] + tempAL[0];
> +    if ((temp_sum < tempBL[0]) && (temp_sum < tempAL[0])) {
> +        temp[1] += 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += tempBL[1] + tempAL[1];
> +
> +    temp_sum = temp[0] + acc[0];
> +    if ((temp_sum < temp[0]) && (temp_sum < acc[0])) {
> +        temp[1] += 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += acc[1];
> +
> +    env->active_tc.HI[ac] = temp[1];
> +    env->active_tc.LO[ac] = temp[0];
> +}
> +
> +void helper_dmsub(CPUMIPSState *env,
> +                  target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    int32_t rs1, rs0;
> +    int32_t rt1, rt0;
> +    int32_t tempB, tempA;
> +    int64_t tempBL[2], tempAL[2];
> +    int64_t acc[2];
> +    int64_t temp[2];
> +    int64_t temp_sum;
> +
> +    temp[0] = 0x00;
> +    temp[1] = 0x00;
> +
> +    rs1 = (rs >> 32) & MIPSDSP_LLO;
> +    rs0 = rs & MIPSDSP_LLO;
> +    rt1 = (rt >> 32) & MIPSDSP_LLO;
> +    rt0 = rt & MIPSDSP_LLO;
> +
> +    tempB = rs1 * rt1;
> +    tempA = rs0 * rt0;
> +
> +    tempBL[0] = (int64_t)tempB;
> +    tempAL[0] = (int64_t)tempA;
> +
> +    if (((tempBL[0] >> 63) & 0x01) == 0) {
> +        tempBL[1] = 0x0;
> +    } else {
> +        tempBL[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    if (((tempAL[0] >> 63) & 0x01) == 0) {
> +        tempAL[1] = 0x0;
> +    } else {
> +        tempAL[1] = 0xFFFFFFFFFFFFFFFFull;
> +    }
> +
> +    acc[1] = env->active_tc.HI[ac];
> +    acc[0] = env->active_tc.LO[ac];
> +
> +    temp_sum = acc[0] - tempBL[0];
> +    if ((uint64_t)temp_sum > (uint64_t)acc[0]) {
> +        temp[1] -= 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += acc[1] - tempBL[1];
> +
> +    temp_sum = temp[0] - tempAL[0];
> +    if ((uint64_t)temp_sum > (uint64_t)temp[0]) {
> +        temp[1] -= 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] -= tempAL[1];
> +
> +    env->active_tc.HI[ac] = temp[1];
> +    env->active_tc.LO[ac] = temp[0];
> +}
> +
> +void helper_dmsubu(CPUMIPSState *env,
> +                   target_ulong rs, target_ulong rt, uint32_t ac)
> +{
> +    uint32_t rs1, rs0;
> +    uint32_t rt1, rt0;
> +    uint64_t tempBL[2], tempAL[2];
> +    uint64_t acc[2];
> +    uint64_t temp[2];
> +    uint64_t temp_sum;
> +
> +    temp[0] = 0x00;
> +    temp[1] = 0x00;
> +
> +    rs1 = (rs >> 32) & MIPSDSP_LLO;
> +    rs0 = rs & MIPSDSP_LLO;
> +    rt1 = (rt >> 32) & MIPSDSP_LLO;
> +    rt0 = rt & MIPSDSP_LLO;
> +
> +    tempBL[0] = mipsdsp_mul_u32_u32(env, rs1, rt1);
> +    tempAL[0] = mipsdsp_mul_u32_u32(env, rs0, rt0);
> +    tempBL[1] = 0;
> +    tempAL[1] = 0;
> +
> +    acc[1] = env->active_tc.HI[ac];
> +    acc[0] = env->active_tc.LO[ac];
> +
> +    temp_sum = acc[0] - tempBL[0];
> +    if (temp_sum > acc[0]) {
> +        temp[1] -= 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] += acc[1] - tempBL[1];
> +
> +    temp_sum = temp[0] - tempAL[0];
> +    if ((uint64_t)temp_sum > (uint64_t)temp[0]) {
> +        temp[1] -= 1;
> +    }
> +    temp[0] = temp_sum;
> +    temp[1] -= tempAL[1];
> +
> +    env->active_tc.HI[ac] = temp[1];
> +    env->active_tc.LO[ac] = temp[0];
> +}
> +#endif
> +
>  #undef MIPSDSP_LHI
>  #undef MIPSDSP_LLO
>  #undef MIPSDSP_HI
> diff --git a/target-mips/helper.h b/target-mips/helper.h
> index 7046380..69bcb6c 100644
> --- a/target-mips/helper.h
> +++ b/target-mips/helper.h
> @@ -491,4 +491,95 @@ DEF_HELPER_FLAGS_2(shra_r_pw, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl, i32)
>  DEF_HELPER_FLAGS_2(shrav_r_pw, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl, tl)
>  #endif
>  
> +/* DSP Multiply Sub-class insns */
> +DEF_HELPER_FLAGS_3(muleu_s_ph_qbl, 0, tl, env, tl, tl)
> +DEF_HELPER_FLAGS_3(muleu_s_ph_qbr, 0, tl, env, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_3(muleu_s_qh_obl, 0, tl, env, tl, tl)
> +DEF_HELPER_FLAGS_3(muleu_s_qh_obr, 0, tl, env, tl, tl)
> +#endif
> +DEF_HELPER_FLAGS_3(mulq_rs_ph, 0, tl, env, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_3(mulq_rs_qh, 0, tl, env, tl, tl)
> +#endif
> +DEF_HELPER_FLAGS_3(muleq_s_w_phl, 0, tl, env, tl, tl)
> +DEF_HELPER_FLAGS_3(muleq_s_w_phr, 0, tl, env, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_3(muleq_s_pw_qhl, 0, tl, env, tl, tl)
> +DEF_HELPER_FLAGS_3(muleq_s_pw_qhr, 0, tl, env, tl, tl)
> +#endif
> +DEF_HELPER_FLAGS_4(dpau_h_qbl, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(dpau_h_qbr, 0, void, env, i32, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_4(dpau_h_obl, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(dpau_h_obr, 0, void, env, tl, tl, i32)
> +#endif
> +DEF_HELPER_FLAGS_4(dpsu_h_qbl, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(dpsu_h_qbr, 0, void, env, i32, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_4(dpsu_h_obl, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(dpsu_h_obr, 0, void, env, tl, tl, i32)
> +#endif
> +DEF_HELPER_FLAGS_4(dpa_w_ph, 0, void, env, i32, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_4(dpa_w_qh, 0, void, env, tl, tl, i32)
> +#endif
> +DEF_HELPER_FLAGS_4(dpax_w_ph, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(dpaq_s_w_ph, 0, void, env, i32, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_4(dpaq_s_w_qh, 0, void, env, tl, tl, i32)
> +#endif
> +DEF_HELPER_FLAGS_4(dpaqx_s_w_ph, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(dpaqx_sa_w_ph, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(dps_w_ph, 0, void, env, i32, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_4(dps_w_qh, 0, void, env, tl, tl, i32)
> +#endif
> +DEF_HELPER_FLAGS_4(dpsx_w_ph, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(dpsq_s_w_ph, 0, void, env, i32, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_4(dpsq_s_w_qh, 0, void, env, tl, tl, i32)
> +#endif
> +DEF_HELPER_FLAGS_4(dpsqx_s_w_ph, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(dpsqx_sa_w_ph, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(mulsaq_s_w_ph, 0, void, env, i32, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_4(mulsaq_s_w_qh, 0, void, env, tl, tl, i32)
> +#endif
> +DEF_HELPER_FLAGS_4(dpaq_sa_l_w, 0, void, env, i32, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_4(dpaq_sa_l_pw, 0, void, env, tl, tl, i32)
> +#endif
> +DEF_HELPER_FLAGS_4(dpsq_sa_l_w, 0, void, env, i32, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_4(dpsq_sa_l_pw, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(mulsaq_s_l_pw, 0, void, env, tl, tl, i32)
> +#endif
> +DEF_HELPER_FLAGS_4(maq_s_w_phl, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(maq_s_w_phr, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(maq_sa_w_phl, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_4(maq_sa_w_phr, 0, void, env, i32, tl, tl)
> +DEF_HELPER_FLAGS_3(mul_ph, 0, tl, env, tl, tl)
> +DEF_HELPER_FLAGS_3(mul_s_ph, 0, tl, env, tl, tl)
> +DEF_HELPER_FLAGS_3(mulq_s_ph, 0, tl, env, tl, tl)
> +DEF_HELPER_FLAGS_3(mulq_s_w, 0, tl, env, tl, tl)
> +DEF_HELPER_FLAGS_3(mulq_rs_w, 0, tl, env, tl, tl)
> +DEF_HELPER_FLAGS_4(mulsa_w_ph, 0, void, env, i32, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_4(maq_s_w_qhll, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(maq_s_w_qhlr, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(maq_s_w_qhrl, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(maq_s_w_qhrr, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(maq_sa_w_qhll, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(maq_sa_w_qhlr, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(maq_sa_w_qhrl, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(maq_sa_w_qhrr, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(maq_s_l_pwl, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(maq_s_l_pwr, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(dmadd, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(dmaddu, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(dmsub, 0, void, env, tl, tl, i32)
> +DEF_HELPER_FLAGS_4(dmsubu, 0, void, env, tl, tl, i32)
> +#endif
> +
>  #include "def-helper.h"
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index d87dbee..88c20cb 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -402,6 +402,13 @@ enum {
>      OPC_ADDWC          = (0x11 << 6) | OPC_ADDU_QB_DSP,
>      OPC_MODSUB         = (0x12 << 6) | OPC_ADDU_QB_DSP,
>      OPC_RADDU_W_QB     = (0x14 << 6) | OPC_ADDU_QB_DSP,
> +    /* MIPS DSP Multiply Sub-class insns */
> +    OPC_MULEU_S_PH_QBL = (0x06 << 6) | OPC_ADDU_QB_DSP,
> +    OPC_MULEU_S_PH_QBR = (0x07 << 6) | OPC_ADDU_QB_DSP,
> +    OPC_MULQ_RS_PH     = (0x1F << 6) | OPC_ADDU_QB_DSP,
> +    OPC_MULEQ_S_W_PHL  = (0x1C << 6) | OPC_ADDU_QB_DSP,
> +    OPC_MULEQ_S_W_PHR  = (0x1D << 6) | OPC_ADDU_QB_DSP,
> +    OPC_MULQ_S_PH      = (0x1E << 6) | OPC_ADDU_QB_DSP,
>  };
>  
>  #define OPC_ADDUH_QB_DSP OPC_MULT_G_2E
> @@ -420,6 +427,11 @@ enum {
>      OPC_SUBQH_R_PH = (0x0B << 6) | OPC_ADDUH_QB_DSP,
>      OPC_SUBQH_W    = (0x11 << 6) | OPC_ADDUH_QB_DSP,
>      OPC_SUBQH_R_W  = (0x13 << 6) | OPC_ADDUH_QB_DSP,
> +    /* MIPS DSP Multiply Sub-class insns */
> +    OPC_MUL_PH     = (0x0C << 6) | OPC_ADDUH_QB_DSP,
> +    OPC_MUL_S_PH   = (0x0E << 6) | OPC_ADDUH_QB_DSP,
> +    OPC_MULQ_S_W   = (0x16 << 6) | OPC_ADDUH_QB_DSP,
> +    OPC_MULQ_RS_W  = (0x17 << 6) | OPC_ADDUH_QB_DSP,
>  };
>  
>  #define MASK_ABSQ_S_PH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> @@ -451,6 +463,7 @@ enum {
>      OPC_PRECRQ_RS_PH_W   = (0x15 << 6) | OPC_CMPU_EQ_QB_DSP,
>      OPC_PRECRQU_S_QB_PH  = (0x0F << 6) | OPC_CMPU_EQ_QB_DSP,
>  };
> +
>  #define MASK_SHLL_QB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
>  enum {
>      /* MIPS DSP GPR-Based Shift Sub-class */
> @@ -478,6 +491,33 @@ enum {
>      OPC_SHRAV_R_W  = (0x17 << 6) | OPC_SHLL_QB_DSP,
>  };
>  
> +#define MASK_DPA_W_PH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> +enum {
> +    /* MIPS DSP Multiply Sub-class insns */
> +    OPC_DPAU_H_QBL    = (0x03 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPAU_H_QBR    = (0x07 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPSU_H_QBL    = (0x0B << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPSU_H_QBR    = (0x0F << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPA_W_PH      = (0x00 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPAX_W_PH     = (0x08 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPAQ_S_W_PH   = (0x04 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPAQX_S_W_PH  = (0x18 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPAQX_SA_W_PH = (0x1A << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPS_W_PH      = (0x01 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPSX_W_PH          = (0x09 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPSQ_S_W_PH   = (0x05 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPSQX_S_W_PH  = (0x19 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPSQX_SA_W_PH = (0x1B << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_MULSAQ_S_W_PH = (0x06 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPAQ_SA_L_W   = (0x0C << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_DPSQ_SA_L_W   = (0x0D << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_MAQ_S_W_PHL   = (0x14 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_MAQ_S_W_PHR   = (0x16 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_MAQ_SA_W_PHL  = (0x10 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_MAQ_SA_W_PHR  = (0x12 << 6) | OPC_DPA_W_PH_DSP,
> +    OPC_MULSA_W_PH    = (0x02 << 6) | OPC_DPA_W_PH_DSP,
> +};
> +
>  #if defined(TARGET_MIPS64)
>  #define MASK_ABSQ_S_QH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
>  enum {
> @@ -505,6 +545,12 @@ enum {
>  #if defined(TARGET_MIPS64)
>  #define MASK_ADDU_OB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
>  enum {
> +    /* MIPS DSP Multiply Sub-class insns */
> +    OPC_MULEQ_S_PW_QHL = (0x1C << 6) | OPC_ADDU_OB_DSP,
> +    OPC_MULEQ_S_PW_QHR = (0x1D << 6) | OPC_ADDU_OB_DSP,
> +    OPC_MULEU_S_QH_OBL = (0x06 << 6) | OPC_ADDU_OB_DSP,
> +    OPC_MULEU_S_QH_OBR = (0x07 << 6) | OPC_ADDU_OB_DSP,
> +    OPC_MULQ_RS_QH     = (0x1F << 6) | OPC_ADDU_OB_DSP,
>      /* MIPS DSP Arithmetic Sub-class */
>      OPC_RADDU_L_OB     = (0x14 << 6) | OPC_ADDU_OB_DSP,
>      OPC_SUBQ_PW        = (0x13 << 6) | OPC_ADDU_OB_DSP,
> @@ -546,6 +592,39 @@ enum {
>  #endif
>  
>  #if defined(TARGET_MIPS64)
> +#define MASK_DPAQ_W_QH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> +enum {
> +    /* MIPS DSP Multiply Sub-class insns */
> +    OPC_DMADD         = (0x19 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DMADDU        = (0x1D << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DMSUB         = (0x1B << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DMSUBU        = (0x1F << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DPA_W_QH      = (0x00 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DPAQ_S_W_QH   = (0x04 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DPAQ_SA_L_PW  = (0x0C << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DPAU_H_OBL    = (0x03 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DPAU_H_OBR    = (0x07 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DPS_W_QH      = (0x01 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DPSQ_S_W_QH   = (0x05 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DPSQ_SA_L_PW  = (0x0D << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DPSU_H_OBL    = (0x0B << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_DPSU_H_OBR    = (0x0F << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MAQ_S_L_PWL   = (0x1C << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MAQ_S_L_PWR   = (0x1E << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MAQ_S_W_QHLL  = (0x14 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MAQ_SA_W_QHLL = (0x10 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MAQ_S_W_QHLR  = (0x15 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MAQ_SA_W_QHLR = (0x11 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MAQ_S_W_QHRL  = (0x16 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MAQ_SA_W_QHRL = (0x12 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MAQ_S_W_QHRR  = (0x17 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MAQ_SA_W_QHRR = (0x13 << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MULSAQ_S_L_PW = (0x0E << 6) | OPC_DPAQ_W_QH_DSP,
> +    OPC_MULSAQ_S_W_QH = (0x06 << 6) | OPC_DPAQ_W_QH_DSP,
> +};
> +#endif
> +
> +#if defined(TARGET_MIPS64)
>  #define MASK_SHLL_OB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
>  enum {
>      /* MIPS DSP GPR-Based Shift Sub-class */
> @@ -12465,6 +12544,22 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
>                  case  OPC_SUBQH_R_W:
>                      gen_helper_subqh_r_w(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
>                      break;
> +                case  OPC_MUL_PH:
> +                    gen_helper_mul_ph(cpu_gpr[rd], cpu_env,
> +                                      cpu_gpr[rs], cpu_gpr[rt]);
> +                    break;
> +                case  OPC_MUL_S_PH:
> +                    gen_helper_mul_s_ph(cpu_gpr[rd], cpu_env,
> +                                        cpu_gpr[rs], cpu_gpr[rt]);
> +                    break;
> +                case OPC_MULQ_S_W:
> +                    gen_helper_mulq_s_w(cpu_gpr[rd], cpu_env,
> +                                        cpu_gpr[rs], cpu_gpr[rt]);
> +                    break;
> +                case OPC_MULQ_RS_W:
> +                    gen_helper_mulq_rs_w(cpu_gpr[rd], cpu_env,
> +                                         cpu_gpr[rs], cpu_gpr[rt]);
> +                    break;
>                  default:
>                      MIPS_INVAL("MASK ADDUH.QB");
>                      generate_exception(ctx, EXCP_RI);
> @@ -12676,6 +12771,36 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
>                  check_dsp(ctx);
>                  gen_helper_raddu_w_qb(cpu_gpr[rd], cpu_gpr[rs]);
>                  break;
> +            case OPC_MULEU_S_PH_QBL:
> +                check_dsp(ctx);
> +                gen_helper_muleu_s_ph_qbl(cpu_gpr[rd], cpu_env,
> +                                          cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
> +            case OPC_MULEU_S_PH_QBR:
> +                check_dsp(ctx);
> +                gen_helper_muleu_s_ph_qbr(cpu_gpr[rd], cpu_env,
> +                                          cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
> +            case OPC_MULQ_RS_PH:
> +                check_dsp(ctx);
> +                gen_helper_mulq_rs_ph(cpu_gpr[rd], cpu_env,
> +                                      cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
> +            case OPC_MULEQ_S_W_PHL:
> +                check_dsp(ctx);
> +                gen_helper_muleq_s_w_phl(cpu_gpr[rd], cpu_env,
> +                                         cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
> +            case OPC_MULEQ_S_W_PHR:
> +                check_dsp(ctx);
> +                gen_helper_muleq_s_w_phr(cpu_gpr[rd], cpu_env,
> +                                         cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
> +            case OPC_MULQ_S_PH:
> +                check_dspr2(ctx);
> +                gen_helper_mulq_s_ph(cpu_gpr[rd], cpu_env,
> +                                     cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
>              default:            /* Invalid */
>                  MIPS_INVAL("MASK ADDU.QB");
>                  generate_exception(ctx, EXCP_RI);
> @@ -12842,6 +12967,213 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
>                  tcg_temp_free_i32(temp_rs);
>                  break;
>              }
> +        case OPC_DPA_W_PH_DSP:
> +            op2 = MASK_DPA_W_PH(ctx->opcode);
> +            switch (op2) {
> +            case OPC_DPAU_H_QBL:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpau_h_qbl(cpu_env, temp_rd,
> +                                          cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPAU_H_QBR:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpau_h_qbr(cpu_env, temp_rd,
> +                                          cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPSU_H_QBL:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpsu_h_qbl(cpu_env, temp_rd,
> +                                          cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPSU_H_QBR:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpsu_h_qbr(cpu_env, temp_rd,
> +                                          cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPA_W_PH:
> +                check_dspr2(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpa_w_ph(cpu_env, temp_rd,
> +                                        cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPAX_W_PH:
> +                check_dspr2(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpax_w_ph(cpu_env, temp_rd,
> +                                         cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPAQ_S_W_PH:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpaq_s_w_ph(cpu_env, temp_rd,
> +                                           cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPAQX_S_W_PH:
> +                check_dspr2(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpaqx_s_w_ph(cpu_env, temp_rd,
> +                                            cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPAQX_SA_W_PH:
> +                check_dspr2(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpaqx_sa_w_ph(cpu_env, temp_rd,
> +                                             cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPS_W_PH:
> +                check_dspr2(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dps_w_ph(cpu_env, temp_rd,
> +                                        cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPSX_W_PH:
> +                check_dspr2(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpsx_w_ph(cpu_env, temp_rd,
> +                                         cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPSQ_S_W_PH:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpsq_s_w_ph(cpu_env, temp_rd,
> +                                           cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPSQX_S_W_PH:
> +                check_dspr2(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpsqx_s_w_ph(cpu_env, temp_rd,
> +                                            cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPSQX_SA_W_PH:
> +                check_dspr2(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpsqx_sa_w_ph(cpu_env, temp_rd,
> +                                             cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_MULSAQ_S_W_PH:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_mulsaq_s_w_ph(cpu_env, temp_rd,
> +                                             cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPAQ_SA_L_W:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpaq_sa_l_w(cpu_env, temp_rd,
> +                                           cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_DPSQ_SA_L_W:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_dpsq_sa_l_w(cpu_env, temp_rd,
> +                                           cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_MAQ_S_W_PHL:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_maq_s_w_phl(cpu_env, temp_rd,
> +                                           cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_MAQ_S_W_PHR:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_maq_s_w_phr(cpu_env, temp_rd,
> +                                           cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_MAQ_SA_W_PHL:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_maq_sa_w_phl(cpu_env, temp_rd,
> +                                            cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_MAQ_SA_W_PHR:
> +                check_dsp(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_maq_sa_w_phr(cpu_env, temp_rd,
> +                                            cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            case OPC_MULSA_W_PH:
> +                check_dspr2(ctx);
> +                {
> +                    TCGv_i32 temp_rd = tcg_const_i32(rd);
> +                    gen_helper_mulsa_w_ph(cpu_env, temp_rd,
> +                                          cpu_gpr[rs], cpu_gpr[rt]);
> +                    tcg_temp_free_i32(temp_rd);
> +                    break;
> +                }
> +            default:            /* Invalid */
> +                MIPS_INVAL("MASK DPAW.PH");
> +                generate_exception(ctx, EXCP_RI);
> +                break;
> +            }
> +            break;
>  #if defined(TARGET_MIPS64)
>          case OPC_DEXTM ... OPC_DEXT:
>          case OPC_DINSM ... OPC_DINS:
> @@ -12945,6 +13277,31 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
>          case OPC_ADDU_OB_DSP:
>              op2 = MASK_ADDU_OB(ctx->opcode);
>              switch (op2) {
> +            case OPC_MULEQ_S_PW_QHL:
> +                check_dsp(ctx);
> +                gen_helper_muleq_s_pw_qhl(cpu_gpr[rd], cpu_env,
> +                                          cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
> +            case OPC_MULEQ_S_PW_QHR:
> +                check_dsp(ctx);
> +                gen_helper_muleq_s_pw_qhr(cpu_gpr[rd], cpu_env,
> +                                          cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
> +            case OPC_MULEU_S_QH_OBL:
> +                check_dsp(ctx);
> +                gen_helper_muleu_s_qh_obl(cpu_gpr[rd],
> +                                          cpu_env, cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
> +            case OPC_MULEU_S_QH_OBR:
> +                check_dsp(ctx);
> +                gen_helper_muleu_s_qh_obr(cpu_gpr[rd],
> +                                          cpu_env, cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
> +            case OPC_MULQ_RS_QH:
> +                check_dsp(ctx);
> +                gen_helper_mulq_rs_qh(cpu_gpr[rd], cpu_env,
> +                                      cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
>              case OPC_RADDU_L_OB:
>                  check_dsp(ctx);
>                  gen_helper_raddu_l_ob(cpu_gpr[rd], cpu_gpr[rs]);
> @@ -13112,6 +13469,152 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
>              break;
>  #endif
>  #if defined(TARGET_MIPS64)
> +        case OPC_DPAQ_W_QH_DSP:
> +            {
> +                int ac = rd & 0x03;
> +                TCGv_i32 ac_v = tcg_const_i32(ac);
> +
> +                op2 = MASK_DPAQ_W_QH(ctx->opcode);
> +
> +                switch (op2) {
> +                case OPC_DMADD:
> +                    check_dsp(ctx);
> +                    gen_helper_dmadd(cpu_env, cpu_gpr[rs], cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DMADDU:
> +                    check_dsp(ctx);
> +                    gen_helper_dmaddu(cpu_env, cpu_gpr[rs],
> +                                      cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DMSUB:
> +                    check_dsp(ctx);
> +                    gen_helper_dmsub(cpu_env, cpu_gpr[rs], cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DMSUBU:
> +                    check_dsp(ctx);
> +                    gen_helper_dmsubu(cpu_env, cpu_gpr[rs],
> +                                      cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DPA_W_QH:
> +                    check_dspr2(ctx);
> +                    gen_helper_dpa_w_qh(cpu_env, cpu_gpr[rs],
> +                                        cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DPAQ_S_W_QH:
> +                    check_dsp(ctx);
> +                    gen_helper_dpaq_s_w_qh(cpu_env, cpu_gpr[rs],
> +                                           cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DPAQ_SA_L_PW:
> +                    check_dsp(ctx);
> +                    gen_helper_dpaq_sa_l_pw(cpu_env, cpu_gpr[rs],
> +                                            cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DPAU_H_OBL:
> +                    check_dsp(ctx);
> +                    gen_helper_dpau_h_obl(cpu_env, cpu_gpr[rs],
> +                                          cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DPAU_H_OBR:
> +                    check_dsp(ctx);
> +                    gen_helper_dpau_h_obr(cpu_env, cpu_gpr[rs],
> +                                          cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DPS_W_QH:
> +                    check_dspr2(ctx);
> +                    gen_helper_dps_w_qh(cpu_env, cpu_gpr[rs],
> +                                        cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DPSQ_S_W_QH:
> +                    check_dsp(ctx);
> +                    gen_helper_dpsq_s_w_qh(cpu_env, cpu_gpr[rs],
> +                                           cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DPSQ_SA_L_PW:
> +                    check_dsp(ctx);
> +                    gen_helper_dpsq_sa_l_pw(cpu_env, cpu_gpr[rs],
> +                                            cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DPSU_H_OBL:
> +                    check_dsp(ctx);
> +                    gen_helper_dpsu_h_obl(cpu_env, cpu_gpr[rs],
> +                                          cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_DPSU_H_OBR:
> +                    check_dsp(ctx);
> +                    gen_helper_dpsu_h_obr(cpu_env, cpu_gpr[rs],
> +                                          cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MAQ_S_L_PWL:
> +                    check_dsp(ctx);
> +                    gen_helper_maq_s_l_pwl(cpu_env, cpu_gpr[rs],
> +                                           cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MAQ_S_L_PWR:
> +                    check_dsp(ctx);
> +                    gen_helper_maq_s_l_pwr(cpu_env, cpu_gpr[rs],
> +                                           cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MAQ_S_W_QHLL:
> +                    check_dsp(ctx);
> +                    gen_helper_maq_s_w_qhll(cpu_env, cpu_gpr[rs],
> +                                            cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MAQ_SA_W_QHLL:
> +                    check_dsp(ctx);
> +                    gen_helper_maq_sa_w_qhll(cpu_env, cpu_gpr[rs],
> +                                             cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MAQ_S_W_QHLR:
> +                    check_dsp(ctx);
> +                    gen_helper_maq_s_w_qhlr(cpu_env, cpu_gpr[rs],
> +                                            cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MAQ_SA_W_QHLR:
> +                    check_dsp(ctx);
> +                    gen_helper_maq_sa_w_qhlr(cpu_env, cpu_gpr[rs],
> +                                             cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MAQ_S_W_QHRL:
> +                    check_dsp(ctx);
> +                    gen_helper_maq_s_w_qhrl(cpu_env, cpu_gpr[rs],
> +                                            cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MAQ_SA_W_QHRL:
> +                    check_dsp(ctx);
> +                    gen_helper_maq_sa_w_qhrl(cpu_env, cpu_gpr[rs],
> +                                             cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MAQ_S_W_QHRR:
> +                    check_dsp(ctx);
> +                    gen_helper_maq_s_w_qhrr(cpu_env, cpu_gpr[rs],
> +                                            cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MAQ_SA_W_QHRR:
> +                    check_dsp(ctx);
> +                    gen_helper_maq_sa_w_qhrr(cpu_env, cpu_gpr[rs],
> +                                             cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MULSAQ_S_L_PW:
> +                    check_dsp(ctx);
> +                    gen_helper_mulsaq_s_l_pw(cpu_env, cpu_gpr[rs],
> +                                             cpu_gpr[rt], ac_v);
> +                    break;
> +                case OPC_MULSAQ_S_W_QH:
> +                    check_dsp(ctx);
> +                    gen_helper_mulsaq_s_w_qh(cpu_env, cpu_gpr[rs],
> +                                             cpu_gpr[rt], ac_v);
> +                    break;
> +                default:            /* Invalid */
> +                    MIPS_INVAL("MASK DPAQ.W.QH");
> +                    generate_exception(ctx, EXCP_RI);
> +                    break;
> +                }
> +                tcg_temp_free_i32(ac_v);
> +                break;
> +            }
> +#endif
> +#if defined(TARGET_MIPS64)
>          case OPC_SHLL_OB_DSP:
>              op2 = MASK_SHLL_OB(ctx->opcode);
>              switch (op2) {
> -- 
> 1.7.9.5
> 
>

Patch

diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
index 8e7bccb..ff51d2f 100644
--- a/target-mips/dsp_helper.c
+++ b/target-mips/dsp_helper.c
@@ -4187,6 +4187,2030 @@  target_ulong helper_shrav_r_pw(target_ulong rt, target_ulong sa)
 }
 #endif
 
+/** DSP Multiply Sub-class insns **/
+target_ulong helper_muleu_s_ph_qbl(CPUMIPSState *env,
+                                   target_ulong rs, target_ulong rt)
+{
+    uint8_t rs3, rs2;
+    uint16_t tempB, tempA, rth, rtl;
+    uint32_t temp;
+    uint32_t rd;
+
+    rs3 = (rs & MIPSDSP_Q3) >> 24;
+    rs2 = (rs & MIPSDSP_Q2) >> 16;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+    tempB = mipsdsp_mul_u8_u16(env, rs3, rth);
+    tempA = mipsdsp_mul_u8_u16(env, rs2, rtl);
+    temp = ((uint32_t)tempB << 16) | ((uint32_t)tempA & MIPSDSP_LO);
+    rd = temp;
+    return (target_long)(int32_t)rd;
+}
+
+target_ulong helper_muleu_s_ph_qbr(CPUMIPSState *env,
+                                   target_ulong rs, target_ulong rt)
+{
+    uint8_t  rs1, rs0;
+    uint16_t tempB, tempA;
+    uint16_t rth,   rtl;
+    uint32_t temp;
+    uint32_t rd;
+
+    rs1 = (rs & MIPSDSP_Q1) >>  8;
+    rs0 =  rs & MIPSDSP_Q0;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = mipsdsp_mul_u8_u16(env, rs1, rth);
+    tempA = mipsdsp_mul_u8_u16(env, rs0, rtl);
+    temp = ((uint32_t)tempB << 16) | ((uint32_t)tempA & MIPSDSP_LO);
+    rd = temp;
+    return (target_long)(int32_t)rd;
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_muleu_s_qh_obl(CPUMIPSState *env,
+                                   target_ulong rs, target_ulong rt)
+{
+    uint8_t rs3, rs2, rs1, rs0;
+    uint16_t rt3, rt2, rt1, rt0;
+    uint16_t tempD, tempC, tempB, tempA;
+    uint64_t temp;
+
+    rs3 = (rs >> 56) & MIPSDSP_Q0;
+    rs2 = (rs >> 48) & MIPSDSP_Q0;
+    rs1 = (rs >> 40) & MIPSDSP_Q0;
+    rs0 = (rs >> 32) & MIPSDSP_Q0;
+    rt3 = (rt >> 48) & MIPSDSP_LO;
+    rt2 = (rt >> 32) & MIPSDSP_LO;
+    rt1 = (rt >> 16) & MIPSDSP_LO;
+    rt0 = rt & MIPSDSP_LO;
+
+    tempD = mipsdsp_mul_u8_u16(env, rs3, rt3);
+    tempC = mipsdsp_mul_u8_u16(env, rs2, rt2);
+    tempB = mipsdsp_mul_u8_u16(env, rs1, rt1);
+    tempA = mipsdsp_mul_u8_u16(env, rs0, rt0);
+
+    temp = ((uint64_t)tempD << 48) | ((uint64_t)tempC << 32) |
+           ((uint64_t)tempB << 16) | (uint64_t)tempA;
+    return temp;
+}
+
+target_ulong helper_muleu_s_qh_obr(CPUMIPSState *env,
+                                   target_ulong rs, target_ulong rt)
+{
+    uint8_t rs3, rs2, rs1, rs0;
+    uint16_t rt3, rt2, rt1, rt0;
+    uint16_t tempD, tempC, tempB, tempA;
+    uint64_t temp;
+
+    rs3 = (rs >> 24) & MIPSDSP_Q0;
+    rs2 = (rs >> 16) & MIPSDSP_Q0;
+    rs1 = (rs >> 8) & MIPSDSP_Q0;
+    rs0 = rs & MIPSDSP_Q0;
+    rt3 = (rt >> 48) & MIPSDSP_LO;
+    rt2 = (rt >> 32) & MIPSDSP_LO;
+    rt1 = (rt >> 16) & MIPSDSP_LO;
+    rt0 = rt & MIPSDSP_LO;
+
+    tempD = mipsdsp_mul_u8_u16(env, rs3, rt3);
+    tempC = mipsdsp_mul_u8_u16(env, rs2, rt2);
+    tempB = mipsdsp_mul_u8_u16(env, rs1, rt1);
+    tempA = mipsdsp_mul_u8_u16(env, rs0, rt0);
+
+    temp = ((uint64_t)tempD << 48) | ((uint64_t)tempC << 32) |
+           ((uint64_t)tempB << 16) | (uint64_t)tempA;
+
+    return temp;
+}
+#endif
+
+target_ulong helper_mulq_rs_ph(CPUMIPSState *env,
+                               target_ulong rs, target_ulong rt)
+{
+    int16_t tempB, tempA, rsh, rsl, rth, rtl;
+    int32_t temp;
+    uint32_t rd;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = mipsdsp_rndq15_mul_q15_q15(env, rsh, rth);
+    tempA = mipsdsp_rndq15_mul_q15_q15(env, rsl, rtl);
+    temp = ((uint32_t)tempB << 16) | ((uint32_t)tempA & MIPSDSP_LO);
+    rd = temp;
+
+    return (target_long)(int32_t)rd;
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_mulq_rs_qh(CPUMIPSState *env,
+                               target_ulong rs, target_ulong rt)
+{
+    uint16_t rs3, rs2, rs1, rs0;
+    uint16_t rt3, rt2, rt1, rt0;
+    uint16_t tempD, tempC, tempB, tempA;
+    uint64_t temp;
+
+    rs3 = (rs >> 48) & MIPSDSP_LO;
+    rs2 = (rs >> 32) & MIPSDSP_LO;
+    rs1 = (rs >> 16) & MIPSDSP_LO;
+    rs0 = rs & MIPSDSP_LO;
+    rt3 = (rt >> 48) & MIPSDSP_LO;
+    rt2 = (rt >> 32) & MIPSDSP_LO;
+    rt1 = (rt >> 16) & MIPSDSP_LO;
+    rt0 = rt & MIPSDSP_LO;
+
+    tempD = mipsdsp_rndq15_mul_q15_q15(env, rs3, rt3);
+    tempC = mipsdsp_rndq15_mul_q15_q15(env, rs2, rt2);
+    tempB = mipsdsp_rndq15_mul_q15_q15(env, rs1, rt1);
+    tempA = mipsdsp_rndq15_mul_q15_q15(env, rs0, rt0);
+
+    temp = ((uint64_t)tempD << 48) | ((uint64_t)tempC << 32) |
+           ((uint64_t)tempB << 16) | (uint64_t)tempA;
+    return temp;
+}
+#endif
+
+target_ulong helper_muleq_s_w_phl(CPUMIPSState *env,
+                                  target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rth;
+    int32_t temp;
+    uint32_t rd;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    temp = mipsdsp_mul_q15_q15_overflowflag21(env, rsh, rth);
+    rd = temp;
+
+    return (target_long)(int32_t)rd;
+}
+
+target_ulong helper_muleq_s_w_phr(CPUMIPSState *env,
+                                  target_ulong rs, target_ulong rt)
+{
+    int16_t rsl, rtl;
+    int32_t temp;
+    uint32_t rd;
+
+    rsl = rs & MIPSDSP_LO;
+    rtl = rt & MIPSDSP_LO;
+    temp = mipsdsp_mul_q15_q15_overflowflag21(env, rsl, rtl);
+    rd = temp;
+
+    return (target_long)(int32_t)rd;
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_muleq_s_pw_qhl(CPUMIPSState *env,
+                                   target_ulong rs, target_ulong rt)
+{
+    uint16_t rsB, rsA;
+    uint16_t rtB, rtA;
+    uint32_t tempB, tempA;
+    uint64_t temp;
+
+    rsB = (rs >> 48) & MIPSDSP_LO;
+    rsA = (rs >> 32) & MIPSDSP_LO;
+    rtB = (rt >> 48) & MIPSDSP_LO;
+    rtA = (rt >> 32) & MIPSDSP_LO;
+
+    tempB = mipsdsp_mul_q15_q15(env, 5, rsB, rtB);
+    tempA = mipsdsp_mul_q15_q15(env, 5, rsA, rtA);
+
+    temp = ((uint64_t)tempB << 32) | (uint64_t)tempA;
+
+    return temp;
+}
+
+target_ulong helper_muleq_s_pw_qhr(CPUMIPSState *env,
+                                   target_ulong rs, target_ulong rt)
+{
+    uint16_t rsB, rsA;
+    uint16_t rtB, rtA;
+    uint32_t tempB, tempA;
+    uint64_t temp;
+
+    rsB = (rs >> 16) & MIPSDSP_LO;
+    rsA = rs & MIPSDSP_LO;
+    rtB = (rt >> 16) & MIPSDSP_LO;
+    rtA = rt & MIPSDSP_LO;
+
+    tempB = mipsdsp_mul_q15_q15(env, 5, rsB, rtB);
+    tempA = mipsdsp_mul_q15_q15(env, 5, rsA, rtA);
+
+    temp = ((uint64_t)tempB << 32) | (uint64_t)tempA;
+
+    return temp;
+}
+#endif
+
+void helper_dpau_h_qbl(CPUMIPSState *env,
+                       uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    uint8_t rs3, rs2;
+    uint8_t rt3, rt2;
+    uint16_t tempB, tempA;
+    uint64_t tempC, tempBL, tempAL, dotp;
+
+    rs3 = (rs & MIPSDSP_Q3) >> 24;
+    rt3 = (rt & MIPSDSP_Q3) >> 24;
+    rs2 = (rs & MIPSDSP_Q2) >> 16;
+    rt2 = (rt & MIPSDSP_Q2) >> 16;
+    tempB = mipsdsp_mul_u8_u8(rs3, rt3);
+    tempA = mipsdsp_mul_u8_u8(rs2, rt2);
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp = tempBL + tempAL;
+    tempC = (((uint64_t)env->active_tc.HI[ac] << 32) |
+             ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO)) + dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
+}
+
+void helper_dpau_h_qbr(CPUMIPSState *env,
+                       uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    uint8_t rs1, rs0;
+    uint8_t rt1, rt0;
+    uint16_t tempB, tempA;
+    uint64_t tempC, tempBL, tempAL, dotp;
+
+    rs1 = (rs & MIPSDSP_Q1) >> 8;
+    rt1 = (rt & MIPSDSP_Q1) >> 8;
+    rs0 = (rs & MIPSDSP_Q0);
+    rt0 = (rt & MIPSDSP_Q0);
+    tempB = mipsdsp_mul_u8_u8(rs1, rt1);
+    tempA = mipsdsp_mul_u8_u8(rs0, rt0);
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp = tempBL + tempAL;
+    tempC = (((uint64_t)env->active_tc.HI[ac] << 32) |
+             ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO)) + dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dpau_h_obl(CPUMIPSState *env,
+                       target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    uint8_t rs7, rs6, rs5, rs4;
+    uint8_t rt7, rt6, rt5, rt4;
+    uint16_t tempD, tempC, tempB, tempA;
+    uint64_t temp[2];
+    uint64_t acc[2];
+    uint64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs7 = (rs >> 56) & MIPSDSP_Q0;
+    rs6 = (rs >> 48) & MIPSDSP_Q0;
+    rs5 = (rs >> 40) & MIPSDSP_Q0;
+    rs4 = (rs >> 32) & MIPSDSP_Q0;
+    rt7 = (rt >> 56) & MIPSDSP_Q0;
+    rt6 = (rt >> 48) & MIPSDSP_Q0;
+    rt5 = (rt >> 40) & MIPSDSP_Q0;
+    rt4 = (rt >> 32) & MIPSDSP_Q0;
+
+    tempD = mipsdsp_mul_u8_u8(rs7, rt7);
+    tempC = mipsdsp_mul_u8_u8(rs6, rt6);
+    tempB = mipsdsp_mul_u8_u8(rs5, rt5);
+    tempA = mipsdsp_mul_u8_u8(rs4, rt4);
+
+    temp[0] = (uint64_t)tempD + (uint64_t)tempC +
+      (uint64_t)tempB + (uint64_t)tempA;
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+
+    if (temp_sum < acc[0] && temp_sum < temp[0]) {
+        temp[1] += 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += acc[1];
+
+    env->active_tc.HI[ac] = temp[1];
+    env->active_tc.LO[ac] = temp[0];
+}
+
+void helper_dpau_h_obr(CPUMIPSState *env,
+                       target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    uint8_t rs3, rs2, rs1, rs0;
+    uint8_t rt3, rt2, rt1, rt0;
+    uint16_t tempD, tempC, tempB, tempA;
+    uint64_t temp[2];
+    uint64_t acc[2];
+    uint64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs3 = (rs >> 24) & MIPSDSP_Q0;
+    rs2 = (rs >> 16) & MIPSDSP_Q0;
+    rs1 = (rs >> 8) & MIPSDSP_Q0;
+    rs0 = rs & MIPSDSP_Q0;
+    rt3 = (rt >> 24) & MIPSDSP_Q0;
+    rt2 = (rt >> 16) & MIPSDSP_Q0;
+    rt1 = (rt >> 8) & MIPSDSP_Q0;
+    rt0 = rt & MIPSDSP_Q0;
+
+    tempD = mipsdsp_mul_u8_u8(rs3, rt3);
+    tempC = mipsdsp_mul_u8_u8(rs2, rt2);
+    tempB = mipsdsp_mul_u8_u8(rs1, rt1);
+    tempA = mipsdsp_mul_u8_u8(rs0, rt0);
+
+    temp[0] = (uint64_t)tempD + (uint64_t)tempC +
+              (uint64_t)tempB + (uint64_t)tempA;
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+
+    if (temp_sum < acc[0] && temp_sum < temp[0]) {
+        temp[1] += 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += acc[1];
+
+    env->active_tc.HI[ac] = temp[1];
+    env->active_tc.LO[ac] = temp[0];
+}
+#endif
+
+void helper_dpsu_h_qbl(CPUMIPSState *env,
+                       uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    uint8_t  rs3, rs2, rt3, rt2;
+    uint16_t tempB,  tempA;
+    uint64_t dotp, tempBL, tempAL, tempC;
+
+    rs3 = (rs & MIPSDSP_Q3) >> 24;
+    rs2 = (rs & MIPSDSP_Q2) >> 16;
+    rt3 = (rt & MIPSDSP_Q3) >> 24;
+    rt2 = (rt & MIPSDSP_Q2) >> 16;
+
+    tempB = mipsdsp_mul_u8_u8(rs3, rt3);
+    tempA = mipsdsp_mul_u8_u8(rs2, rt2);
+    tempBL = tempB & 0xFFFF;
+    tempAL = tempA & 0xFFFF;
+
+    dotp   = tempBL + tempAL;
+    tempC  = ((uint64_t)env->active_tc.HI[ac] << 32) |
+             ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    tempC -= dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
+}
+
+void helper_dpsu_h_qbr(CPUMIPSState *env,
+                       uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    uint8_t  rs1, rs0, rt1, rt0;
+    uint16_t tempB,  tempA;
+    uint64_t dotp, tempBL, tempAL, tempC;
+
+    rs1 = (rs & MIPSDSP_Q1) >> 8;
+    rs0 = (rs & MIPSDSP_Q0);
+    rt1 = (rt & MIPSDSP_Q1) >> 8;
+    rt0 = (rt & MIPSDSP_Q0);
+
+    tempB = mipsdsp_mul_u8_u8(rs1, rt1);
+    tempA = mipsdsp_mul_u8_u8(rs0, rt0);
+    tempBL = tempB & 0xFFFF;
+    tempAL = tempA & 0xFFFF;
+
+    dotp   = tempBL + tempAL;
+    tempC  = ((uint64_t)env->active_tc.HI[ac] << 32) |
+             ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    tempC -= dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dpsu_h_obl(CPUMIPSState *env,
+                       target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    uint8_t rs7, rs6, rs5, rs4;
+    uint8_t rt7, rt6, rt5, rt4;
+    uint16_t tempD, tempC, tempB, tempA;
+    uint64_t acc[2];
+    uint64_t temp[2];
+    uint64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs7 = (rs >> 56) & MIPSDSP_Q0;
+    rs6 = (rs >> 48) & MIPSDSP_Q0;
+    rs5 = (rs >> 40) & MIPSDSP_Q0;
+    rs4 = (rs >> 32) & MIPSDSP_Q0;
+    rt7 = (rt >> 56) & MIPSDSP_Q0;
+    rt6 = (rt >> 48) & MIPSDSP_Q0;
+    rt5 = (rt >> 40) & MIPSDSP_Q0;
+    rt4 = (rt >> 32) & MIPSDSP_Q0;
+
+    tempD = mipsdsp_mul_u8_u8(rs7, rt7);
+    tempC = mipsdsp_mul_u8_u8(rs6, rt6);
+    tempB = mipsdsp_mul_u8_u8(rs5, rt5);
+    tempA = mipsdsp_mul_u8_u8(rs4, rt4);
+
+    temp[0] = (uint64_t)tempD + (uint64_t)tempC +
+              (uint64_t)tempB + (uint64_t)tempA;
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] - temp[0];
+    if (temp_sum > acc[0]) {
+        acc[1] -= 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] -= temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_dpsu_h_obr(CPUMIPSState *env,
+                       target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    uint8_t rs3, rs2, rs1, rs0;
+    uint8_t rt3, rt2, rt1, rt0;
+    uint16_t tempD, tempC, tempB, tempA;
+    uint64_t acc[2];
+    uint64_t temp[2];
+    uint64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs3 = (rs >> 24) & MIPSDSP_Q0;
+    rs2 = (rs >> 16) & MIPSDSP_Q0;
+    rs1 = (rs >> 8) & MIPSDSP_Q0;
+    rs0 = rs & MIPSDSP_Q0;
+    rt3 = (rt >> 24) & MIPSDSP_Q0;
+    rt2 = (rt >> 16) & MIPSDSP_Q0;
+    rt1 = (rt >> 8) & MIPSDSP_Q0;
+    rt0 = rt & MIPSDSP_Q0;
+
+    tempD = mipsdsp_mul_u8_u8(rs3, rt3);
+    tempC = mipsdsp_mul_u8_u8(rs2, rt2);
+    tempB = mipsdsp_mul_u8_u8(rs1, rt1);
+    tempA = mipsdsp_mul_u8_u8(rs0, rt0);
+
+    temp[0] = (uint64_t)tempD + (uint64_t)tempC +
+              (uint64_t)tempB + (uint64_t)tempA;
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] - temp[0];
+    if (temp_sum > acc[0]) {
+        acc[1] -= 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] -= temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+#endif
+
+void helper_dpa_w_ph(CPUMIPSState *env,
+                     uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    uint16_t rsh, rsl, rth, rtl;
+    int32_t  tempA, tempB;
+    int64_t  acc, tempAL, tempBL;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = (int32_t)rsh * (int32_t)rth;
+    tempA = (int32_t)rsl * (int32_t)rtl;
+    tempBL = tempB;
+    tempAL = tempA;
+
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    acc += tempBL + tempAL;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dpa_w_qh(CPUMIPSState *env,
+                     target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int32_t rs3, rs2, rs1, rs0;
+    int32_t rt3, rt2, rt1, rt0;
+    int32_t tempD, tempC, tempB, tempA;
+    int64_t acc[2];
+    int64_t temp[2];
+    int64_t temp_sum;
+
+    rs3 = (rs >> 48) & MIPSDSP_LO;
+    rs2 = (rs >> 32) & MIPSDSP_LO;
+    rs1 = (rs >> 16) & MIPSDSP_LO;
+    rs0 = rs & MIPSDSP_LO;
+
+    rt3 = (rt >> 48) & MIPSDSP_LO;
+    rt2 = (rt >> 32) & MIPSDSP_LO;
+    rt1 = (rt >> 16) & MIPSDSP_LO;
+    rt0 = rt & MIPSDSP_LO;
+
+    tempD = rs3 * rt3;
+    tempC = rs2 * rt2;
+    tempB = rs1 * rt1;
+    tempA = rs0 * rt0;
+
+    temp[0] = (int64_t)tempD + (int64_t)tempC +
+              (int64_t)tempB + (int64_t)tempA;
+    temp[0] = (int64_t)(temp[0] << 31) >> 31;
+
+    if (((temp[0] >> 63) & 0x01) == 0) {
+        temp[1] = 0;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[1] = env->active_tc.HI[ac];
+    acc[0] = env->active_tc.LO[ac];
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        temp[1] += 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += acc[1];
+
+    env->active_tc.HI[ac] = temp[1];
+    env->active_tc.LO[ac] = temp[0];
+}
+#endif
+
+void helper_dpax_w_ph(CPUMIPSState *env,
+                      uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    uint16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA;
+    int64_t acc, dotp, tempBL, tempAL;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB  = (uint32_t)rsh * (uint32_t)rth;
+    tempA  = (uint32_t)rsl * (uint32_t)rtl;
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp =  tempBL + tempAL;
+    acc  =  ((uint64_t)env->active_tc.HI[ac] << 32) |
+            ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    acc  += dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
+}
+
+void helper_dpaq_s_w_ph(CPUMIPSState *env,
+                        uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA;
+    int64_t acc, dotp, tempBL, tempAL;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rth);
+    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rtl);
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp = tempBL + tempAL;
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    acc += dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dpaq_s_w_qh(CPUMIPSState *env,
+                        target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int32_t rs3, rs2, rs1, rs0;
+    int32_t rt3, rt2, rt1, rt0;
+    int32_t tempD, tempC, tempB, tempA;
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    rs3 = (rs >> 48) & MIPSDSP_LO;
+    rs2 = (rs >> 32) & MIPSDSP_LO;
+    rs1 = (rs >> 16) & MIPSDSP_LO;
+    rs0 = rs & MIPSDSP_LO;
+    rt3 = (rt >> 48) & MIPSDSP_LO;
+    rt2 = (rt >> 32) & MIPSDSP_LO;
+    rt1 = (rt >> 16) & MIPSDSP_LO;
+    rt0 = rt & MIPSDSP_LO;
+
+    tempD = mipsdsp_mul_q15_q15(env, ac, rs3, rt3);
+    tempC = mipsdsp_mul_q15_q15(env, ac, rs2, rt2);
+    tempB = mipsdsp_mul_q15_q15(env, ac, rs1, rt1);
+    tempA = mipsdsp_mul_q15_q15(env, ac, rs0, rt0);
+
+    temp[0] = (int64_t)tempD + (int64_t)tempC +
+              (int64_t)tempB + (int64_t)tempA;
+    if (((temp[0] >> 63) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = temp[0] + acc[0];
+    if ((temp_sum < temp[0]) && (temp_sum < acc[0])) {
+        temp[1] += 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += acc[1];
+
+    env->active_tc.HI[ac] = temp[1];
+    env->active_tc.LO[ac] = temp[0];
+}
+#endif
+
+void helper_dpaqx_s_w_ph(CPUMIPSState *env,
+                         uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    uint16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA;
+    int64_t acc, dotp, tempBL, tempAL;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rtl);
+    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rth);
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp = tempBL + tempAL;
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    acc += dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
+}
+
+void helper_dpaqx_sa_w_ph(CPUMIPSState *env,
+                          uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA, tempC62_31, tempC63;
+    int64_t acc, dotp, tempBL, tempAL, tempC;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rtl);
+    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rth);
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp = tempBL + tempAL;
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    tempC = acc + dotp;
+    tempC63 = (tempC >> 63) & 0x01;
+    tempC62_31 = (tempC >> 31) & 0xFFFFFFFF;
+
+    if ((tempC63 == 0) && (tempC62_31 != 0x00000000)) {
+        tempC = 0x7FFFFFFF;
+        set_DSPControl_overflow_flag(env, 1, 16 + ac);
+    }
+
+    if ((tempC63 == 1) && (tempC62_31 != 0xFFFFFFFF)) {
+        tempC = 0xFFFFFFFF80000000ull;
+        set_DSPControl_overflow_flag(env, 1, 16 + ac);
+    }
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
+}
+
+void helper_dps_w_ph(CPUMIPSState *env,
+                     uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    uint16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA;
+    int64_t acc, dotp, tempBL, tempAL;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB  = (int32_t)rsh * (int32_t)rth;
+    tempA  = (int32_t)rsl * (int32_t)rtl;
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp =  tempBL + tempAL;
+    acc  =  ((uint64_t)env->active_tc.HI[ac] << 32) |
+            ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    acc  -= dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dps_w_qh(CPUMIPSState *env,
+                     target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs3, rs2, rs1, rs0;
+    int16_t rt3, rt2, rt1, rt0;
+    int32_t tempD, tempC, tempB, tempA;
+    int64_t acc[2];
+    int64_t temp[2];
+    int64_t temp_sum;
+
+    rs3 = (rs >> 48) & MIPSDSP_LO;
+    rs2 = (rs >> 32) & MIPSDSP_LO;
+    rs1 = (rs >> 16) & MIPSDSP_LO;
+    rs0 = rs & MIPSDSP_LO;
+    rt3 = (rt >> 48) & MIPSDSP_LO;
+    rt2 = (rt >> 32) & MIPSDSP_LO;
+    rt1 = (rt >> 16) & MIPSDSP_LO;
+    rt0 = rt & MIPSDSP_LO;
+
+    tempD = (int32_t)rs3 * (int32_t)rt3;
+    tempC = (int32_t)rs2 * (int32_t)rt2;
+    tempB = (int32_t)rs1 * (int32_t)rt1;
+    tempA = (int32_t)rs0 * (int32_t)rt0;
+
+    temp[0] = (int64_t)tempD + (int64_t)tempC +
+              (int64_t)tempB + (int64_t)tempA;
+
+    temp[0] = (int64_t)(temp[0] << 31) >> 31;
+    if (((temp[0] >> 32) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] - temp[0];
+    if ((uint64_t)temp_sum > (uint64_t)acc[0]) {
+        acc[1] -= 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] -= temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+#endif
+
+void helper_dpsx_w_ph(CPUMIPSState *env,
+                      uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    uint16_t rsh, rsl, rth, rtl;
+    int32_t  tempB,  tempA;
+    int64_t acc, dotp, tempBL, tempAL;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = (int32_t)rsh * (int32_t)rtl;
+    tempA = (int32_t)rsl * (int32_t)rth;
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp = tempBL + tempAL;
+
+    acc  = ((uint64_t)env->active_tc.HI[ac] << 32) |
+           ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    acc -= dotp;
+    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
+}
+
+void helper_dpsq_s_w_ph(CPUMIPSState *env,
+                        uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA;
+    int64_t acc, dotp, tempBL, tempAL;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rth);
+    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rtl);
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp = tempBL + tempAL;
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+           ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    acc -= dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dpsq_s_w_qh(CPUMIPSState *env,
+                        target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs3, rs2, rs1, rs0;
+    int16_t rt3, rt2, rt1, rt0;
+    int32_t tempD, tempC, tempB, tempA;
+    int64_t acc[2];
+    int64_t temp[2];
+    int64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs3 = (rs >> 48) & MIPSDSP_LO;
+    rs2 = (rs >> 32) & MIPSDSP_LO;
+    rs1 = (rs >> 16) & MIPSDSP_LO;
+    rs0 = rs & MIPSDSP_LO;
+    rt3 = (rt >> 48) & MIPSDSP_LO;
+    rt2 = (rt >> 32) & MIPSDSP_LO;
+    rt1 = (rt >> 16) & MIPSDSP_LO;
+    rt0 = rt & MIPSDSP_LO;
+
+    tempD = mipsdsp_mul_q15_q15(env, ac, rs3, rt3);
+    tempC = mipsdsp_mul_q15_q15(env, ac, rs2, rt2);
+    tempB = mipsdsp_mul_q15_q15(env, ac, rs1, rt1);
+    tempA = mipsdsp_mul_q15_q15(env, ac, rs0, rt0);
+
+    temp[0] = (int64_t)tempD + (int64_t)tempC +
+              (int64_t)tempB + (int64_t)tempA;
+    if (((temp[0] >> 63) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] - temp[0];
+    if ((uint64_t)temp_sum > (uint64_t)acc[0]) {
+        acc[1] -= 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] -= temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+#endif
+
+void helper_dpsqx_s_w_ph(CPUMIPSState *env,
+                         uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA;
+    int64_t dotp, tempC, tempBL, tempAL;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rtl);
+    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rth);
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp = tempBL + tempAL;
+    tempC = (((uint64_t)env->active_tc.HI[ac] << 32) |
+            ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO)) - dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
+}
+
+void helper_dpsqx_sa_w_ph(CPUMIPSState *env,
+                          uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA, tempC63, tempC62_31;
+    int64_t dotp, tempBL, tempAL, tempC;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rtl);
+    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rth);
+
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp   = tempBL + tempAL;
+    tempC  = ((uint64_t)env->active_tc.HI[ac] << 32) |
+             ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    tempC -= dotp;
+
+    tempC63 = (tempC >> 63) & 0x01;
+    tempC62_31 = (tempC >> 31) & 0xFFFFFFFF;
+
+    if ((tempC63 == 0) && (tempC62_31 != 0)) {
+        tempC = 0x7FFFFFFF;
+        set_DSPControl_overflow_flag(env, 1, 16 + ac);
+    }
+
+    if ((tempC63 == 1) && (tempC62_31 != 0xFFFFFFFF)) {
+        tempC = 0xFFFFFFFF80000000ull;
+        set_DSPControl_overflow_flag(env, 1, 16 + ac);
+    }
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempC & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO);
+}
+
+void helper_mulsaq_s_w_ph(CPUMIPSState *env,
+                          uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA;
+    int64_t tempBL, tempAL, acc, dotp;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = mipsdsp_mul_q15_q15(env, ac, rsh, rth);
+    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rtl);
+    tempBL = tempB;
+    tempAL = tempA;
+    dotp = tempBL - tempAL;
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    dotp = dotp + acc;
+    env->active_tc.HI[ac] = (target_long)(int32_t)((dotp & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(dotp & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_mulsaq_s_w_qh(CPUMIPSState *env,
+                          target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs3, rs2, rs1, rs0;
+    int16_t rt3, rt2, rt1, rt0;
+    int32_t tempD, tempC, tempB, tempA;
+    int64_t acc[2];
+    int64_t temp[2];
+    int64_t temp_sum;
+
+    rs3 = (rs >> 48) & MIPSDSP_LO;
+    rs2 = (rs >> 32) & MIPSDSP_LO;
+    rs1 = (rs >> 16) & MIPSDSP_LO;
+    rs0 = rs & MIPSDSP_LO;
+    rt3 = (rt >> 48) & MIPSDSP_LO;
+    rt2 = (rt >> 32) & MIPSDSP_LO;
+    rt1 = (rt >> 16) & MIPSDSP_LO;
+    rt0 = rt & MIPSDSP_LO;
+
+    tempD = mipsdsp_mul_q15_q15(env, ac, rs3, rt3);
+    tempC = mipsdsp_mul_q15_q15(env, ac, rs2, rt2);
+    tempB = mipsdsp_mul_q15_q15(env, ac, rs1, rt1);
+    tempA = mipsdsp_mul_q15_q15(env, ac, rs0, rt0);
+
+    temp[0] = ((int32_t)tempD - (int32_t)tempC) +
+              ((int32_t)tempB - (int32_t)tempA);
+    temp[0] = (int64_t)(temp[0] << 30) >> 30;
+    if (((temp[0] >> 33) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+#endif
+
+void helper_dpaq_sa_l_w(CPUMIPSState *env,
+                        uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    int32_t temp64, temp63, tempacc63, tempdotp63, tempDL63;
+    int64_t dotp, acc;
+    int64_t tempDL[2];
+    uint64_t temp;
+
+    dotp = mipsdsp_mul_q31_q31(env, ac, rs, rt);
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    tempDL[0] = acc + dotp;
+
+    tempacc63  = (acc >> 63) & 0x01;
+    tempdotp63 = (dotp >> 63) & 0x01;
+    tempDL63   = (tempDL[0] >> 63) & 0x01;
+
+    if (((tempacc63 == 1) && (tempdotp63 == 1)) |
+        (((tempacc63 == 1) || (tempdotp63 == 1)) && tempDL63 == 0)) {
+        tempDL[1] = 1;
+    } else {
+        tempDL[1] = 0;
+    }
+
+    temp = tempDL[0];
+    temp64 = tempDL[1] & 0x01;
+    temp63 = (tempDL[0] >> 63) & 0x01;
+
+    if (temp64 != temp63) {
+        if (temp64 == 1) {
+            temp = 0x8000000000000000ull;
+        } else {
+            temp = 0x7FFFFFFFFFFFFFFFull;
+        }
+
+        set_DSPControl_overflow_flag(env, 1, 16 + ac);
+    }
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((temp & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(temp & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dpaq_sa_l_pw(CPUMIPSState *env,
+                         target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int32_t rs1, rs0;
+    int32_t rt1, rt0;
+    int64_t tempB[2], tempA[2];
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs1 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempB[0] = mipsdsp_mul_q31_q31(env, ac, rs1, rt1);
+    tempA[0] = mipsdsp_mul_q31_q31(env, ac, rs0, rt0);
+
+    if (((tempB[0] >> 63) & 0x01) == 0) {
+        tempB[1] = 0x00;
+    } else {
+        tempB[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    if (((tempA[0] >> 63) & 0x01) == 0) {
+        tempA[1] = 0x00;
+    } else {
+        tempA[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    temp_sum = tempB[0] + tempA[0];
+    if (((uint64_t)temp_sum < (uint64_t)tempB[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)tempA[0])) {
+        temp[1] += 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += tempB[1] + tempA[1];
+
+    mipsdsp_sat64_acc_add_q63(env, acc, ac, temp);
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+#endif
+
+void helper_dpsq_sa_l_w(CPUMIPSState *env,
+                        uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    int32_t temp64, temp63, tempacc63, tempdotp63, tempDL63;
+    int64_t dotp, acc;
+    int64_t tempDL[2];
+    uint64_t temp;
+
+    dotp = mipsdsp_mul_q31_q31(env, ac, rs, rt);
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    tempDL[0] = acc - dotp;
+
+    tempacc63  = (acc >> 63) & 0x01;
+    tempdotp63 = (dotp >> 63) & 0x01;
+    tempDL63   = (tempDL[0] >> 63) & 0x01;
+
+    if (((tempacc63 == 1) && (tempdotp63 == 0)) |
+        (((tempacc63 == 1) || (tempdotp63 == 0)) && tempDL63 == 0)) {
+        tempDL[1] = 1;
+    } else {
+        tempDL[1] = 0;
+    }
+
+    temp = tempDL[0];
+    temp64 = tempDL[1] & 0x01;
+    temp63 = (tempDL[0] >> 63) & 0x01;
+    if (temp64 != temp63) {
+        if (temp64 == 1) {
+            temp = 0x8000000000000000ull;
+        } else {
+            temp = 0x7FFFFFFFFFFFFFFFull;
+        }
+        set_DSPControl_overflow_flag(env, 1, ac + 16);
+    }
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((temp & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(temp & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dpsq_sa_l_pw(CPUMIPSState *env,
+                         target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int32_t rs1, rs0;
+    int32_t rt1, rt0;
+    int64_t tempB[2], tempA[2];
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    temp[0] = 0x00;
+    temp[1] = 0x00;
+
+    rs1 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempB[0] = mipsdsp_mul_q31_q31(env, ac, rs1, rt1);
+    tempA[0] = mipsdsp_mul_q31_q31(env, ac, rs0, rt0);
+
+    if (((tempB[0] >> 31) & 0x01) == 0) {
+        tempB[1] = 0x00;
+    } else {
+        tempB[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    if (((tempA[0] >> 31) & 0x01) == 0) {
+        tempA[1] = 0x00;
+    } else {
+        tempA[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    temp_sum = tempB[0] + tempA[0];
+    if (((uint64_t)temp_sum < (uint64_t)tempB[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)tempA[0])) {
+        temp[1] += 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += tempA[1] + tempB[1];
+
+    mipsdsp_sat64_acc_sub_q63(env, acc, ac, temp);
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_mulsaq_s_l_pw(CPUMIPSState *env,
+                          target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int32_t rs1, rs0;
+    int32_t rt1, rt0;
+    int64_t tempB[2], tempA[2];
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    rs1 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempB[0] = mipsdsp_mul_q31_q31(env, ac, rs1, rt1);
+    tempA[0] = mipsdsp_mul_q31_q31(env, ac, rs0, rt0);
+
+    if (((tempB[0] >> 63) & 0x01) == 0) {
+        tempB[1] = 0x00;
+    } else {
+        tempB[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    if (((tempA[0] >> 63) & 0x01) == 0) {
+        tempA[1] = 0x00;
+    } else {
+        tempA[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = tempB[0] - tempA[0];
+    if ((uint64_t)temp_sum > (uint64_t)tempB[0]) {
+        tempB[1] -= 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] = tempB[1] - tempA[1];
+
+    if ((temp[1] & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+#endif
+
+void helper_maq_s_w_phl(CPUMIPSState *env,
+                        uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rth;
+    int32_t  tempA;
+    int64_t tempL, tempAL, acc;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    tempA  = mipsdsp_mul_q15_q15(env, ac, rsh, rth);
+    tempAL = tempA;
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    tempL  = tempAL + acc;
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempL & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempL & MIPSDSP_LLO);
+}
+
+void helper_maq_s_w_phr(CPUMIPSState *env,
+                        uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    int16_t rsl, rtl;
+    int32_t tempA;
+    int64_t tempL, tempAL, acc;
+
+    rsl = rs & MIPSDSP_LO;
+    rtl = rt & MIPSDSP_LO;
+    tempA  = mipsdsp_mul_q15_q15(env, ac, rsl, rtl);
+    tempAL = tempA;
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    tempL = tempAL + acc;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempL & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempL & MIPSDSP_LLO);
+}
+
+void helper_maq_sa_w_phl(CPUMIPSState *env, uint32_t ac,
+                         target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rth;
+    int32_t tempA;
+    int64_t tempAL;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    tempA = mipsdsp_mul_q15_q15(env, ac, rsh, rth);
+    tempA = mipsdsp_sat32_acc_q31(env, ac, tempA);
+    tempAL = tempA;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempAL & MIPSDSP_LHI) >>
+                                                   32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempAL & MIPSDSP_LLO);
+}
+
+void helper_maq_sa_w_phr(CPUMIPSState *env, uint32_t ac,
+                         target_ulong rs, target_ulong rt)
+{
+    int16_t rsl, rtl;
+    int32_t tempA;
+    int64_t tempAL;
+
+    rsl = rs & MIPSDSP_LO;
+    rtl = rt & MIPSDSP_LO;
+
+    tempA = mipsdsp_mul_q15_q15(env, ac, rsl, rtl);
+    tempA = mipsdsp_sat32_acc_q31(env, ac, tempA);
+    tempAL = tempA;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((tempAL & MIPSDSP_LHI) >>
+                                                   32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempAL & MIPSDSP_LLO);
+}
+
+/***************************************************************
+ * In manual, GPR[rd](..0) <- tempB(15..0) || tempA(15..0),
+ * I'm not sure its means zero extend or sign extend.
+ * Now treat it as zero extend.
+ ***************************************************************/
+target_ulong helper_mul_ph(CPUMIPSState *env, target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA;
+    uint32_t rd;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+    tempB = mipsdsp_mul_i16_i16(env, rsh, rth);
+    tempA = mipsdsp_mul_i16_i16(env, rsl, rtl);
+
+    rd = ((tempB & MIPSDSP_LO) << 16) | (tempA & MIPSDSP_LO);
+
+    return (target_ulong)rd;
+}
+
+/***************************************************************
+ * In manual, GPR[rd](..0) <- tempB(15..0) || tempA(15..0),
+ * I'm not sure its means zero extend or sign extend.
+ * Now treat it as zero extend.
+ ***************************************************************/
+target_ulong helper_mul_s_ph(CPUMIPSState *env,
+                             target_ulong rs, target_ulong rt)
+{
+    int16_t  rsh, rsl, rth, rtl;
+    int32_t  tempB, tempA;
+    uint32_t rd;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+    tempB = mipsdsp_sat16_mul_i16_i16(env, rsh, rth);
+    tempA = mipsdsp_sat16_mul_i16_i16(env, rsl, rtl);
+
+    rd = ((tempB & MIPSDSP_LO) << 16) | (tempA & MIPSDSP_LO);
+
+    return (target_ulong)rd;
+}
+
+target_ulong helper_mulq_s_ph(CPUMIPSState *env,
+                              target_ulong rs, target_ulong rt)
+{
+    int16_t rsh, rsl, rth, rtl;
+    int32_t temp, tempB, tempA;
+    uint32_t rd;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = mipsdsp_sat16_mul_q15_q15(env, rsh, rth);
+    tempA = mipsdsp_sat16_mul_q15_q15(env, rsl, rtl);
+    temp = ((tempB & MIPSDSP_LO) << 16) | (tempA & MIPSDSP_LO);
+    rd = temp;
+
+    return (target_long)(int32_t)rd;
+}
+
+target_ulong helper_mulq_s_w(CPUMIPSState *env,
+                             target_ulong rs, target_ulong rt)
+{
+    uint32_t rd;
+    int32_t tempI;
+    int64_t tempL;
+
+    if ((rs == 0x80000000) && (rt == 0x80000000)) {
+        tempL = 0x7FFFFFFF00000000ull;
+        set_DSPControl_overflow_flag(env, 1, 21);
+    } else {
+        tempL  = ((int64_t)rs * (int64_t)rt) << 1;
+    }
+    tempI = (tempL & MIPSDSP_LHI) >> 32;
+    rd = tempI;
+
+    return (target_long)(int32_t)rd;
+}
+
+target_ulong helper_mulq_rs_w(CPUMIPSState *env,
+                              target_ulong rs, target_ulong rt)
+{
+    uint32_t rd;
+    uint32_t rs_t, rt_t;
+    uint32_t tempI;
+    int64_t tempL;
+
+    rs_t = rs & MIPSDSP_LLO;
+    rt_t = rt & MIPSDSP_LLO;
+
+    if ((rs_t == 0x80000000) && (rt_t == 0x80000000)) {
+        tempL = 0x7FFFFFFF00000000ull;
+        set_DSPControl_overflow_flag(env, 1, 21);
+    } else {
+        tempL  = ((int64_t)rs_t * (int64_t)rt_t) << 1;
+        tempL += 0x80000000ull;
+    }
+    tempI = (tempL & MIPSDSP_LHI) >> 32;
+    rd = tempI;
+
+    return (target_long)(int32_t)rd;
+}
+
+void helper_mulsa_w_ph(CPUMIPSState *env,
+                       uint32_t ac, target_ulong rs, target_ulong rt)
+{
+    uint16_t rsh, rsl, rth, rtl;
+    int32_t tempB, tempA;
+    int64_t dotp, acc, tempBL, tempAL;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempB = (int32_t)rsh * (int32_t)rth;
+    tempA = (int32_t)rsl * (int32_t)rtl;
+    tempBL = tempB;
+    tempAL = tempA;
+
+    dotp = tempBL - tempAL;
+    acc  = ((int64_t)env->active_tc.HI[ac] << 32) |
+           ((int64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    acc = acc + dotp;
+
+    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_maq_s_w_qhll(CPUMIPSState *env,
+                         target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs_t, rt_t;
+    int32_t temp_mul;
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs_t = (rs >> 48) & MIPSDSP_LO;
+    rt_t = (rt >> 48) & MIPSDSP_LO;
+    temp_mul = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
+
+    temp[0] = (int64_t)temp_mul;
+    if (((temp[0] >> 63) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_maq_s_w_qhlr(CPUMIPSState *env,
+                         target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs_t, rt_t;
+    int32_t temp_mul;
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs_t = (rs >> 32) & MIPSDSP_LO;
+    rt_t = (rt >> 32) & MIPSDSP_LO;
+    temp_mul = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
+
+    temp[0] = (int64_t)temp_mul;
+    if (((temp[0] >> 63) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_maq_s_w_qhrl(CPUMIPSState *env,
+                         target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs_t, rt_t;
+    int32_t temp_mul;
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs_t = (rs >> 16) & MIPSDSP_LO;
+    rt_t = (rt >> 16) & MIPSDSP_LO;
+    temp_mul = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
+
+    temp[0] = (int64_t)temp_mul;
+    if (((temp[0] >> 63) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_maq_s_w_qhrr(CPUMIPSState *env,
+                         target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs_t, rt_t;
+    int32_t temp_mul;
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs_t = rs & MIPSDSP_LO;
+    rt_t = rt & MIPSDSP_LO;
+    temp_mul = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
+
+    temp[0] = (int64_t)temp_mul;
+    if (((temp[0] >> 63) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_maq_sa_w_qhll(CPUMIPSState *env,
+                          target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs_t, rt_t;
+    int32_t temp;
+    int64_t acc[2];
+
+    rs_t = (rs >> 48) & MIPSDSP_LO;
+    rt_t = (rt >> 48) & MIPSDSP_LO;
+    temp = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
+    temp = mipsdsp_sat32_acc_q31(env, ac, temp);
+
+    acc[0] = (int64_t)(int32_t)temp;
+    if (((acc[0] >> 63) & 0x01) == 0) {
+        acc[1] = 0x00;
+    } else {
+        acc[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_maq_sa_w_qhlr(CPUMIPSState *env,
+                          target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs_t, rt_t;
+    int32_t temp;
+    int64_t acc[2];
+
+    rs_t = (rs >> 32) & MIPSDSP_LO;
+    rt_t = (rt >> 32) & MIPSDSP_LO;
+    temp = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
+    temp = mipsdsp_sat32_acc_q31(env, ac, temp);
+
+    acc[0] = (int64_t)temp;
+    if (((acc[0] >> 63) & 0x01) == 0) {
+        acc[1] = 0x00;
+    } else {
+        acc[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_maq_sa_w_qhrl(CPUMIPSState *env,
+                          target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs_t, rt_t;
+    int32_t temp;
+    int64_t acc[2];
+
+    rs_t = (rs >> 16) & MIPSDSP_LO;
+    rt_t = (rt >> 16) & MIPSDSP_LO;
+    temp = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
+    temp = mipsdsp_sat32_acc_q31(env, ac, temp);
+
+    acc[0] = (int64_t)temp;
+    if (((acc[0] >> 63) & 0x01) == 0) {
+        acc[1] = 0x00;
+    } else {
+        acc[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_maq_sa_w_qhrr(CPUMIPSState *env,
+                          target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int16_t rs_t, rt_t;
+    int32_t temp;
+    int64_t acc[2];
+
+    rs_t = rs & MIPSDSP_LO;
+    rt_t = rt & MIPSDSP_LO;
+    temp = mipsdsp_mul_q15_q15(env, ac, rs_t, rt_t);
+    temp = mipsdsp_sat32_acc_q31(env, ac, temp);
+
+    acc[0] = (int64_t)temp;
+    if (((acc[0] >> 63) & 0x01) == 0) {
+        acc[1] = 0x00;
+    } else {
+        acc[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_maq_s_l_pwl(CPUMIPSState *env,
+                        target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int32_t rs_t, rt_t;
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs_t = (rs >> 32) & MIPSDSP_LLO;
+    rt_t = (rt >> 32) & MIPSDSP_LLO;
+
+    temp[0] = mipsdsp_mul_q31_q31(env, ac, rs_t, rt_t);
+    if (((temp[0] >> 63) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)temp[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)acc[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_maq_s_l_pwr(CPUMIPSState *env,
+                        target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int32_t rs_t, rt_t;
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    temp[0] = 0;
+    temp[1] = 0;
+
+    rs_t = rs & MIPSDSP_LLO;
+    rt_t = rt & MIPSDSP_LLO;
+
+    temp[0] = mipsdsp_mul_q31_q31(env, ac, rs_t, rt_t);
+    if (((temp[0] >> 63) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)temp[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)acc[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+
+void helper_dmadd(CPUMIPSState *env,
+                  target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int32_t rs1, rs0;
+    int32_t rt1, rt0;
+    int32_t tempB, tempA;
+    int64_t tempBL[2], tempAL[2];
+    int64_t acc[2];
+    int64_t temp[2];
+    int64_t temp_sum;
+
+    temp[0] = 0x00;
+    temp[1] = 0x00;
+
+    rs1 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempB = rs1 * rt1;
+    tempA = rs0 * rt0;
+
+    tempBL[0] = (int64_t)tempB;
+    tempAL[0] = (int64_t)tempA;
+
+    if (((tempBL[0] >> 63) & 0x01) == 0) {
+        tempBL[1] = 0x0;
+    } else {
+        tempBL[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    if (((tempAL[0] >> 63) & 0x01) == 0) {
+        tempAL[1] = 0x0;
+    } else {
+        tempAL[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[1] = env->active_tc.HI[ac];
+    acc[0] = env->active_tc.LO[ac];
+
+    temp_sum = tempBL[0] + tempAL[0];
+    if (((uint64_t)temp_sum < (uint64_t)tempBL[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)tempAL[0])) {
+        temp[1] += 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += tempBL[1] + tempAL[1];
+
+    temp_sum = temp[0] + acc[0];
+    if (((uint64_t)temp_sum < (uint64_t)temp[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)acc[0])) {
+        temp[1] += 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += acc[1];
+
+    env->active_tc.HI[ac] = temp[1];
+    env->active_tc.LO[ac] = temp[0];
+}
+
+void helper_dmaddu(CPUMIPSState *env,
+                   target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    uint32_t rs1, rs0;
+    uint32_t rt1, rt0;
+    uint64_t tempBL[2], tempAL[2];
+    uint64_t acc[2];
+    uint64_t temp[2];
+    uint64_t temp_sum;
+
+    temp[0] = 0x00;
+    temp[1] = 0x00;
+
+    rs1 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempBL[0] = mipsdsp_mul_u32_u32(env, rs1, rt1);
+    tempAL[0] = mipsdsp_mul_u32_u32(env, rs0, rt0);
+    tempBL[1] = 0;
+    tempAL[1] = 0;
+
+    acc[1] = env->active_tc.HI[ac];
+    acc[0] = env->active_tc.LO[ac];
+
+    temp_sum = tempBL[0] + tempAL[0];
+    if ((temp_sum < tempBL[0]) && (temp_sum < tempAL[0])) {
+        temp[1] += 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += tempBL[1] + tempAL[1];
+
+    temp_sum = temp[0] + acc[0];
+    if ((temp_sum < temp[0]) && (temp_sum < acc[0])) {
+        temp[1] += 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += acc[1];
+
+    env->active_tc.HI[ac] = temp[1];
+    env->active_tc.LO[ac] = temp[0];
+}
+
+void helper_dmsub(CPUMIPSState *env,
+                  target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    int32_t rs1, rs0;
+    int32_t rt1, rt0;
+    int32_t tempB, tempA;
+    int64_t tempBL[2], tempAL[2];
+    int64_t acc[2];
+    int64_t temp[2];
+    int64_t temp_sum;
+
+    temp[0] = 0x00;
+    temp[1] = 0x00;
+
+    rs1 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempB = rs1 * rt1;
+    tempA = rs0 * rt0;
+
+    tempBL[0] = (int64_t)tempB;
+    tempAL[0] = (int64_t)tempA;
+
+    if (((tempBL[0] >> 63) & 0x01) == 0) {
+        tempBL[1] = 0x0;
+    } else {
+        tempBL[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    if (((tempAL[0] >> 63) & 0x01) == 0) {
+        tempAL[1] = 0x0;
+    } else {
+        tempAL[1] = 0xFFFFFFFFFFFFFFFFull;
+    }
+
+    acc[1] = env->active_tc.HI[ac];
+    acc[0] = env->active_tc.LO[ac];
+
+    temp_sum = acc[0] - tempBL[0];
+    if ((uint64_t)temp_sum > (uint64_t)acc[0]) {
+        temp[1] -= 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += acc[1] - tempBL[1];
+
+    temp_sum = temp[0] - tempAL[0];
+    if ((uint64_t)temp_sum > (uint64_t)temp[0]) {
+        temp[1] -= 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] -= tempAL[1];
+
+    env->active_tc.HI[ac] = temp[1];
+    env->active_tc.LO[ac] = temp[0];
+}
+
+void helper_dmsubu(CPUMIPSState *env,
+                   target_ulong rs, target_ulong rt, uint32_t ac)
+{
+    uint32_t rs1, rs0;
+    uint32_t rt1, rt0;
+    uint64_t tempBL[2], tempAL[2];
+    uint64_t acc[2];
+    uint64_t temp[2];
+    uint64_t temp_sum;
+
+    temp[0] = 0x00;
+    temp[1] = 0x00;
+
+    rs1 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempBL[0] = mipsdsp_mul_u32_u32(env, rs1, rt1);
+    tempAL[0] = mipsdsp_mul_u32_u32(env, rs0, rt0);
+    tempBL[1] = 0;
+    tempAL[1] = 0;
+
+    acc[1] = env->active_tc.HI[ac];
+    acc[0] = env->active_tc.LO[ac];
+
+    temp_sum = acc[0] - tempBL[0];
+    if (temp_sum > acc[0]) {
+        temp[1] -= 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] += acc[1] - tempBL[1];
+
+    temp_sum = temp[0] - tempAL[0];
+    if ((uint64_t)temp_sum > (uint64_t)temp[0]) {
+        temp[1] -= 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] -= tempAL[1];
+
+    env->active_tc.HI[ac] = temp[1];
+    env->active_tc.LO[ac] = temp[0];
+}
+#endif
+
 #undef MIPSDSP_LHI
 #undef MIPSDSP_LLO
 #undef MIPSDSP_HI
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 7046380..69bcb6c 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -491,4 +491,95 @@  DEF_HELPER_FLAGS_2(shra_r_pw, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl, i32)
 DEF_HELPER_FLAGS_2(shrav_r_pw, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl, tl)
 #endif
 
+/* DSP Multiply Sub-class insns */
+DEF_HELPER_FLAGS_3(muleu_s_ph_qbl, 0, tl, env, tl, tl)
+DEF_HELPER_FLAGS_3(muleu_s_ph_qbr, 0, tl, env, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(muleu_s_qh_obl, 0, tl, env, tl, tl)
+DEF_HELPER_FLAGS_3(muleu_s_qh_obr, 0, tl, env, tl, tl)
+#endif
+DEF_HELPER_FLAGS_3(mulq_rs_ph, 0, tl, env, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(mulq_rs_qh, 0, tl, env, tl, tl)
+#endif
+DEF_HELPER_FLAGS_3(muleq_s_w_phl, 0, tl, env, tl, tl)
+DEF_HELPER_FLAGS_3(muleq_s_w_phr, 0, tl, env, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(muleq_s_pw_qhl, 0, tl, env, tl, tl)
+DEF_HELPER_FLAGS_3(muleq_s_pw_qhr, 0, tl, env, tl, tl)
+#endif
+DEF_HELPER_FLAGS_4(dpau_h_qbl, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(dpau_h_qbr, 0, void, env, i32, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpau_h_obl, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(dpau_h_obr, 0, void, env, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_4(dpsu_h_qbl, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(dpsu_h_qbr, 0, void, env, i32, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpsu_h_obl, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(dpsu_h_obr, 0, void, env, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_4(dpa_w_ph, 0, void, env, i32, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpa_w_qh, 0, void, env, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_4(dpax_w_ph, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(dpaq_s_w_ph, 0, void, env, i32, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpaq_s_w_qh, 0, void, env, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_4(dpaqx_s_w_ph, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(dpaqx_sa_w_ph, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(dps_w_ph, 0, void, env, i32, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dps_w_qh, 0, void, env, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_4(dpsx_w_ph, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(dpsq_s_w_ph, 0, void, env, i32, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpsq_s_w_qh, 0, void, env, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_4(dpsqx_s_w_ph, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(dpsqx_sa_w_ph, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(mulsaq_s_w_ph, 0, void, env, i32, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(mulsaq_s_w_qh, 0, void, env, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_4(dpaq_sa_l_w, 0, void, env, i32, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpaq_sa_l_pw, 0, void, env, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_4(dpsq_sa_l_w, 0, void, env, i32, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpsq_sa_l_pw, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(mulsaq_s_l_pw, 0, void, env, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_4(maq_s_w_phl, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(maq_s_w_phr, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(maq_sa_w_phl, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_4(maq_sa_w_phr, 0, void, env, i32, tl, tl)
+DEF_HELPER_FLAGS_3(mul_ph, 0, tl, env, tl, tl)
+DEF_HELPER_FLAGS_3(mul_s_ph, 0, tl, env, tl, tl)
+DEF_HELPER_FLAGS_3(mulq_s_ph, 0, tl, env, tl, tl)
+DEF_HELPER_FLAGS_3(mulq_s_w, 0, tl, env, tl, tl)
+DEF_HELPER_FLAGS_3(mulq_rs_w, 0, tl, env, tl, tl)
+DEF_HELPER_FLAGS_4(mulsa_w_ph, 0, void, env, i32, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(maq_s_w_qhll, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(maq_s_w_qhlr, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(maq_s_w_qhrl, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(maq_s_w_qhrr, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhll, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhlr, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhrl, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhrr, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(maq_s_l_pwl, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(maq_s_l_pwr, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(dmadd, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(dmaddu, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(dmsub, 0, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(dmsubu, 0, void, env, tl, tl, i32)
+#endif
+
 #include "def-helper.h"
diff --git a/target-mips/translate.c b/target-mips/translate.c
index d87dbee..88c20cb 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -402,6 +402,13 @@  enum {
     OPC_ADDWC          = (0x11 << 6) | OPC_ADDU_QB_DSP,
     OPC_MODSUB         = (0x12 << 6) | OPC_ADDU_QB_DSP,
     OPC_RADDU_W_QB     = (0x14 << 6) | OPC_ADDU_QB_DSP,
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_MULEU_S_PH_QBL = (0x06 << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULEU_S_PH_QBR = (0x07 << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULQ_RS_PH     = (0x1F << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULEQ_S_W_PHL  = (0x1C << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULEQ_S_W_PHR  = (0x1D << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULQ_S_PH      = (0x1E << 6) | OPC_ADDU_QB_DSP,
 };
 
 #define OPC_ADDUH_QB_DSP OPC_MULT_G_2E
@@ -420,6 +427,11 @@  enum {
     OPC_SUBQH_R_PH = (0x0B << 6) | OPC_ADDUH_QB_DSP,
     OPC_SUBQH_W    = (0x11 << 6) | OPC_ADDUH_QB_DSP,
     OPC_SUBQH_R_W  = (0x13 << 6) | OPC_ADDUH_QB_DSP,
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_MUL_PH     = (0x0C << 6) | OPC_ADDUH_QB_DSP,
+    OPC_MUL_S_PH   = (0x0E << 6) | OPC_ADDUH_QB_DSP,
+    OPC_MULQ_S_W   = (0x16 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_MULQ_RS_W  = (0x17 << 6) | OPC_ADDUH_QB_DSP,
 };
 
 #define MASK_ABSQ_S_PH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
@@ -451,6 +463,7 @@  enum {
     OPC_PRECRQ_RS_PH_W   = (0x15 << 6) | OPC_CMPU_EQ_QB_DSP,
     OPC_PRECRQU_S_QB_PH  = (0x0F << 6) | OPC_CMPU_EQ_QB_DSP,
 };
+
 #define MASK_SHLL_QB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
 enum {
     /* MIPS DSP GPR-Based Shift Sub-class */
@@ -478,6 +491,33 @@  enum {
     OPC_SHRAV_R_W  = (0x17 << 6) | OPC_SHLL_QB_DSP,
 };
 
+#define MASK_DPA_W_PH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_DPAU_H_QBL    = (0x03 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAU_H_QBR    = (0x07 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSU_H_QBL    = (0x0B << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSU_H_QBR    = (0x0F << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPA_W_PH      = (0x00 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAX_W_PH     = (0x08 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQ_S_W_PH   = (0x04 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQX_S_W_PH  = (0x18 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQX_SA_W_PH = (0x1A << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPS_W_PH      = (0x01 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSX_W_PH          = (0x09 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQ_S_W_PH   = (0x05 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQX_S_W_PH  = (0x19 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQX_SA_W_PH = (0x1B << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MULSAQ_S_W_PH = (0x06 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQ_SA_L_W   = (0x0C << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQ_SA_L_W   = (0x0D << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_S_W_PHL   = (0x14 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_S_W_PHR   = (0x16 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_SA_W_PHL  = (0x10 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_SA_W_PHR  = (0x12 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MULSA_W_PH    = (0x02 << 6) | OPC_DPA_W_PH_DSP,
+};
+
 #if defined(TARGET_MIPS64)
 #define MASK_ABSQ_S_QH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
 enum {
@@ -505,6 +545,12 @@  enum {
 #if defined(TARGET_MIPS64)
 #define MASK_ADDU_OB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
 enum {
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_MULEQ_S_PW_QHL = (0x1C << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULEQ_S_PW_QHR = (0x1D << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULEU_S_QH_OBL = (0x06 << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULEU_S_QH_OBR = (0x07 << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULQ_RS_QH     = (0x1F << 6) | OPC_ADDU_OB_DSP,
     /* MIPS DSP Arithmetic Sub-class */
     OPC_RADDU_L_OB     = (0x14 << 6) | OPC_ADDU_OB_DSP,
     OPC_SUBQ_PW        = (0x13 << 6) | OPC_ADDU_OB_DSP,
@@ -546,6 +592,39 @@  enum {
 #endif
 
 #if defined(TARGET_MIPS64)
+#define MASK_DPAQ_W_QH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_DMADD         = (0x19 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DMADDU        = (0x1D << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DMSUB         = (0x1B << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DMSUBU        = (0x1F << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPA_W_QH      = (0x00 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAQ_S_W_QH   = (0x04 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAQ_SA_L_PW  = (0x0C << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAU_H_OBL    = (0x03 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAU_H_OBR    = (0x07 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPS_W_QH      = (0x01 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSQ_S_W_QH   = (0x05 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSQ_SA_L_PW  = (0x0D << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSU_H_OBL    = (0x0B << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSU_H_OBR    = (0x0F << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_L_PWL   = (0x1C << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_L_PWR   = (0x1E << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHLL  = (0x14 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHLL = (0x10 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHLR  = (0x15 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHLR = (0x11 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHRL  = (0x16 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHRL = (0x12 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHRR  = (0x17 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHRR = (0x13 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MULSAQ_S_L_PW = (0x0E << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MULSAQ_S_W_QH = (0x06 << 6) | OPC_DPAQ_W_QH_DSP,
+};
+#endif
+
+#if defined(TARGET_MIPS64)
 #define MASK_SHLL_OB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
 enum {
     /* MIPS DSP GPR-Based Shift Sub-class */
@@ -12465,6 +12544,22 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
                 case  OPC_SUBQH_R_W:
                     gen_helper_subqh_r_w(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
                     break;
+                case  OPC_MUL_PH:
+                    gen_helper_mul_ph(cpu_gpr[rd], cpu_env,
+                                      cpu_gpr[rs], cpu_gpr[rt]);
+                    break;
+                case  OPC_MUL_S_PH:
+                    gen_helper_mul_s_ph(cpu_gpr[rd], cpu_env,
+                                        cpu_gpr[rs], cpu_gpr[rt]);
+                    break;
+                case OPC_MULQ_S_W:
+                    gen_helper_mulq_s_w(cpu_gpr[rd], cpu_env,
+                                        cpu_gpr[rs], cpu_gpr[rt]);
+                    break;
+                case OPC_MULQ_RS_W:
+                    gen_helper_mulq_rs_w(cpu_gpr[rd], cpu_env,
+                                         cpu_gpr[rs], cpu_gpr[rt]);
+                    break;
                 default:
                     MIPS_INVAL("MASK ADDUH.QB");
                     generate_exception(ctx, EXCP_RI);
@@ -12676,6 +12771,36 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
                 check_dsp(ctx);
                 gen_helper_raddu_w_qb(cpu_gpr[rd], cpu_gpr[rs]);
                 break;
+            case OPC_MULEU_S_PH_QBL:
+                check_dsp(ctx);
+                gen_helper_muleu_s_ph_qbl(cpu_gpr[rd], cpu_env,
+                                          cpu_gpr[rs], cpu_gpr[rt]);
+                break;
+            case OPC_MULEU_S_PH_QBR:
+                check_dsp(ctx);
+                gen_helper_muleu_s_ph_qbr(cpu_gpr[rd], cpu_env,
+                                          cpu_gpr[rs], cpu_gpr[rt]);
+                break;
+            case OPC_MULQ_RS_PH:
+                check_dsp(ctx);
+                gen_helper_mulq_rs_ph(cpu_gpr[rd], cpu_env,
+                                      cpu_gpr[rs], cpu_gpr[rt]);
+                break;
+            case OPC_MULEQ_S_W_PHL:
+                check_dsp(ctx);
+                gen_helper_muleq_s_w_phl(cpu_gpr[rd], cpu_env,
+                                         cpu_gpr[rs], cpu_gpr[rt]);
+                break;
+            case OPC_MULEQ_S_W_PHR:
+                check_dsp(ctx);
+                gen_helper_muleq_s_w_phr(cpu_gpr[rd], cpu_env,
+                                         cpu_gpr[rs], cpu_gpr[rt]);
+                break;
+            case OPC_MULQ_S_PH:
+                check_dspr2(ctx);
+                gen_helper_mulq_s_ph(cpu_gpr[rd], cpu_env,
+                                     cpu_gpr[rs], cpu_gpr[rt]);
+                break;
             default:            /* Invalid */
                 MIPS_INVAL("MASK ADDU.QB");
                 generate_exception(ctx, EXCP_RI);
@@ -12842,6 +12967,213 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
                 tcg_temp_free_i32(temp_rs);
                 break;
             }
+        case OPC_DPA_W_PH_DSP:
+            op2 = MASK_DPA_W_PH(ctx->opcode);
+            switch (op2) {
+            case OPC_DPAU_H_QBL:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpau_h_qbl(cpu_env, temp_rd,
+                                          cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPAU_H_QBR:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpau_h_qbr(cpu_env, temp_rd,
+                                          cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPSU_H_QBL:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpsu_h_qbl(cpu_env, temp_rd,
+                                          cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPSU_H_QBR:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpsu_h_qbr(cpu_env, temp_rd,
+                                          cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPA_W_PH:
+                check_dspr2(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpa_w_ph(cpu_env, temp_rd,
+                                        cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPAX_W_PH:
+                check_dspr2(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpax_w_ph(cpu_env, temp_rd,
+                                         cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPAQ_S_W_PH:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpaq_s_w_ph(cpu_env, temp_rd,
+                                           cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPAQX_S_W_PH:
+                check_dspr2(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpaqx_s_w_ph(cpu_env, temp_rd,
+                                            cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPAQX_SA_W_PH:
+                check_dspr2(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpaqx_sa_w_ph(cpu_env, temp_rd,
+                                             cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPS_W_PH:
+                check_dspr2(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dps_w_ph(cpu_env, temp_rd,
+                                        cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPSX_W_PH:
+                check_dspr2(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpsx_w_ph(cpu_env, temp_rd,
+                                         cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPSQ_S_W_PH:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpsq_s_w_ph(cpu_env, temp_rd,
+                                           cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPSQX_S_W_PH:
+                check_dspr2(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpsqx_s_w_ph(cpu_env, temp_rd,
+                                            cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPSQX_SA_W_PH:
+                check_dspr2(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpsqx_sa_w_ph(cpu_env, temp_rd,
+                                             cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_MULSAQ_S_W_PH:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_mulsaq_s_w_ph(cpu_env, temp_rd,
+                                             cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPAQ_SA_L_W:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpaq_sa_l_w(cpu_env, temp_rd,
+                                           cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_DPSQ_SA_L_W:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_dpsq_sa_l_w(cpu_env, temp_rd,
+                                           cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_MAQ_S_W_PHL:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_maq_s_w_phl(cpu_env, temp_rd,
+                                           cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_MAQ_S_W_PHR:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_maq_s_w_phr(cpu_env, temp_rd,
+                                           cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_MAQ_SA_W_PHL:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_maq_sa_w_phl(cpu_env, temp_rd,
+                                            cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_MAQ_SA_W_PHR:
+                check_dsp(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_maq_sa_w_phr(cpu_env, temp_rd,
+                                            cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            case OPC_MULSA_W_PH:
+                check_dspr2(ctx);
+                {
+                    TCGv_i32 temp_rd = tcg_const_i32(rd);
+                    gen_helper_mulsa_w_ph(cpu_env, temp_rd,
+                                          cpu_gpr[rs], cpu_gpr[rt]);
+                    tcg_temp_free_i32(temp_rd);
+                    break;
+                }
+            default:            /* Invalid */
+                MIPS_INVAL("MASK DPAW.PH");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
 #if defined(TARGET_MIPS64)
         case OPC_DEXTM ... OPC_DEXT:
         case OPC_DINSM ... OPC_DINS:
@@ -12945,6 +13277,31 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
         case OPC_ADDU_OB_DSP:
             op2 = MASK_ADDU_OB(ctx->opcode);
             switch (op2) {
+            case OPC_MULEQ_S_PW_QHL:
+                check_dsp(ctx);
+                gen_helper_muleq_s_pw_qhl(cpu_gpr[rd], cpu_env,
+                                          cpu_gpr[rs], cpu_gpr[rt]);
+                break;
+            case OPC_MULEQ_S_PW_QHR:
+                check_dsp(ctx);
+                gen_helper_muleq_s_pw_qhr(cpu_gpr[rd], cpu_env,
+                                          cpu_gpr[rs], cpu_gpr[rt]);
+                break;
+            case OPC_MULEU_S_QH_OBL:
+                check_dsp(ctx);
+                gen_helper_muleu_s_qh_obl(cpu_gpr[rd],
+                                          cpu_env, cpu_gpr[rs], cpu_gpr[rt]);
+                break;
+            case OPC_MULEU_S_QH_OBR:
+                check_dsp(ctx);
+                gen_helper_muleu_s_qh_obr(cpu_gpr[rd],
+                                          cpu_env, cpu_gpr[rs], cpu_gpr[rt]);
+                break;
+            case OPC_MULQ_RS_QH:
+                check_dsp(ctx);
+                gen_helper_mulq_rs_qh(cpu_gpr[rd], cpu_env,
+                                      cpu_gpr[rs], cpu_gpr[rt]);
+                break;
             case OPC_RADDU_L_OB:
                 check_dsp(ctx);
                 gen_helper_raddu_l_ob(cpu_gpr[rd], cpu_gpr[rs]);
@@ -13112,6 +13469,152 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
             break;
 #endif
 #if defined(TARGET_MIPS64)
+        case OPC_DPAQ_W_QH_DSP:
+            {
+                int ac = rd & 0x03;
+                TCGv_i32 ac_v = tcg_const_i32(ac);
+
+                op2 = MASK_DPAQ_W_QH(ctx->opcode);
+
+                switch (op2) {
+                case OPC_DMADD:
+                    check_dsp(ctx);
+                    gen_helper_dmadd(cpu_env, cpu_gpr[rs], cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DMADDU:
+                    check_dsp(ctx);
+                    gen_helper_dmaddu(cpu_env, cpu_gpr[rs],
+                                      cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DMSUB:
+                    check_dsp(ctx);
+                    gen_helper_dmsub(cpu_env, cpu_gpr[rs], cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DMSUBU:
+                    check_dsp(ctx);
+                    gen_helper_dmsubu(cpu_env, cpu_gpr[rs],
+                                      cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DPA_W_QH:
+                    check_dspr2(ctx);
+                    gen_helper_dpa_w_qh(cpu_env, cpu_gpr[rs],
+                                        cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DPAQ_S_W_QH:
+                    check_dsp(ctx);
+                    gen_helper_dpaq_s_w_qh(cpu_env, cpu_gpr[rs],
+                                           cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DPAQ_SA_L_PW:
+                    check_dsp(ctx);
+                    gen_helper_dpaq_sa_l_pw(cpu_env, cpu_gpr[rs],
+                                            cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DPAU_H_OBL:
+                    check_dsp(ctx);
+                    gen_helper_dpau_h_obl(cpu_env, cpu_gpr[rs],
+                                          cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DPAU_H_OBR:
+                    check_dsp(ctx);
+                    gen_helper_dpau_h_obr(cpu_env, cpu_gpr[rs],
+                                          cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DPS_W_QH:
+                    check_dspr2(ctx);
+                    gen_helper_dps_w_qh(cpu_env, cpu_gpr[rs],
+                                        cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DPSQ_S_W_QH:
+                    check_dsp(ctx);
+                    gen_helper_dpsq_s_w_qh(cpu_env, cpu_gpr[rs],
+                                           cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DPSQ_SA_L_PW:
+                    check_dsp(ctx);
+                    gen_helper_dpsq_sa_l_pw(cpu_env, cpu_gpr[rs],
+                                            cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DPSU_H_OBL:
+                    check_dsp(ctx);
+                    gen_helper_dpsu_h_obl(cpu_env, cpu_gpr[rs],
+                                          cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_DPSU_H_OBR:
+                    check_dsp(ctx);
+                    gen_helper_dpsu_h_obr(cpu_env, cpu_gpr[rs],
+                                          cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MAQ_S_L_PWL:
+                    check_dsp(ctx);
+                    gen_helper_maq_s_l_pwl(cpu_env, cpu_gpr[rs],
+                                           cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MAQ_S_L_PWR:
+                    check_dsp(ctx);
+                    gen_helper_maq_s_l_pwr(cpu_env, cpu_gpr[rs],
+                                           cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MAQ_S_W_QHLL:
+                    check_dsp(ctx);
+                    gen_helper_maq_s_w_qhll(cpu_env, cpu_gpr[rs],
+                                            cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MAQ_SA_W_QHLL:
+                    check_dsp(ctx);
+                    gen_helper_maq_sa_w_qhll(cpu_env, cpu_gpr[rs],
+                                             cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MAQ_S_W_QHLR:
+                    check_dsp(ctx);
+                    gen_helper_maq_s_w_qhlr(cpu_env, cpu_gpr[rs],
+                                            cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MAQ_SA_W_QHLR:
+                    check_dsp(ctx);
+                    gen_helper_maq_sa_w_qhlr(cpu_env, cpu_gpr[rs],
+                                             cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MAQ_S_W_QHRL:
+                    check_dsp(ctx);
+                    gen_helper_maq_s_w_qhrl(cpu_env, cpu_gpr[rs],
+                                            cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MAQ_SA_W_QHRL:
+                    check_dsp(ctx);
+                    gen_helper_maq_sa_w_qhrl(cpu_env, cpu_gpr[rs],
+                                             cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MAQ_S_W_QHRR:
+                    check_dsp(ctx);
+                    gen_helper_maq_s_w_qhrr(cpu_env, cpu_gpr[rs],
+                                            cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MAQ_SA_W_QHRR:
+                    check_dsp(ctx);
+                    gen_helper_maq_sa_w_qhrr(cpu_env, cpu_gpr[rs],
+                                             cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MULSAQ_S_L_PW:
+                    check_dsp(ctx);
+                    gen_helper_mulsaq_s_l_pw(cpu_env, cpu_gpr[rs],
+                                             cpu_gpr[rt], ac_v);
+                    break;
+                case OPC_MULSAQ_S_W_QH:
+                    check_dsp(ctx);
+                    gen_helper_mulsaq_s_w_qh(cpu_env, cpu_gpr[rs],
+                                             cpu_gpr[rt], ac_v);
+                    break;
+                default:            /* Invalid */
+                    MIPS_INVAL("MASK DPAQ.W.QH");
+                    generate_exception(ctx, EXCP_RI);
+                    break;
+                }
+                tcg_temp_free_i32(ac_v);
+                break;
+            }
+#endif
+#if defined(TARGET_MIPS64)
         case OPC_SHLL_OB_DSP:
             op2 = MASK_SHLL_OB(ctx->opcode);
             switch (op2) {