diff mbox

[for-2.5,26/30] m68k: add mull/divl

Message ID 1439151229-27747-27-git-send-email-laurent@vivier.eu
State New
Headers show

Commit Message

Laurent Vivier Aug. 9, 2015, 8:13 p.m. UTC
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
---
 target-m68k/cpu.h       |   3 +
 target-m68k/helper.h    |   6 ++
 target-m68k/op_helper.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
 target-m68k/qregs.def   |   1 +
 target-m68k/translate.c |  65 ++++++++++++++++++----
 5 files changed, 208 insertions(+), 10 deletions(-)

Comments

Richard Henderson Aug. 12, 2015, 6:36 p.m. UTC | #1
On 08/09/2015 01:13 PM, Laurent Vivier wrote:
> Signed-off-by: Laurent Vivier <laurent@vivier.eu>
> ---
>  target-m68k/cpu.h       |   3 +
>  target-m68k/helper.h    |   6 ++
>  target-m68k/op_helper.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++
>  target-m68k/qregs.def   |   1 +
>  target-m68k/translate.c |  65 ++++++++++++++++++----
>  5 files changed, 208 insertions(+), 10 deletions(-)
> 
> diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
> index 6d1a140..a261680 100644
> --- a/target-m68k/cpu.h
> +++ b/target-m68k/cpu.h
> @@ -90,6 +90,9 @@ typedef struct CPUM68KState {
>      uint32_t div1;
>      uint32_t div2;
>  
> +    /* Upper 32 bits of a 64bit operand for quad MUL/DIV.  */
> +    uint32_t quadh;

This is a poor choice, IMO.

> +void HELPER(divu64)(CPUM68KState *env)
> +{
> +    uint32_t num;
> +    uint32_t den;
> +    uint64_t quot;
> +    uint32_t rem;
> +    uint32_t flags;
> +    uint64_t quad;
> +
> +    num = env->div1;
> +    den = env->div2;
> +    /* ??? This needs to make sure the throwing location is accurate.  */
> +    if (den == 0) {
> +        raise_exception(env, EXCP_DIV0);
> +    }
> +    quad = num | ((uint64_t)env->quadh << 32);

Pass in the numerator as a 64-bit argument, and avoid the quadh input.

> +    quot = quad / den;
> +    rem = quad % den;
> +    if (quot > 0xffffffffULL) {
> +        flags = (env->cc_dest & ~CCF_C) | CCF_V;
> +    } else {
> +        flags = 0;
> +        if (quot == 0) {
> +            flags |= CCF_Z;
> +        } else if ((int32_t)quot < 0) {
> +            flags |= CCF_N;
> +        }
> +        env->div1 = quot;
> +        env->quadh = rem;

Return a 64-bit result with the quot/rem packed into the high/low.

> +void HELPER(divs64)(CPUM68KState *env)

Similarly.

> +    if ((quot & 0xffffffff80000000ULL) &&
> +        (quot & 0xffffffff80000000ULL) != 0xffffffff80000000ULL) {

Possibly more concisely written as

  !((quot >> 31) == 0 || (quot >> 31) == -1)

> +uint32_t HELPER(mulu32_cc)(CPUM68KState *env, uint32_t op1, uint32_t op2)
> +uint32_t HELPER(muls32_cc)(CPUM68KState *env, uint32_t op1, uint32_t op2)
> +uint32_t HELPER(mulu64)(CPUM68KState *env, uint32_t op1, uint32_t op2)
> +uint32_t HELPER(muls64)(CPUM68KState *env, uint32_t op1, uint32_t op2)

It's much easier to do all of the multiplication inline now.  I suppose the
opcodes involved didn't exist when this patch was written.  See below.

It's probably better to split this patch in two as well, one part for division
and the other for multiplication.

> @@ -1122,8 +1122,27 @@ DISAS_INSN(divl)
>      uint16_t ext;
>  
>      ext = read_im16(env, s);
> -    if (ext & 0x87f8) {
> -        gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
> +    if (ext & 0x400) {
> +        if (!m68k_feature(s->env, M68K_FEATURE_QUAD_MULDIV)) {
> +            gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
> +            return;
> +        }
> +        num = DREG(ext, 12);
> +        reg = DREG(ext, 0);
> +        tcg_gen_mov_i32(QREG_DIV1, num);
> +        tcg_gen_mov_i32(QREG_QUADH, reg);

This then becomes

           TCGv_i64 t64 = tcg_temp_new_i64();
           tcg_gen_concat_i32_i64(t64, num, reg);

> +        SRC_EA(env, den, OS_LONG, 0, NULL);
> +        tcg_gen_mov_i32(QREG_DIV2, den);
> +        if (ext & 0x0800) {
> +            gen_helper_divs64(cpu_env);
> +        } else {
> +            gen_helper_divu64(cpu_env);

               gen_helper_divu64(t64, cpu_env, t64, den);

> +        }

           TCGv_i32 q = tcg_temp_new();
           TCGv_i32 r = tcg_temp_new();
           tcg_gen_extr_i64_i32(q, r, t64);
           tcg_temp_free_i64(t64);

> +        tcg_gen_mov_i32(num, QREG_DIV1);
> +        if (!TCGV_EQUAL(num, reg)) {
> +            tcg_gen_mov_i32(reg, QREG_QUADH);
> +        }

Depending on how common this is, or isn't, it's probably cleaner to trust the
tcg optimizer to delete the dead code:

	   /* If Dq and Dr are the same, the quotient is returned.
              therefore we set Dq last.  */
           tcg_gen_mov_i32(reg, r);
           tcg_gen_mov_i32(num, q);

> @@ -1887,21 +1908,45 @@ DISAS_INSN(mull)
>      TCGv reg;
>      TCGv src1;
>      TCGv dest;
> +    TCGv regh;
>  
>      /* The upper 32 bits of the product are discarded, so
>         muls.l and mulu.l are functionally equivalent.  */
>      ext = read_im16(env, s);
> -    if (ext & 0x87ff) {
> -        gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
> +    if (ext & 0x400) {
> +        if (!m68k_feature(s->env, M68K_FEATURE_QUAD_MULDIV)) {
> +            gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
> +            return;
> +        }
> +        reg = DREG(ext, 12);
> +        regh = DREG(ext, 0);
> +        SRC_EA(env, src1, OS_LONG, 0, NULL);
> +        dest = tcg_temp_new();
> +        if (ext & 0x800) {
> +            gen_helper_muls64(dest, cpu_env, src1, reg);

	tcg_gen_muls2_i32(reg, dest, reg, src1);
	set_cc_op(s, CC_OP_MULQ);
	tcg_gen_mov_i32(QREG_CC_DEST, dest);
	tcg_gen_mov_i32(QREG_CC_SRC, reg);
	tcg_gen_mov_i32(regh, dest);

where MULQ sets N = (CC_DEST < 0) and Z = ((CC_DEST | CC_SRC) == 0).

> +        } else {
> +            gen_helper_mulu64(dest, cpu_env, src1, reg);

Similarly with tcg_gen_mulu2_i32.

> -    tcg_gen_mul_i32(dest, src1, reg);
> -    tcg_gen_mov_i32(reg, dest);
> -    /* Unlike m68k, coldfire always clears the overflow bit.  */
> +    if (m68k_feature(s->env, M68K_FEATURE_M68000)) {
> +        if (ext & 0x800) {
> +            gen_helper_muls32_cc(dest, cpu_env, src1, reg);

	tcg_gen_muls2_i32(reg, QREG_CC_DEST, reg, src1);
	set_cc_op(s, CC_OP_MULQ_V);
	tcg_gen_mov_i32(QREG_CC_SRC, reg);

where MULQ_V is like MULQ, except that it also sets
V = (DEST != (SRC >> 31)).

> +        } else {
> +            gen_helper_mulu32_cc(dest, cpu_env, src1, reg);

	tcg_gen_mulu2_i32(reg, QREG_CC_DEST, reg, src1);
	set_cc_op(s, CC_OP_MULQ);
	tcg_gen_mov_i32(QREG_CC_SRC, reg);


r~
diff mbox

Patch

diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
index 6d1a140..a261680 100644
--- a/target-m68k/cpu.h
+++ b/target-m68k/cpu.h
@@ -90,6 +90,9 @@  typedef struct CPUM68KState {
     uint32_t div1;
     uint32_t div2;
 
+    /* Upper 32 bits of a 64bit operand for quad MUL/DIV.  */
+    uint32_t quadh;
+
     /* MMU status.  */
     struct {
         uint32_t ar;
diff --git a/target-m68k/helper.h b/target-m68k/helper.h
index 9b54ebf..de4d84d 100644
--- a/target-m68k/helper.h
+++ b/target-m68k/helper.h
@@ -3,6 +3,12 @@  DEF_HELPER_1(ff1, i32, i32)
 DEF_HELPER_2(sats, i32, i32, i32)
 DEF_HELPER_2(divu, void, env, i32)
 DEF_HELPER_2(divs, void, env, i32)
+DEF_HELPER_1(divu64, void, env)
+DEF_HELPER_1(divs64, void, env)
+DEF_HELPER_3(mulu32_cc, i32, env, i32, i32)
+DEF_HELPER_3(muls32_cc, i32, env, i32, i32)
+DEF_HELPER_3(mulu64, i32, env, i32, i32)
+DEF_HELPER_3(muls64, i32, env, i32, i32)
 DEF_HELPER_3(addx_cc, i32, env, i32, i32)
 DEF_HELPER_3(subx_cc, i32, env, i32, i32)
 DEF_HELPER_3(shl_cc, i32, env, i32, i32)
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index 71641bf..8ecbf8a 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -244,3 +244,146 @@  void HELPER(divs)(CPUM68KState *env, uint32_t word)
     env->div2 = rem;
     env->cc_dest = flags;
 }
+
+void HELPER(divu64)(CPUM68KState *env)
+{
+    uint32_t num;
+    uint32_t den;
+    uint64_t quot;
+    uint32_t rem;
+    uint32_t flags;
+    uint64_t quad;
+
+    num = env->div1;
+    den = env->div2;
+    /* ??? This needs to make sure the throwing location is accurate.  */
+    if (den == 0) {
+        raise_exception(env, EXCP_DIV0);
+    }
+    quad = num | ((uint64_t)env->quadh << 32);
+    quot = quad / den;
+    rem = quad % den;
+    if (quot > 0xffffffffULL) {
+        flags = (env->cc_dest & ~CCF_C) | CCF_V;
+    } else {
+        flags = 0;
+        if (quot == 0) {
+            flags |= CCF_Z;
+        } else if ((int32_t)quot < 0) {
+            flags |= CCF_N;
+        }
+        env->div1 = quot;
+        env->quadh = rem;
+    }
+    env->cc_dest = flags;
+}
+
+void HELPER(divs64)(CPUM68KState *env)
+{
+    uint32_t num;
+    int32_t den;
+    int64_t quot;
+    int32_t rem;
+    int32_t flags;
+    int64_t quad;
+
+    num = env->div1;
+    den = env->div2;
+    if (den == 0) {
+        raise_exception(env, EXCP_DIV0);
+    }
+    quad = num | ((int64_t)env->quadh << 32);
+    quot = quad / (int64_t)den;
+    rem = quad % (int64_t)den;
+
+    if ((quot & 0xffffffff80000000ULL) &&
+        (quot & 0xffffffff80000000ULL) != 0xffffffff80000000ULL) {
+        flags = (env->cc_dest & ~CCF_C) | CCF_V;
+    } else {
+        flags = 0;
+        if (quot == 0) {
+            flags |= CCF_Z;
+        } else if ((int32_t)quot < 0) {
+            flags |= CCF_N;
+        }
+        env->div1 = quot;
+        env->quadh = rem;
+    }
+    env->cc_dest = flags;
+}
+
+uint32_t HELPER(mulu32_cc)(CPUM68KState *env, uint32_t op1, uint32_t op2)
+{
+    uint64_t res = (uint32_t)op1 * op2;
+    uint32_t flags;
+
+    flags = 0;
+    if (res >> 32) {
+        flags |= CCF_V;
+    }
+    if ((uint32_t)res == 0) {
+        flags |= CCF_Z;
+    }
+    if ((int32_t)res < 0) {
+        flags |= CCF_N;
+    }
+    env->cc_dest = flags;
+
+    return res;
+}
+
+uint32_t HELPER(muls32_cc)(CPUM68KState *env, uint32_t op1, uint32_t op2)
+{
+    int64_t res = (int32_t)op1 * (int32_t)op2;
+    uint32_t flags;
+
+    flags = 0;
+    if (res != (int64_t)(int32_t)res) {
+        flags |= CCF_V;
+    }
+    if ((uint32_t)res == 0) {
+        flags |= CCF_Z;
+    }
+    if ((int32_t)res < 0) {
+        flags |= CCF_N;
+    }
+    env->cc_dest = flags;
+
+    return res;
+}
+
+uint32_t HELPER(mulu64)(CPUM68KState *env, uint32_t op1, uint32_t op2)
+{
+    uint64_t res = (uint64_t)op1 * op2;
+    uint32_t flags;
+
+    env->quadh = res >> 32;
+    flags = 0;
+    if (res == 0) {
+        flags |= CCF_Z;
+    }
+    if ((int64_t)res < 0) {
+        flags |= CCF_N;
+    }
+    env->cc_dest = flags;
+
+    return res;
+}
+
+uint32_t HELPER(muls64)(CPUM68KState *env, uint32_t op1, uint32_t op2)
+{
+    int64_t res = (uint64_t)(int32_t)op1 * (int32_t)op2;
+    uint32_t flags;
+
+    env->quadh = res >> 32;
+    flags = 0;
+    if (res == 0) {
+        flags |= CCF_Z;
+    }
+    if (res < 0) {
+        flags |= CCF_N;
+    }
+    env->cc_dest = flags;
+
+    return res;
+}
diff --git a/target-m68k/qregs.def b/target-m68k/qregs.def
index 204663e..aba6c9a 100644
--- a/target-m68k/qregs.def
+++ b/target-m68k/qregs.def
@@ -7,5 +7,6 @@  DEFO32(CC_SRC, cc_src)
 DEFO32(CC_X, cc_x)
 DEFO32(DIV1, div1)
 DEFO32(DIV2, div2)
+DEFO32(QUADH, quadh)
 DEFO32(MACSR, macsr)
 DEFO32(MAC_MASK, mac_mask)
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 67527fe..ad11457 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -1122,8 +1122,27 @@  DISAS_INSN(divl)
     uint16_t ext;
 
     ext = read_im16(env, s);
-    if (ext & 0x87f8) {
-        gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
+    if (ext & 0x400) {
+        if (!m68k_feature(s->env, M68K_FEATURE_QUAD_MULDIV)) {
+            gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
+            return;
+        }
+        num = DREG(ext, 12);
+        reg = DREG(ext, 0);
+        tcg_gen_mov_i32(QREG_DIV1, num);
+        tcg_gen_mov_i32(QREG_QUADH, reg);
+        SRC_EA(env, den, OS_LONG, 0, NULL);
+        tcg_gen_mov_i32(QREG_DIV2, den);
+        if (ext & 0x0800) {
+            gen_helper_divs64(cpu_env);
+        } else {
+            gen_helper_divu64(cpu_env);
+        }
+        tcg_gen_mov_i32(num, QREG_DIV1);
+        if (!TCGV_EQUAL(num, reg)) {
+            tcg_gen_mov_i32(reg, QREG_QUADH);
+        }
+        set_cc_op(s, CC_OP_FLAGS);
         return;
     }
     num = DREG(ext, 12);
@@ -1136,10 +1155,12 @@  DISAS_INSN(divl)
     } else {
         gen_helper_divu(cpu_env, tcg_const_i32(0));
     }
-    if ((ext & 7) == ((ext >> 12) & 7)) {
+    if (TCGV_EQUAL(num, reg) ||
+        m68k_feature(s->env, M68K_FEATURE_LONG_MULDIV)) {
         /* div */
-        tcg_gen_mov_i32 (reg, QREG_DIV1);
-    } else {
+        tcg_gen_mov_i32(num, QREG_DIV1);
+    }
+    if (!TCGV_EQUAL(num, reg)) {
         /* rem */
         tcg_gen_mov_i32 (reg, QREG_DIV2);
     }
@@ -1887,21 +1908,45 @@  DISAS_INSN(mull)
     TCGv reg;
     TCGv src1;
     TCGv dest;
+    TCGv regh;
 
     /* The upper 32 bits of the product are discarded, so
        muls.l and mulu.l are functionally equivalent.  */
     ext = read_im16(env, s);
-    if (ext & 0x87ff) {
-        gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
+    if (ext & 0x400) {
+        if (!m68k_feature(s->env, M68K_FEATURE_QUAD_MULDIV)) {
+            gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
+            return;
+        }
+        reg = DREG(ext, 12);
+        regh = DREG(ext, 0);
+        SRC_EA(env, src1, OS_LONG, 0, NULL);
+        dest = tcg_temp_new();
+        if (ext & 0x800) {
+            gen_helper_muls64(dest, cpu_env, src1, reg);
+        } else {
+            gen_helper_mulu64(dest, cpu_env, src1, reg);
+        }
+        tcg_gen_mov_i32(reg, dest);
+        tcg_gen_mov_i32(regh, QREG_QUADH);
+        set_cc_op(s, CC_OP_FLAGS);
         return;
     }
     reg = DREG(ext, 12);
     SRC_EA(env, src1, OS_LONG, 0, NULL);
     dest = tcg_temp_new();
-    tcg_gen_mul_i32(dest, src1, reg);
-    tcg_gen_mov_i32(reg, dest);
-    /* Unlike m68k, coldfire always clears the overflow bit.  */
+    if (m68k_feature(s->env, M68K_FEATURE_M68000)) {
+        if (ext & 0x800) {
+            gen_helper_muls32_cc(dest, cpu_env, src1, reg);
+        } else {
+            gen_helper_mulu32_cc(dest, cpu_env, src1, reg);
+        }
+        set_cc_op(s, CC_OP_FLAGS);
+    } else {
+        tcg_gen_mul_i32(dest, src1, reg);
+    }
     gen_logic_cc(s, dest, OS_LONG);
+    tcg_gen_mov_i32(reg, dest);
 }
 
 DISAS_INSN(link)