diff mbox

target-m68k: add rol/ror/roxl/roxr instructions

Message ID 1478712603-18286-1-git-send-email-laurent@vivier.eu
State New
Headers show

Commit Message

Laurent Vivier Nov. 9, 2016, 5:30 p.m. UTC
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
---
 target-m68k/translate.c | 414 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 414 insertions(+)

Comments

Richard Henderson Nov. 9, 2016, 6:47 p.m. UTC | #1
On 11/09/2016 06:30 PM, Laurent Vivier wrote:
> +        /* create [src:X:..] */
> +
> +        tcg_gen_deposit_i32(t0, QREG_CC_X, src, 1, size);
> +        tcg_gen_shli_i32(t0, t0, 31 - size);
> +
> +        /* rotate */
> +
> +        tcg_gen_rotl_i32(t0, t0, shift);
> +
> +        /* result is [src:..:src:X] */
> +
> +        tcg_gen_andi_i32(X, t0, 1);
> +        tcg_gen_shri_i32(t0, t0, 1);

I don't see how this is supposed to work.  If you form [src:x:...], and rotate 
by 0, then X gets garbage.  Of course, you're actually forming [0:src:x].  But 
for a rol of 2, the lsb of src gets 0's instead of the msb of src.

If you want to use a 32-bit rotate here, you have to (1) reduce the rotate by 
modulo size + 1 and (2) form [src:...:src:x].  E.g.

     tcg_gen_deposit_i32(t0, Q_REG_CC_X, src, 32 - size, size);
     tcg_gen_deposit_i32(t0, t0, src, 1, size);


r~
Richard Henderson Nov. 9, 2016, 7:39 p.m. UTC | #2
On 11/09/2016 07:47 PM, Richard Henderson wrote:
> On 11/09/2016 06:30 PM, Laurent Vivier wrote:
>> +        /* create [src:X:..] */
>> +
>> +        tcg_gen_deposit_i32(t0, QREG_CC_X, src, 1, size);
>> +        tcg_gen_shli_i32(t0, t0, 31 - size);
>> +
>> +        /* rotate */
>> +
>> +        tcg_gen_rotl_i32(t0, t0, shift);
>> +
>> +        /* result is [src:..:src:X] */
>> +
>> +        tcg_gen_andi_i32(X, t0, 1);
>> +        tcg_gen_shri_i32(t0, t0, 1);
>
> I don't see how this is supposed to work.  If you form [src:x:...], and rotate
> by 0, then X gets garbage.  Of course, you're actually forming [0:src:x].  But
> for a rol of 2, the lsb of src gets 0's instead of the msb of src.
>
> If you want to use a 32-bit rotate here, you have to (1) reduce the rotate by
> modulo size + 1 and (2) form [src:...:src:x].  E.g.
>
>     tcg_gen_deposit_i32(t0, Q_REG_CC_X, src, 32 - size, size);
>     tcg_gen_deposit_i32(t0, t0, src, 1, size);

Feh, actually that only works for rot8, since you need 2*size+1 bits available 
for this.  Why not just use shifts.  Something like

   int shl = shift % 17;
   int shr = shl ^ (size - 1);
   int shx = shl ? shl - 1 : 16;

   res32 = (data << shl) | (data >> shr) | (x << shx);
   x = (res32 >> 16) & 1;


r~
Laurent Vivier Nov. 9, 2016, 8:22 p.m. UTC | #3
Le 09/11/2016 à 20:39, Richard Henderson a écrit :
> On 11/09/2016 07:47 PM, Richard Henderson wrote:
>> On 11/09/2016 06:30 PM, Laurent Vivier wrote:
>>> +        /* create [src:X:..] */
>>> +
>>> +        tcg_gen_deposit_i32(t0, QREG_CC_X, src, 1, size);
>>> +        tcg_gen_shli_i32(t0, t0, 31 - size);
>>> +
>>> +        /* rotate */
>>> +
>>> +        tcg_gen_rotl_i32(t0, t0, shift);
>>> +
>>> +        /* result is [src:..:src:X] */
>>> +
>>> +        tcg_gen_andi_i32(X, t0, 1);
>>> +        tcg_gen_shri_i32(t0, t0, 1);
>>
>> I don't see how this is supposed to work.  If you form [src:x:...],
>> and rotate
>> by 0, then X gets garbage.  Of course, you're actually forming

The result is ignored in the case of a rotate by 0 (see movcond in
rotate_reg()).

>> [0:src:x].  But
>> for a rol of 2, the lsb of src gets 0's instead of the msb of src.

for a rol of 2 on an 8bit value [12345678]:

    tcg_gen_deposit_i32(t0, QREG_CC_X, src, 1, size);
    tcg_gen_shli_i32(t0, t0, 31 - size);

t0 = [12345678x00000000000000000000000]

    tcg_gen_rotl_i32(t0, t0, shift);

t0 = [345678x0000000000000000000000012]

    tcg_gen_andi_i32(X, t0, 1);

X = 2

    tcg_gen_shri_i32(t0, t0, 1);

t0 = [0345678x000000000000000000000001]

    tcg_gen_shri_i32(t1, t0, 31 - size);

t1 = [000000000000000000000000345678x0]

    tcg_gen_or_i32(dest, t0, t1);

dest = [0345678x0000000000000000345678x1]

->     we keep only 8 bits: [345678x1]

Where am I wrong?

> Feh, actually that only works for rot8, since you need 2*size+1 bits
> available for this.  Why not just use shifts.  Something like
> 
>   int shl = shift % 17;
>   int shr = shl ^ (size - 1);
>   int shx = shl ? shl - 1 : 16;
> 
>   res32 = (data << shl) | (data >> shr) | (x << shx);
>   x = (res32 >> 16) & 1;


If you think it is better, I can do like that.

Thanks,
Laurent
Richard Henderson Nov. 10, 2016, 1:05 p.m. UTC | #4
On 11/09/2016 09:22 PM, Laurent Vivier wrote:
> Le 09/11/2016 à 20:39, Richard Henderson a écrit :
>> On 11/09/2016 07:47 PM, Richard Henderson wrote:
>>> On 11/09/2016 06:30 PM, Laurent Vivier wrote:
>>>> +        /* create [src:X:..] */
>>>> +
>>>> +        tcg_gen_deposit_i32(t0, QREG_CC_X, src, 1, size);
>>>> +        tcg_gen_shli_i32(t0, t0, 31 - size);
>>>> +
>>>> +        /* rotate */
>>>> +
>>>> +        tcg_gen_rotl_i32(t0, t0, shift);
>>>> +
>>>> +        /* result is [src:..:src:X] */
>>>> +
>>>> +        tcg_gen_andi_i32(X, t0, 1);
>>>> +        tcg_gen_shri_i32(t0, t0, 1);
>>>
>>> I don't see how this is supposed to work.  If you form [src:x:...],
>>> and rotate
>>> by 0, then X gets garbage.  Of course, you're actually forming
>
> The result is ignored in the case of a rotate by 0 (see movcond in
> rotate_reg()).
>
>>> [0:src:x].  But
>>> for a rol of 2, the lsb of src gets 0's instead of the msb of src.
>
> for a rol of 2 on an 8bit value [12345678]:
>
>     tcg_gen_deposit_i32(t0, QREG_CC_X, src, 1, size);
>     tcg_gen_shli_i32(t0, t0, 31 - size);
>
> t0 = [12345678x00000000000000000000000]
>
>     tcg_gen_rotl_i32(t0, t0, shift);
>
> t0 = [345678x0000000000000000000000012]
>
>     tcg_gen_andi_i32(X, t0, 1);
>
> X = 2
>
>     tcg_gen_shri_i32(t0, t0, 1);
>
> t0 = [0345678x000000000000000000000001]
>
>     tcg_gen_shri_i32(t1, t0, 31 - size);
>
> t1 = [000000000000000000000000345678x0]
>
>     tcg_gen_or_i32(dest, t0, t1);
>
> dest = [0345678x0000000000000000345678x1]
>
> ->     we keep only 8 bits: [345678x1]
>
> Where am I wrong?

You aren't.  I simply misread this.  But still we must only perform this rotate 
modulo size+1.


r~
diff mbox

Patch

diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index a17ff01..9686a24 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -3080,6 +3080,413 @@  DISAS_INSN(shift_mem)
     set_cc_op(s, CC_OP_FLAGS);
 }
 
+static void rotate(TCGv reg, TCGv shift, int left, int size)
+{
+    switch (size) {
+    case 8:
+        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
+        tcg_gen_ext8u_i32(reg, reg);
+        tcg_gen_muli_i32(reg, reg, 0x01010101);
+        goto do_long;
+    case 16:
+        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
+        tcg_gen_deposit_i32(reg, reg, reg, 16, 16);
+        goto do_long;
+    do_long:
+    default:
+        if (left) {
+            tcg_gen_rotl_i32(reg, reg, shift);
+        } else {
+            tcg_gen_rotr_i32(reg, reg, shift);
+        }
+    }
+
+    /* compute flags */
+
+    switch (size) {
+    case 8:
+        tcg_gen_ext8s_i32(reg, reg);
+        break;
+    case 16:
+        tcg_gen_ext16s_i32(reg, reg);
+        break;
+    default:
+        break;
+    }
+
+    /* QREG_CC_X is not affected */
+
+    tcg_gen_mov_i32(QREG_CC_N, reg);
+    tcg_gen_mov_i32(QREG_CC_Z, reg);
+
+    if (left) {
+        tcg_gen_andi_i32(QREG_CC_C, reg, 1);
+    } else {
+        tcg_gen_shri_i32(QREG_CC_C, reg, 31);
+    }
+
+    tcg_gen_movi_i32(QREG_CC_V, 0); /* always cleared */
+}
+
+static void rotate_x_flags(TCGv reg, TCGv X, int size)
+{
+    switch (size) {
+    case 8:
+        tcg_gen_ext8s_i32(reg, reg);
+        break;
+    case 16:
+        tcg_gen_ext16s_i32(reg, reg);
+        break;
+    default:
+        break;
+    }
+    tcg_gen_mov_i32(QREG_CC_N, reg);
+    tcg_gen_mov_i32(QREG_CC_Z, reg);
+    tcg_gen_mov_i32(QREG_CC_X, X);
+    tcg_gen_mov_i32(QREG_CC_C, X);
+    tcg_gen_movi_i32(QREG_CC_V, 0);
+}
+
+static TCGv rotate_x(TCGv dest, TCGv src, TCGv shift, int left, int size)
+{
+    TCGv X, t0, t1;
+
+    X = tcg_temp_new();
+    t0 = tcg_temp_new();
+    if (left) {
+        /* create [src:X:..] */
+
+        tcg_gen_deposit_i32(t0, QREG_CC_X, src, 1, size);
+        tcg_gen_shli_i32(t0, t0, 31 - size);
+
+        /* rotate */
+
+        tcg_gen_rotl_i32(t0, t0, shift);
+
+        /* result is [src:..:src:X] */
+
+        tcg_gen_andi_i32(X, t0, 1);
+        tcg_gen_shri_i32(t0, t0, 1);
+    } else {
+        /* create [..:X:src] */
+
+        tcg_gen_deposit_i32(t0, src, QREG_CC_X, size, 1);
+
+        /* rotate */
+
+        tcg_gen_rotr_i32(t0, t0, shift);
+
+        /* result is value: [X:src:..:src] */
+
+        tcg_gen_shri_i32(X, t0, 31);
+    }
+
+    /* extract result */
+
+    t1 = tcg_temp_new();
+    tcg_gen_shri_i32(t1, t0, 31 - size);
+    tcg_gen_or_i32(dest, t0, t1);
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+
+    return X;
+}
+
+static TCGv rotate32_x(TCGv dest, TCGv src, TCGv shift, int left)
+{
+    TCGv_i64 t0, shift64;
+    TCGv X, lo, hi;
+
+    shift64 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(shift64, shift);
+
+    t0 = tcg_temp_new_i64();
+
+    X = tcg_temp_new();
+    lo = tcg_temp_new();
+    hi = tcg_temp_new();
+
+    if (left) {
+        /* create [src:X:..] */
+
+        tcg_gen_shli_i32(lo, QREG_CC_X, 31);
+        tcg_gen_concat_i32_i64(t0, lo, src);
+
+        /* rotate */
+
+        tcg_gen_rotl_i64(t0, t0, shift64);
+        tcg_temp_free_i64(shift64);
+
+        /* result is [src:..:src:X] */
+
+        tcg_gen_extr_i64_i32(lo, hi, t0);
+        tcg_gen_andi_i32(X, lo, 1);
+
+        tcg_gen_shri_i32(lo, lo, 1);
+    } else {
+        /* create [..:X:src] */
+
+        tcg_gen_concat_i32_i64(t0, src, QREG_CC_X);
+
+        tcg_gen_rotr_i64(t0, t0, shift64);
+        tcg_temp_free_i64(shift64);
+
+        /* result is value: [X:src:..:src] */
+
+        tcg_gen_extr_i64_i32(lo, hi, t0);
+
+        /* extract X */
+
+        tcg_gen_shri_i32(X, hi, 31);
+
+        /* extract result */
+
+        tcg_gen_shli_i32(hi, hi, 1);
+    }
+    tcg_gen_or_i32(dest, lo, hi);
+    tcg_temp_free(hi);
+    tcg_temp_free(lo);
+    tcg_temp_free_i64(t0);
+
+    return X;
+}
+
+DISAS_INSN(rotate_im)
+{
+    TCGv shift;
+    int tmp;
+    int left = (insn & 0x100);
+
+    tmp = (insn >> 9) & 7;
+    if (tmp == 0) {
+        tmp = 8;
+    }
+
+    shift = tcg_const_i32(tmp);
+    if (insn & 8) {
+        rotate(DREG(insn, 0), shift, left, 32);
+    } else {
+        TCGv X = rotate32_x(DREG(insn, 0), DREG(insn, 0), shift, left);
+        rotate_x_flags(DREG(insn, 0), X, 32);
+        tcg_temp_free(X);
+    }
+    tcg_temp_free(shift);
+
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
+DISAS_INSN(rotate8_im)
+{
+    int left = (insn & 0x100);
+    TCGv reg;
+    TCGv shift;
+    int tmp;
+
+    reg = gen_extend(DREG(insn, 0), OS_BYTE, 0);
+
+    tmp = (insn >> 9) & 7;
+    if (tmp == 0) {
+        tmp = 8;
+    }
+
+    shift = tcg_const_i32(tmp);
+    if (insn & 8) {
+        rotate(reg, shift, left, 8);
+    } else {
+        TCGv X = rotate_x(reg, reg, shift, left, 8);
+        rotate_x_flags(reg, X, 8);
+        tcg_temp_free(X);
+    }
+    gen_partset_reg(OS_BYTE, DREG(insn, 0), reg);
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
+DISAS_INSN(rotate16_im)
+{
+    int left = (insn & 0x100);
+    TCGv reg;
+    TCGv shift;
+    int tmp;
+
+    reg = gen_extend(DREG(insn, 0), OS_WORD, 0);
+    tmp = (insn >> 9) & 7;
+    if (tmp == 0) {
+        tmp = 8;
+    }
+
+    shift = tcg_const_i32(tmp);
+    if (insn & 8) {
+        rotate(reg, shift, left, 16);
+    } else {
+        TCGv X = rotate_x(reg, reg, shift, left, 16);
+        rotate_x_flags(reg, X, 8);
+        tcg_temp_free(X);
+    }
+    gen_partset_reg(OS_WORD, DREG(insn, 0), reg);
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
+DISAS_INSN(rotate_reg)
+{
+    TCGv reg;
+    TCGv src;
+    TCGv t0, t1;
+    int left = (insn & 0x100);
+
+    reg = DREG(insn, 0);
+    src = DREG(insn, 9);
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new_i32();
+    if (insn & 8) {
+        tcg_gen_andi_i32(t0, src, 63);
+        tcg_gen_andi_i32(t1, src, 31);
+        rotate(reg, t1, left, 32);
+        /* if shift == 0, clear C */
+        tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_C,
+                            t0, QREG_CC_V /* 0 */,
+                            QREG_CC_V /* 0 */, QREG_CC_C);
+    } else {
+        TCGv X, zero, res;
+        /* shift in [0..63] */
+        tcg_gen_andi_i32(t0, src, 63);
+        /* modulo 33 */
+        tcg_gen_movi_i32(t1, 33);
+        tcg_gen_remu_i32(t1, t0, t1);
+        res = tcg_temp_new();
+        X = rotate32_x(res, DREG(insn, 0), t1, left);
+        /* if shift == 0, register and X are not affected */
+        zero = tcg_const_i32(0);
+        tcg_gen_movcond_i32(TCG_COND_EQ, X,
+                            t0, zero,
+                            QREG_CC_X, X);
+        tcg_gen_movcond_i32(TCG_COND_EQ, DREG(insn, 0),
+                            t0, zero,
+                            DREG(insn, 0), res);
+        tcg_temp_free(res);
+        tcg_temp_free(zero);
+        rotate_x_flags(DREG(insn, 0), X, 32);
+        tcg_temp_free(X);
+    }
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
+DISAS_INSN(rotate8_reg)
+{
+    TCGv reg;
+    TCGv src;
+    TCGv t0, t1;
+    int left = (insn & 0x100);
+
+    reg = gen_extend(DREG(insn, 0), OS_BYTE, 0);
+    src = DREG(insn, 9);
+    t0 = tcg_temp_new_i32();
+    t1 = tcg_temp_new_i32();
+    if (insn & 8) {
+        tcg_gen_andi_i32(t0, src, 63);
+        tcg_gen_andi_i32(t1, src, 7);
+        rotate(reg, t1, left, 8);
+        /* if shift == 0, clear C */
+        tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_C,
+                            t0, QREG_CC_V /* 0 */,
+                            QREG_CC_V /* 0 */, QREG_CC_C);
+    } else {
+        TCGv X, res, zero;
+        /* shift in [0..63] */
+        tcg_gen_andi_i32(t0, src, 63);
+        /* modulo 9 */
+        tcg_gen_movi_i32(t1, 9);
+        tcg_gen_remu_i32(t1, t0, t1);
+        res = tcg_temp_new();
+        X = rotate_x(res, reg, t1, left, 8);
+        /* if shift == 0, register and X are not affected */
+        zero = tcg_const_i32(0);
+        tcg_gen_movcond_i32(TCG_COND_EQ, X,
+                            t0, zero,
+                            QREG_CC_X, X);
+        tcg_gen_movcond_i32(TCG_COND_EQ, reg,
+                            t0, zero,
+                            reg, res);
+        tcg_temp_free(res);
+        tcg_temp_free(zero);
+        rotate_x_flags(reg, X, 8);
+        tcg_temp_free(X);
+    }
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+    gen_partset_reg(OS_BYTE, DREG(insn, 0), reg);
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
+DISAS_INSN(rotate16_reg)
+{
+    TCGv reg;
+    TCGv src;
+    TCGv t0, t1;
+    int left = (insn & 0x100);
+
+    reg = gen_extend(DREG(insn, 0), OS_WORD, 0);
+    src = DREG(insn, 9);
+    t0 = tcg_temp_new_i32();
+    t1 = tcg_temp_new_i32();
+    if (insn & 8) {
+        tcg_gen_andi_i32(t0, src, 63);
+        tcg_gen_andi_i32(t1, src, 15);
+        rotate(reg, t1, left, 16);
+        /* if shift == 0, clear C */
+        tcg_gen_movcond_i32(TCG_COND_EQ, QREG_CC_C,
+                            t0, QREG_CC_V /* 0 */,
+                            QREG_CC_V /* 0 */, QREG_CC_C);
+    } else {
+        TCGv X, res, zero;
+        /* shift in [0..63] */
+        tcg_gen_andi_i32(t0, src, 63);
+        /* modulo 17 */
+        t1 = tcg_const_i32(17);
+        tcg_gen_remu_i32(t1, t0, t1);
+        res = tcg_temp_new();
+        X = rotate_x(res, reg, t1, left, 16);
+        /* if shift == 0, register and X are not affected */
+        zero = tcg_const_i32(0);
+        tcg_gen_movcond_i32(TCG_COND_EQ, X,
+                            t0, zero,
+                            QREG_CC_X, X);
+        tcg_gen_movcond_i32(TCG_COND_EQ, reg,
+                            t0, zero,
+                            reg, res);
+        tcg_temp_free(res);
+        tcg_temp_free(zero);
+        rotate_x_flags(reg, X, 16);
+        tcg_temp_free(X);
+    }
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+    gen_partset_reg(OS_WORD, DREG(insn, 0), reg);
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
+DISAS_INSN(rotate_mem)
+{
+    TCGv src;
+    TCGv addr;
+    TCGv shift;
+    int left = (insn & 0x100);
+
+    SRC_EA(env, src, OS_WORD, 0, &addr);
+
+    shift = tcg_const_i32(1);
+    if (insn & 8) {
+        rotate(src, shift, left, 16);
+    } else {
+        TCGv X = rotate_x(src, src, shift, left, 16);
+        rotate_x_flags(src, X, 16);
+        tcg_temp_free(X);
+    }
+    DEST_EA(env, insn, OS_WORD, src, &addr);
+    set_cc_op(s, CC_OP_FLAGS);
+}
+
 static void bitfield_param(uint16_t ext, TCGv *offset, TCGv *width, TCGv *mask)
 {
     TCGv tmp;
@@ -4492,6 +4899,13 @@  void register_m68k_insns (CPUM68KState *env)
     INSN(shift16_reg, e060, f0f0, M68000);
     INSN(shift_reg, e0a0, f0f0, M68000);
     INSN(shift_mem, e0c0, fcc0, M68000);
+    INSN(rotate_im, e090, f0f0, M68000);
+    INSN(rotate8_im, e010, f0f0, M68000);
+    INSN(rotate16_im, e050, f0f0, M68000);
+    INSN(rotate_reg, e0b0, f0f0, M68000);
+    INSN(rotate8_reg, e030, f0f0, M68000);
+    INSN(rotate16_reg, e070, f0f0, M68000);
+    INSN(rotate_mem, e4c0, fcc0, M68000);
     INSN(bitfield_mem,e8c0, f8c0, BITFIELD);
     INSN(bitfield_reg,e8c0, f8f8, BITFIELD);
     INSN(undef_fpu, f000, f000, CF_ISA_A);