diff mbox

[4/5] target-tricore: Add instructions of BIT opcode format

Message ID 1411829891-24866-5-git-send-email-kbastian@mail.uni-paderborn.de
State New
Headers show

Commit Message

Bastian Koppelmann Sept. 27, 2014, 2:58 p.m. UTC
Add instructions of BIT opcode format.
Add microcode generator functions gen_bit_1/2op to do 1/2 bit operations on the last bit.

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
---
 target-tricore/translate.c | 349 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 349 insertions(+)

Comments

Richard Henderson Sept. 28, 2014, 5:22 a.m. UTC | #1
On 09/27/2014 07:58 AM, Bastian Koppelmann wrote:
> +/* D[c] = D[c][0] op1 (D[a][pos1] op2 D[b][pos2]);*/
> +static inline void gen_bit_2op(TCGv ret, TCGv r1, TCGv r2, TCGv r3,
> +                               int pos1, int pos2,
> +                               void(*op1)(TCGv, TCGv, TCGv),
> +                               void(*op2)(TCGv, TCGv, TCGv))
> +{
> +    TCGv temp1, temp2, temp3;
> +
> +    temp1 = tcg_temp_new();
> +    temp2 = tcg_temp_new();
> +    temp3 = tcg_temp_new();
> +
> +    tcg_gen_andi_tl(temp3, r3, 0x1);
> +
> +    tcg_gen_andi_tl(temp2, r2 , (0x1u << pos2));
> +    tcg_gen_shri_tl(temp2, temp2, pos2);
> +
> +    tcg_gen_andi_tl(temp1, r1, (0x1u << pos1));
> +    tcg_gen_shri_tl(temp1, temp1, pos1);
> +
> +    (*op1)(temp1, temp1, temp2);
> +    (*op2)(ret , temp3, temp1);

This incorrectly clobbers bits 1:31 of ret.  You want

  shri tmp1, r2, pos2
  shri tmp1, r1, pos1
  op1(tmp1, tmp1, tmp2)
  op2(tmp1, r3, tmp1)
  deposit ret, ret, tmp1, 0, 1


> +    TCGv temp1, temp2;
> +
> +    temp1 = tcg_temp_new();
> +    temp2 = tcg_temp_new();
> +
> +    tcg_gen_andi_tl(temp2, r2, (0x1u << pos2));
> +    tcg_gen_shri_tl(temp2, temp2, pos2);
> +
> +    tcg_gen_andi_tl(temp1, r1, (0x1u << pos1));
> +    tcg_gen_shri_tl(temp1, temp1, pos1);
> +
> +    (*op1)(ret, temp1, temp2);

This one, though, does get to set the whole register.  That said,
I think you should *not* mask the two inputs, but instead mask the one output.
 That saves one operation, and allows NOR to not need a special case.

> +    case OPC2_32_BIT_AND_NOR_T:
> +        gen_bit_2op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r3],
> +                    pos1, pos2, &tcg_gen_or_tl, &tcg_gen_andc_tl);
> +        break;

Without trying to take into account the properties of the tcg backend, this
seems less than ideal.  Yes, it's correct, but so is

	tcg_gen_nor_tl, tcg_gen_and_tl

which matches the name of the instruction.

If the tcg backend is sparc or ppc, it's more efficient too, since nor is
actually present in the isa.  But of course, arm and even haswell x86 have andc
but don't have nor.  This is stuff that a normal compiler optimizer could sort
out, but we don't have that for tcg.

I'd be willing to accept something conditionalized on TCG_TARGET_HAS_nor_i32,
if you like, but otherwise just match the instruction.

> +static void decode_bit_insert(CPUTriCoreState *env, DisasContext *ctx)

It's probably better to implement this with one right-shift and one deposit.
Certainly would be easier to read and follow.

> +    case OPC2_32_BIT_XNOR_T:
> +        gen_bit_1op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
> +                    pos1, pos2, &tcg_gen_xor_tl);

tcg_gen_eqv_tl

> +    case OPC2_32_BIT_OR_NOR_T:
> +        gen_bit_2op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r3],
> +                    pos1, pos2, &tcg_gen_or_tl, &tcg_gen_orc_tl);
> +        break;

Again, probably better with nor + or, or conditionalization.

> +    case OPC2_32_BIT_SH_XNOR_T:
> +        gen_bit_1op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2],
> +                    pos1, pos2, &tcg_gen_xor_tl);
> +        tcg_gen_not_tl(temp, temp);

Again, eqv.


r~
Bastian Koppelmann Sept. 28, 2014, 12:05 p.m. UTC | #2
On 09/28/2014 06:22 AM, Richard Henderson wrote:
>> +    case OPC2_32_BIT_XNOR_T:
>> +        gen_bit_1op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
>> +                    pos1, pos2, &tcg_gen_xor_tl);
> tcg_gen_eqv_tl
>
I often don't use the optimal tcg-frontend operation, since the 
documentation I mostly use is 
http://wiki.qemu.org/Documentation/TCG/frontend-ops, which is outdated. 
That said, I'm willing to update the documentation to include all the 
latest tcg-ops. Richard, would you be willing to review those changes?

Thanks,

Bastian
Peter Maydell Sept. 28, 2014, 1:27 p.m. UTC | #3
On 28 September 2014 13:05, Bastian Koppelmann
<kbastian@mail.uni-paderborn.de> wrote:
> I often don't use the optimal tcg-frontend operation, since the
> documentation I mostly use is
> http://wiki.qemu.org/Documentation/TCG/frontend-ops, which is outdated.

Probably better to read (and update if necessary) tcg/README.

-- PMM
diff mbox

Patch

diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 871c3cd..34375a9 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -427,6 +427,56 @@  static inline void gen_subs(TCGv ret, TCGv r1, TCGv r2)
     gen_helper_sub_ssov(ret, cpu_env, r1, r2);
 }
 
+/* D[c] = D[c][0] op1 (D[a][pos1] op2 D[b][pos2]);*/
+static inline void gen_bit_2op(TCGv ret, TCGv r1, TCGv r2, TCGv r3,
+                               int pos1, int pos2,
+                               void(*op1)(TCGv, TCGv, TCGv),
+                               void(*op2)(TCGv, TCGv, TCGv))
+{
+    TCGv temp1, temp2, temp3;
+
+    temp1 = tcg_temp_new();
+    temp2 = tcg_temp_new();
+    temp3 = tcg_temp_new();
+
+    tcg_gen_andi_tl(temp3, r3, 0x1);
+
+    tcg_gen_andi_tl(temp2, r2 , (0x1u << pos2));
+    tcg_gen_shri_tl(temp2, temp2, pos2);
+
+    tcg_gen_andi_tl(temp1, r1, (0x1u << pos1));
+    tcg_gen_shri_tl(temp1, temp1, pos1);
+
+    (*op1)(temp1, temp1, temp2);
+    (*op2)(ret , temp3, temp1);
+
+    tcg_temp_free(temp1);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+}
+
+/* result = D[a][pos1] op1 D[b][pos2]; */
+static inline void gen_bit_1op(TCGv ret, TCGv r1, TCGv r2,
+                               int pos1, int pos2,
+                               void(*op1)(TCGv, TCGv, TCGv))
+{
+    TCGv temp1, temp2;
+
+    temp1 = tcg_temp_new();
+    temp2 = tcg_temp_new();
+
+    tcg_gen_andi_tl(temp2, r2, (0x1u << pos2));
+    tcg_gen_shri_tl(temp2, temp2, pos2);
+
+    tcg_gen_andi_tl(temp1, r1, (0x1u << pos1));
+    tcg_gen_shri_tl(temp1, temp1, pos1);
+
+    (*op1)(ret, temp1, temp2);
+
+    tcg_temp_free(temp1);
+    tcg_temp_free(temp2);
+}
+
 /* helpers for generating program flow micro-ops */
 
 static inline void gen_save_pc(target_ulong pc)
@@ -1347,6 +1397,283 @@  static void decode_abs_storeb_h(CPUTriCoreState *env, DisasContext *ctx)
     tcg_temp_free(temp);
 }
 
+/* Bit-format */
+
+static void decode_bit_andacc(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    int r1, r2, r3;
+    int pos1, pos2;
+    TCGv temp;
+
+    r1 = MASK_OP_BIT_S1(ctx->opcode);
+    r2 = MASK_OP_BIT_S2(ctx->opcode);
+    r3 = MASK_OP_BIT_D(ctx->opcode);
+    pos1 = MASK_OP_BIT_POS1(ctx->opcode);
+    pos2 = MASK_OP_BIT_POS2(ctx->opcode);
+    op2 = MASK_OP_BIT_OP2(ctx->opcode);
+
+    temp = tcg_temp_new();
+
+    switch (op2) {
+    case OPC2_32_BIT_AND_AND_T:
+        gen_bit_2op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r3],
+                    pos1, pos2, &tcg_gen_and_tl, &tcg_gen_and_tl);
+        break;
+    case OPC2_32_BIT_AND_ANDN_T:
+        gen_bit_2op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r3],
+                    pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_and_tl);
+        break;
+    case OPC2_32_BIT_AND_NOR_T:
+        gen_bit_2op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r3],
+                    pos1, pos2, &tcg_gen_or_tl, &tcg_gen_andc_tl);
+        break;
+    case OPC2_32_BIT_AND_OR_T:
+        gen_bit_2op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r3],
+                    pos1, pos2, &tcg_gen_or_tl, &tcg_gen_and_tl);
+        break;
+    }
+    tcg_gen_andi_tl(temp, temp, 0x1);
+    tcg_gen_andi_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 0xfffffffe);
+    tcg_gen_add_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], temp);
+    tcg_temp_free(temp);
+}
+
+static void decode_bit_logical_t(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    int r1, r2, r3;
+    int pos1, pos2;
+    r1 = MASK_OP_BIT_S1(ctx->opcode);
+    r2 = MASK_OP_BIT_S2(ctx->opcode);
+    r3 = MASK_OP_BIT_D(ctx->opcode);
+    pos1 = MASK_OP_BIT_POS1(ctx->opcode);
+    pos2 = MASK_OP_BIT_POS2(ctx->opcode);
+    op2 = MASK_OP_BIT_OP2(ctx->opcode);
+
+    switch (op2) {
+    case OPC2_32_BIT_AND_T:
+        gen_bit_1op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_and_tl);
+        break;
+    case OPC2_32_BIT_ANDN_T:
+        gen_bit_1op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_andc_tl);
+        break;
+    case OPC2_32_BIT_NOR_T:
+        gen_bit_1op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_nor_tl);
+        tcg_gen_andi_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 0x1);
+        break;
+    case OPC2_32_BIT_OR_T:
+        gen_bit_1op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_or_tl);
+        break;
+    }
+}
+
+static void decode_bit_insert(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    int r1, r2, r3;
+    int pos1, pos2;
+    TCGv temp, temp2;
+    op2 = MASK_OP_BIT_OP2(ctx->opcode);
+    r1 = MASK_OP_BIT_S1(ctx->opcode);
+    r2 = MASK_OP_BIT_S2(ctx->opcode);
+    r3 = MASK_OP_BIT_D(ctx->opcode);
+    pos1 = MASK_OP_BIT_POS1(ctx->opcode);
+    pos2 = MASK_OP_BIT_POS2(ctx->opcode);
+
+    /* D[c] = {D[a][31:(pos1+1)], D[b][pos2], D[a][(pos1-1):0]}; */
+
+    temp = tcg_temp_new();
+    temp2 = tcg_temp_new();
+
+    /* temp2 = {D[a][31:(pos1+1)], 0} */
+    tcg_gen_andi_tl(temp2, cpu_gpr_d[r1],
+                   ((1 << (31 - pos1)) - 1) << (pos1 + 1));
+    /* temp = D[b][pos2] */;
+    tcg_gen_andi_tl(temp, cpu_gpr_d[r2], (1 << pos2));
+
+    if (op2 == OPC2_32_BIT_INSN_T) {
+        tcg_gen_not_tl(temp, temp);
+        tcg_gen_andi_tl(temp, temp, 1 << pos2);
+    }
+    /* temp2 = {D[a][31:(pos1+1)], D[b][pos2], 0} */
+    tcg_gen_shri_tl(temp, temp, pos2);
+    tcg_gen_shli_tl(temp, temp, pos1);
+    tcg_gen_add_tl(temp2, temp2, temp);
+
+    tcg_gen_andi_tl(temp, cpu_gpr_d[r1], (1 << pos1) - 1);
+    tcg_gen_add_tl(cpu_gpr_d[r3], temp2, temp);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static void decode_bit_logical_t2(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+
+    int r1, r2, r3;
+    int pos1, pos2;
+
+    op2 = MASK_OP_BIT_OP2(ctx->opcode);
+    r1 = MASK_OP_BIT_S1(ctx->opcode);
+    r2 = MASK_OP_BIT_S2(ctx->opcode);
+    r3 = MASK_OP_BIT_D(ctx->opcode);
+    pos1 = MASK_OP_BIT_POS1(ctx->opcode);
+    pos2 = MASK_OP_BIT_POS2(ctx->opcode);
+
+    switch (op2) {
+    case OPC2_32_BIT_NAND_T:
+        gen_bit_1op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_nand_tl);
+        tcg_gen_andi_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 0x1);
+        break;
+    case OPC2_32_BIT_ORN_T:
+        gen_bit_1op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_orc_tl);
+        tcg_gen_andi_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 0x1);
+        break;
+    case OPC2_32_BIT_XNOR_T:
+        gen_bit_1op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_xor_tl);
+        tcg_gen_not_tl(cpu_gpr_d[r3], cpu_gpr_d[r3]);
+        tcg_gen_andi_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 0x1);
+        break;
+    case OPC2_32_BIT_XOR_T:
+        gen_bit_1op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_xor_tl);
+        break;
+    }
+}
+
+static void decode_bit_orand(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+
+    int r1, r2, r3;
+    int pos1, pos2;
+    TCGv temp;
+
+    op2 = MASK_OP_BIT_OP2(ctx->opcode);
+    r1 = MASK_OP_BIT_S1(ctx->opcode);
+    r2 = MASK_OP_BIT_S2(ctx->opcode);
+    r3 = MASK_OP_BIT_D(ctx->opcode);
+    pos1 = MASK_OP_BIT_POS1(ctx->opcode);
+    pos2 = MASK_OP_BIT_POS2(ctx->opcode);
+
+    temp = tcg_temp_new();
+
+    switch (op2) {
+    case OPC2_32_BIT_OR_AND_T:
+        gen_bit_2op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r3],
+                    pos1, pos2, &tcg_gen_and_tl, &tcg_gen_or_tl);
+        break;
+    case OPC2_32_BIT_OR_ANDN_T:
+        gen_bit_2op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r3],
+                    pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_or_tl);
+        break;
+    case OPC2_32_BIT_OR_NOR_T:
+        gen_bit_2op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r3],
+                    pos1, pos2, &tcg_gen_or_tl, &tcg_gen_orc_tl);
+        break;
+    case OPC2_32_BIT_OR_OR_T:
+        gen_bit_2op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2], cpu_gpr_d[r3],
+                    pos1, pos2, &tcg_gen_or_tl, &tcg_gen_or_tl);
+        break;
+    }
+    tcg_gen_andi_tl(temp, temp, 0x1);
+    tcg_gen_andi_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 0xfffffffe);
+    tcg_gen_add_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], temp);
+    tcg_temp_free(temp);
+}
+
+static void decode_bit_sh_logic1(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    int r1, r2, r3;
+    int pos1, pos2;
+    TCGv temp;
+
+    op2 = MASK_OP_BIT_OP2(ctx->opcode);
+    r1 = MASK_OP_BIT_S1(ctx->opcode);
+    r2 = MASK_OP_BIT_S2(ctx->opcode);
+    r3 = MASK_OP_BIT_D(ctx->opcode);
+    pos1 = MASK_OP_BIT_POS1(ctx->opcode);
+    pos2 = MASK_OP_BIT_POS2(ctx->opcode);
+
+    temp = tcg_temp_new();
+
+    switch (op2) {
+    case OPC2_32_BIT_SH_AND_T:
+        gen_bit_1op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_and_tl);
+        break;
+    case OPC2_32_BIT_SH_ANDN_T:
+        gen_bit_1op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_andc_tl);
+        break;
+    case OPC2_32_BIT_SH_NOR_T:
+        gen_bit_1op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_nor_tl);
+        tcg_gen_andi_tl(temp, temp, 0x1);
+        break;
+    case OPC2_32_BIT_SH_OR_T:
+        gen_bit_1op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_or_tl);
+        break;
+    }
+    tcg_gen_shli_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 1);
+    tcg_gen_add_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], temp);
+    tcg_temp_free(temp);
+}
+
+static void decode_bit_sh_logic2(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    int r1, r2, r3;
+    int pos1, pos2;
+    TCGv temp;
+
+    op2 = MASK_OP_BIT_OP2(ctx->opcode);
+    r1 = MASK_OP_BIT_S1(ctx->opcode);
+    r2 = MASK_OP_BIT_S2(ctx->opcode);
+    r3 = MASK_OP_BIT_D(ctx->opcode);
+    pos1 = MASK_OP_BIT_POS1(ctx->opcode);
+    pos2 = MASK_OP_BIT_POS2(ctx->opcode);
+
+    temp = tcg_temp_new();
+
+    switch (op2) {
+    case OPC2_32_BIT_SH_NAND_T:
+        gen_bit_1op(temp, cpu_gpr_d[r1] , cpu_gpr_d[r2] ,
+                    pos1, pos2, &tcg_gen_nand_tl);
+        tcg_gen_andi_tl(temp, temp, 0x1);
+        break;
+    case OPC2_32_BIT_SH_ORN_T:
+        gen_bit_1op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_orc_tl);
+        tcg_gen_andi_tl(temp, temp, 0x1);
+        break;
+    case OPC2_32_BIT_SH_XNOR_T:
+        gen_bit_1op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_xor_tl);
+        tcg_gen_not_tl(temp, temp);
+        tcg_gen_andi_tl(temp, temp, 0x1);
+        break;
+    case OPC2_32_BIT_SH_XOR_T:
+        gen_bit_1op(temp, cpu_gpr_d[r1], cpu_gpr_d[r2],
+                    pos1, pos2, &tcg_gen_xor_tl);
+        break;
+    }
+    tcg_gen_shli_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 1);
+    tcg_gen_add_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], temp);
+    tcg_temp_free(temp);
+}
+
 static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
 {
     int op1;
@@ -1430,6 +1757,28 @@  static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
         address = MASK_OP_B_DISP24(ctx->opcode);
         gen_compute_branch(ctx, op1, 0, 0, 0, address);
         break;
+/* Bit-format */
+    case OPCM_32_BIT_ANDACC:
+        decode_bit_andacc(env, ctx);
+        break;
+    case OPCM_32_BIT_LOGICAL_T1:
+        decode_bit_logical_t(env, ctx);
+        break;
+    case OPCM_32_BIT_INSERT:
+        decode_bit_insert(env, ctx);
+        break;
+    case OPCM_32_BIT_LOGICAL_T2:
+        decode_bit_logical_t2(env, ctx);
+        break;
+    case OPCM_32_BIT_ORAND:
+        decode_bit_orand(env, ctx);
+        break;
+    case OPCM_32_BIT_SH_LOGIC1:
+        decode_bit_sh_logic1(env, ctx);
+        break;
+    case OPCM_32_BIT_SH_LOGIC2:
+        decode_bit_sh_logic2(env, ctx);
+        break;
     }
 }