diff mbox

[v3,06/15] target-tricore: Add instructions of SRC opcode format

Message ID 1407173932-969-7-git-send-email-kbastian@mail.uni-paderborn.de
State New
Headers show

Commit Message

Bastian Koppelmann Aug. 4, 2014, 5:38 p.m. UTC
Add instructions of SRC opcode format.
Add micro-op generator functions for add, conditional add/sub and shi/shai.

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
---
v2 -> v3:
    - Remove helper_shac, gen_shac
    - Remove len parameter of gen_shaci
    - Change gen_shaci to a special case.
    - Add gen_calc_psw_* functions to generate the calculation of PSW bits.
    - Add gen_add_i32 micro-op generator, that handles PSW bits.
    - Replace ADD instructions with gen_add_i32 for PSW bit calculation.
    - Change OP_COND to handle PSW bits.
    - MOV_A: Remove sign extended loading of const4
    - gen_shi: Remove wrong documentation

 target-tricore/helper.h    |  16 +++
 target-tricore/translate.c | 244 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 260 insertions(+)

--
2.0.4

Comments

Richard Henderson Aug. 4, 2014, 6:35 p.m. UTC | #1
On 08/04/2014 07:38 AM, Bastian Koppelmann wrote:
> +static inline void gen_calc_psw_sv_i32(TCGv ret, TCGv arg)
> +{
> +    tcg_gen_xor_tl(ret, ret, arg);
> +}

Not exclusive or, inclusive or.  And there's really no need for a helper for this.

> +static inline void gen_calc_psw_av_i32(TCGv ret, TCGv arg)
> +{
> +    TCGv temp = tcg_temp_new();
> +    tcg_gen_muli_tl(temp, arg, 2);

Strength reduce to tcg_gen_add_tl(temp, arg, arg).

> +    tcg_gen_xor_tl(temp, arg, temp);
> +    tcg_gen_andi_tl(ret, temp, 0x80000000);

No need for the andi if you do as I suggested and only consider the high bit
when reading the value from PSW_AV.

> +static inline void gen_calc_psw_sav_i32(TCGv ret, TCGv arg)
> +{
> +    tcg_gen_xor_tl(ret, ret, arg);
> +}

Again, inclusive or.

> +static inline void gen_add_i32(TCGv ret, TCGv r1, TCGv r2)

I strongly suggest that you name this something else, because you've gone and
confused yourself: this only applies to adds in the data registers.

> +    TCGv t0 = tcg_temp_new_i32();
> +    /* Addition and set V/SV bits */
> +    tcg_gen_movi_tl(t0, 0);
> +    tcg_gen_add2_tl(ret, cpu_PSW_V, r1, t0, r2, t0);

This computation is not overflow, but carry.  As I said, see e.g. the ARM port
where we properly compute overflow as

	R = A + B
	VF = (R ^ A) & ~(A ^ B)

i.e.

	tcg_gen_xor_tl(VF, R, A)
	tcg_gen_xor_tl(tmp, A, B)
	tcg_gen_andc_tl(VF, VF, tmp)

considering only the most significant bit as the overflow.

> +#define OP_COND(insn)\
> +static inline void gen_cond_##insn(int cond, TCGv r1, TCGv r2, TCGv r3, \
> +                                   TCGv r4)                             \
> +{                                                                       \
> +    TCGv temp = tcg_temp_new();                                         \
> +    TCGv temp2 = tcg_temp_new();                                        \
> +    TCGv t0 = tcg_const_i32(0);                                         \
> +                                                                        \
> +    tcg_gen_##insn ## 2_tl(temp, temp2, r1, t0, r2, t0);                \
> +    tcg_gen_movcond_tl(cond, r3, r4, t0, temp, r3);                     \
> +    /* Set PSW_V conditional */                                         \
> +    tcg_gen_movcond_tl(cond, cpu_PSW_V, r4, t0, temp2, cpu_PSW_V);      \
> +    /* Set PSW_SV conditional */                                        \
> +    gen_calc_psw_sv_i32(temp2, cpu_PSW_SV);                             \
> +    tcg_gen_movcond_tl(cond, cpu_PSW_SV, r4, t0, temp2, cpu_PSW_SV);    \
> +    /* calc AV bit */                                                   \
> +    gen_calc_psw_av_i32(temp2, temp);                                   \
> +    tcg_gen_movcond_tl(cond, cpu_PSW_AV, r4, t0, temp2, cpu_PSW_AV);    \
> +    /* calc SAV bit */                                                  \
> +    gen_calc_psw_sav_i32(temp2, cpu_PSW_SAV);                           \
> +    tcg_gen_movcond_tl(cond, cpu_PSW_SAV, r4, t0, temp2, cpu_PSW_SAV);  \
> +                                                                        \
> +    tcg_temp_free(t0);                                                  \
> +    tcg_temp_free(temp);                                                \
> +    tcg_temp_free(temp2);                                               \
> +}                                                                       \
> +                                                                        \
> +static inline void gen_condi_##insn(int cond, TCGv r1, int32_t r2,      \
> +                                    TCGv r3, TCGv r4)                   \
> +{                                                                       \
> +    TCGv temp = tcg_const_i32(r2);                                      \
> +    gen_cond_##insn(cond, r1, temp, r3, r4);                            \
> +    tcg_temp_free(temp);                                                \
> +}
> +
> +OP_COND(add)
> +OP_COND(sub)

BTW, this macro substitution isn't going to work well as is, since there are
different overflow computations for addition and subtraction.

> +static void gen_shaci(TCGv ret, TCGv r1, int32_t shift_count)
> +{
> +    uint32_t msk, msk_start;
> +    TCGv_i64 temp = tcg_temp_new_i64();
> +    TCGv_i64 result = tcg_temp_new_i64();
> +    TCGv_i64 t_0 = tcg_const_i64(0);
> +    TCGv_i64 t_1 = tcg_const_i64(1);
> +    TCGv_i64 t_max = tcg_const_i64(0x7FFFFFFF);
> +    TCGv_i64 t_min = tcg_const_i64(-(0x80000000L));
> +
> +    if (shift_count == 0) {
> +        /* Clear PSW.C */
> +        tcg_gen_movi_tl(cpu_PSW_C, 0);
> +        tcg_gen_mov_tl(ret, r1);
> +    } else if (shift_count > 0) {
> +        tcg_gen_ext_i32_i64(temp, r1);
> +        tcg_gen_shli_i64(result, temp, shift_count);
> +        /* calc carry */
> +        msk_start = 32 - shift_count;
> +        msk = ((1 << shift_count) - 1) << msk_start;
> +        tcg_gen_andi_tl(cpu_PSW_C, r1, msk);

You don't need a 64-bit shift here.

> +    } else {
> +        tcg_gen_ext_i32_i64(temp, r1);
> +        tcg_gen_sari_i64(result, temp, -(shift_count));
> +        /* calc carry */
> +        msk = (1 << (shift_count - 1)) - 1;
> +        tcg_gen_andi_tl(cpu_PSW_C, r1, msk);
> +    }

Likewise, although that does mean you need to handle the special case of -32.

> +    /* calc v/sv bits only if shift happened and write back 64bit result*/
> +    if (shift_count != 0) {
> +        /* v/sv */
> +        tcg_gen_movcond_i64(TCG_COND_GT, temp, result, t_max, t_1, t_0);
> +        tcg_gen_movcond_i64(TCG_COND_LT, temp, result, t_min, t_1, temp);
> +        tcg_gen_trunc_i64_i32(cpu_PSW_V, temp);
> +
> +        gen_calc_psw_sv_i32(cpu_PSW_SV, cpu_PSW_V);
> +        /* write back result */
> +        tcg_gen_trunc_i64_i32(ret, result);
> +    }

Note that right shifts can't overflow, since the magnitude always reduces.

I suppose using the 64-bit shift result is a reasonable way to compute left
shift overflow on a 64-bit host.  It seems like there's an easier way to
compute this that would be better for 32-bit hosts though...

One way is to adjust the comparisons for prior to the shift.  That is,

	R >= 0x7fff_ffff
	= (A << C) >= 0x7fff_ffff
	= A >= (0x7fff_ffff >> C)

Also, using 2 setcond and 1 or is more efficient than 2 movcond with those
constants on most hosts.

> +    case OPC1_16_SRC_ADD_A:
> +        gen_addi_i32(cpu_gpr_a[r1], cpu_gpr_a[r1], const4);
> +        break;

No PSW computation here.


r~
diff mbox

Patch

diff --git a/target-tricore/helper.h b/target-tricore/helper.h
index e69de29..5884240 100644
--- a/target-tricore/helper.h
+++ b/target-tricore/helper.h
@@ -0,0 +1,16 @@ 
+/*
+ *  Copyright (c) 2012-2014 Bastian Koppelmann C-Lab/University Paderborn
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 0d30c51..d1e6669 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -27,6 +27,7 @@ 
 #include "exec/helper-gen.h"

 #include "tricore-opcodes.h"
+
 /*
  * TCG registers
  */
@@ -102,8 +103,251 @@  void tricore_cpu_dump_state(CPUState *cs, FILE *f,

 }

+/*
+ * Functions to generate micro-ops
+ */
+
+/* Functions for calculating PSW status bits */
+
+static inline void gen_calc_psw_sv_i32(TCGv ret, TCGv arg)
+{
+    tcg_gen_xor_tl(ret, ret, arg);
+}
+
+static inline void gen_calc_psw_av_i32(TCGv ret, TCGv arg)
+{
+    TCGv temp = tcg_temp_new();
+    tcg_gen_muli_tl(temp, arg, 2);
+    tcg_gen_xor_tl(temp, arg, temp);
+    tcg_gen_andi_tl(ret, temp, 0x80000000);
+    tcg_temp_free(temp);
+}
+
+static inline void gen_calc_psw_sav_i32(TCGv ret, TCGv arg)
+{
+    tcg_gen_xor_tl(ret, ret, arg);
+}
+
+/* Functions for arithmetic instructions  */
+
+static inline void gen_add_i32(TCGv ret, TCGv r1, TCGv r2)
+{
+    TCGv t0 = tcg_temp_new_i32();
+    /* Addition and set V/SV bits */
+    tcg_gen_movi_tl(t0, 0);
+    tcg_gen_add2_tl(ret, cpu_PSW_V, r1, t0, r2, t0);
+    gen_calc_psw_sv_i32(cpu_PSW_SV, cpu_PSW_V);
+    /* Calc AV/SAV bits */
+    gen_calc_psw_av_i32(cpu_PSW_AV, ret);
+    gen_calc_psw_sav_i32(cpu_PSW_SAV, cpu_PSW_AV);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_addi_i32(TCGv ret, TCGv r1, target_ulong r2)
+{
+    TCGv temp = tcg_const_i32(r2);
+    gen_add_i32(ret, r1, temp);
+    tcg_temp_free(temp);
+}
+
+#define OP_COND(insn)\
+static inline void gen_cond_##insn(int cond, TCGv r1, TCGv r2, TCGv r3, \
+                                   TCGv r4)                             \
+{                                                                       \
+    TCGv temp = tcg_temp_new();                                         \
+    TCGv temp2 = tcg_temp_new();                                        \
+    TCGv t0 = tcg_const_i32(0);                                         \
+                                                                        \
+    tcg_gen_##insn ## 2_tl(temp, temp2, r1, t0, r2, t0);                \
+    tcg_gen_movcond_tl(cond, r3, r4, t0, temp, r3);                     \
+    /* Set PSW_V conditional */                                         \
+    tcg_gen_movcond_tl(cond, cpu_PSW_V, r4, t0, temp2, cpu_PSW_V);      \
+    /* Set PSW_SV conditional */                                        \
+    gen_calc_psw_sv_i32(temp2, cpu_PSW_SV);                             \
+    tcg_gen_movcond_tl(cond, cpu_PSW_SV, r4, t0, temp2, cpu_PSW_SV);    \
+    /* calc AV bit */                                                   \
+    gen_calc_psw_av_i32(temp2, temp);                                   \
+    tcg_gen_movcond_tl(cond, cpu_PSW_AV, r4, t0, temp2, cpu_PSW_AV);    \
+    /* calc SAV bit */                                                  \
+    gen_calc_psw_sav_i32(temp2, cpu_PSW_SAV);                           \
+    tcg_gen_movcond_tl(cond, cpu_PSW_SAV, r4, t0, temp2, cpu_PSW_SAV);  \
+                                                                        \
+    tcg_temp_free(t0);                                                  \
+    tcg_temp_free(temp);                                                \
+    tcg_temp_free(temp2);                                               \
+}                                                                       \
+                                                                        \
+static inline void gen_condi_##insn(int cond, TCGv r1, int32_t r2,      \
+                                    TCGv r3, TCGv r4)                   \
+{                                                                       \
+    TCGv temp = tcg_const_i32(r2);                                      \
+    gen_cond_##insn(cond, r1, temp, r3, r4);                            \
+    tcg_temp_free(temp);                                                \
+}
+
+OP_COND(add)
+OP_COND(sub)
+
+static void gen_shi(TCGv ret, TCGv r1, int32_t shift_count)
+{
+    if (shift_count == -32) {
+        tcg_gen_movi_tl(ret, 0);
+    } else if (shift_count >= 0) {
+        tcg_gen_shli_tl(ret, r1, shift_count);
+    } else {
+        tcg_gen_shri_tl(ret, r1, (-shift_count));
+    }
+}
+
+static void gen_shaci(TCGv ret, TCGv r1, int32_t shift_count)
+{
+    uint32_t msk, msk_start;
+    TCGv_i64 temp = tcg_temp_new_i64();
+    TCGv_i64 result = tcg_temp_new_i64();
+    TCGv_i64 t_0 = tcg_const_i64(0);
+    TCGv_i64 t_1 = tcg_const_i64(1);
+    TCGv_i64 t_max = tcg_const_i64(0x7FFFFFFF);
+    TCGv_i64 t_min = tcg_const_i64(-(0x80000000L));
+
+    if (shift_count == 0) {
+        /* Clear PSW.C */
+        tcg_gen_movi_tl(cpu_PSW_C, 0);
+        tcg_gen_mov_tl(ret, r1);
+    } else if (shift_count > 0) {
+        tcg_gen_ext_i32_i64(temp, r1);
+        tcg_gen_shli_i64(result, temp, shift_count);
+        /* calc carry */
+        msk_start = 32 - shift_count;
+        msk = ((1 << shift_count) - 1) << msk_start;
+        tcg_gen_andi_tl(cpu_PSW_C, r1, msk);
+    } else {
+        tcg_gen_ext_i32_i64(temp, r1);
+        tcg_gen_sari_i64(result, temp, -(shift_count));
+        /* calc carry */
+        msk = (1 << (shift_count - 1)) - 1;
+        tcg_gen_andi_tl(cpu_PSW_C, r1, msk);
+    }
+    /* calc v/sv bits only if shift happened and write back 64bit result*/
+    if (shift_count != 0) {
+        /* v/sv */
+        tcg_gen_movcond_i64(TCG_COND_GT, temp, result, t_max, t_1, t_0);
+        tcg_gen_movcond_i64(TCG_COND_LT, temp, result, t_min, t_1, temp);
+        tcg_gen_trunc_i64_i32(cpu_PSW_V, temp);
+
+        gen_calc_psw_sv_i32(cpu_PSW_SV, cpu_PSW_V);
+        /* write back result */
+        tcg_gen_trunc_i64_i32(ret, result);
+    }
+    /* calc av overflow bit */
+    gen_calc_psw_av_i32(cpu_PSW_AV, ret);
+    /* calc sav overflow bit */
+    gen_calc_psw_sav_i32(cpu_PSW_SAV, cpu_PSW_AV);
+
+    tcg_temp_free_i64(temp);
+    tcg_temp_free_i64(result);
+    tcg_temp_free_i64(t_0);
+    tcg_temp_free_i64(t_1);
+    tcg_temp_free_i64(t_max);
+    tcg_temp_free_i64(t_min);
+}
+
+/*
+ * Functions for decoding instructions
+ */
+
+static void decode_src_opc(DisasContext *ctx, int op1)
+{
+    int r1;
+    int32_t const4;
+    TCGv temp, temp2;
+
+    r1 = MASK_OP_SRC_S1D(ctx->opcode);
+    const4 = MASK_OP_SRC_CONST4_SEXT(ctx->opcode);
+
+    switch (op1) {
+    case OPC1_16_SRC_ADD:
+        gen_addi_i32(cpu_gpr_d[r1], cpu_gpr_d[r1], const4);
+        break;
+    case OPC1_16_SRC_ADD_A15:
+        gen_addi_i32(cpu_gpr_d[15], cpu_gpr_d[r1], const4);
+        break;
+    case OPC1_16_SRC_ADD_15A:
+        gen_addi_i32(cpu_gpr_d[r1], cpu_gpr_d[15], const4);
+        break;
+    case OPC1_16_SRC_ADD_A:
+        gen_addi_i32(cpu_gpr_a[r1], cpu_gpr_a[r1], const4);
+        break;
+    case OPC1_16_SRC_CADD:
+        gen_condi_add(TCG_COND_NE, cpu_gpr_d[r1], const4, cpu_gpr_d[r1],
+                      cpu_gpr_d[15]);
+        break;
+    case OPC1_16_SRC_CADDN:
+        gen_condi_add(TCG_COND_EQ, cpu_gpr_d[r1], const4, cpu_gpr_d[r1],
+                      cpu_gpr_d[15]);
+        break;
+    case OPC1_16_SRC_CMOV:
+        temp = tcg_const_tl(0);
+        temp2 = tcg_const_tl(const4);
+        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr_d[r1], cpu_gpr_d[15], temp,
+                           temp2, cpu_gpr_d[r1]);
+        tcg_temp_free(temp);
+        tcg_temp_free(temp2);
+        break;
+    case OPC1_16_SRC_CMOVN:
+        temp = tcg_const_tl(0);
+        temp2 = tcg_const_tl(const4);
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr_d[r1], cpu_gpr_d[15], temp,
+                           temp2, cpu_gpr_d[r1]);
+        tcg_temp_free(temp);
+        tcg_temp_free(temp2);
+        break;
+    case OPC1_16_SRC_EQ:
+        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_gpr_d[15], cpu_gpr_d[r1],
+                            const4);
+        break;
+    case OPC1_16_SRC_LT:
+        tcg_gen_setcondi_tl(TCG_COND_LT, cpu_gpr_d[15], cpu_gpr_d[r1],
+                            const4);
+        break;
+    case OPC1_16_SRC_MOV:
+        tcg_gen_movi_tl(cpu_gpr_d[r1], const4);
+        break;
+    case OPC1_16_SRC_MOV_A:
+        tcg_gen_movi_tl(cpu_gpr_a[r1], const4);
+        break;
+    case OPC1_16_SRC_SH:
+        gen_shi(cpu_gpr_d[r1], cpu_gpr_d[r1], const4);
+        break;
+    case OPC1_16_SRC_SHA:
+        gen_shaci(cpu_gpr_d[r1], cpu_gpr_d[r1], const4);
+        break;
+    }
+}
+
 static void decode_16Bit_opc(CPUTRICOREState *env, DisasContext *ctx)
 {
+    int op1;
+
+    op1 = MASK_OP_MAJOR(ctx->opcode);
+
+    switch (op1) {
+    case OPC1_16_SRC_ADD:
+    case OPC1_16_SRC_ADD_A15:
+    case OPC1_16_SRC_ADD_15A:
+    case OPC1_16_SRC_ADD_A:
+    case OPC1_16_SRC_CADD:
+    case OPC1_16_SRC_CADDN:
+    case OPC1_16_SRC_CMOV:
+    case OPC1_16_SRC_CMOVN:
+    case OPC1_16_SRC_EQ:
+    case OPC1_16_SRC_LT:
+    case OPC1_16_SRC_MOV:
+    case OPC1_16_SRC_MOV_A:
+    case OPC1_16_SRC_SH:
+    case OPC1_16_SRC_SHA:
+        decode_src_opc(ctx, op1);
+        break;
+    }
 }

 static void decode_32Bit_opc(CPUTRICOREState *env, DisasContext *ctx)