diff mbox

[5/5] target-tricore: Add instructions of BO opcode format

Message ID 1411829891-24866-6-git-send-email-kbastian@mail.uni-paderborn.de
State New
Headers show

Commit Message

Bastian Koppelmann Sept. 27, 2014, 2:58 p.m. UTC
Add instructions of BO opcode format.
Add microcode generator functions gen_swap, gen_ldmst.
Add helper for loading/storing byte, halfword, upper halfword word, dword in circular and bit reverse addr mode
Add sign extended bitmask for BO_OFF10 field.

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
---
 target-tricore/helper.h          |  42 +++
 target-tricore/op_helper.c       | 483 ++++++++++++++++++++++++++++++++
 target-tricore/translate.c       | 582 +++++++++++++++++++++++++++++++++++++++
 target-tricore/tricore-opcodes.h |   2 +
 4 files changed, 1109 insertions(+)

Comments

Richard Henderson Sept. 28, 2014, 6:17 a.m. UTC | #1
On 09/27/2014 07:58 AM, Bastian Koppelmann wrote:
> +/* ld circ */
> +DEF_HELPER_4(ld_b_circ, void, env, i32, i32, int)

Avoid, whenever possible, performing memory operations within helpers.  Doing
that complicates the generation of correct code for exact exceptions from the
memory operation.  I'm not saying it can't be done, but what you're doing right
now certainly isn't correct, and it's easier to do the memory operation with
tcg ops.

So now we need to find an efficient way to do this.  In both cases, I think one
helper each for bit-reverse and circular modes should suffice.

E.g.

	tcg_gen_ext16u_tl(temp, A[b+1])
	tcg_gen_add_tl(ea, A[b], temp)
	tcg_gen_qemu_ld_tl(ret, ea, mmu_idx, MO_UB)
	gen_helper_br_update(A[b+1], A[b+1])

and then

uint32_t helper_br_update(uint32_t reg)
{
    uint32_t index = reg & 0xffff;
    uint32_t incr = reg >> 16;
    uint32_t new_index = reverse16(reverse16(index) + reverse16(incr));
    return reg - index + new_index;
}

uint32_t helper_circ_update(uint32_t reg, uint32_t off)
{
    uint32_t index = reg & 0xffff;
    uint32_t length = reg >> 16;
    int32_t new_index = index + off;
    if (new_index < 0) {
        new_index += length;
    } else {
        new_index %= length;
    }
    return reg - index + new_index;
}


> +void helper_ld_w_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
> +{
> +    CIRC_BR_DEFINES(r2)
> +
> +    uint32_t ea0 = env->gpr_a[r2] + index;
> +    uint32_t ea2 = env->gpr_a[r2] + (index + 2 % length_incr);
> +
> +    uint32_t hw_ea2 = cpu_lduw_data(env, ea2) << 16;
> +    uint32_t lw_ea0 = cpu_lduw_data(env, ea0);
> +
> +    env->gpr_d[r1] = hw_ea2 | lw_ea0;
> +
> +    CIRC_CALC_INDEX(r2, off10, length_incr);
> +}

I know that the volume 2 pseudo code for LD.W says two halfword loads, but the
volume 1 text, section 2.5.5 says that the buffer end must be aligned to the
size of the access.  As I read it, two memory operations should not be required.


r~
diff mbox

Patch

diff --git a/target-tricore/helper.h b/target-tricore/helper.h
index fbabbd5..ee8c9a7 100644
--- a/target-tricore/helper.h
+++ b/target-tricore/helper.h
@@ -27,3 +27,45 @@  DEF_HELPER_2(ldlcx, void, env, i32)
 DEF_HELPER_2(lducx, void, env, i32)
 DEF_HELPER_2(stlcx, void, env, i32)
 DEF_HELPER_2(stucx, void, env, i32)
+/* ld circ */
+DEF_HELPER_4(ld_b_circ, void, env, i32, i32, int)
+DEF_HELPER_4(ld_bu_circ, void, env, i32, i32, int)
+DEF_HELPER_4(ld_h_circ, void, env, i32, i32, int)
+DEF_HELPER_4(ld_hu_circ, void, env, i32, i32, int)
+DEF_HELPER_4(ld_q_circ, void, env, i32, i32, int)
+DEF_HELPER_4(ldmst_circ, void, env, i32, i32, int)
+DEF_HELPER_4(ld_a_circ, void, env, i32, i32, int)
+DEF_HELPER_4(ld_w_circ, void, env, i32, i32, int)
+DEF_HELPER_4(ld_d_circ, void, env, i32, i32, int)
+DEF_HELPER_4(ld_da_circ, void, env, i32, i32, int)
+/* st circ */
+DEF_HELPER_4(st_a_circ, void, env, i32, i32, int)
+DEF_HELPER_4(st_b_circ, void, env, i32, i32, int)
+DEF_HELPER_4(st_d_circ, void, env, i32, i32, int)
+DEF_HELPER_4(st_da_circ, void, env, i32, i32, int)
+DEF_HELPER_4(st_h_circ, void, env, i32, i32, int)
+DEF_HELPER_4(st_q_circ, void, env, i32, i32, int)
+DEF_HELPER_4(st_w_circ, void, env, i32, i32, int)
+DEF_HELPER_4(swap_circ, void, env, i32, i32, int)
+DEF_HELPER_3(empty_circ, void, env, i32, int)
+/* ld br */
+DEF_HELPER_3(ld_b_br, void, env, i32, i32)
+DEF_HELPER_3(ld_bu_br, void, env, i32, i32)
+DEF_HELPER_3(ld_h_br, void, env, i32, i32)
+DEF_HELPER_3(ld_hu_br, void, env, i32, i32)
+DEF_HELPER_3(ld_q_br, void, env, i32, i32)
+DEF_HELPER_3(ldmst_br, void, env, i32, i32)
+DEF_HELPER_3(ld_a_br, void, env, i32, i32)
+DEF_HELPER_3(ld_w_br, void, env, i32, i32)
+DEF_HELPER_3(ld_d_br, void, env, i32, i32)
+DEF_HELPER_3(ld_da_br, void, env, i32, i32)
+/* st br */
+DEF_HELPER_3(st_a_br, void, env, i32, i32)
+DEF_HELPER_3(st_b_br, void, env, i32, i32)
+DEF_HELPER_3(st_d_br, void, env, i32, i32)
+DEF_HELPER_3(st_da_br, void, env, i32, i32)
+DEF_HELPER_3(st_h_br, void, env, i32, i32)
+DEF_HELPER_3(st_q_br, void, env, i32, i32)
+DEF_HELPER_3(st_w_br, void, env, i32, i32)
+DEF_HELPER_3(swap_br, void, env, i32, i32)
+DEF_HELPER_2(empty_br, void, env, i32)
diff --git a/target-tricore/op_helper.c b/target-tricore/op_helper.c
index 7a33afd..c965a46 100644
--- a/target-tricore/op_helper.c
+++ b/target-tricore/op_helper.c
@@ -20,6 +20,489 @@ 
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 
+/* Addressing mode helper */
+
+#define CIRC_BR_DEFINES(reg)                                       \
+    uint32_t index = env->gpr_a[reg+1] & 0xffff;                   \
+    uint32_t length_incr = (env->gpr_a[reg+1] & 0xffff0000) >> 16; \
+
+static uint16_t reverse16(uint16_t val)
+{
+    uint8_t high = (uint8_t)(val >> 8);
+    uint8_t low  = (uint8_t)(val & 0xff);
+
+    uint16_t rh, rl;
+
+    rl = (uint16_t)((high * 0x0202020202ULL & 0x010884422010ULL) % 1023);
+    rh = (uint16_t)((low * 0x0202020202ULL & 0x010884422010ULL) % 1023);
+
+    return (rh << 8) | rl;
+}
+
+#define BR_CALC_INDEX(reg, index, incr) do {                           \
+    int32_t new_index = reverse16(reverse16(index) + reverse16(incr)); \
+    env->gpr_a[reg+1] = (incr << 16) | new_index;                      \
+} while (0)
+
+#define CIRC_CALC_INDEX(reg, off, len) do {         \
+    int32_t new_index = index + off;                \
+    if (new_index < 0) {                            \
+        new_index = new_index + len;                \
+    } else {                                        \
+        new_index = new_index % len;                \
+    }                                               \
+    env->gpr_a[reg+1] = (len << 16) | new_index; \
+} while (0)
+
+void helper_ld_b_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_ldsb_data(env, ea);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_ld_bu_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2,
+                       int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_ldub_data(env, ea);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_ld_h_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_ldsw_data(env, ea);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_ld_hu_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2,
+                       int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_lduw_data(env, ea);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_ld_q_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_lduw_data(env, ea) << 16;
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_ldmst_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2,
+                       int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+    /* M(EA, word) = (M(EA, word) & ~E[a][63:32]) | (E[a][31:0] & E[a][63:32]);
+     */
+    uint32_t data = (cpu_ldl_data(env, ea) & ~(env->gpr_d[r1+1])) |
+                     (env->gpr_d[r1] & env->gpr_d[r1+1]);
+    cpu_stl_data(env, ea, data);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_ld_a_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_a[r1] = cpu_ldl_data(env, ea);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_ld_w_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+    uint32_t ea2 = env->gpr_a[r2] + (index + 2 % length_incr);
+
+    uint32_t hw_ea2 = cpu_lduw_data(env, ea2) << 16;
+    uint32_t lw_ea0 = cpu_lduw_data(env, ea0);
+
+    env->gpr_d[r1] = hw_ea2 | lw_ea0;
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_ld_d_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+    uint32_t ea2 = env->gpr_a[r2] + (index + 2 % length_incr);
+    uint32_t ea4 = env->gpr_a[r2] + (index + 4 % length_incr);
+    uint32_t ea6 = env->gpr_a[r2] + (index + 6 % length_incr);
+
+
+    uint32_t hw_ea6 = cpu_lduw_data(env, ea6) << 16;
+    uint32_t lw_ea4 = cpu_lduw_data(env, ea4);
+    uint32_t hw_ea2 = cpu_lduw_data(env, ea2) << 16;
+    uint32_t lw_ea0 = cpu_lduw_data(env, ea0);
+
+    env->gpr_d[r1] = hw_ea6 | lw_ea4;
+    env->gpr_d[r1+1] = hw_ea2 | lw_ea0;
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_ld_da_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2,
+                       int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+    uint32_t ea4 = env->gpr_a[r2] + (index + 4 % length_incr);
+
+    env->gpr_a[r1] = cpu_lduw_data(env, ea4);
+    env->gpr_a[r1+1] = cpu_lduw_data(env, ea0);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_st_a_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+    cpu_stb_data(env, ea0, env->gpr_a[r1]);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_st_b_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+    cpu_stb_data(env, ea0, env->gpr_d[r1]);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_st_d_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+    uint32_t ea2 = env->gpr_a[r2] + (index + 2 % length_incr);
+    uint32_t ea4 = env->gpr_a[r2] + (index + 4 % length_incr);
+    uint32_t ea6 = env->gpr_a[r2] + (index + 6 % length_incr);
+
+    cpu_stw_data(env, ea0, env->gpr_d[r1]);
+    cpu_stw_data(env, ea2, env->gpr_d[r1] >> 16);
+    cpu_stw_data(env, ea4, env->gpr_d[r1+1]);
+    cpu_stw_data(env, ea6, env->gpr_d[r1+1]  >> 16);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_st_da_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2,
+                       int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+    uint32_t ea4 = env->gpr_a[r2] + (index + 4 % length_incr);
+
+    cpu_stl_data(env, ea0, env->gpr_d[r1]);
+    cpu_stl_data(env, ea4, env->gpr_d[r1+1]);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_st_h_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+    cpu_stw_data(env, ea0, env->gpr_d[r1]);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_st_q_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+    cpu_stw_data(env, ea0, env->gpr_d[r1] >> 16);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_st_w_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+    uint32_t ea2 = env->gpr_a[r2] + (index + 2 % length_incr);
+
+    cpu_stw_data(env, ea0, env->gpr_d[r1]);
+    cpu_stw_data(env, ea2, env->gpr_d[r1] >> 16);
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+void helper_swap_circ(CPUTriCoreState *env, uint32_t r1, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea0 = env->gpr_a[r2] + index;
+
+    uint32_t tmp = cpu_ldl_data(env, ea0);
+    cpu_stl_data(env, ea0, env->gpr_d[r1]);
+    env->gpr_d[r1] = tmp;
+
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+/* for cache reads/writes */
+void helper_empty_circ(CPUTriCoreState *env, uint32_t r2, int off10)
+{
+    CIRC_BR_DEFINES(r2)
+    CIRC_CALC_INDEX(r2, off10, length_incr);
+}
+
+/*
+ * Bit reverse helpers
+ */
+
+void helper_ld_b_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_ldsb_data(env, ea);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_ld_bu_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_ldub_data(env, ea);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_ld_h_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_ldsw_data(env, ea);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_ld_hu_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_lduw_data(env, ea);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_ld_q_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_lduw_data(env, ea) << 16;
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_ldmst_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+/* M(EA, word) = (M(EA, word) & ~E[a][63:32]) | (E[a][31:0] & E[a][63:32]); */
+    uint32_t data = (cpu_ldl_data(env, ea) & ~(env->gpr_d[r1+1])) |
+                     (env->gpr_d[r1] & env->gpr_d[r1+1]);
+    cpu_stl_data(env, ea, data);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_ld_a_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_a[r1] = cpu_ldl_data(env, ea);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_ld_w_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    env->gpr_d[r1] = cpu_ldl_data(env, ea);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_ld_d_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+    uint64_t data = cpu_ldq_data(env, ea);
+
+    env->gpr_d[r1] = (uint32_t)data;
+    env->gpr_d[r1+1] = (uint32_t)(data >> 32);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_ld_da_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+    uint64_t data = cpu_ldq_data(env, ea);
+
+    env->gpr_a[r1] = (uint32_t)data;
+    env->gpr_a[r1+1] = (uint32_t)(data >> 32);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_st_a_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+    cpu_stl_data(env, ea, env->gpr_a[r1]);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_st_b_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+    cpu_stb_data(env, ea, env->gpr_a[r1]);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_st_d_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+    uint64_t data = ((uint64_t)(env->gpr_d[r1+1]) << 32) | (env->gpr_d[r1]);
+    cpu_stq_data(env, ea, data);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_st_da_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+    uint64_t data = ((uint64_t)(env->gpr_a[r1+1]) << 32) | (env->gpr_a[r1]);
+    cpu_stq_data(env, ea, data);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_st_h_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+    cpu_stw_data(env, ea, env->gpr_d[r1]);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_st_q_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+    cpu_stw_data(env, ea, env->gpr_d[r1] >> 16);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_st_w_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+
+    uint32_t ea = env->gpr_a[r2] + index;
+    cpu_stl_data(env, ea, env->gpr_d[r1]);
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+void helper_swap_br(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+    uint32_t ea = env->gpr_a[r2] + index;
+
+    uint32_t tmp = cpu_ldl_data(env, ea);
+    cpu_stl_data(env, ea, env->gpr_d[r1]);
+    env->gpr_d[r1] = tmp;
+
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
+/* for cache reads/writes */
+void helper_empty_br(CPUTriCoreState *env, uint32_t r2)
+{
+    CIRC_BR_DEFINES(r2)
+    BR_CALC_INDEX(r2, index, length_incr);
+}
+
 #define SSOV(env, ret, arg, len) do {               \
     int64_t max_pos = INT##len ##_MAX;              \
     int64_t max_neg = INT##len ##_MIN;              \
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 34375a9..2bb7309 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -149,6 +149,15 @@  static void gen_st_2regs_64(TCGv rh, TCGv rl, TCGv address, DisasContext *ctx)
     tcg_temp_free_i64(temp);
 }
 
+static void gen_offset_st_2regs(TCGv rh, TCGv rl, TCGv base, int16_t con,
+                                DisasContext *ctx)
+{
+    TCGv temp = tcg_temp_new();
+    tcg_gen_addi_tl(temp, base, con);
+    gen_st_2regs_64(rh, rl, temp, ctx);
+    tcg_temp_free(temp);
+}
+
 static void gen_ld_2regs_64(TCGv rh, TCGv rl, TCGv address, DisasContext *ctx)
 {
     TCGv_i64 temp = tcg_temp_new_i64();
@@ -162,6 +171,15 @@  static void gen_ld_2regs_64(TCGv rh, TCGv rl, TCGv address, DisasContext *ctx)
     tcg_temp_free_i64(temp);
 }
 
+static void gen_offset_ld_2regs(TCGv rh, TCGv rl, TCGv base, int16_t con,
+                                DisasContext *ctx)
+{
+    TCGv temp = tcg_temp_new();
+    tcg_gen_addi_tl(temp, base, con);
+    gen_ld_2regs_64(rh, rl, temp, ctx);
+    tcg_temp_free(temp);
+}
+
 /* M(EA, word) = (M(EA, word) & ~E[a][63:32]) | (E[a][31:0] & E[a][63:32]); */
 static void gen_ldmst(DisasContext *ctx, int ereg, TCGv ea)
 {
@@ -1674,6 +1692,551 @@  static void decode_bit_sh_logic2(CPUTriCoreState *env, DisasContext *ctx)
     tcg_temp_free(temp);
 }
 
+/* BO-format */
+
+
+static void decode_bo_addrmode_post_pre_base(CPUTriCoreState *env,
+                                             DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t off10;
+    int32_t r1, r2;
+    TCGv temp;
+
+    r1 = MASK_OP_BO_S1D(ctx->opcode);
+    r2  = MASK_OP_BO_S2(ctx->opcode);
+    off10 = MASK_OP_BO_OFF10_SEXT(ctx->opcode);
+    op2 = MASK_OP_BO_OP2(ctx->opcode);
+
+    switch (op2) {
+    case OPC2_32_BO_CACHEA_WI_SHORTOFF:
+    case OPC2_32_BO_CACHEA_W_SHORTOFF:
+    case OPC2_32_BO_CACHEA_I_SHORTOFF:
+        /* instruction to access the cache */
+        break;
+    case OPC2_32_BO_CACHEA_WI_POSTINC:
+    case OPC2_32_BO_CACHEA_W_POSTINC:
+    case OPC2_32_BO_CACHEA_I_POSTINC:
+        /* instruction to access the cache, but we still need to handle
+           the addressing mode */
+        tcg_gen_addi_tl(cpu_gpr_d[r2], cpu_gpr_d[r2], off10);
+        break;
+    case OPC2_32_BO_CACHEA_WI_PREINC:
+    case OPC2_32_BO_CACHEA_W_PREINC:
+    case OPC2_32_BO_CACHEA_I_PREINC:
+        /* instruction to access the cache, but we still need to handle
+           the addressing mode */
+        tcg_gen_addi_tl(cpu_gpr_d[r2], cpu_gpr_d[r2], off10);
+        break;
+    case OPC2_32_BO_CACHEI_WI_SHORTOFF:
+    case OPC2_32_BO_CACHEI_W_SHORTOFF:
+        /* TODO: Raise illegal opcode trap,
+                 if tricore_feature(TRICORE_FEATURE_13) */
+        break;
+    case OPC2_32_BO_CACHEI_W_POSTINC:
+    case OPC2_32_BO_CACHEI_WI_POSTINC:
+        if (!tricore_feature(env, TRICORE_FEATURE_13)) {
+            tcg_gen_addi_tl(cpu_gpr_d[r2], cpu_gpr_d[r2], off10);
+        } /* TODO: else raise illegal opcode trap */
+        break;
+    case OPC2_32_BO_CACHEI_W_PREINC:
+    case OPC2_32_BO_CACHEI_WI_PREINC:
+        if (!tricore_feature(env, TRICORE_FEATURE_13)) {
+            tcg_gen_addi_tl(cpu_gpr_d[r2], cpu_gpr_d[r2], off10);
+        } /* TODO: else raise illegal opcode trap */
+        break;
+    case OPC2_32_BO_ST_A_SHORTOFF:
+        gen_offset_st(ctx, cpu_gpr_a[r1], cpu_gpr_a[r2], off10, MO_LESL);
+        break;
+    case OPC2_32_BO_ST_A_POSTINC:
+        tcg_gen_qemu_st_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LESL);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_ST_A_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_qemu_st_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LESL);
+        break;
+    case OPC2_32_BO_ST_B_SHORTOFF:
+        gen_offset_st(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_UB);
+        break;
+    case OPC2_32_BO_ST_B_POSTINC:
+        tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_UB);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_ST_B_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_UB);
+        break;
+    case OPC2_32_BO_ST_D_SHORTOFF:
+        gen_offset_st_2regs(cpu_gpr_d[r1+1], cpu_gpr_d[r1], cpu_gpr_a[r2],
+                            off10, ctx);
+        break;
+    case OPC2_32_BO_ST_D_POSTINC:
+        gen_st_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], cpu_gpr_a[r2], ctx);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_ST_D_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        gen_st_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], cpu_gpr_a[r2], ctx);
+        break;
+    case OPC2_32_BO_ST_DA_SHORTOFF:
+        gen_offset_st_2regs(cpu_gpr_a[r1+1], cpu_gpr_a[r1], cpu_gpr_a[r2],
+                            off10, ctx);
+        break;
+    case OPC2_32_BO_ST_DA_POSTINC:
+        gen_st_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], cpu_gpr_a[r2], ctx);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_ST_DA_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        gen_st_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], cpu_gpr_a[r2], ctx);
+        break;
+    case OPC2_32_BO_ST_H_SHORTOFF:
+        gen_offset_st(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUW);
+        break;
+    case OPC2_32_BO_ST_H_POSTINC:
+        tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUW);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_ST_H_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUW);
+        break;
+    case OPC2_32_BO_ST_Q_SHORTOFF:
+        temp = tcg_temp_new();
+        tcg_gen_shri_tl(temp, cpu_gpr_d[r1], 16);
+        gen_offset_st(ctx, temp, cpu_gpr_a[r2], off10, MO_LEUW);
+        tcg_temp_free(temp);
+        break;
+    case OPC2_32_BO_ST_Q_POSTINC:
+        temp = tcg_temp_new();
+        tcg_gen_shri_tl(temp, cpu_gpr_d[r1], 16);
+        gen_offset_st(ctx, temp, cpu_gpr_a[r2], off10, MO_LEUW);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_temp_free(temp);
+        break;
+    case OPC2_32_BO_ST_Q_PREINC:
+        temp = tcg_temp_new();
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_shri_tl(temp, cpu_gpr_d[r1], 16);
+        gen_offset_st(ctx, temp, cpu_gpr_a[r2], off10, MO_LEUW);
+        tcg_temp_free(temp);
+        break;
+    case OPC2_32_BO_ST_W_SHORTOFF:
+        gen_offset_st(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUL);
+        break;
+    case OPC2_32_BO_ST_W_POSTINC:
+        tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUL);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_ST_W_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_qemu_st_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUL);
+        break;
+    }
+}
+
+static void decode_bo_addrmode_bitreverse_circular(CPUTriCoreState *env,
+                                                   DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t off10;
+    int32_t r1, r2;
+    TCGv temp, temp2, temp3;
+
+    r1 = MASK_OP_BO_S1D(ctx->opcode);
+    r2  = MASK_OP_BO_S2(ctx->opcode);
+    off10 = MASK_OP_BO_OFF10_SEXT(ctx->opcode);
+    op2 = MASK_OP_BO_OP2(ctx->opcode);
+
+    temp = tcg_const_i32(r1);
+    temp2 = tcg_const_i32(r2);
+    temp3 = tcg_const_i32(off10);
+
+    switch (op2) {
+    case OPC2_32_BO_CACHEA_WI_BR:
+    case OPC2_32_BO_CACHEA_W_BR:
+    case OPC2_32_BO_CACHEA_I_BR:
+        gen_helper_empty_br(cpu_env, temp2);
+        break;
+    case OPC2_32_BO_CACHEA_WI_CIRC:
+    case OPC2_32_BO_CACHEA_W_CIRC:
+    case OPC2_32_BO_CACHEA_I_CIRC:
+        gen_helper_empty_circ(cpu_env, temp2, temp3);
+        break;
+    case OPC2_32_BO_ST_A_BR:
+        gen_helper_st_a_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_ST_A_CIRC:
+        gen_helper_st_a_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_ST_B_BR:
+        gen_helper_st_b_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_ST_B_CIRC:
+        gen_helper_st_b_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_ST_D_BR:
+        gen_helper_st_d_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_ST_D_CIRC:
+        gen_helper_st_d_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_ST_DA_BR:
+        gen_helper_st_da_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_ST_DA_CIRC:
+        gen_helper_st_da_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_ST_H_BR:
+        gen_helper_st_h_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_ST_H_CIRC:
+        gen_helper_st_h_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_ST_Q_BR:
+        gen_helper_st_q_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_ST_Q_CIRC:
+        gen_helper_st_q_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_ST_W_BR:
+        gen_helper_st_w_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_ST_W_CIRC:
+        gen_helper_st_w_circ(cpu_env, temp, temp2, temp3);
+        break;
+    }
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+}
+
+static void decode_bo_addrmode_ld_post_pre_base(CPUTriCoreState *env,
+                                                DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t off10;
+    int32_t r1, r2;
+
+    r1 = MASK_OP_BO_S1D(ctx->opcode);
+    r2  = MASK_OP_BO_S2(ctx->opcode);
+    off10 = MASK_OP_BO_OFF10_SEXT(ctx->opcode);
+    op2 = MASK_OP_BO_OP2(ctx->opcode);
+
+    switch (op2) {
+    case OPC2_32_BO_LD_A_SHORTOFF:
+        gen_offset_ld(ctx, cpu_gpr_a[r1], cpu_gpr_a[r2], off10, MO_LEUL);
+        break;
+    case OPC2_32_BO_LD_A_POSTINC:
+        tcg_gen_qemu_ld_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUL);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LD_A_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_qemu_ld_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUL);
+        break;
+    case OPC2_32_BO_LD_B_SHORTOFF:
+        gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_SB);
+        break;
+    case OPC2_32_BO_LD_B_POSTINC:
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_SB);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LD_B_PREINC:
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_SB);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LD_BU_SHORTOFF:
+        gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_UB);
+        break;
+    case OPC2_32_BO_LD_BU_POSTINC:
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_UB);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LD_BU_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_SB);
+        break;
+    case OPC2_32_BO_LD_D_SHORTOFF:
+        gen_offset_ld_2regs(cpu_gpr_d[r1+1], cpu_gpr_d[r1], cpu_gpr_a[r2],
+                            off10, ctx);
+        break;
+    case OPC2_32_BO_LD_D_POSTINC:
+        gen_ld_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], cpu_gpr_a[r2], ctx);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LD_D_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        gen_ld_2regs_64(cpu_gpr_d[r1+1], cpu_gpr_d[r1], cpu_gpr_a[r2], ctx);
+        break;
+    case OPC2_32_BO_LD_DA_SHORTOFF:
+        gen_offset_ld_2regs(cpu_gpr_a[r1+1], cpu_gpr_a[r1], cpu_gpr_a[r2],
+                            off10, ctx);
+        break;
+    case OPC2_32_BO_LD_DA_POSTINC:
+        gen_ld_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], cpu_gpr_a[r2], ctx);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LD_DA_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        gen_ld_2regs_64(cpu_gpr_a[r1+1], cpu_gpr_a[r1], cpu_gpr_a[r2], ctx);
+        break;
+    case OPC2_32_BO_LD_H_SHORTOFF:
+        gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LESW);
+        break;
+    case OPC2_32_BO_LD_H_POSTINC:
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LESW);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LD_H_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LESW);
+        break;
+    case OPC2_32_BO_LD_HU_SHORTOFF:
+        gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUW);
+        break;
+    case OPC2_32_BO_LD_HU_POSTINC:
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUW);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LD_HU_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUW);
+        break;
+    case OPC2_32_BO_LD_Q_SHORTOFF:
+        gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUW);
+        tcg_gen_shli_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 16);
+        break;
+    case OPC2_32_BO_LD_Q_POSTINC:
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUW);
+        tcg_gen_shli_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 16);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LD_Q_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUW);
+        tcg_gen_shli_tl(cpu_gpr_d[r1], cpu_gpr_d[r1], 16);
+        break;
+    case OPC2_32_BO_LD_W_SHORTOFF:
+        gen_offset_ld(ctx, cpu_gpr_d[r1], cpu_gpr_a[r2], off10, MO_LEUL);
+        break;
+    case OPC2_32_BO_LD_W_POSTINC:
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUL);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LD_W_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        tcg_gen_qemu_ld_tl(cpu_gpr_d[r1], cpu_gpr_a[r2], ctx->mem_idx,
+                           MO_LEUL);
+        break;
+    }
+}
+
+static void decode_bo_addrmode_ld_bitreverse_circular(CPUTriCoreState *env,
+                                                DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t off10;
+    int r1, r2;
+
+    TCGv temp, temp2, temp3;
+
+    r1 = MASK_OP_BO_S1D(ctx->opcode);
+    r2 = MASK_OP_BO_S2(ctx->opcode);
+    off10 = MASK_OP_BO_OFF10_SEXT(ctx->opcode);
+    op2 = MASK_OP_BO_OP2(ctx->opcode);
+
+    temp = tcg_const_i32(r1);
+    temp2 = tcg_const_i32(r2);
+    temp3 = tcg_const_i32(off10);
+
+    switch (op2) {
+    case OPC2_32_BO_LD_A_BR:
+        gen_helper_ld_a_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_LD_A_CIRC:
+        gen_helper_ld_a_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_LD_B_BR:
+        gen_helper_ld_b_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_LD_B_CIRC:
+        gen_helper_ld_b_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_LD_BU_BR:
+        gen_helper_ld_bu_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_LD_BU_CIRC:
+        gen_helper_ld_bu_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_LD_D_BR:
+        gen_helper_ld_d_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_LD_D_CIRC:
+        gen_helper_ld_d_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_LD_DA_BR:
+        gen_helper_ld_da_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_LD_DA_CIRC:
+        gen_helper_ld_da_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_LD_H_BR:
+        gen_helper_ld_h_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_LD_H_CIRC:
+        gen_helper_ld_h_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_LD_HU_BR:
+        gen_helper_ld_hu_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_LD_HU_CIRC:
+        gen_helper_ld_hu_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_LD_Q_BR:
+        gen_helper_ld_q_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_LD_Q_CIRC:
+        gen_helper_ld_q_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_LD_W_BR:
+        gen_helper_ld_w_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_LD_W_CIRC:
+        gen_helper_ld_w_circ(cpu_env, temp, temp2, temp3);
+        break;
+    }
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+}
+
+static void decode_bo_addrmode_stctx_post_pre_base(CPUTriCoreState *env,
+                                                   DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t off10;
+    int r1, r2;
+
+    TCGv temp, temp2;
+
+    r1 = MASK_OP_BO_S1D(ctx->opcode);
+    r2 = MASK_OP_BO_S2(ctx->opcode);
+    off10 = MASK_OP_BO_OFF10_SEXT(ctx->opcode);
+    op2 = MASK_OP_BO_OP2(ctx->opcode);
+
+
+    temp = tcg_temp_new();
+    temp2 = tcg_temp_new();
+
+    switch (op2) {
+    case OPC2_32_BO_LDLCX_SHORTOFF:
+        tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10);
+        gen_helper_ldlcx(cpu_env, temp);
+        break;
+    case OPC2_32_BO_LDMST_SHORTOFF:
+        tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10);
+        gen_ldmst(ctx, r1, temp);
+        break;
+    case OPC2_32_BO_LDMST_POSTINC:
+        gen_ldmst(ctx, r1, cpu_gpr_a[r2]);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_LDMST_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        gen_ldmst(ctx, r1, cpu_gpr_a[r2]);
+        break;
+    case OPC2_32_BO_LDUCX_SHORTOFF:
+        tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10);
+        gen_helper_lducx(cpu_env, temp);
+        break;
+    case OPC2_32_BO_LEA_SHORTOFF:
+        tcg_gen_addi_tl(cpu_gpr_a[r1], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_STLCX_SHORTOFF:
+        tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10);
+        gen_helper_stlcx(cpu_env, temp);
+        break;
+    case OPC2_32_BO_STUCX_SHORTOFF:
+        tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10);
+        gen_helper_stucx(cpu_env, temp);
+        break;
+    case OPC2_32_BO_SWAP_W_SHORTOFF:
+        tcg_gen_addi_tl(temp, cpu_gpr_a[r2], off10);
+        gen_swap(ctx, r1, temp);
+        break;
+    case OPC2_32_BO_SWAP_W_POSTINC:
+        gen_swap(ctx, r1, cpu_gpr_a[r2]);
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        break;
+    case OPC2_32_BO_SWAP_W_PREINC:
+        tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r2], off10);
+        gen_swap(ctx, r1, cpu_gpr_a[r2]);
+        break;
+    }
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static void decode_bo_addrmode_ldmst_bitreverse_circular(CPUTriCoreState *env,
+                                                         DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t off10;
+    int r1, r2;
+
+    TCGv temp, temp2, temp3;
+
+    r1 = MASK_OP_BO_S1D(ctx->opcode);
+    r2 = MASK_OP_BO_S2(ctx->opcode);
+    off10 = MASK_OP_BO_OFF10_SEXT(ctx->opcode);
+    op2 = MASK_OP_BO_OP2(ctx->opcode);
+
+    temp = tcg_const_i32(r1);
+    temp2 = tcg_const_i32(r2);
+    temp3 = tcg_const_i32(off10);
+
+    switch (op2) {
+    case OPC2_32_BO_LDMST_BR:
+        gen_helper_ldmst_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_LDMST_CIRC:
+        gen_helper_ldmst_circ(cpu_env, temp, temp2, temp3);
+        break;
+    case OPC2_32_BO_SWAP_W_BR:
+        gen_helper_swap_br(cpu_env, temp, temp2);
+        break;
+    case OPC2_32_BO_SWAP_W_CIRC:
+        gen_helper_swap_circ(cpu_env, temp, temp2, temp3);
+        break;
+    }
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+}
+
 static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
 {
     int op1;
@@ -1779,6 +2342,25 @@  static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
     case OPCM_32_BIT_SH_LOGIC2:
         decode_bit_sh_logic2(env, ctx);
         break;
+    /* BO Format */
+    case OPCM_32_BO_ADDRMODE_POST_PRE_BASE:
+        decode_bo_addrmode_post_pre_base(env, ctx);
+        break;
+    case OPCM_32_BO_ADDRMODE_BITREVERSE_CIRCULAR:
+        decode_bo_addrmode_bitreverse_circular(env, ctx);
+        break;
+    case OPCM_32_BO_ADDRMODE_LD_POST_PRE_BASE:
+        decode_bo_addrmode_ld_post_pre_base(env, ctx);
+        break;
+    case OPCM_32_BO_ADDRMODE_LD_BITREVERSE_CIRCULAR:
+        decode_bo_addrmode_ld_bitreverse_circular(env, ctx);
+        break;
+    case OPCM_32_BO_ADDRMODE_STCTX_POST_PRE_BASE:
+        decode_bo_addrmode_stctx_post_pre_base(env, ctx);
+        break;
+    case OPCM_32_BO_ADDRMODE_LDMST_BITREVERSE_CIRCULAR:
+        decode_bo_addrmode_ldmst_bitreverse_circular(env, ctx);
+        break;
     }
 }
 
diff --git a/target-tricore/tricore-opcodes.h b/target-tricore/tricore-opcodes.h
index 342414f..7e6f33b 100644
--- a/target-tricore/tricore-opcodes.h
+++ b/target-tricore/tricore-opcodes.h
@@ -105,6 +105,8 @@ 
 /* BO Format */
 #define MASK_OP_BO_OFF10(op)   (MASK_BITS_SHIFT(op, 16, 21) + \
                                (MASK_BITS_SHIFT(op, 28, 31) << 6))
+#define MASK_OP_BO_OFF10_SEXT(op)   (MASK_BITS_SHIFT_SEXT(op, 16, 21) + \
+                                    (MASK_BITS_SHIFT_SEXT(op, 28, 31) << 6))
 #define MASK_OP_BO_OP2(op)     MASK_BITS_SHIFT(op, 22, 27)
 #define MASK_OP_BO_S2(op)      MASK_BITS_SHIFT(op, 12, 15)
 #define MASK_OP_BO_S1D(op)     MASK_BITS_SHIFT(op, 8, 11)