diff mbox

[2/5] Blackfin: initial port

Message ID 1371453383-11484-1-git-send-email-vapier@gentoo.org
State New
Headers show

Commit Message

Mike Frysinger June 17, 2013, 7:16 a.m. UTC
This is the core Blackfin support.  While most things work that gcc will
generate, there are notable things missing at this point:
 - many dsp/alu/mac insns not supported
 - no saturation support
 - many astat flags not updated
 - probably other stuff
Details as to what is missing "by design" vs "not done due to laziness"
can be sorted out in the Blackfin README/TODO files.

FLAT and FDPIC ELFs however seem to work nicely, as do random samplings of
apps from a typical build.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 MAINTAINERS                    |    5 +
 configure                      |    4 +
 cpu-exec.c                     |    5 +-
 gdbstub.c                      |  103 ++
 include/elf.h                  |    6 +
 qapi-schema.json               |    9 +-
 scripts/qemu-binfmt-conf.sh    |    4 +
 target-bfin/Makefile.objs      |    3 +
 target-bfin/README             |   32 +
 target-bfin/TODO               |   25 +
 target-bfin/bfin-sim.c         | 3666 ++++++++++++++++++++++++++++++++++++++++
 target-bfin/bfin-tdep.h        |   94 ++
 target-bfin/cpu-qom.h          |   61 +
 target-bfin/cpu.c              |   55 +
 target-bfin/cpu.h              |  236 +++
 target-bfin/helper.c           |   37 +
 target-bfin/helper.h           |   23 +
 target-bfin/linux-fixed-code.h |   23 +
 target-bfin/op_helper.c        |  229 +++
 target-bfin/translate.c        | 1347 +++++++++++++++
 20 files changed, 5962 insertions(+), 5 deletions(-)
 create mode 100644 target-bfin/Makefile.objs
 create mode 100644 target-bfin/README
 create mode 100644 target-bfin/TODO
 create mode 100644 target-bfin/bfin-sim.c
 create mode 100644 target-bfin/bfin-tdep.h
 create mode 100644 target-bfin/cpu-qom.h
 create mode 100644 target-bfin/cpu.c
 create mode 100644 target-bfin/cpu.h
 create mode 100644 target-bfin/helper.c
 create mode 100644 target-bfin/helper.h
 create mode 100644 target-bfin/linux-fixed-code.h
 create mode 100644 target-bfin/op_helper.c
 create mode 100644 target-bfin/translate.c

Comments

Richard Henderson June 25, 2013, 9:23 p.m. UTC | #1
> diff --git a/target-bfin/bfin-sim.c b/target-bfin/bfin-sim.c

Why this separate file from translate.c?

> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <inttypes.h>

Certainly you shouldn't need these, since this isn't a separately
compiled object -- you're included from translate.c.

> +static void
> +unhandled_instruction(DisasContext *dc, const char *insn)
> +{
> +    fprintf(stderr, "unhandled insn: %s\n", insn);

Use LOG_UNIMP.

> +#define HOST_LONG_WORD_SIZE (sizeof(long) * 8)

You mean TCG_TARGET_REG_BITS?

> +static TCGv
> +get_allreg(DisasContext *dc, int grp, int reg)
> +{
> +    TCGv *ret = cpu_regs[(grp << 3) | reg];
> +    if (ret) {
> +       return *ret;
> +    }
> +    abort();
> +    illegal_instruction(dc);
> +}

Well, which is it?  abort or illegal_instruction.  And come to that, how is
abort any better than SEGV from dereferecing the null?  Certainly the later
will generate a faster translator...

> +decode_multfunc_tl(DisasContext *dc, int h0, int h1, int src0, int src1,
> +                   int mmod, int MM, TCGv psat)
> +{
> +    TCGv s0, s1, val;
> +
> +    s0 = tcg_temp_local_new();

You'll really want to avoid local temps and branches, if at all possible.  For
some of the more complex stuff that you're open-coding, you may be better off
with helper functions instead.

> +        l = gen_new_label();
> +        endl = gen_new_label();
> +
> +        tcg_gen_brcondi_tl(TCG_COND_NE, val, 0x40000000, l);
> +        if (mmod == M_W32) {
> +            tcg_gen_movi_tl(val, 0x7fffffff);
> +        } else {
> +            tcg_gen_movi_tl(val, 0x80000000);
> +        }
> +        tcg_gen_movi_tl(psat, 1);
> +        tcg_gen_br(endl);
> +
> +        gen_set_label(l);
> +        tcg_gen_shli_tl(val, val, 1);
> +
> +        gen_set_label(endl);

Certainly possible here with 2 movcond, or 1 movcond, 1 setcond + 1 or.

> +    l = gen_new_label();
> +    tcg_gen_brcondi_tl(TCG_COND_EQ, psat, 0, l);
> +    tcg_gen_ext32u_i64(val1, val1);
> +    gen_set_label(l);

movcond again.

> +static void
> +saturate_s32(TCGv_i64 val, TCGv overflow)

I shall now stop mentioning movcond.  I sense there are many locations to come.

> +    } else if (prgfunc == 11 && poprnd < 6) {
> +        /* TESTSET (Preg{poprnd}); */
> +        TCGv tmp = tcg_temp_new();
> +        tcg_gen_qemu_ld8u(tmp, cpu_preg[poprnd], dc->mem_idx);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_cc, tmp, 0);
> +        tcg_gen_ori_tl(tmp, tmp, 0x80);
> +        tcg_gen_qemu_st8(tmp, cpu_preg[poprnd], dc->mem_idx);
> +        tcg_temp_free(tmp);

I'll note that this is fine for system code, but for user code ought to be
atomic.  There are a bunch of really bad examples in the tree, and no real
good solutions atm.

> +    /* Can't push/pop reserved registers */
> +    /*if (reg_is_reserved(grp, reg))
> +        illegal_instruction(dc);*/

No commented out code like this.

> +    /* Everything here needs to be aligned, so check once */
> +    gen_align_check(dc, cpu_spreg, 4, false);

You ought not need to generate explicit alignment checks.  Yes, we don't do
that correctly for user-mode, but we do for system mode.

My hope is that user mode eventually has the option of using the system mode
page tables too -- there are just too many things that don't work correctly
when host and target page sizes don't match, or the host and target don't have
the same unaligned access characteristics.

> +        } else if (grp == 4 && (reg == 0 || reg == 2)) {
> +            /* Pop A#.X */
> +            tmp = tcg_temp_new();
> +            tcg_gen_qemu_ld32u(tmp, cpu_spreg, dc->mem_idx);
> +            tcg_gen_andi_tl(tmp, tmp, 0xff);
> +            tmp64 = tcg_temp_new_i64();
> +            tcg_gen_extu_i32_i64(tmp64, tmp);
> +            tcg_temp_free(tmp);
> +
> +            tcg_gen_andi_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], 0xffffffff);
> +            tcg_gen_shli_i64(tmp64, tmp64, 32);
> +            tcg_gen_or_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], tmp64);
> +            tcg_temp_free_i64(tmp64);

Drop the andi with 0xff and use

tcg_gen_deposit_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], tmp64, 32, 8)

> +        } else if (grp == 4 && (reg == 1 || reg == 3)) {
> +            /* Pop A#.W */
> +            tcg_gen_andi_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], 0xff00000000);
> +            tmp = tcg_temp_new();
> +            tcg_gen_qemu_ld32u(tmp, cpu_spreg, dc->mem_idx);
> +            tmp64 = tcg_temp_new_i64();
> +            tcg_gen_extu_i32_i64(tmp64, tmp);
> +            tcg_temp_free(tmp);
> +            tcg_gen_or_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], tmp64);
> +            tcg_temp_free_i64(tmp64);

And then this one becomes deposit(areg, areg, tmp64, 0, 32).

> +        } else if (grp == 4 && (reg == 0 || reg == 2)) {
> +            /* Push A#.X */
> +            tmp64 = tcg_temp_new_i64();
> +            tcg_gen_shri_i64(tmp64, cpu_areg[reg >> 1], 32);
> +            tmp = tcg_temp_new();
> +            tcg_gen_trunc_i64_i32(tmp, tmp64);
> +            tcg_temp_free_i64(tmp64);
> +            tcg_gen_andi_tl(tmp, tmp, 0xff);

Do we ever allow the high 24 bits to be non-zero?  Is this andi actually redundant?

> +    if (W == 1) {
> +        /* [--SP] = ({d}R7:imm{dr}, {p}P5:imm{pr}); */
> +        if (d) {
> +            for (i = dr; i < 8; i++) {
> +                tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
> +                tcg_gen_qemu_st32(cpu_dreg[i], cpu_spreg, dc->mem_idx);
> +            }
> +        }

What's the cpu exception effect of the second store causing a page fault?
Normally one needs to do the address increment in a temporary and only update
the real SP register at the end, so that the instruction can be restarted.

> +    /* CC = CC; is invalid.  */
> +    if (cbit == 5)
> +        illegal_instruction(dc);

Please handle all checkpatch.pl style errors.

> +    if (opc == 0) {
> +        /* CC = ! BITTST (Dreg{dst}, imm{uimm}); */
> +        tmp = tcg_temp_new();
> +        tcg_gen_movi_tl(tmp, 1 << uimm);
> +        tcg_gen_and_tl(tmp, tmp, cpu_dreg[dst]);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_cc, tmp, 0);
> +        tcg_temp_free(tmp);
> +    } else if (opc == 1) {
> +        /* CC = BITTST (Dreg{dst}, imm{uimm}); */
> +        tmp = tcg_temp_new();
> +        tcg_gen_movi_tl(tmp, 1 << uimm);
> +        tcg_gen_and_tl(tmp, tmp, cpu_dreg[dst]);
> +        tcg_gen_setcondi_tl(TCG_COND_NE, cpu_cc, tmp, 0);
> +        tcg_temp_free(tmp);

You're writing

	(x & (1 << I)) != 0

whereas the alternative

	(x >> I) & 1

does not require the setcond, and will be faster on most hosts.

> +    if (aop == 1 && W == 0 && idx == ptr) {
> +        /* Dreg_lo{reg} = W[Preg{ptr}]; */
> +        tmp = tcg_temp_local_new();
> +        tcg_gen_andi_tl(cpu_dreg[reg], cpu_dreg[reg], 0xffff0000);
> +        gen_aligned_qemu_ld16u(dc, tmp, cpu_preg[ptr]);
> +        tcg_gen_or_tl(cpu_dreg[reg], cpu_dreg[reg], tmp);
> +        tcg_temp_free(tmp);

Deposit again.  Lots of instances in this function.

> +        /* LINK imm{framesize}; */
> +        int size = uimm16s4(framesize);
> +        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
> +        tcg_gen_qemu_st32(cpu_rets, cpu_spreg, dc->mem_idx);
> +        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
> +        tcg_gen_qemu_st32(cpu_fpreg, cpu_spreg, dc->mem_idx);
> +        tcg_gen_mov_tl(cpu_fpreg, cpu_spreg);
> +        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, size);
> +    } else if (framesize == 0) {
> +        /* UNLINK; */
> +        /* Restore SP from FP.  */
> +        tcg_gen_mov_tl(cpu_spreg, cpu_fpreg);
> +        tcg_gen_qemu_ld32u(cpu_fpreg, cpu_spreg, dc->mem_idx);
> +        tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
> +        tcg_gen_qemu_ld32u(cpu_rets, cpu_spreg, dc->mem_idx);
> +        tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);

Similarly to push/pop multiple wrt intermediate SP.

> +    if ((aop == 0 || aop == 2) && aopcde == 9 && HL == 0 && s == 0) {
> +        int a = aop >> 1;
> +        /* Areg_lo{a} = Dreg_lo{src0}; */
> +        tcg_gen_andi_i64(cpu_areg[a], cpu_areg[a], ~0xffff);
> +        tmp64 = tcg_temp_new_i64();
> +        tcg_gen_extu_i32_i64(tmp64, cpu_dreg[src0]);
> +        tcg_gen_andi_i64(tmp64, tmp64, 0xffff);
> +        tcg_gen_or_i64(cpu_areg[a], cpu_areg[a], tmp64);
> +        tcg_temp_free_i64(tmp64);

More deposits in this function.  I'll stop mentioning them, but pretty much
every place you touch aregs can use this.

> +#include "linux-fixed-code.h"
> +
> +static uint32_t bfin_lduw_code(DisasContext *dc, target_ulong pc)
> +{
> +#ifdef CONFIG_USER_ONLY
> +    /* Intercept jump to the magic kernel page */
> +    if (((dc->env->personality & 0xff/*PER_MASK*/) == 0/*PER_LINUX*/) &&
> +        (pc & 0xFFFFFF00) == 0x400) {
> +        uint32_t off = pc - 0x400;
> +        if (off < sizeof(bfin_linux_fixed_code)) {
> +            return ((uint16_t)bfin_linux_fixed_code[off + 1] << 8) |
> +                   bfin_linux_fixed_code[off];
> +        }
> +    }
> +#endif

Surely this memory setup belongs in linux-user/.

> +/* Interpret a single Blackfin insn; breaks up parallel insns */
> +static void
> +interp_insn_bfin(DisasContext *dc)
> +{
> +    _interp_insn_bfin(dc, dc->pc);

I'd prefer a suffix like "1" rather than a prefix of "_".

> +typedef struct CPUBfinState {
> +    CPU_COMMON

COMMON should come last, or just about.
Certainly the cpu registers should come first, for most
efficient translation access on the host.

> +static inline void bfin_astat_write(CPUArchState *env, uint32_t astat)
> +{
> +    unsigned int i;
> +    for (i = 0; i < 32; ++i)
> +        env->astat[i] = !!(astat & (1 << i));

 = (astat >> i) & 1

> +typedef void (*hwloop_callback)(struct DisasContext *dc, int loop);
> +
> +typedef struct DisasContext {
> +    CPUArchState *env;
> +    struct TranslationBlock *tb;
> +    /* The current PC we're decoding (could be middle of parallel insn) */
> +    target_ulong pc;
> +    /* Length of current insn (2/4/8) */
> +    target_ulong insn_len;
> +
> +    /* For delayed ASTAT handling */
> +    enum astat_ops astat_op;
> +
> +    /* For hardware lop processing */
> +    hwloop_callback hwloop_callback;
> +    void *hwloop_data;
> +
> +    /* Was a DISALGNEXCPT used in this parallel insn ? */
> +    int disalgnexcpt;
> +
> +    int is_jmp;
> +    int mem_idx;
> +} DisasContext;

Really, this type should be private to translate.c.

> +static inline void cpu_get_tb_cpu_state(CPUArchState *env, target_ulong *pc,
> +                                        target_ulong *cs_base, int *flags)
> +{
> +    *pc = cpu_get_pc(env);
> +    *cs_base = 0;
> +    *flags = env->astat[ASTAT_RND_MOD];
> +}

You'll probably be better off with a bit that notes whether the loop registers
are active, or something, so that you don't have to always generate code that
handles them.

> +DEF_HELPER_3(raise_exception, void, env, i32, i32)

Lots of these can use better settings for flags.  Here, the only side effect is
to raise an exception, which leads to reading the globals.  So TCG_CALL_NO_WG.

> +DEF_HELPER_5(memalign, void, env, i32, i32, i32, i32)
> +
> +DEF_HELPER_4(dbga_l, void, env, i32, i32, i32)
> +DEF_HELPER_4(dbga_h, void, env, i32, i32, i32)

Likewise.

> +/* Count the number of bits set to 1 in the 32bit value */
> +uint32_t HELPER(ones)(uint32_t val)
> +{
> +    uint32_t i;
> +    uint32_t ret;
> +
> +    ret = 0;
> +    for (i = 0; i < 32; ++i)
> +        ret += !!(val & (1 << i));

ctpop32.

> +/* Count number of leading bits that match the sign bit */
> +uint32_t HELPER(signbits)(uint32_t val, uint32_t size)
...
> +/* Count number of leading bits that match the sign bit */
> +uint32_t HELPER(signbits_64)(uint64_t val, uint32_t size)

Surely we can make some use of clz here.  But I guess for now this is ok.

> +static void gen_goto_tb(DisasContext *dc, int tb_num, TCGv dest)
> +{
> +/*
> +    TranslationBlock *tb;
> +    tb = dc->tb;
> +
> +    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
> +        tcg_gen_goto_tb(tb_num);
> +        tcg_gen_mov_tl(cpu_pc, dest);
> +        tcg_gen_exit_tb((long)tb + tb_num);
> +    } else */{
> +        gen_astat_update(dc, false);
> +        tcg_gen_mov_tl(cpu_pc, dest);
> +        tcg_gen_exit_tb(0);
> +    }

Why the astat update here, when you have it on almost no other exits from the tb?

> +        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
> +            tcg_gen_debug_insn_start(dc->pc);

CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT


r~
Mike Frysinger June 25, 2013, 11:14 p.m. UTC | #2
On Tuesday 25 June 2013 17:23:57 Richard Henderson wrote:

whee, got a review! :)  i've snipped items that were obvious in the "i'll go 
do this" category rather than just copying & pasting "OK" many times.  got a 
long flight coming up soon, so hopefully i can tackle the majority of this work 
then.

> > diff --git a/target-bfin/bfin-sim.c b/target-bfin/bfin-sim.c
> 
> Why this separate file from translate.c?

because this port is based on the GNU/sim Blackfin port.  bfin-sim.c focuses on 
the actual opcode translation while the higher level file (translate.c in QEMU 
and interp.c in GNU/sim) takes care of the higher layers (like clock ticking).  
i like keeping the core structure the same between the two sims so that i can 
more easily merge changes between them.

> > +#include <stdbool.h>
> > +#include <stdint.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <inttypes.h>
> 
> Certainly you shouldn't need these, since this isn't a separately
> compiled object -- you're included from translate.c.

yes, this is most likely true.  it's due to the previous reason.  maybe i 
should make it a sep compiled file then there won't be any confusion ...

> > +#define HOST_LONG_WORD_SIZE (sizeof(long) * 8)
> 
> You mean TCG_TARGET_REG_BITS?

maybe?  this is for extracting target encoded immediates and properly 
extending them up to the system that is running the code (e.g. x86_64) so that 
it can then be checked naturally (like comparing it to a -1).  but this is 
done in the decode logic, not in the tcg output logic, so i'm not sure TCG is 
the right abstraction.

> > +static TCGv
> > +get_allreg(DisasContext *dc, int grp, int reg)
> > +{
> > +    TCGv *ret = cpu_regs[(grp << 3) | reg];
> > +    if (ret) {
> > +       return *ret;
> > +    }
> > +    abort();
> > +    illegal_instruction(dc);
> > +}
> 
> Well, which is it?  abort or illegal_instruction.

i had this in there while doing the initial port to track down bad things.  i 
can probably cut it over to illegal_instruction() now.

> And come to that, how is
> abort any better than SEGV from dereferecing the null?  Certainly the later
> will generate a faster translator...

QEMU doesn't need any help segfaulting :p.  an abort() is much better at 
showing the source of the problem.

> > +decode_multfunc_tl(DisasContext *dc, int h0, int h1, int src0, int src1,
> > +                   int mmod, int MM, TCGv psat)
> > +{
> > +    TCGv s0, s1, val;
> > +
> > +    s0 = tcg_temp_local_new();
> 
> You'll really want to avoid local temps and branches, if at all possible. 
> For some of the more complex stuff that you're open-coding, you may be
> better off with helper functions instead.

it seemed like having generated (and cached) opcodes was better than relying 
on helpers since helpers requires interrupting the native code flow and muck 
around with state ?  is there a good (or even semi-decent) rule of thumb i can 
use to decide when to use one over the other ?

> > +static void
> > +saturate_s32(TCGv_i64 val, TCGv overflow)
> 
> I shall now stop mentioning movcond.  I sense there are many locations to
> come.

looks like movcond was introduced after i did the initial (bulk) port.  so 
there are probably many locations that can take advantage of it.  i'll have to 
go through the code top-to-bottom looking for things.  and probably look at 
the history of tcg/README to see what other interesting opcodes have been 
added since.

> > +    } else if (prgfunc == 11 && poprnd < 6) {
> > +        /* TESTSET (Preg{poprnd}); */
> > +        TCGv tmp = tcg_temp_new();
> > +        tcg_gen_qemu_ld8u(tmp, cpu_preg[poprnd], dc->mem_idx);
> > +        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_cc, tmp, 0);
> > +        tcg_gen_ori_tl(tmp, tmp, 0x80);
> > +        tcg_gen_qemu_st8(tmp, cpu_preg[poprnd], dc->mem_idx);
> > +        tcg_temp_free(tmp);
> 
> I'll note that this is fine for system code, but for user code ought to be
> atomic.  There are a bunch of really bad examples in the tree, and no real
> good solutions atm.

i general, i completely agree with you.  in practice, i think a (mis)feature 
of the Blackfin arch helps out here.  TESTSET doesn't work on cached memory, 
and it only works on system memory (i.e. not L1), and every Linux build runs 
with caches turned on.  so maybe flaky misbehavior is a good thing ? :)

i can drop a note in there noting the issue though

> > +    /* Everything here needs to be aligned, so check once */
> > +    gen_align_check(dc, cpu_spreg, 4, false);
> 
> You ought not need to generate explicit alignment checks.  Yes, we don't do
> that correctly for user-mode, but we do for system mode.
> 
> My hope is that user mode eventually has the option of using the system
> mode page tables too -- there are just too many things that don't work
> correctly when host and target page sizes don't match, or the host and
> target don't have the same unaligned access characteristics.

i had this code because it wasn't working in user mode, and someone in a 
previous review suggested i check out gen_helper_memalign().  i actually have 
this disabled by default because the speed impact is fairly substantial.  i 
was debating turning this into a configure time check.

> > +        } else if (grp == 4 && (reg == 0 || reg == 2)) {
> > +            /* Push A#.X */
> > +            tmp64 = tcg_temp_new_i64();
> > +            tcg_gen_shri_i64(tmp64, cpu_areg[reg >> 1], 32);
> > +            tmp = tcg_temp_new();
> > +            tcg_gen_trunc_i64_i32(tmp, tmp64);
> > +            tcg_temp_free_i64(tmp64);
> > +            tcg_gen_andi_tl(tmp, tmp, 0xff);
> 
> Do we ever allow the high 24 bits to be non-zero?  Is this andi actually
> redundant?

yes & no.  in the hardware, it'll always be 0.  there is some code which will 
might sign extend things (so that you can correctly compare the 40bit 
accumulator using 64bit regs), and that would interact badly here.  maybe it 
would be better to drop this and force the accumulator handling code to do the 
right thing with the sign rather than store the result.

> > +    if (W == 1) {
> > +        /* [--SP] = ({d}R7:imm{dr}, {p}P5:imm{pr}); */
> > +        if (d) {
> > +            for (i = dr; i < 8; i++) {
> > +                tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
> > +                tcg_gen_qemu_st32(cpu_dreg[i], cpu_spreg, dc->mem_idx);
> > +            }
> > +        }
> 
> What's the cpu exception effect of the second store causing a page fault?
> Normally one needs to do the address increment in a temporary and only
> update the real SP register at the end, so that the instruction can be
> restarted.

this was me being lazy when getting started with the port ;).  the GNU/sim 
code matches the hardware and that is as you suspected -- the SP reg isn't 
updated until after all the push/pops finish.  i wasn't sweating the difference 
here too much as in user mode, the exception would just kill the program 
(ignoring apps that catch like SIGSEGV).

i'll have to generate a tmp reg based on SP and do it correctly though now 
that this port is out of the infant stage.

> > +#include "linux-fixed-code.h"
> > +
> > +static uint32_t bfin_lduw_code(DisasContext *dc, target_ulong pc)
> > +{
> > +#ifdef CONFIG_USER_ONLY
> > +    /* Intercept jump to the magic kernel page */
> > +    if (((dc->env->personality & 0xff/*PER_MASK*/) == 0/*PER_LINUX*/) &&
> > +        (pc & 0xFFFFFF00) == 0x400) {
> > +        uint32_t off = pc - 0x400;
> > +        if (off < sizeof(bfin_linux_fixed_code)) {
> > +            return ((uint16_t)bfin_linux_fixed_code[off + 1] << 8) |
> > +                   bfin_linux_fixed_code[off];
> > +        }
> > +    }
> > +#endif
> 
> Surely this memory setup belongs in linux-user/.

i couldn't find good examples previously to make this work so i put it in here.  
i came across the arm handling of its fixed code recently though (which i think 
does it in linux-user), so i'll see about moving it there.

> > +/* Interpret a single Blackfin insn; breaks up parallel insns */
> > +static void
> > +interp_insn_bfin(DisasContext *dc)
> > +{
> > +    _interp_insn_bfin(dc, dc->pc);
> 
> I'd prefer a suffix like "1" rather than a prefix of "_".

this matches the GNU/sim code, so i'd prefer to stick to that where possible.  
underscore prefix is a common "this is internal" indicator.

> > +static inline void cpu_get_tb_cpu_state(CPUArchState *env, target_ulong
> > *pc, +                                        target_ulong *cs_base, int
> > *flags) +{
> > +    *pc = cpu_get_pc(env);
> > +    *cs_base = 0;
> > +    *flags = env->astat[ASTAT_RND_MOD];
> > +}
> 
> You'll probably be better off with a bit that notes whether the loop
> registers are active, or something, so that you don't have to always
> generate code that handles them.

whether loop registers are active is purely based on the PC and the current 
values in the loop registers/counters.  the hardware logic is basically:
	# After every single insn is executed.
	oldpc = PC
	PC += insn_length;
	if (oldpc == LB1 && LC1) {
		PC = LT1;
		--LC1;
	}
	if (oldpc == LB0 && LB0) {
		PC = LT0;
		--LC0;
	}

i didn't think it was really possible to check the cpu state at runtime since 
ideally you'd generate a bunch of TB's, cache them, and then let them run 
w/out invoking the translator again.

> > +DEF_HELPER_3(raise_exception, void, env, i32, i32)
> 
> Lots of these can use better settings for flags.  Here, the only side
> effect is to raise an exception, which leads to reading the globals.  So
> TCG_CALL_NO_WG.

the flags are newer than the port, so i'll have to go through them all for a 
refresh

> > +static void gen_goto_tb(DisasContext *dc, int tb_num, TCGv dest)
> > +{
> > +/*
> > +    TranslationBlock *tb;
> > +    tb = dc->tb;
> > +
> > +    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
> > +        tcg_gen_goto_tb(tb_num);
> > +        tcg_gen_mov_tl(cpu_pc, dest);
> > +        tcg_gen_exit_tb((long)tb + tb_num);
> > +    } else */{
> > +        gen_astat_update(dc, false);
> > +        tcg_gen_mov_tl(cpu_pc, dest);
> > +        tcg_gen_exit_tb(0);
> > +    }
> 
> Why the astat update here, when you have it on almost no other exits from
> the tb?

it's really the only way i could get it to [mostly] work :/.  i tried to figure 
out how x86 was handling its delayed eflags updates (since Blackfin will do 
pretty much the same exact thing for the same reason), but i failed at that 
too.

in general, the TB logic is magic to me.  there are a number of optimizations 
that are blocked because i haven't been able to figure it out.  i'm surprised 
it really works at all.  you can see how my gen_goto_tb always does 
tct_gen_exit_tb(0) which forces a PC look up every time.
-mike
Eric Blake June 28, 2013, 1:47 p.m. UTC | #3
On 06/17/2013 01:16 AM, Mike Frysinger wrote:
> This is the core Blackfin support.  While most things work that gcc will
> generate, there are notable things missing at this point:
>  - many dsp/alu/mac insns not supported
>  - no saturation support
>  - many astat flags not updated
>  - probably other stuff
> Details as to what is missing "by design" vs "not done due to laziness"
> can be sorted out in the Blackfin README/TODO files.
> 
> FLAT and FDPIC ELFs however seem to work nicely, as do random samplings of
> apps from a typical build.
> 
> Signed-off-by: Mike Frysinger <vapier@gentoo.org>
> ---
>  MAINTAINERS                    |    5 +
>  configure                      |    4 +
>  cpu-exec.c                     |    5 +-
>  gdbstub.c                      |  103 ++
>  include/elf.h                  |    6 +
>  qapi-schema.json               |    9 +-

> +++ b/qapi-schema.json
> @@ -3023,10 +3023,11 @@
>  # Since: 1.2.0
>  ##
>  { 'enum': 'TargetType',
> -  'data': [ 'alpha', 'arm', 'cris', 'i386', 'lm32', 'm68k', 'microblazeel',
> -            'microblaze', 'mips64el', 'mips64', 'mipsel', 'mips', 'moxie',
> -            'or32', 'ppc64', 'ppcemb', 'ppc', 's390x', 'sh4eb', 'sh4',
> -            'sparc64', 'sparc', 'unicore32', 'x86_64', 'xtensaeb', 'xtensa' ] }
> +  'data': [ 'alpha', 'arm', 'bfin', 'cris', 'i386', 'lm32', 'm68k',
> +            'microblazeel', 'microblaze', 'mips64el', 'mips64', 'mipsel',
> +            'mips', 'moxie', 'or32', 'ppc64', 'ppcemb', 'ppc', 's390x', 'sh4eb',
> +            'sh4', 'sparc64', 'sparc', 'unicore32', 'x86_64', 'xtensaeb',
> +            'xtensa' ] }

This conflicts with Paolo's patches that removed TargetType.  Just drop
this hunk.
Andreas Färber June 28, 2013, 2:24 p.m. UTC | #4
Hi,

Am 17.06.2013 09:16, schrieb Mike Frysinger:
> diff --git a/target-bfin/cpu-qom.h b/target-bfin/cpu-qom.h
> new file mode 100644
> index 0000000..697797b
> --- /dev/null
> +++ b/target-bfin/cpu-qom.h

For a new target, a separate cpu-qom.h should be unnecessary - it has
become impossible to include it without cpu.h. Just inline it into cpu.h
and group CPUState vs. CPUBfinState stuff together there.

> @@ -0,0 +1,61 @@
> +/*
> + * QEMU Blackfin CPU
> + *
> + * Copyright 2007-2013 Mike Frysinger
> + * Copyright 2007-2011 Analog Devices, Inc.
> + *
> + * Licensed under the Lesser GPL 2 or later.
> + */
> +
> +#ifndef QEMU_BFIN_CPU_QOM_H
> +#define QEMU_BFIN_CPU_QOM_H
> +
> +#include "qom/cpu.h"
> +
> +#define TYPE_BFIN_CPU "bfin-cpu"
> +
> +#define BFIN_CPU_CLASS(klass) \
> +    OBJECT_CLASS_CHECK(BfinCPUClass, (klass), TYPE_BFIN_CPU)
> +#define BFIN_CPU(obj) \
> +    OBJECT_CHECK(BfinCPU, (obj), TYPE_BFIN_CPU)
> +#define BFIN_CPU_GET_CLASS(obj) \
> +    OBJECT_GET_CLASS(BfinCPUClass, (obj), TYPE_BFIN_CPU)
> +
> +/**
> + * BfinCPUClass:
> + * @parent_reset: The parent class' reset handler.
> + *
> + * An Bfin CPU model.
> + */
> +typedef struct BfinCPUClass {
> +    /*< private >*/
> +    CPUClass parent_class;
> +    /*< public >*/
> +
> +    void (*parent_reset)(CPUState *cpu);
> +} BfinCPUClass;
> +
> +/**
> + * BfinCPU:

QOM types should have verbose, readable names. Please use BlackfinCPU,
BlackfinCPUClass, TYPE_BLACKFIN_CPU.
By contrast, CPUBfinState, bfin_cpu_... and bfin-cpu are totally fine, I
guess.

> + * @env: #CPUArchState

Please don't use CPUArchState anywhere in bfin code except for the
#define CPUArchState CPUBfinState.

> + *
> + * An Bfin CPU.
> + */
> +typedef struct BfinCPU {
> +    /*< private >*/
> +    CPUState parent_obj;
> +    /*< public >*/
> +
> +    CPUArchState env;
> +} BfinCPU;
> +
> +static inline BfinCPU *bfin_env_get_cpu(CPUArchState *env)

CPUbfinState *env

> +{
> +    return BFIN_CPU(container_of(env, BfinCPU, env));

I've just posted a patch to drop these FOO_CPU() casts from all targets.
While there's no ACK yet, there was agreement on v1, so please just
return the container_of() here.

> +}
> +
> +#define ENV_GET_CPU(e) CPU(bfin_env_get_cpu(e))
> +
> +#define ENV_OFFSET offsetof(BfinCPU, env)
> +
> +#endif
> diff --git a/target-bfin/cpu.c b/target-bfin/cpu.c
> new file mode 100644
> index 0000000..871a1a1
> --- /dev/null
> +++ b/target-bfin/cpu.c
> @@ -0,0 +1,55 @@
> +/*
> + * QEMU Blackfin CPU
> + *
> + * Copyright 2007-2013 Mike Frysinger
> + * Copyright 2007-2011 Analog Devices, Inc.
> + *
> + * Licensed under the Lesser GPL 2 or later.
> + */
> +
> +#include "cpu.h"
> +#include "qemu-common.h"
> +
> +
> +/* CPUClass::reset() */
> +static void bfin_cpu_reset(CPUState *s)
> +{
> +    BfinCPU *cpu = BFIN_CPU(s);
> +    CPUArchState *env = &cpu->env;

CPUBfinState *

Missing memset(env, 0, offsetof(CPUBfinState, breakpoints)).

> +
> +    env->pc = 0xEF000000;
> +}
> +
> +static void bfin_cpu_initfn(Object *obj)
> +{
> +    CPUState *cs = CPU(obj);
> +    BfinCPU *cpu = BFIN_CPU(obj);
> +    CPUArchState *env = &cpu->env;
> +
> +    cs->env_ptr = env;
> +    cpu_exec_init(env);
> +}
> +
> +static void bfin_cpu_class_init(ObjectClass *oc, void *data)
> +{
> +    CPUClass *cc = CPU_CLASS(oc);
> +
> +    cc->reset = bfin_cpu_reset;
> +}
> +
> +static const TypeInfo bfin_cpu_type_info = {
> +    .name = TYPE_BFIN_CPU,
> +    .parent = TYPE_CPU,
> +    .instance_size = sizeof(BfinCPU),
> +    .instance_init = bfin_cpu_initfn,
> +    .abstract = false,
> +    .class_size = sizeof(BfinCPUClass),
> +    .class_init = bfin_cpu_class_init,
> +};
> +
> +static void bfin_cpu_register_types(void)
> +{
> +    type_register_static(&bfin_cpu_type_info);
> +}
> +
> +type_init(bfin_cpu_register_types)
> diff --git a/target-bfin/cpu.h b/target-bfin/cpu.h
> new file mode 100644
> index 0000000..d288197
> --- /dev/null
> +++ b/target-bfin/cpu.h
> @@ -0,0 +1,236 @@
> +/*
> + * Blackfin emulation
> + *
> + * Copyright 2007-2013 Mike Frysinger
> + * Copyright 2007-2011 Analog Devices, Inc.
> + *
> + * Licensed under the Lesser GPL 2 or later.
> + */
> +
> +#ifndef CPU_BFIN_H
> +#define CPU_BFIN_H
> +
> +struct DisasContext;
> +
> +#define TARGET_LONG_BITS 32
> +
> +#define ELF_MACHINE	EM_BLACKFIN
> +
> +#define CPUArchState struct CPUBfinState
> +
> +#include "config.h"
> +#include "qemu-common.h"
> +#include "exec/cpu-defs.h"
> +
> +#define TARGET_HAS_ICE 1
> +
> +#define EXCP_SYSCALL        0
> +#define EXCP_SOFT_BP        1
> +#define EXCP_STACK_OVERFLOW 3
> +#define EXCP_SINGLE_STEP    0x10
> +#define EXCP_TRACE_FULL     0x11
> +#define EXCP_UNDEF_INST     0x21
> +#define EXCP_ILL_INST       0x22
> +#define EXCP_DCPLB_VIOLATE  0x23
> +#define EXCP_DATA_MISALGIN  0x24
> +#define EXCP_UNRECOVERABLE  0x25
> +#define EXCP_DCPLB_MISS     0x26
> +#define EXCP_DCPLB_MULT     0x27
> +#define EXCP_EMU_WATCH      0x28
> +#define EXCP_MISALIG_INST   0x2a
> +#define EXCP_ICPLB_PROT     0x2b
> +#define EXCP_ICPLB_MISS     0x2c
> +#define EXCP_ICPLB_MULT     0x2d
> +#define EXCP_ILL_SUPV       0x2e
> +#define EXCP_ABORT          0x100
> +#define EXCP_DBGA           0x101
> +#define EXCP_OUTC           0x102
> +
> +#define CPU_INTERRUPT_NMI   CPU_INTERRUPT_TGT_EXT_1
> +
> +#define BFIN_L1_CACHE_BYTES 32
> +
> +/* Blackfin does 1K/4K/1M/4M, but for now only support 4k */
> +#define TARGET_PAGE_BITS    12
> +#define NB_MMU_MODES        2
> +
> +#define TARGET_PHYS_ADDR_SPACE_BITS 32
> +#define TARGET_VIRT_ADDR_SPACE_BITS 32
> +
> +#define cpu_init cpu_bfin_init
> +#define cpu_exec cpu_bfin_exec
> +#define cpu_gen_code cpu_bfin_gen_code
> +#define cpu_signal_handler cpu_bfin_signal_handler
> +
> +/* Indexes into astat array; matches bitpos in hardware too */
> +enum {
> +    ASTAT_AZ = 0,
> +    ASTAT_AN,
> +    ASTAT_AC0_COPY,
> +    ASTAT_V_COPY,
> +    ASTAT_CC = 5,
> +    ASTAT_AQ,
> +    ASTAT_RND_MOD = 8,
> +    ASTAT_AC0 = 12,
> +    ASTAT_AC1,
> +    ASTAT_AV0 = 16,
> +    ASTAT_AV0S,
> +    ASTAT_AV1,
> +    ASTAT_AV1S,
> +    ASTAT_V = 24,
> +    ASTAT_VS
> +};
> +
> +typedef struct CPUBfinState {
> +    CPU_COMMON
> +    int personality;
> +
> +    uint32_t dreg[8];
> +    uint32_t preg[8];
> +    uint32_t ireg[4];
> +    uint32_t mreg[4];
> +    uint32_t breg[4];
> +    uint32_t lreg[4];
> +    uint64_t areg[2];
> +    uint32_t rets;
> +    uint32_t lcreg[2], ltreg[2], lbreg[2];
> +    uint32_t cycles[2];
> +    uint32_t uspreg;
> +    uint32_t seqstat;
> +    uint32_t syscfg;
> +    uint32_t reti;
> +    uint32_t retx;
> +    uint32_t retn;
> +    uint32_t rete;
> +    uint32_t emudat;
> +    uint32_t pc;
> +
> +    /* ASTAT bits; broken up for speeeeeeeed */
> +    uint32_t astat[32];
> +    /* ASTAT delayed helpers */
> +    uint32_t astat_op, astat_arg[3];

Are you sure this field placement is what you want? Usually reset
memset()s all fields up to breakpoints (inside CPU_COMMON) so registers
are usually placed before CPU_COMMON.

Any field that is not a register accessed by TCG should rather be in
BlackfinCPU or BlackfinCPUClass - personality sounds like a candidate?

> +} CPUBfinState;
> +#define spreg preg[6]
> +#define fpreg preg[7]
> +
> +static inline uint32_t bfin_astat_read(CPUArchState *env)
> +{
> +    unsigned int i, ret;
> +
> +    ret = 0;
> +    for (i = 0; i < 32; ++i)
> +        ret |= (env->astat[i] << i);
> +
> +    return ret;
> +}
> +
> +static inline void bfin_astat_write(CPUArchState *env, uint32_t astat)
> +{
> +    unsigned int i;
> +    for (i = 0; i < 32; ++i)
> +        env->astat[i] = !!(astat & (1 << i));
> +}
> +
> +enum astat_ops {
> +    ASTAT_OP_NONE,
> +    ASTAT_OP_DYNAMIC,
> +    ASTAT_OP_ABS,
> +    ASTAT_OP_ABS_VECTOR,
> +    ASTAT_OP_ADD16,
> +    ASTAT_OP_ADD32,
> +    ASTAT_OP_ASHIFT16,
> +    ASTAT_OP_ASHIFT32,
> +    ASTAT_OP_COMPARE_SIGNED,
> +    ASTAT_OP_COMPARE_UNSIGNED,
> +    ASTAT_OP_LOGICAL,
> +    ASTAT_OP_LSHIFT16,
> +    ASTAT_OP_LSHIFT32,
> +    ASTAT_OP_LSHIFT_RT16,
> +    ASTAT_OP_LSHIFT_RT32,
> +    ASTAT_OP_MIN_MAX,
> +    ASTAT_OP_MIN_MAX_VECTOR,
> +    ASTAT_OP_NEGATE,
> +    ASTAT_OP_SUB16,
> +    ASTAT_OP_SUB32,
> +    ASTAT_OP_VECTOR_ADD_ADD,    /* +|+ */
> +    ASTAT_OP_VECTOR_ADD_SUB,    /* +|- */
> +    ASTAT_OP_VECTOR_SUB_SUB,    /* -|- */
> +    ASTAT_OP_VECTOR_SUB_ADD,    /* -|+ */
> +};
> +
> +typedef void (*hwloop_callback)(struct DisasContext *dc, int loop);
> +
> +typedef struct DisasContext {
> +    CPUArchState *env;
> +    struct TranslationBlock *tb;
> +    /* The current PC we're decoding (could be middle of parallel insn) */
> +    target_ulong pc;
> +    /* Length of current insn (2/4/8) */
> +    target_ulong insn_len;
> +
> +    /* For delayed ASTAT handling */
> +    enum astat_ops astat_op;
> +
> +    /* For hardware loop processing */
> +    hwloop_callback hwloop_callback;
> +    void *hwloop_data;
> +
> +    /* Was a DISALGNEXCPT used in this parallel insn ? */
> +    int disalgnexcpt;
> +
> +    int is_jmp;
> +    int mem_idx;
> +} DisasContext;
> +
> +void do_interrupt(CPUArchState *env);

do_interrupt() has recently been converted to a CPUClass hook.

> +CPUArchState *cpu_init(const char *cpu_model);
> +int cpu_exec(CPUArchState *s);
> +int cpu_bfin_signal_handler(int host_signum, void *pinfo, void *puc);
> +
> +extern const char * const greg_names[];
> +extern const char *get_allreg_name(int grp, int reg);
> +
> +#define MMU_KERNEL_IDX 0
> +#define MMU_USER_IDX   1
> +
> +int cpu_bfin_handle_mmu_fault(CPUArchState *env, target_ulong address, int rw,
> +                              int mmu_idx);
> +#define cpu_handle_mmu_fault cpu_bfin_handle_mmu_fault
> +
> +#if defined(CONFIG_USER_ONLY)
> +static inline void cpu_clone_regs(CPUArchState *env, target_ulong newsp)
> +{
> +    if (newsp)
> +        env->spreg = newsp;
> +}
> +#endif

Note there's a pending patch moving cpu_clone_regs() to
linux-user/*/target_cpu.h.

> +
> +#include "exec/cpu-all.h"
> +#include "cpu-qom.h"
> +
> +static inline bool cpu_has_work(CPUState *cpu)
> +{
> +    return (cpu->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI));
> +}
> +
> +#include "exec/exec-all.h"
> +
> +static inline void cpu_pc_from_tb(CPUArchState *env, TranslationBlock *tb)
> +{
> +    env->pc = tb->pc;
> +}
> +

> +static inline target_ulong cpu_get_pc(CPUArchState *env)
> +{
> +    return env->pc;
> +}

Unused?

> +
> +static inline void cpu_get_tb_cpu_state(CPUArchState *env, target_ulong *pc,
> +                                        target_ulong *cs_base, int *flags)
> +{
> +    *pc = cpu_get_pc(env);
> +    *cs_base = 0;
> +    *flags = env->astat[ASTAT_RND_MOD];
> +}
> +
> +#endif
[...]
> diff --git a/target-bfin/translate.c b/target-bfin/translate.c
> new file mode 100644
> index 0000000..a619f66
> --- /dev/null
> +++ b/target-bfin/translate.c
[...]
> +
> +CPUArchState *cpu_init(const char *cpu_model)
> +{
> +    BfinCPU *cpu;
> +    CPUArchState *env;
> +    static int tcg_initialized = 0;
> +
> +    cpu = BFIN_CPU(object_new(TYPE_BFIN_CPU));
> +    env = &cpu->env;
> +
> +    cpu_reset(CPU(cpu));
> +    qemu_init_vcpu(env);
> +
> +    if (tcg_initialized)
> +        return env;
> +
> +    tcg_initialized = 1;

Please place this into the CPU realizefn. Note that qemu_init_vcpu() is
being moved to generic code with my next pull.

In light of possible bfin-softmmu support, please turn this function
into BlackfinCPU *cpu_bfin_init(const char *cpu_model) and place
cpu_init() as a static inline compatibility wrapper into cpu.h.

> +
> +#define GEN_HELPER 2
> +#include "helper.h"
> +
> +    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
> +
> +    cpu_pc = tcg_global_mem_new(TCG_AREG0,
> +        offsetof(CPUArchState, pc), "PC");
> +    cpu_cc = tcg_global_mem_new(TCG_AREG0,
> +        offsetof(CPUArchState, astat[ASTAT_CC]), "CC");
> +
> +    /*cpu_astat_op = tcg_global_mem_new(TCG_AREG0,
> +        offsetof(CPUArchState, astat_op), "astat_op");*/
> +    cpu_astat_arg[0] = tcg_global_mem_new(TCG_AREG0,
> +        offsetof(CPUArchState, astat_arg[0]), "astat_arg[0]");
> +    cpu_astat_arg[1] = tcg_global_mem_new(TCG_AREG0,
> +        offsetof(CPUArchState, astat_arg[1]), "astat_arg[1]");
> +    cpu_astat_arg[2] = tcg_global_mem_new(TCG_AREG0,
> +        offsetof(CPUArchState, astat_arg[2]), "astat_arg[2]");
> +
> +    cpu_areg[0] = tcg_global_mem_new_i64(TCG_AREG0,
> +        offsetof(CPUArchState, areg[0]), "A0");
> +    cpu_areg[1] = tcg_global_mem_new_i64(TCG_AREG0,
> +        offsetof(CPUArchState, areg[1]), "A1");
> +
> +    bfin_tcg_new_set(dreg, 0);
> +    bfin_tcg_new_set(preg, 8);
> +    bfin_tcg_new_set(ireg, 16);
> +    bfin_tcg_new_set(mreg, 20);
> +    bfin_tcg_new_set(breg, 24);
> +    bfin_tcg_new_set(lreg, 28);
> +    bfin_tcg_new(rets, 39);
> +    bfin_tcg_new(lcreg[0], 48);
> +    bfin_tcg_new(ltreg[0], 49);
> +    bfin_tcg_new(lbreg[0], 50);
> +    bfin_tcg_new(lcreg[1], 51);
> +    bfin_tcg_new(ltreg[1], 52);
> +    bfin_tcg_new(lbreg[1], 53);
> +    bfin_tcg_new_set(cycles, 54);
> +    bfin_tcg_new(uspreg, 56);
> +    bfin_tcg_new(seqstat, 57);
> +    bfin_tcg_new(syscfg, 58);
> +    bfin_tcg_new(reti, 59);
> +    bfin_tcg_new(retx, 60);
> +    bfin_tcg_new(retn, 61);
> +    bfin_tcg_new(rete, 62);
> +    bfin_tcg_new(emudat, 63);
> +
> +    return env;
> +}
> +
> +#define _astat_printf(bit) cpu_fprintf(f, "%s" #bit " ", (env->astat[ASTAT_##bit] ? "" : "~"))
> +void cpu_dump_state(CPUArchState *env, FILE *f,
> +                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
> +                    int flags)
> +{

This will be converted to a CPUClass hook in my next pull.

[...]
> +static void
> +gen_intermediate_code_internal(CPUArchState *env, TranslationBlock *tb,
> +                               int search_pc)
> +{

Please use BlackfinCPU and bool arguments here, I am about to convert
all other targets (github.com/afaerber/qemu-cpu.git qom-cpu-11).

[...]
> +
> +void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb)
> +{
> +    gen_intermediate_code_internal(env, tb, 0);
> +}
> +
> +void gen_intermediate_code_pc(CPUArchState *env, struct TranslationBlock *tb)
> +{
> +    gen_intermediate_code_internal(env, tb, 1);
> +}
> +
> +void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb, int pc_pos)
> +{
> +    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
> +}
> +
> +#include "bfin-sim.c"

Regards,
Andreas
diff mbox

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 3412b07..9563bb7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -69,6 +69,11 @@  F: target-arm/
 F: hw/arm/
 F: hw/cpu/a*mpcore.c
 
+Blackfin
+M: Mike Frysinger <vapier@gentoo.org>
+S: Maintained
+F: target-bfin/
+
 CRIS
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
diff --git a/configure b/configure
index bc7a9c0..95dc66b 100755
--- a/configure
+++ b/configure
@@ -4146,6 +4146,10 @@  case "$target_arch2" in
     target_nptl="yes"
     gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
   ;;
+  bfin)
+    bflt="yes"
+    target_phys_bits=32
+  ;;
   cris)
     target_nptl="yes"
   ;;
diff --git a/cpu-exec.c b/cpu-exec.c
index ec46380..98a99d8 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -249,6 +249,7 @@  int cpu_exec(CPUArchState *env)
 #elif defined(TARGET_MOXIE)
 #elif defined(TARGET_OPENRISC)
 #elif defined(TARGET_SH4)
+#elif defined(TARGET_BFIN)
 #elif defined(TARGET_CRIS)
 #elif defined(TARGET_S390X)
 #elif defined(TARGET_XTENSA)
@@ -302,7 +303,8 @@  int cpu_exec(CPUArchState *env)
                     }
 #if defined(TARGET_ARM) || defined(TARGET_SPARC) || defined(TARGET_MIPS) || \
     defined(TARGET_PPC) || defined(TARGET_ALPHA) || defined(TARGET_CRIS) || \
-    defined(TARGET_MICROBLAZE) || defined(TARGET_LM32) || defined(TARGET_UNICORE32)
+    defined(TARGET_MICROBLAZE) || defined(TARGET_LM32) || defined(TARGET_UNICORE32) || \
+    defined(TARGET_BFIN)
                     if (interrupt_request & CPU_INTERRUPT_HALT) {
                         cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
                         cpu->halted = 1;
@@ -698,6 +700,7 @@  int cpu_exec(CPUArchState *env)
 #elif defined(TARGET_MOXIE)
 #elif defined(TARGET_OPENRISC)
 #elif defined(TARGET_SH4)
+#elif defined(TARGET_BFIN)
 #elif defined(TARGET_ALPHA)
 #elif defined(TARGET_CRIS)
 #elif defined(TARGET_S390X)
diff --git a/gdbstub.c b/gdbstub.c
index 94c78ce..0947a31 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1780,6 +1780,107 @@  static int cpu_gdb_write_register(CPUXtensaState *env, uint8_t *mem_buf, int n)
 
     return 4;
 }
+#elif defined (TARGET_BFIN)
+
+#include "target-bfin/bfin-tdep.h"
+
+#define NUM_CORE_REGS BFIN_NUM_REGS
+
+static int cpu_gdb_read_register(CPUArchState *env, uint8_t *mem_buf, int n)
+{
+    switch (n) {
+        case BFIN_R0_REGNUM ... BFIN_R7_REGNUM:
+            GET_REGL(env->dreg[n - BFIN_R0_REGNUM]); break;
+        case BFIN_P0_REGNUM ... BFIN_FP_REGNUM:
+            GET_REGL(env->preg[n - BFIN_P0_REGNUM]); break;
+        case BFIN_I0_REGNUM ... BFIN_I3_REGNUM:
+            GET_REGL(env->ireg[n - BFIN_I0_REGNUM]); break;
+        case BFIN_M0_REGNUM ... BFIN_M3_REGNUM:
+            GET_REGL(env->mreg[n - BFIN_M0_REGNUM]); break;
+        case BFIN_B0_REGNUM ... BFIN_B3_REGNUM:
+            GET_REGL(env->breg[n - BFIN_B0_REGNUM]); break;
+        case BFIN_L0_REGNUM ... BFIN_L3_REGNUM:
+            GET_REGL(env->lreg[n - BFIN_L0_REGNUM]); break;
+        case BFIN_A0_DOT_X_REGNUM: GET_REGL((env->areg[0] >> 32) & 0xff); break;
+        case BFIN_A0_DOT_W_REGNUM: GET_REGL(env->areg[0]); break;
+        case BFIN_A1_DOT_X_REGNUM: GET_REGL((env->areg[1] >> 32) & 0xff); break;
+        case BFIN_A1_DOT_W_REGNUM: GET_REGL(env->areg[1]); break;
+        case BFIN_ASTAT_REGNUM: GET_REGL(bfin_astat_read(env)); break;
+        case BFIN_RETS_REGNUM: GET_REGL(env->rets); break;
+        case BFIN_LC0_REGNUM: GET_REGL(env->lcreg[0]); break;
+        case BFIN_LT0_REGNUM: GET_REGL(env->ltreg[0]); break;
+        case BFIN_LB0_REGNUM: GET_REGL(env->lbreg[0]); break;
+        case BFIN_LC1_REGNUM: GET_REGL(env->lcreg[1]); break;
+        case BFIN_LT1_REGNUM: GET_REGL(env->ltreg[1]); break;
+        case BFIN_LB1_REGNUM: GET_REGL(env->lbreg[1]); break;
+        case BFIN_CYCLES_REGNUM ... BFIN_CYCLES2_REGNUM:
+            GET_REGL(env->cycles[n - BFIN_CYCLES_REGNUM]); break;
+        case BFIN_USP_REGNUM: GET_REGL(env->uspreg); break;
+        case BFIN_SEQSTAT_REGNUM: GET_REGL(env->seqstat); break;
+        case BFIN_SYSCFG_REGNUM: GET_REGL(env->syscfg); break;
+        case BFIN_RETI_REGNUM: GET_REGL(env->reti); break;
+        case BFIN_RETX_REGNUM: GET_REGL(env->retx); break;
+        case BFIN_RETN_REGNUM: GET_REGL(env->retn); break;
+        case BFIN_RETE_REGNUM: GET_REGL(env->rete); break;
+        case BFIN_PC_REGNUM: GET_REGL(env->pc); break;
+    }
+
+    return 0;
+}
+
+static int cpu_gdb_write_register(CPUArchState *env, uint8_t *mem_buf, int n)
+{
+    target_ulong tmpl;
+    int r = 4;
+    tmpl = ldtul_p(mem_buf);
+
+    switch (n) {
+        case BFIN_R0_REGNUM ... BFIN_R7_REGNUM:
+            env->dreg[n - BFIN_R0_REGNUM] = tmpl; break;
+        case BFIN_P0_REGNUM ... BFIN_FP_REGNUM:
+            env->preg[n - BFIN_P0_REGNUM] = tmpl; break;
+        case BFIN_I0_REGNUM ... BFIN_I3_REGNUM:
+            env->ireg[n - BFIN_I0_REGNUM] = tmpl; break;
+        case BFIN_M0_REGNUM ... BFIN_M3_REGNUM:
+            env->mreg[n - BFIN_M0_REGNUM] = tmpl; break;
+        case BFIN_B0_REGNUM ... BFIN_B3_REGNUM:
+            env->breg[n - BFIN_B0_REGNUM] = tmpl; break;
+        case BFIN_L0_REGNUM ... BFIN_L3_REGNUM:
+            env->lreg[n - BFIN_L0_REGNUM] = tmpl; break;
+        case BFIN_A0_DOT_X_REGNUM:
+            env->areg[0] = (env->areg[0] & 0xffffffff) | ((uint64_t)tmpl << 32);
+            break;
+        case BFIN_A0_DOT_W_REGNUM:
+            env->areg[0] = (env->areg[0] & ~0xffffffff) | tmpl;
+            break;
+        case BFIN_A1_DOT_X_REGNUM:
+            env->areg[1] = (env->areg[1] & 0xffffffff) | ((uint64_t)tmpl << 32);
+            break;
+        case BFIN_A1_DOT_W_REGNUM:
+            env->areg[1] = (env->areg[1] & ~0xffffffff) | tmpl;
+            break;
+        case BFIN_ASTAT_REGNUM: bfin_astat_write(env, tmpl); break;
+        case BFIN_RETS_REGNUM: env->rets = tmpl; break;
+        case BFIN_LC0_REGNUM: env->lcreg[0] = tmpl; break;
+        case BFIN_LT0_REGNUM: env->ltreg[0] = tmpl; break;
+        case BFIN_LB0_REGNUM: env->lbreg[0] = tmpl; break;
+        case BFIN_LC1_REGNUM: env->lcreg[1] = tmpl; break;
+        case BFIN_LT1_REGNUM: env->ltreg[1] = tmpl; break;
+        case BFIN_LB1_REGNUM: env->lbreg[1] = tmpl; break;
+        case BFIN_CYCLES_REGNUM ... BFIN_CYCLES2_REGNUM:
+            env->cycles[n - BFIN_CYCLES_REGNUM] = tmpl; break;
+        case BFIN_USP_REGNUM: env->uspreg = tmpl; break;
+        case BFIN_SEQSTAT_REGNUM: env->seqstat = tmpl; break;
+        case BFIN_SYSCFG_REGNUM: env->syscfg = tmpl; break;
+        case BFIN_RETI_REGNUM: env->reti = tmpl; break;
+        case BFIN_RETX_REGNUM: env->retx = tmpl; break;
+        case BFIN_RETN_REGNUM: env->retn = tmpl; break;
+        case BFIN_RETE_REGNUM: env->rete = tmpl; break;
+        case BFIN_PC_REGNUM: env->pc = tmpl; break;
+    }
+
+    return r;
+}
 #else
 
 #define NUM_CORE_REGS 0
@@ -2066,6 +2167,8 @@  static void gdb_set_cpu_pc(GDBState *s, target_ulong pc)
     s->c_cpu->pc = pc;
 #elif defined(TARGET_XTENSA)
     s->c_cpu->pc = pc;
+#elif defined (TARGET_BFIN)
+    s->c_cpu->pc = pc;
 #endif
 }
 
diff --git a/include/elf.h b/include/elf.h
index cf0d3e2..110e7b8 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -104,6 +104,7 @@  typedef int64_t  Elf64_Sxword;
 
 #define EM_H8_300H      47      /* Hitachi H8/300H */
 #define EM_H8S          48      /* Hitachi H8S     */
+#define EM_BLACKFIN	106	/* Analog Devices Blackfin */
 #define EM_LATTICEMICO32 138    /* LatticeMico32 */
 
 #define EM_OPENRISC     92        /* OpenCores OpenRISC */
@@ -848,6 +849,11 @@  typedef struct {
 
 #define EF_ALPHA_32BIT		1	/* All addresses are below 2GB */
 
+/* Blackfin specific definitions.  */
+
+#define EF_BFIN_PIC		0x00000001	/* -fpic */
+#define EF_BFIN_FDPIC		0x00000002      /* -mfdpic */
+
 /* HPPA specific definitions.  */
 
 /* Legal values for e_flags field of Elf32_Ehdr.  */
diff --git a/qapi-schema.json b/qapi-schema.json
index 5ad6894..c7f4304 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3023,10 +3023,11 @@ 
 # Since: 1.2.0
 ##
 { 'enum': 'TargetType',
-  'data': [ 'alpha', 'arm', 'cris', 'i386', 'lm32', 'm68k', 'microblazeel',
-            'microblaze', 'mips64el', 'mips64', 'mipsel', 'mips', 'moxie',
-            'or32', 'ppc64', 'ppcemb', 'ppc', 's390x', 'sh4eb', 'sh4',
-            'sparc64', 'sparc', 'unicore32', 'x86_64', 'xtensaeb', 'xtensa' ] }
+  'data': [ 'alpha', 'arm', 'bfin', 'cris', 'i386', 'lm32', 'm68k',
+            'microblazeel', 'microblaze', 'mips64el', 'mips64', 'mipsel',
+            'mips', 'moxie', 'or32', 'ppc64', 'ppcemb', 'ppc', 's390x', 'sh4eb',
+            'sh4', 'sparc64', 'sparc', 'unicore32', 'x86_64', 'xtensaeb',
+            'xtensa' ] }
 
 ##
 # @TargetInfo:
diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
index 0da2618..52b9ef7 100644
--- a/scripts/qemu-binfmt-conf.sh
+++ b/scripts/qemu-binfmt-conf.sh
@@ -41,6 +41,10 @@  if [ $cpu != "arm" ] ; then
     echo   ':arm:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-arm:' > /proc/sys/fs/binfmt_misc/register
     echo   ':armeb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-armeb:' > /proc/sys/fs/binfmt_misc/register
 fi
+if [ $cpu != "bfin" ] ; then
+    echo   ':bfin:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00''\x02\x00''\x6A\x00''::/usr/local/bin/qemu-bfin:' > /proc/sys/fs/binfmt_misc/register
+    echo   ':bfin-flat:M::bFLT\x00\x00\x00\x04::/usr/local/bin/qemu-bfin:' > /proc/sys/fs/binfmt_misc/register
+fi
 if [ $cpu != "sparc" ] ; then
     echo   ':sparc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sparc:' > /proc/sys/fs/binfmt_misc/register
 fi
diff --git a/target-bfin/Makefile.objs b/target-bfin/Makefile.objs
new file mode 100644
index 0000000..7e6c528
--- /dev/null
+++ b/target-bfin/Makefile.objs
@@ -0,0 +1,3 @@ 
+obj-y += translate.o op_helper.o helper.o cpu.o
+
+$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-bfin/README b/target-bfin/README
new file mode 100644
index 0000000..25f7fde
--- /dev/null
+++ b/target-bfin/README
@@ -0,0 +1,32 @@ 
+------------------
+Blackfin QEMU port
+------------------
+
+There are some things we don't bother handling in the port for speed reasons.
+If you want an accurate (but not as fast) simulator, then use the GNU sim as
+found in the GNU toolchain (part of gdb).
+
+Things we do not currently handle by design:
+
+	- invalid parallel instruction combinations
+		- no toolchain will output these
+		- things like jumps
+
+	- invalid register combinations
+		- some insns cannot have same register be both source and dest
+		- no toolchain will output these
+
+	- transactional parallel instructions
+		- on the hardware, if a load/store causes an exception, the other
+		  insns do not change register states either.  in qemu, they do,
+		  but since those exceptions will kill the program anyways, who
+		  cares.  no intermediate store buffers!
+
+	- AC0_COPY and V_COPY
+		- no one has ever used these instead of AC0 or V
+
+	- no support for RND_MOD
+
+There are a few insns/modes we don't currently handle, but it's more a matter
+of nothing really uses these, so we haven't bothered.  If these matter to you,
+then feel free to request support for them.
diff --git a/target-bfin/TODO b/target-bfin/TODO
new file mode 100644
index 0000000..80802bd
--- /dev/null
+++ b/target-bfin/TODO
@@ -0,0 +1,25 @@ 
+CEC behavior in user-emulation (SP vs USP)
+
+see if making a global "0", "1", "2", and "4" register speeds things up
+
+TB chaining is not implemented
+
+we often over-translate code blocks.  consider a bfin mem/str func:
+	{
+	[1] setup code
+	[2] hwloop0
+	[3] some other stuff
+	[4] hwloop1
+	[5] clean up / return
+	}
+the first TB will go from the start to the end (since there are no
+unconditional branches).  then when we hit the hwloop bottom, we jump
+back up to the top of the hwloop and a new TB which goes all the way
+to the end of the func.  so we end up with the TBs covering:
+	{1-5} {2-5} {3-5} {4-5} {5-5}
+In reality, we probably want to have the TBs to be like:
+	{[1] to LSETUP then to LT0 (usually the same)}
+	{[2] LT0 to LB0}
+	{[3] to LSETUP then to LT1 (usually the same)}
+	{[4] LT1 to LB1}
+	{[5]}
diff --git a/target-bfin/bfin-sim.c b/target-bfin/bfin-sim.c
new file mode 100644
index 0000000..560fada
--- /dev/null
+++ b/target-bfin/bfin-sim.c
@@ -0,0 +1,3666 @@ 
+/*
+ * Simulator for Analog Devices Blackfin processors.
+ *
+ * Copyright 2005-2013 Mike Frysinger
+ * Copyright 2005-2011 Analog Devices, Inc.
+ *
+ * Licensed under the GPL 2 or later.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#define TRACE_EXTRACT(fmt, args...) \
+do { \
+    if (1) \
+        qemu_log_mask(CPU_LOG_TB_CPU, "%s: " fmt "\n", __func__, ## args); \
+} while (0)
+
+static void
+illegal_instruction(DisasContext *dc)
+{
+    cec_exception(dc, EXCP_UNDEF_INST);
+}
+
+static void
+unhandled_instruction(DisasContext *dc, const char *insn)
+{
+    fprintf(stderr, "unhandled insn: %s\n", insn);
+    illegal_instruction(dc);
+}
+
+typedef enum {
+    c_0, c_1, c_4, c_2, c_uimm2, c_uimm3, c_imm3, c_pcrel4,
+    c_imm4, c_uimm4s4, c_uimm4s4d, c_uimm4, c_uimm4s2, c_negimm5s4, c_imm5, c_imm5d, c_uimm5, c_imm6,
+    c_imm7, c_imm7d, c_imm8, c_uimm8, c_pcrel8, c_uimm8s4, c_pcrel8s4, c_lppcrel10, c_pcrel10,
+    c_pcrel12, c_imm16s4, c_luimm16, c_imm16, c_imm16d, c_huimm16, c_rimm16, c_imm16s2, c_uimm16s4,
+    c_uimm16s4d, c_uimm16, c_pcrel24, c_uimm32, c_imm32, c_huimm32, c_huimm32e,
+} const_forms_t;
+
+static const struct {
+    const char *name;
+    const int nbits;
+    const char reloc;
+    const char issigned;
+    const char pcrel;
+    const char scale;
+    const char offset;
+    const char negative;
+    const char positive;
+    const char decimal;
+    const char leading;
+    const char exact;
+} constant_formats[] = {
+    { "0",          0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "1",          0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "4",          0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "2",          0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "uimm2",      2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "uimm3",      3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm3",       3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "pcrel4",     4, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "imm4",       4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "uimm4s4",    4, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0},
+    { "uimm4s4d",   4, 0, 0, 0, 2, 0, 0, 1, 1, 0, 0},
+    { "uimm4",      4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "uimm4s2",    4, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0},
+    { "negimm5s4",  5, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0},
+    { "imm5",       5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm5d",      5, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0},
+    { "uimm5",      5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm6",       6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm7",       7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm7d",      7, 0, 1, 0, 0, 0, 0, 0, 1, 3, 0},
+    { "imm8",       8, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "uimm8",      8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "pcrel8",     8, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "uimm8s4",    8, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0},
+    { "pcrel8s4",   8, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0},
+    { "lppcrel10", 10, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "pcrel10",   10, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "pcrel12",   12, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "imm16s4",   16, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0},
+    { "luimm16",   16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm16",     16, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm16d",    16, 0, 1, 0, 0, 0, 0, 0, 1, 3, 0},
+    { "huimm16",   16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "rimm16",    16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm16s2",   16, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0},
+    { "uimm16s4",  16, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0},
+    { "uimm16s4d", 16, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0},
+    { "uimm16",    16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "pcrel24",   24, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "uimm32",    32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm32",     32, 0, 1, 0, 0, 0, 0, 0, 1, 3, 0},
+    { "huimm32",   32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "huimm32e",  32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1},
+};
+
+#define HOST_LONG_WORD_SIZE (sizeof(long) * 8)
+#define SIGNEXTEND(v, n) (((int32_t)(v) << (HOST_LONG_WORD_SIZE - (n))) >> (HOST_LONG_WORD_SIZE - (n)))
+
+static uint32_t
+fmtconst_val(const_forms_t cf, uint32_t x)
+{
+    /* Negative constants have an implied sign bit.  */
+    if (constant_formats[cf].negative) {
+        int nb = constant_formats[cf].nbits + 1;
+        x = x | (1 << constant_formats[cf].nbits);
+        x = SIGNEXTEND(x, nb);
+    } else if (constant_formats[cf].issigned) {
+        x = SIGNEXTEND(x, constant_formats[cf].nbits);
+    }
+
+    x += constant_formats[cf].offset;
+    x <<= constant_formats[cf].scale;
+
+    return x;
+}
+
+#define uimm16s4(x)  fmtconst_val(c_uimm16s4, x)
+#define uimm16s4d(x) fmtconst_val(c_uimm16s4d, x)
+#define pcrel4(x)    fmtconst_val(c_pcrel4, x)
+#define pcrel8(x)    fmtconst_val(c_pcrel8, x)
+#define pcrel8s4(x)  fmtconst_val(c_pcrel8s4, x)
+#define pcrel10(x)   fmtconst_val(c_pcrel10, x)
+#define pcrel12(x)   fmtconst_val(c_pcrel12, x)
+#define negimm5s4(x) fmtconst_val(c_negimm5s4, x)
+#define rimm16(x)    fmtconst_val(c_rimm16, x)
+#define huimm16(x)   fmtconst_val(c_huimm16, x)
+#define imm16(x)     fmtconst_val(c_imm16, x)
+#define imm16d(x)    fmtconst_val(c_imm16d, x)
+#define uimm2(x)     fmtconst_val(c_uimm2, x)
+#define uimm3(x)     fmtconst_val(c_uimm3, x)
+#define luimm16(x)   fmtconst_val(c_luimm16, x)
+#define uimm4(x)     fmtconst_val(c_uimm4, x)
+#define uimm5(x)     fmtconst_val(c_uimm5, x)
+#define imm16s2(x)   fmtconst_val(c_imm16s2, x)
+#define uimm8(x)     fmtconst_val(c_uimm8, x)
+#define imm16s4(x)   fmtconst_val(c_imm16s4, x)
+#define uimm4s2(x)   fmtconst_val(c_uimm4s2, x)
+#define uimm4s4(x)   fmtconst_val(c_uimm4s4, x)
+#define uimm4s4d(x)  fmtconst_val(c_uimm4s4d, x)
+#define lppcrel10(x) fmtconst_val(c_lppcrel10, x)
+#define imm3(x)      fmtconst_val(c_imm3, x)
+#define imm4(x)      fmtconst_val(c_imm4, x)
+#define uimm8s4(x)   fmtconst_val(c_uimm8s4, x)
+#define imm5(x)      fmtconst_val(c_imm5, x)
+#define imm5d(x)     fmtconst_val(c_imm5d, x)
+#define imm6(x)      fmtconst_val(c_imm6, x)
+#define imm7(x)      fmtconst_val(c_imm7, x)
+#define imm7d(x)     fmtconst_val(c_imm7d, x)
+#define imm8(x)      fmtconst_val(c_imm8, x)
+#define pcrel24(x)   fmtconst_val(c_pcrel24, x)
+#define uimm16(x)    fmtconst_val(c_uimm16, x)
+#define uimm32(x)    fmtconst_val(c_uimm32, x)
+#define imm32(x)     fmtconst_val(c_imm32, x)
+#define huimm32(x)   fmtconst_val(c_huimm32, x)
+#define huimm32e(x)  fmtconst_val(c_huimm32e, x)
+
+/* Table C-4. Core Register Encoding Map */
+const char * const greg_names[] = {
+    "R0",    "R1",      "R2",     "R3",    "R4",    "R5",    "R6",     "R7",
+    "P0",    "P1",      "P2",     "P3",    "P4",    "P5",    "SP",     "FP",
+    "I0",    "I1",      "I2",     "I3",    "M0",    "M1",    "M2",     "M3",
+    "B0",    "B1",      "B2",     "B3",    "L0",    "L1",    "L2",     "L3",
+    "A0.X",  "A0.W",    "A1.X",   "A1.W",  "<res>", "<res>", "ASTAT",  "RETS",
+    "<res>", "<res>",   "<res>",  "<res>", "<res>", "<res>", "<res>",  "<res>",
+    "LC0",   "LT0",     "LB0",    "LC1",   "LT1",   "LB1",   "CYCLES", "CYCLES2",
+    "USP",   "SEQSTAT", "SYSCFG", "RETI",  "RETX",  "RETN",  "RETE",   "EMUDAT",
+};
+
+const char *
+get_allreg_name(int grp, int reg)
+{
+    return greg_names[(grp << 3) | reg];
+}
+
+static TCGv * const cpu_regs[] = {
+    &cpu_dreg[0], &cpu_dreg[1], &cpu_dreg[2], &cpu_dreg[3], &cpu_dreg[4], &cpu_dreg[5], &cpu_dreg[6], &cpu_dreg[7],
+    &cpu_preg[0], &cpu_preg[1], &cpu_preg[2], &cpu_preg[3], &cpu_preg[4], &cpu_preg[5], &cpu_preg[6], &cpu_preg[7],
+    &cpu_ireg[0], &cpu_ireg[1], &cpu_ireg[2], &cpu_ireg[3], &cpu_mreg[0], &cpu_mreg[1], &cpu_mreg[2], &cpu_mreg[3],
+    &cpu_breg[0], &cpu_breg[1], &cpu_breg[2], &cpu_breg[3], &cpu_lreg[0], &cpu_lreg[1], &cpu_lreg[2], &cpu_lreg[3],
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, &cpu_rets,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    &cpu_lcreg[0], &cpu_ltreg[0], &cpu_lbreg[0], &cpu_lcreg[1], &cpu_ltreg[1], &cpu_lbreg[1], &cpu_cycles[0], &cpu_cycles[1],
+    &cpu_uspreg, &cpu_seqstat, &cpu_syscfg, &cpu_reti, &cpu_retx, &cpu_retn, &cpu_rete, &cpu_emudat,
+};
+
+static TCGv
+get_allreg(DisasContext *dc, int grp, int reg)
+{
+    TCGv *ret = cpu_regs[(grp << 3) | reg];
+    if (ret) {
+       return *ret;
+    }
+    abort();
+    illegal_instruction(dc);
+}
+
+static void
+reg_check_sup(DisasContext *dc, int grp, int reg)
+{
+    if (grp == 7) {
+        cec_require_supervisor(dc);
+    }
+}
+
+#define gen_unextend_acc(acc) tcg_gen_andi_i64(acc, acc, 0xffffffffffull)
+#define gen_extend_acc(acc) gen_extNsi_i64(acc, acc, 40)
+
+/* Perform a multiplication of D registers SRC0 and SRC1, sign- or
+   zero-extending the result to 64 bit.  H0 and H1 determine whether the
+   high part or the low part of the source registers is used.  Store 1 in
+   *PSAT if saturation occurs, 0 otherwise.  */
+static TCGv
+decode_multfunc_tl(DisasContext *dc, int h0, int h1, int src0, int src1,
+                   int mmod, int MM, TCGv psat)
+{
+    TCGv s0, s1, val;
+
+    s0 = tcg_temp_local_new();
+    if (h0) {
+        tcg_gen_shri_tl(s0, cpu_dreg[src0], 16);
+    } else {
+        tcg_gen_andi_tl(s0, cpu_dreg[src0], 0xffff);
+    }
+
+    s1 = tcg_temp_local_new();
+    if (h1) {
+        tcg_gen_shri_tl(s1, cpu_dreg[src1], 16);
+    } else {
+        tcg_gen_andi_tl(s1, cpu_dreg[src1], 0xffff);
+    }
+
+    if (MM) {
+        tcg_gen_ext16s_tl(s0, s0);
+    } else {
+        switch (mmod) {
+        case 0:
+        case M_S2RND:
+        case M_T:
+        case M_IS:
+        case M_ISS2:
+        case M_IH:
+        case M_W32:
+            tcg_gen_ext16s_tl(s0, s0);
+            tcg_gen_ext16s_tl(s1, s1);
+            break;
+        case M_FU:
+        case M_IU:
+        case M_TFU:
+            break;
+        default:
+            illegal_instruction(dc);
+        }
+    }
+
+    val = tcg_temp_local_new();
+    tcg_gen_mul_tl(val, s0, s1);
+    tcg_temp_free(s0);
+    tcg_temp_free(s1);
+
+    /* Perform shift correction if appropriate for the mode.  */
+    tcg_gen_movi_tl(psat, 0);
+    if (!MM && (mmod == 0 || mmod == M_T || mmod == M_S2RND || mmod == M_W32)) {
+        int l, endl;
+
+        l = gen_new_label();
+        endl = gen_new_label();
+
+        tcg_gen_brcondi_tl(TCG_COND_NE, val, 0x40000000, l);
+        if (mmod == M_W32) {
+            tcg_gen_movi_tl(val, 0x7fffffff);
+        } else {
+            tcg_gen_movi_tl(val, 0x80000000);
+        }
+        tcg_gen_movi_tl(psat, 1);
+        tcg_gen_br(endl);
+
+        gen_set_label(l);
+        tcg_gen_shli_tl(val, val, 1);
+
+        gen_set_label(endl);
+    }
+
+    return val;
+}
+
+static TCGv_i64
+decode_multfunc_i64(DisasContext *dc, int h0, int h1, int src0, int src1,
+                    int mmod, int MM, TCGv psat)
+{
+    TCGv val;
+    TCGv_i64 val1;
+    int l;
+
+    val = decode_multfunc_tl(dc, h0, h1, src0, src1, mmod, MM, psat);
+    val1 = tcg_temp_local_new_i64();
+    tcg_gen_extu_i32_i64(val1, val);
+    tcg_temp_free(val);
+
+    if (mmod == 0 || mmod == M_IS || mmod == M_T || mmod == M_S2RND ||
+        mmod == M_ISS2 || mmod == M_IH || (MM && mmod == M_FU)) {
+        gen_extNsi_i64(val1, val1, 40);
+    }
+
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, psat, 0, l);
+    tcg_gen_ext32u_i64(val1, val1);
+    gen_set_label(l);
+
+    return val1;
+}
+
+static void
+saturate_s32(TCGv_i64 val, TCGv overflow)
+{
+    int l, endl;
+
+    endl = gen_new_label();
+
+    l = gen_new_label();
+    tcg_gen_brcondi_i64(TCG_COND_GE, val, -0x80000000ll, l);
+    tcg_gen_movi_tl(overflow, 1);
+    tcg_gen_movi_i64(val, 0x80000000);
+    tcg_gen_br(endl);
+    gen_set_label(l);
+
+    l = gen_new_label();
+    tcg_gen_brcondi_i64(TCG_COND_LE, val, 0x7fffffff, l);
+    tcg_gen_movi_tl(overflow, 1);
+    tcg_gen_movi_i64(val, 0x7fffffff);
+    gen_set_label(l);
+
+    gen_set_label(endl);
+}
+
+static TCGv
+decode_macfunc(DisasContext *dc, int which, int op, int h0, int h1, int src0,
+               int src1, int mmod, int MM, int fullword, int *overflow)
+{
+    /* XXX: Very incomplete.  */
+    TCGv_i64 acc;
+
+    if (mmod == 0 || mmod == M_T || mmod == M_IS || mmod == M_ISS2 ||
+        mmod == M_S2RND || mmod == M_IH || mmod == M_W32) {
+          gen_extend_acc(cpu_areg[which]);
+    } else {
+          gen_unextend_acc(cpu_areg[which]);
+    }
+    acc = cpu_areg[which];
+
+    if (op != 3) {
+        /* this can't saturate, so we don't keep track of the sat flag */
+        TCGv tsat = tcg_temp_local_new();;
+        TCGv_i64 res = decode_multfunc_i64(dc, h0, h1, src0, src1, mmod, MM, tsat);
+        tcg_temp_free(tsat);
+
+        /* Perform accumulation.  */
+        switch (op) {
+        case 0:
+            tcg_gen_mov_i64(acc, res);
+            break;
+        case 1:
+            tcg_gen_add_i64(acc, acc, res);
+            break;
+        case 2:
+            tcg_gen_sub_i64(acc, acc, res);
+            break;
+        }
+        tcg_temp_free_i64(res);
+
+        /* XXX: Saturate.  */
+    }
+
+    TCGv tmp = tcg_temp_local_new();
+    tcg_gen_trunc_i64_i32(tmp, acc);
+    return tmp;
+}
+
+static void
+decode_ProgCtrl_0(DisasContext *dc, uint16_t iw0)
+{
+    /* ProgCtrl
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |.prgfunc.......|.poprnd........|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int poprnd  = ((iw0 >> ProgCtrl_poprnd_bits) & ProgCtrl_poprnd_mask);
+    int prgfunc = ((iw0 >> ProgCtrl_prgfunc_bits) & ProgCtrl_prgfunc_mask);
+
+    TRACE_EXTRACT("poprnd:%i prgfunc:%i", poprnd, prgfunc);
+
+    if (prgfunc == 0 && poprnd == 0) {
+        /* NOP */;
+    } else if (prgfunc == 1 && poprnd == 0) {
+        /* RTS; */
+        dc->is_jmp = DISAS_JUMP;
+        dc->hwloop_callback = gen_hwloop_br_direct;
+        dc->hwloop_data = &cpu_rets;
+    } else if (prgfunc == 1 && poprnd == 1) {
+        /* RTI; */
+        cec_require_supervisor(dc);
+    } else if (prgfunc == 1 && poprnd == 2) {
+        /* RTX; */
+        cec_require_supervisor(dc);
+    } else if (prgfunc == 1 && poprnd == 3) {
+        /* RTN; */
+        cec_require_supervisor(dc);
+    } else if (prgfunc == 1 && poprnd == 4) {
+        /* RTE; */
+        cec_require_supervisor(dc);
+    } else if (prgfunc == 2 && poprnd == 0) {
+        /* IDLE; */
+        /* just NOP it */;
+    } else if (prgfunc == 2 && poprnd == 3) {
+        /* CSYNC; */
+        /* just NOP it */;
+    } else if (prgfunc == 2 && poprnd == 4) {
+        /* SSYNC; */
+        /* just NOP it */;
+    } else if (prgfunc == 2 && poprnd == 5) {
+        /* EMUEXCPT; */
+        cec_exception(dc, EXCP_DEBUG);
+    } else if (prgfunc == 3 && poprnd < 8) {
+        /* CLI Dreg{poprnd}; */
+        cec_require_supervisor(dc);
+    } else if (prgfunc == 4 && poprnd < 8) {
+        /* STI Dreg{poprnd}; */
+        cec_require_supervisor(dc);
+    } else if (prgfunc == 5 && poprnd < 8) {
+        /* JUMP (Preg{poprnd}); */
+        dc->is_jmp = DISAS_JUMP;
+        dc->hwloop_callback = gen_hwloop_br_direct;
+        dc->hwloop_data = &cpu_preg[poprnd];
+    } else if (prgfunc == 6 && poprnd < 8) {
+        /* CALL (Preg{poprnd}); */
+        dc->is_jmp = DISAS_CALL;
+        dc->hwloop_callback = gen_hwloop_br_direct;
+        dc->hwloop_data = &cpu_preg[poprnd];
+    } else if (prgfunc == 7 && poprnd < 8) {
+        /* CALL (PC + Preg{poprnd}); */
+        dc->is_jmp = DISAS_CALL;
+        dc->hwloop_callback = gen_hwloop_br_pcrel;
+        dc->hwloop_data = &cpu_preg[poprnd];
+    } else if (prgfunc == 8 && poprnd < 8) {
+        /* JUMP (PC + Preg{poprnd}); */
+        dc->is_jmp = DISAS_JUMP;
+        dc->hwloop_callback = gen_hwloop_br_pcrel;
+        dc->hwloop_data = &cpu_preg[poprnd];
+    } else if (prgfunc == 9) {
+        /* RAISE imm{poprnd}; */
+        /* int raise = uimm4 (poprnd); */
+        cec_require_supervisor(dc);
+    } else if (prgfunc == 10) {
+        /* EXCPT imm{poprnd}; */
+        int excpt = uimm4 (poprnd);
+        cec_exception(dc, excpt);
+    } else if (prgfunc == 11 && poprnd < 6) {
+        /* TESTSET (Preg{poprnd}); */
+        TCGv tmp = tcg_temp_new();
+        tcg_gen_qemu_ld8u(tmp, cpu_preg[poprnd], dc->mem_idx);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_cc, tmp, 0);
+        tcg_gen_ori_tl(tmp, tmp, 0x80);
+        tcg_gen_qemu_st8(tmp, cpu_preg[poprnd], dc->mem_idx);
+        tcg_temp_free(tmp);
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_CaCTRL_0(DisasContext *dc, uint16_t iw0)
+{
+    /* CaCTRL
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |.a.|.op....|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int a   = ((iw0 >> CaCTRL_a_bits) & CaCTRL_a_mask);
+    int op  = ((iw0 >> CaCTRL_op_bits) & CaCTRL_op_mask);
+    int reg = ((iw0 >> CaCTRL_reg_bits) & CaCTRL_reg_mask);
+
+    TRACE_EXTRACT("a:%i op:%i reg:%i", a, op, reg);
+
+    /*
+     * PREFETCH [Preg{reg}];
+     * PREFETCH [Preg{reg}++{a}];
+     * FLUSHINV [Preg{reg}];
+     * FLUSHINV [Preg{reg}++{a}];
+     * FLUSH [Preg{reg}];
+     * FLUSH [Preg{reg}++{a}];
+     * IFLUSH [Preg{reg}];
+     * IFLUSH [Preg{reg}++{a}];
+     */
+
+    /* No cache simulation, and we'll ignore the implicit CPLB aspects */
+
+    if (a) {
+        tcg_gen_addi_tl(cpu_preg[reg], cpu_preg[reg], BFIN_L1_CACHE_BYTES);
+    }
+}
+
+static void
+decode_PushPopReg_0(DisasContext *dc, uint16_t iw0)
+{
+    /* PushPopReg
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |.W.|.grp.......|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int W   = ((iw0 >> PushPopReg_W_bits) & PushPopReg_W_mask);
+    int grp = ((iw0 >> PushPopReg_grp_bits) & PushPopReg_grp_mask);
+    int reg = ((iw0 >> PushPopReg_reg_bits) & PushPopReg_reg_mask);
+    TCGv treg, tmp;
+    TCGv_i64 tmp64;
+
+    TRACE_EXTRACT("W:%i grp:%i reg:%i", W, grp, reg);
+
+    /* Can't push/pop reserved registers */
+    /*if (reg_is_reserved(grp, reg))
+        illegal_instruction(dc);*/
+
+    reg_check_sup(dc, grp, reg);
+
+    /* Everything here needs to be aligned, so check once */
+    gen_align_check(dc, cpu_spreg, 4, false);
+
+    if (W == 0) {
+        /* Dreg and Preg are not supported by this instruction */
+        /*if (grp == 0 || grp == 1)
+            illegal_instruction(dc);*/
+
+        /* genreg{grp,reg} [SP++]; */
+        if (grp == 4 && reg == 6) {
+            /* Pop ASTAT */
+            tmp = tcg_temp_new();
+            tcg_gen_qemu_ld32u(tmp, cpu_spreg, dc->mem_idx);
+            gen_astat_store(dc, tmp);
+            tcg_temp_free(tmp);
+        } else if (grp == 4 && (reg == 0 || reg == 2)) {
+            /* Pop A#.X */
+            tmp = tcg_temp_new();
+            tcg_gen_qemu_ld32u(tmp, cpu_spreg, dc->mem_idx);
+            tcg_gen_andi_tl(tmp, tmp, 0xff);
+            tmp64 = tcg_temp_new_i64();
+            tcg_gen_extu_i32_i64(tmp64, tmp);
+            tcg_temp_free(tmp);
+
+            tcg_gen_andi_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], 0xffffffff);
+            tcg_gen_shli_i64(tmp64, tmp64, 32);
+            tcg_gen_or_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], tmp64);
+            tcg_temp_free_i64(tmp64);
+        } else if (grp == 4 && (reg == 1 || reg == 3)) {
+            /* Pop A#.W */
+            tcg_gen_andi_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], 0xff00000000);
+            tmp = tcg_temp_new();
+            tcg_gen_qemu_ld32u(tmp, cpu_spreg, dc->mem_idx);
+            tmp64 = tcg_temp_new_i64();
+            tcg_gen_extu_i32_i64(tmp64, tmp);
+            tcg_temp_free(tmp);
+            tcg_gen_or_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], tmp64);
+            tcg_temp_free_i64(tmp64);
+        } else {
+            treg = get_allreg(dc, grp, reg);
+            tcg_gen_qemu_ld32u(treg, cpu_spreg, dc->mem_idx);
+
+            if (grp == 6 && (reg == 1 || reg == 4)) {
+                /* LT loads auto clear the LSB */
+                tcg_gen_andi_tl(treg, treg, ~1);
+            }
+        }
+
+        tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
+        gen_maybe_lb_exit_tb(dc, treg);
+    } else {
+        /* [--SP] = genreg{grp,reg}; */
+
+        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
+        if (grp == 4 && reg == 6) {
+            /* Push ASTAT */
+            tmp = tcg_temp_new();
+            gen_astat_load(dc, tmp);
+            tcg_gen_qemu_st32(tmp, cpu_spreg, dc->mem_idx);
+            tcg_temp_free(tmp);
+        } else if (grp == 4 && (reg == 0 || reg == 2)) {
+            /* Push A#.X */
+            tmp64 = tcg_temp_new_i64();
+            tcg_gen_shri_i64(tmp64, cpu_areg[reg >> 1], 32);
+            tmp = tcg_temp_new();
+            tcg_gen_trunc_i64_i32(tmp, tmp64);
+            tcg_temp_free_i64(tmp64);
+            tcg_gen_andi_tl(tmp, tmp, 0xff);
+            tcg_gen_qemu_st32(tmp, cpu_spreg, dc->mem_idx);
+            tcg_temp_free(tmp);
+        } else if (grp == 4 && (reg == 1 || reg == 3)) {
+            /* Push A#.W */
+            tmp = tcg_temp_new();
+            tcg_gen_trunc_i64_i32(tmp, cpu_areg[reg >> 1]);
+            tcg_gen_qemu_st32(tmp, cpu_spreg, dc->mem_idx);
+            tcg_temp_free(tmp);
+        } else {
+            treg = get_allreg(dc, grp, reg);
+            tcg_gen_qemu_st32(treg, cpu_spreg, dc->mem_idx);
+        }
+    }
+}
+
+static void
+decode_PushPopMultiple_0(DisasContext *dc, uint16_t iw0)
+{
+    /* PushPopMultiple
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 1 | 0 |.d.|.p.|.W.|.dr........|.pr........|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int p  = ((iw0 >> PushPopMultiple_p_bits) & PushPopMultiple_p_mask);
+    int d  = ((iw0 >> PushPopMultiple_d_bits) & PushPopMultiple_d_mask);
+    int W  = ((iw0 >> PushPopMultiple_W_bits) & PushPopMultiple_W_mask);
+    int dr = ((iw0 >> PushPopMultiple_dr_bits) & PushPopMultiple_dr_mask);
+    int pr = ((iw0 >> PushPopMultiple_pr_bits) & PushPopMultiple_pr_mask);
+    int i;
+
+    TRACE_EXTRACT("d:%i p:%i W:%i dr:%i pr:%i", d, p, W, dr, pr);
+
+    if ((d == 0 && p == 0) || (p && imm5(pr) > 5) ||
+        (d && !p && pr) || (p && !d && dr)) {
+        illegal_instruction(dc);
+    }
+
+    /* Everything here needs to be aligned, so check once */
+    gen_align_check(dc, cpu_spreg, 4, false);
+
+    if (W == 1) {
+        /* [--SP] = ({d}R7:imm{dr}, {p}P5:imm{pr}); */
+        if (d) {
+            for (i = dr; i < 8; i++) {
+                tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
+                tcg_gen_qemu_st32(cpu_dreg[i], cpu_spreg, dc->mem_idx);
+            }
+        }
+        if (p) {
+            for (i = pr; i < 6; i++) {
+                tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
+                tcg_gen_qemu_st32(cpu_preg[i], cpu_spreg, dc->mem_idx);
+            }
+        }
+    } else {
+        /* ({d}R7:imm{dr}, {p}P5:imm{pr}) = [SP++]; */
+        if (p) {
+            for (i = 5; i >= pr; i--) {
+                tcg_gen_qemu_ld32u(cpu_preg[i], cpu_spreg, dc->mem_idx);
+                tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
+            }
+        }
+        if (d) {
+            for (i = 7; i >= dr; i--) {
+                tcg_gen_qemu_ld32u(cpu_dreg[i], cpu_spreg, dc->mem_idx);
+                tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
+            }
+        }
+    }
+}
+
+static void
+decode_ccMV_0(DisasContext *dc, uint16_t iw0)
+{
+    /* ccMV
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 1 | 1 |.T.|.d.|.s.|.dst.......|.src.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int s  = ((iw0 >> CCmv_s_bits) & CCmv_s_mask);
+    int d  = ((iw0 >> CCmv_d_bits) & CCmv_d_mask);
+    int T  = ((iw0 >> CCmv_T_bits) & CCmv_T_mask);
+    int src = ((iw0 >> CCmv_src_bits) & CCmv_src_mask);
+    int dst = ((iw0 >> CCmv_dst_bits) & CCmv_dst_mask);
+    int l;
+    TCGv reg_src, reg_dst;
+
+    TRACE_EXTRACT("T:%i d:%i s:%i dst:%i src:%i",
+                  T, d, s, dst, src);
+
+    /* IF !{T} CC DPreg{d,dst} = DPreg{s,src}; */
+    reg_src = get_allreg(dc, s, src);
+    reg_dst = get_allreg(dc, d, dst);
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_cc, T, l);
+    tcg_gen_mov_tl(reg_dst, reg_src);
+    gen_set_label(l);
+}
+
+static void
+decode_CCflag_0(DisasContext *dc, uint16_t iw0)
+{
+    /* CCflag
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 1 |.I.|.opc.......|.G.|.y.........|.x.........|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int x = ((iw0 >> CCflag_x_bits) & CCflag_x_mask);
+    int y = ((iw0 >> CCflag_y_bits) & CCflag_y_mask);
+    int I = ((iw0 >> CCflag_I_bits) & CCflag_I_mask);
+    int G = ((iw0 >> CCflag_G_bits) & CCflag_G_mask);
+    int opc = ((iw0 >> CCflag_opc_bits) & CCflag_opc_mask);
+
+    TRACE_EXTRACT("I:%i opc:%i G:%i y:%i x:%i",
+                  I, opc, G, y, x);
+
+    if (opc > 4) {
+        TCGv_i64 tmp64;
+        TCGCond cond;
+
+        /*if (x != 0 || y != 0)
+            illegal_instruction(dc);*/
+
+        if (opc == 5 && I == 0 && G == 0) {
+            /* CC = A0 == A1; */
+            cond = TCG_COND_EQ;
+        } else if (opc == 6 && I == 0 && G == 0) {
+            /* CC = A0 < A1; */
+            cond = TCG_COND_LT;
+        } else if (opc == 7 && I == 0 && G == 0) {
+            /* CC = A0 <= A1; */
+            cond = TCG_COND_LE;
+        } else {
+            illegal_instruction(dc);
+        }
+
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_setcond_i64(cond, tmp64, cpu_areg[0], cpu_areg[1]);
+        tcg_gen_trunc_i64_i32(cpu_cc, tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else {
+        int issigned = opc < 3;
+        uint32_t dst_imm = issigned ? imm3(y) : uimm3(y);
+        TCGv src_reg = G ? cpu_preg[x] : cpu_dreg[x];
+        TCGv dst_reg = G ? cpu_preg[y] : cpu_dreg[y];
+        TCGv tmp;
+        TCGCond cond;
+        enum astat_ops astat_op;
+
+        switch (opc) {
+        default: /* shutup useless gcc warnings */
+        case 0: /* signed == */
+            cond = TCG_COND_EQ;
+            break;
+        case 1: /* signed < */
+            cond = TCG_COND_LT;
+            break;
+        case 2: /* signed <= */
+            cond = TCG_COND_LE;
+            break;
+        case 3: /* unsigned < */
+            cond = TCG_COND_LTU;
+            break;
+        case 4: /* unsigned <= */
+            cond = TCG_COND_LEU;
+            break;
+        }
+        if (issigned) {
+            astat_op = ASTAT_OP_COMPARE_SIGNED;
+        } else {
+            astat_op = ASTAT_OP_COMPARE_UNSIGNED;
+        }
+
+        if (I) {
+            /* Compare to an immediate rather than a reg */
+            tmp = tcg_const_tl(dst_imm);
+            dst_reg = tmp;
+        }
+        tcg_gen_setcond_tl(cond, cpu_cc, src_reg, dst_reg);
+
+        /* Pointer compares only touch CC.  */
+        if (!G) {
+            astat_queue_state2(dc, astat_op, src_reg, dst_reg);
+        }
+
+        if (I) {
+            tcg_temp_free(tmp);
+        }
+    }
+}
+
+static void
+decode_CC2dreg_0(DisasContext *dc, uint16_t iw0)
+{
+    /* CC2dreg
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |.op....|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int op  = ((iw0 >> CC2dreg_op_bits) & CC2dreg_op_mask);
+    int reg = ((iw0 >> CC2dreg_reg_bits) & CC2dreg_reg_mask);
+
+    TRACE_EXTRACT("op:%i reg:%i", op, reg);
+
+    if (op == 0) {
+        /* Dreg{reg} = CC; */
+        tcg_gen_mov_tl(cpu_dreg[reg], cpu_cc);
+    } else if (op == 1) {
+        /* CC = Dreg{reg}; */
+        tcg_gen_setcondi_tl(TCG_COND_NE, cpu_cc, cpu_dreg[reg], 0);
+    } else if (op == 3 && reg == 0) {
+        /* CC = !CC; */
+        tcg_gen_xori_tl(cpu_cc, cpu_cc, 1);
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_CC2stat_0(DisasContext *dc, uint16_t iw0)
+{
+    /* CC2stat
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |.D.|.op....|.cbit..............|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int D    = ((iw0 >> CC2stat_D_bits) & CC2stat_D_mask);
+    int op   = ((iw0 >> CC2stat_op_bits) & CC2stat_op_mask);
+    int cbit = ((iw0 >> CC2stat_cbit_bits) & CC2stat_cbit_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("D:%i op:%i cbit:%i", D, op, cbit);
+
+    /* CC = CC; is invalid.  */
+    if (cbit == 5)
+        illegal_instruction(dc);
+
+    gen_astat_update(dc, true);
+
+    if (D == 0) {
+        switch (op) {
+        case 0: /* CC = ASTAT[cbit] */
+            tcg_gen_ld_tl(cpu_cc, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            break;
+        case 1: /* CC |= ASTAT[cbit] */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            tcg_gen_or_tl(cpu_cc, cpu_cc, tmp);
+            tcg_temp_free(tmp);
+            break;
+        case 2: /* CC &= ASTAT[cbit] */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            tcg_gen_and_tl(cpu_cc, cpu_cc, tmp);
+            tcg_temp_free(tmp);
+            break;
+        case 3: /* CC ^= ASTAT[cbit] */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            tcg_gen_xor_tl(cpu_cc, cpu_cc, tmp);
+            tcg_temp_free(tmp);
+            break;
+        }
+    } else {
+        switch (op) {
+        case 0: /* ASTAT[cbit] = CC */
+            tcg_gen_st_tl(cpu_cc, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            break;
+        case 1: /* ASTAT[cbit] |= CC */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            tcg_gen_or_tl(tmp, tmp, cpu_cc);
+            tcg_gen_st_tl(tmp, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            tcg_temp_free(tmp);
+            break;
+        case 2: /* ASTAT[cbit] &= CC */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            tcg_gen_and_tl(tmp, tmp, cpu_cc);
+            tcg_gen_st_tl(tmp, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            tcg_temp_free(tmp);
+            break;
+        case 3: /* ASTAT[cbit] ^= CC */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            tcg_gen_xor_tl(tmp, tmp, cpu_cc);
+            tcg_gen_st_tl(tmp, cpu_env, offsetof(CPUArchState, astat[cbit]));
+            tcg_temp_free(tmp);
+            break;
+        }
+    }
+}
+
+static void
+decode_BRCC_0(DisasContext *dc, uint16_t iw0)
+{
+    /* BRCC
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 1 |.T.|.B.|.offset................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int B = ((iw0 >> BRCC_B_bits) & BRCC_B_mask);
+    int T = ((iw0 >> BRCC_T_bits) & BRCC_T_mask);
+    int offset = ((iw0 >> BRCC_offset_bits) & BRCC_offset_mask);
+    int pcrel = pcrel10(offset);
+
+    TRACE_EXTRACT("T:%i B:%i offset:%#x", T, B, offset);
+
+    /* IF !{T} CC JUMP imm{offset} (bp){B}; */
+    dc->hwloop_callback = gen_hwloop_br_pcrel_cc;
+    dc->hwloop_data = (void *)(unsigned long)(pcrel | T);
+}
+
+static void
+decode_UJUMP_0(DisasContext *dc, uint16_t iw0)
+{
+    /* UJUMP
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 1 | 0 |.offset........................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int offset = ((iw0 >> UJump_offset_bits) & UJump_offset_mask);
+    int pcrel = pcrel12(offset);
+
+    TRACE_EXTRACT("offset:%#x", offset);
+
+    /* JUMP.S imm{offset}; */
+    dc->is_jmp = DISAS_JUMP;
+    dc->hwloop_callback = gen_hwloop_br_pcrel_imm;
+    dc->hwloop_data = (void *)(unsigned long)pcrel;
+}
+
+static void
+decode_REGMV_0(DisasContext *dc, uint16_t iw0)
+{
+    /* REGMV
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 1 | 1 |.gd........|.gs........|.dst.......|.src.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int gs  = ((iw0 >> RegMv_gs_bits) & RegMv_gs_mask);
+    int gd  = ((iw0 >> RegMv_gd_bits) & RegMv_gd_mask);
+    int src = ((iw0 >> RegMv_src_bits) & RegMv_src_mask);
+    int dst = ((iw0 >> RegMv_dst_bits) & RegMv_dst_mask);
+    TCGv reg_src, reg_dst, tmp;
+    TCGv_i64 tmp64;
+    bool istmp;
+
+    TRACE_EXTRACT("gd:%i gs:%i dst:%i src:%i",
+                  gd, gs, dst, src);
+
+    /* genreg{gd,dst} = genreg{gs,src}; */
+
+    reg_check_sup(dc, gs, src);
+    reg_check_sup(dc, gd, dst);
+
+    if (gs == 4 && src == 6) {
+        /* Reads of ASTAT */
+        tmp = tcg_temp_new();
+        gen_astat_load(dc, tmp);
+        reg_src = tmp;
+        istmp = true;
+    } else if (gs == 4 && (src == 0 || src == 2)) {
+        /* Reads of A#.X */
+        tmp = tcg_temp_new();
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_shri_i64(tmp64, cpu_areg[src >> 1], 32);
+        tcg_gen_trunc_i64_i32(tmp, tmp64);
+        tcg_temp_free_i64(tmp64);
+        tcg_gen_ext8s_tl(tmp, tmp);
+        reg_src = tmp;
+        istmp = true;
+    } else if (gs == 4 && (src == 1 || src == 3)) {
+        /* Reads of A#.W */
+        tmp = tcg_temp_new();
+        tcg_gen_trunc_i64_i32(tmp, cpu_areg[src >> 1]);
+        reg_src = tmp;
+        istmp = true;
+    } else {
+        reg_src = get_allreg(dc, gs, src);
+        istmp = false;
+    }
+
+    if (gd == 4 && dst == 6) {
+        /* Writes to ASTAT */
+        gen_astat_store(dc, reg_src);
+    } else if (gd == 4 && (dst == 0 || dst == 2)) {
+        /* Writes to A#.X */
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_andi_i64(cpu_areg[dst >> 1], cpu_areg[dst >> 1], 0xffffffff);
+        tcg_gen_extu_i32_i64(tmp64, reg_src);
+        tcg_gen_andi_i64(tmp64, tmp64, 0xff);
+        tcg_gen_shli_i64(tmp64, tmp64, 32);
+        tcg_gen_or_i64(cpu_areg[dst >> 1], cpu_areg[dst >> 1], tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else if (gd == 4 && (dst == 1 || dst == 3)) {
+        /* Writes to A#.W */
+        tcg_gen_andi_i64(cpu_areg[dst >> 1], cpu_areg[dst >> 1], 0xff00000000);
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, reg_src);
+        tcg_gen_or_i64(cpu_areg[dst >> 1], cpu_areg[dst >> 1], tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else if (gd == 6 && (dst == 1 || dst == 4)) {
+        /* Writes to LT# */
+        /* LT loads auto clear the LSB */
+        tcg_gen_andi_tl(cpu_ltreg[dst >> 2], reg_src, ~1);
+    } else {
+        reg_dst = get_allreg(dc, gd, dst);
+        tcg_gen_mov_tl(reg_dst, reg_src);
+        gen_maybe_lb_exit_tb(dc, reg_dst);
+    }
+
+    if (istmp)
+        tcg_temp_free(tmp);
+}
+
+static void
+decode_ALU2op_0(DisasContext *dc, uint16_t iw0)
+{
+    /* ALU2op
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 0 | 0 | 0 | 0 |.opc...........|.src.......|.dst.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int src = ((iw0 >> ALU2op_src_bits) & ALU2op_src_mask);
+    int opc = ((iw0 >> ALU2op_opc_bits) & ALU2op_opc_mask);
+    int dst = ((iw0 >> ALU2op_dst_bits) & ALU2op_dst_mask);
+    int l;
+    TCGv tmp;
+
+    TRACE_EXTRACT("opc:%i src:%i dst:%i", opc, src, dst);
+
+    if (opc == 0) {
+        /* Dreg{dst} >>>= Dreg{src}; */
+        l = gen_new_label();
+        tmp = tcg_temp_local_new();
+
+        /* Clip the shift magnitude to 31 bits */
+        tcg_gen_mov_tl(tmp, cpu_dreg[src]);
+        tcg_gen_brcondi_tl(TCG_COND_LEU, tmp, 31, l);
+        tcg_gen_movi_tl(tmp, 31);
+        gen_set_label(l);
+
+        tcg_gen_sar_tl(cpu_dreg[dst], cpu_dreg[dst], tmp);
+
+        tcg_temp_free(tmp);
+
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst]);
+    } else if (opc == 1) {
+        /* Dreg{dst} >>= Dreg{src}; */
+        l = gen_new_label();
+        tmp = tcg_temp_local_new();
+
+        /* Clip the shift magnitude to 31 bits */
+        tcg_gen_mov_tl(tmp, cpu_dreg[src]);
+        tcg_gen_brcondi_tl(TCG_COND_LEU, tmp, 31, l);
+        tcg_gen_movi_tl(tmp, 0);
+        tcg_gen_mov_tl(cpu_dreg[dst], tmp);
+        gen_set_label(l);
+
+        tcg_gen_shr_tl(cpu_dreg[dst], cpu_dreg[dst], tmp);
+
+        tcg_temp_free(tmp);
+
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst]);
+    } else if (opc == 2) {
+        /* Dreg{dst} <<= Dreg{src}; */
+        l = gen_new_label();
+        tmp = tcg_temp_local_new();
+
+        /* Clip the shift magnitude to 31 bits */
+        tcg_gen_mov_tl(tmp, cpu_dreg[src]);
+        tcg_gen_brcondi_tl(TCG_COND_LEU, tmp, 31, l);
+        tcg_gen_movi_tl(tmp, 0);
+        tcg_gen_mov_tl(cpu_dreg[dst], tmp);
+        gen_set_label(l);
+
+        tcg_gen_shl_tl(cpu_dreg[dst], cpu_dreg[dst], tmp);
+
+        tcg_temp_free(tmp);
+
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst]);
+    } else if (opc == 3) {
+        /* Dreg{dst} *= Dreg{src}; */
+        tcg_gen_mul_tl(cpu_dreg[dst], cpu_dreg[dst], cpu_dreg[src]);
+    } else if (opc == 4 || opc == 5) {
+        /* Dreg{dst} = (Dreg{dst} + Dreg{src}) << imm{opc}; */
+        tcg_gen_add_tl(cpu_dreg[dst], cpu_dreg[dst], cpu_dreg[src]);
+        tcg_gen_shli_tl(cpu_dreg[dst], cpu_dreg[dst], (opc - 3));
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst]);
+    } else if (opc == 8) {
+        /* DIVQ (Dreg, Dreg); */
+        gen_divq(cpu_dreg[dst], cpu_dreg[src]);
+    } else if (opc == 9) {
+        /* DIVS (Dreg, Dreg); */
+        gen_divs(cpu_dreg[dst], cpu_dreg[src]);
+    } else if (opc == 10) {
+        /* Dreg{dst} = Dreg_lo{src} (X); */
+        tcg_gen_ext16s_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 11) {
+        /* Dreg{dst} = Dreg_lo{src} (Z); */
+        tcg_gen_ext16u_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 12) {
+        /* Dreg{dst} = Dreg_byte{src} (X); */
+        tcg_gen_ext8s_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 13) {
+        /* Dreg{dst} = Dreg_byte{src} (Z); */
+        tcg_gen_ext8u_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 14) {
+        /* Dreg{dst} = -Dreg{src}; */
+        /* XXX: Documentation isn't entirely clear about av0 and av1.  */
+        tcg_gen_neg_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_NEGATE, cpu_dreg[dst]);
+    } else if (opc == 15) {
+        /* Dreg = ~Dreg; */
+        tcg_gen_not_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    }
+}
+
+static void
+decode_PTR2op_0(DisasContext *dc, uint16_t iw0)
+{
+    /* PTR2op
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 0 | 0 | 0 | 1 | 0 |.opc.......|.src.......|.dst.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int src = ((iw0 >> PTR2op_src_bits) & PTR2op_dst_mask);
+    int opc = ((iw0 >> PTR2op_opc_bits) & PTR2op_opc_mask);
+    int dst = ((iw0 >> PTR2op_dst_bits) & PTR2op_dst_mask);
+
+    TRACE_EXTRACT("opc:%i src:%i dst:%i", opc, src, dst);
+
+    if (opc == 0) {
+        /* Preg{dst} -= Preg{src}; */
+        tcg_gen_sub_tl(cpu_preg[dst], cpu_preg[dst], cpu_preg[src]);
+    } else if (opc == 1) {
+        /* Preg{dst} = Preg{src} << 2; */
+        tcg_gen_shli_tl(cpu_preg[dst], cpu_preg[src], 2);
+    } else if (opc == 3) {
+        /* Preg{dst} = Preg{src} >> 2; */
+        tcg_gen_shri_tl(cpu_preg[dst], cpu_preg[src], 2);
+    } else if (opc == 4) {
+        /* Preg{dst} = Preg{src} >> 1; */
+        tcg_gen_shri_tl(cpu_preg[dst], cpu_preg[src], 1);
+    } else if (opc == 5) {
+        /* Preg{dst} += Preg{src} (BREV); */
+        gen_helper_add_brev(cpu_preg[dst], cpu_preg[dst], cpu_preg[src]);
+    } else /*if (opc == 6 || opc == 7)*/ {
+        /* Preg{dst} = (Preg{dst} + Preg{src}) << imm{opc}; */
+        tcg_gen_add_tl(cpu_preg[dst], cpu_preg[dst], cpu_preg[src]);
+        tcg_gen_shli_tl(cpu_preg[dst], cpu_preg[dst], (opc - 5));
+    }
+}
+
+static void
+decode_LOGI2op_0(DisasContext *dc, uint16_t iw0)
+{
+    /* LOGI2op
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 0 | 0 | 1 |.opc.......|.src...............|.dst.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int src = ((iw0 >> LOGI2op_src_bits) & LOGI2op_src_mask);
+    int opc = ((iw0 >> LOGI2op_opc_bits) & LOGI2op_opc_mask);
+    int dst = ((iw0 >> LOGI2op_dst_bits) & LOGI2op_dst_mask);
+    int uimm = uimm5(src);
+    TCGv tmp;
+
+    TRACE_EXTRACT("opc:%i src:%i dst:%i", opc, src, dst);
+
+    if (opc == 0) {
+        /* CC = ! BITTST (Dreg{dst}, imm{uimm}); */
+        tmp = tcg_temp_new();
+        tcg_gen_movi_tl(tmp, 1 << uimm);
+        tcg_gen_and_tl(tmp, tmp, cpu_dreg[dst]);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_cc, tmp, 0);
+        tcg_temp_free(tmp);
+    } else if (opc == 1) {
+        /* CC = BITTST (Dreg{dst}, imm{uimm}); */
+        tmp = tcg_temp_new();
+        tcg_gen_movi_tl(tmp, 1 << uimm);
+        tcg_gen_and_tl(tmp, tmp, cpu_dreg[dst]);
+        tcg_gen_setcondi_tl(TCG_COND_NE, cpu_cc, tmp, 0);
+        tcg_temp_free(tmp);
+    } else if (opc == 2) {
+        /* BITSET (Dreg{dst}, imm{uimm}); */
+        tcg_gen_ori_tl(cpu_dreg[dst], cpu_dreg[dst], 1 << uimm);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 3) {
+        /* BITTGL (Dreg{dst}, imm{uimm}); */
+        tcg_gen_xori_tl(cpu_dreg[dst], cpu_dreg[dst], 1 << uimm);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 4) {
+        /* BITCLR (Dreg{dst}, imm{uimm}); */
+        tcg_gen_andi_tl(cpu_dreg[dst], cpu_dreg[dst], ~(1 << uimm));
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 5) {
+        /* Dreg{dst} >>>= imm{uimm}; */
+        tcg_gen_sari_tl(cpu_dreg[dst], cpu_dreg[dst], uimm);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst]);
+    } else if (opc == 6) {
+        /* Dreg{dst} >>= imm{uimm}; */
+        tcg_gen_shri_tl(cpu_dreg[dst], cpu_dreg[dst], uimm);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst]);
+    } else /*if (opc == 7)*/ {
+        /* Dreg{dst} <<= imm{uimm}; */
+        tcg_gen_shli_tl(cpu_dreg[dst], cpu_dreg[dst], uimm);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst]);
+    }
+}
+
+static void
+decode_COMP3op_0(DisasContext *dc, uint16_t iw0)
+{
+    /* COMP3op
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 0 | 1 |.opc.......|.dst.......|.src1......|.src0......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int opc  = ((iw0 >> COMP3op_opc_bits) & COMP3op_opc_mask);
+    int dst  = ((iw0 >> COMP3op_dst_bits) & COMP3op_dst_mask);
+    int src0 = ((iw0 >> COMP3op_src0_bits) & COMP3op_src0_mask);
+    int src1 = ((iw0 >> COMP3op_src1_bits) & COMP3op_src1_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("opc:%i dst:%i src1:%i src0:%i",
+                  opc, dst, src1, src0);
+
+    tmp = tcg_temp_local_new();
+    if (opc == 0) {
+        /* Dreg{dst} = Dreg{src0} + Dreg{src1}; */
+        tcg_gen_add_tl(tmp, cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state3(dc, ASTAT_OP_ADD32, tmp, cpu_dreg[src0], cpu_dreg[src1]);
+        tcg_gen_mov_tl(cpu_dreg[dst], tmp);
+    } else if (opc == 1) {
+        /* Dreg{dst} = Dreg{src0} - Dreg{src1}; */
+        tcg_gen_sub_tl(tmp, cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state3(dc, ASTAT_OP_SUB32, tmp, cpu_dreg[src0], cpu_dreg[src1]);
+        tcg_gen_mov_tl(cpu_dreg[dst], tmp);
+    } else if (opc == 2) {
+        /* Dreg{dst} = Dreg{src0} & Dreg{src1}; */
+        tcg_gen_and_tl(cpu_dreg[dst], cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 3) {
+        /* Dreg{dst} = Dreg{src0} | Dreg{src1}; */
+        tcg_gen_or_tl(cpu_dreg[dst], cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 4) {
+        /* Dreg{dst} = Dreg{src0} ^ Dreg{src1}; */
+        tcg_gen_xor_tl(cpu_dreg[dst], cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 5) {
+        /* Preg{dst} = Preg{src0} + Preg{src1}; */
+        /* If src0 == src1 this is disassembled as a shift by 1, but this
+           distinction doesn't matter for our purposes */
+        tcg_gen_add_tl(cpu_preg[dst], cpu_preg[src0], cpu_preg[src1]);
+    } else /*if (opc == 6 || opc == 7)*/ {
+        /* Preg{dst} = Preg{src0} + Preg{src1} << imm{opc}; */
+        /* The dst/src0/src1 might all be the same register, so we need
+           the temp here to avoid clobbering source values too early.
+           This could be optimized a little, but for now we'll leave it. */
+        tcg_gen_shli_tl(tmp, cpu_preg[src1], (opc - 5));
+        tcg_gen_add_tl(cpu_preg[dst], cpu_preg[src0], tmp);
+    }
+    tcg_temp_free(tmp);
+}
+
+static void
+decode_COMPI2opD_0(DisasContext *dc, uint16_t iw0)
+{
+    /* COMPI2opD
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 1 | 0 | 0 |.op|..src......................|.dst.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int op  = ((iw0 >> COMPI2opD_op_bits) & COMPI2opD_op_mask);
+    int dst = ((iw0 >> COMPI2opD_dst_bits) & COMPI2opD_dst_mask);
+    int src = ((iw0 >> COMPI2opD_src_bits) & COMPI2opD_src_mask);
+    int imm = imm7(src);
+    TCGv tmp;
+
+    TRACE_EXTRACT("op:%i src:%i dst:%i", op, src, dst);
+
+    if (op == 0) {
+        /* Dreg{dst} = imm{src} (X); */
+        tcg_gen_movi_tl(cpu_dreg[dst], imm);
+    } else {
+        /* Dreg{dst} += imm{src}; */
+        tmp = tcg_const_tl(imm);
+        tcg_gen_mov_tl(cpu_astat_arg[1], cpu_dreg[dst]);
+        tcg_gen_add_tl(cpu_dreg[dst], cpu_astat_arg[1], tmp);
+        astat_queue_state3(dc, ASTAT_OP_ADD32, cpu_dreg[dst], cpu_astat_arg[1], tmp);
+        tcg_temp_free(tmp);
+    }
+}
+
+static void
+decode_COMPI2opP_0(DisasContext *dc, uint16_t iw0)
+{
+    /* COMPI2opP
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 1 | 0 | 1 |.op|.src.......................|.dst.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int op  = ((iw0 >> COMPI2opP_op_bits) & COMPI2opP_op_mask);
+    int src = ((iw0 >> COMPI2opP_src_bits) & COMPI2opP_src_mask);
+    int dst = ((iw0 >> COMPI2opP_dst_bits) & COMPI2opP_dst_mask);
+    int imm = imm7(src);
+
+    TRACE_EXTRACT("op:%i src:%i dst:%i", op, src, dst);
+
+    if (op == 0) {
+        /* Preg{dst} = imm{src}; */
+        tcg_gen_movi_tl(cpu_preg[dst], imm);
+    } else {
+        /* Preg{dst} += imm{src}; */
+        tcg_gen_addi_tl(cpu_preg[dst], cpu_preg[dst], imm);
+    }
+}
+
+static void
+decode_LDSTpmod_0(DisasContext *dc, uint16_t iw0)
+{
+    /* LDSTpmod
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 0 | 0 |.W.|.aop...|.reg.......|.idx.......|.ptr.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int W   = ((iw0 >> LDSTpmod_W_bits) & LDSTpmod_W_mask);
+    int aop = ((iw0 >> LDSTpmod_aop_bits) & LDSTpmod_aop_mask);
+    int idx = ((iw0 >> LDSTpmod_idx_bits) & LDSTpmod_idx_mask);
+    int ptr = ((iw0 >> LDSTpmod_ptr_bits) & LDSTpmod_ptr_mask);
+    int reg = ((iw0 >> LDSTpmod_reg_bits) & LDSTpmod_reg_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("W:%i aop:%i reg:%i idx:%i ptr:%i",
+                  W, aop, reg, idx, ptr);
+
+    if (aop == 1 && W == 0 && idx == ptr) {
+        /* Dreg_lo{reg} = W[Preg{ptr}]; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_andi_tl(cpu_dreg[reg], cpu_dreg[reg], 0xffff0000);
+        gen_aligned_qemu_ld16u(dc, tmp, cpu_preg[ptr]);
+        tcg_gen_or_tl(cpu_dreg[reg], cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+    } else if (aop == 2 && W == 0 && idx == ptr) {
+        /* Dreg_hi{reg} = W[Preg{ptr}]; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_andi_tl(cpu_dreg[reg], cpu_dreg[reg], 0xffff);
+        gen_aligned_qemu_ld16u(dc, tmp, cpu_preg[ptr]);
+        tcg_gen_shli_tl(tmp, tmp, 16);
+        tcg_gen_or_tl(cpu_dreg[reg], cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+    } else if (aop == 1 && W == 1 && idx == ptr) {
+        /* W[Preg{ptr}] = Dreg_lo{reg}; */
+        gen_aligned_qemu_st16(dc, cpu_dreg[reg], cpu_preg[ptr]);
+    } else if (aop == 2 && W == 1 && idx == ptr) {
+        /* W[Preg{ptr}] = Dreg_hi{reg}; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[reg], 16);
+        gen_aligned_qemu_st16(dc, tmp, cpu_preg[ptr]);
+        tcg_temp_free(tmp);
+    } else if (aop == 0 && W == 0) {
+        /* Dreg{reg} = [Preg{ptr} ++ Preg{idx}]; */
+        gen_aligned_qemu_ld32u(dc, cpu_dreg[reg], cpu_preg[ptr]);
+        if (ptr != idx) {
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        }
+    } else if (aop == 1 && W == 0) {
+        /* Dreg_lo{reg} = W[Preg{ptr} ++ Preg{idx}]; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_andi_tl(cpu_dreg[reg], cpu_dreg[reg], 0xffff0000);
+        gen_aligned_qemu_ld16u(dc, tmp, cpu_preg[ptr]);
+        tcg_gen_or_tl(cpu_dreg[reg], cpu_dreg[reg], tmp);
+        if (ptr != idx) {
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        }
+        tcg_temp_free(tmp);
+    } else if (aop == 2 && W == 0) {
+        /* Dreg_hi{reg} = W[Preg{ptr} ++ Preg{idx}]; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_andi_tl(cpu_dreg[reg], cpu_dreg[reg], 0xffff);
+        gen_aligned_qemu_ld16u(dc, tmp, cpu_preg[ptr]);
+        tcg_gen_shli_tl(tmp, tmp, 16);
+        tcg_gen_or_tl(cpu_dreg[reg], cpu_dreg[reg], tmp);
+        if (ptr != idx) {
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        }
+        tcg_temp_free(tmp);
+    } else if (aop == 3 && W == 0) {
+        /* R%i = W[Preg{ptr} ++ Preg{idx}] (Z); */
+        gen_aligned_qemu_ld16u(dc, cpu_dreg[reg], cpu_preg[ptr]);
+        if (ptr != idx) {
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        }
+    } else if (aop == 3 && W == 1) {
+        /* R%i = W[Preg{ptr} ++ Preg{idx}] (X); */
+        gen_aligned_qemu_ld16s(dc, cpu_dreg[reg], cpu_preg[ptr]);
+        if (ptr != idx) {
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        }
+    } else if (aop == 0 && W == 1) {
+        /* [Preg{ptr} ++ Preg{idx}] = R%i; */
+        gen_aligned_qemu_st32(dc, cpu_dreg[reg], cpu_preg[ptr]);
+        if (ptr != idx) {
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        }
+    } else if (aop == 1 && W == 1) {
+        /* W[Preg{ptr} ++ Preg{idx}] = Dreg_lo{reg}; */
+        gen_aligned_qemu_st16(dc, cpu_dreg[reg], cpu_preg[ptr]);
+        if (ptr != idx) {
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        }
+    } else if (aop == 2 && W == 1) {
+        /* W[Preg{ptr} ++ Preg{idx}] = Dreg_hi{reg}; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[reg], 16);
+        gen_aligned_qemu_st16(dc, tmp, cpu_preg[ptr]);
+        if (ptr != idx) {
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        }
+        tcg_temp_free(tmp);
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_dagMODim_0(DisasContext *dc, uint16_t iw0)
+{
+    /* dagMODim
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 0 |.br| 1 | 1 |.op|.m.....|.i.....|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int i  = ((iw0 >> DagMODim_i_bits) & DagMODim_i_mask);
+    int m  = ((iw0 >> DagMODim_m_bits) & DagMODim_m_mask);
+    int br = ((iw0 >> DagMODim_br_bits) & DagMODim_br_mask);
+    int op = ((iw0 >> DagMODim_op_bits) & DagMODim_op_mask);
+
+    TRACE_EXTRACT("br:%i op:%i m:%i i:%i", br, op, m, i);
+
+    if (op == 0 && br == 1) {
+        /* Ireg{i} += Mreg{m} (BREV); */
+        gen_helper_add_brev(cpu_ireg[i], cpu_ireg[i], cpu_mreg[m]);
+    } else if (op == 0) {
+        /* Ireg{i} += Mreg{m}; */
+        gen_dagadd(dc, i, cpu_mreg[m]);
+    } else if (op == 1 && br == 0) {
+        /* Ireg{i} -= Mreg{m}; */
+        gen_dagsub(dc, i, cpu_mreg[m]);
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_dagMODik_0(DisasContext *dc, uint16_t iw0)
+{
+    /* dagMODik
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 |.op....|.i.....|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int i  = ((iw0 >> DagMODik_i_bits) & DagMODik_i_mask);
+    int op = ((iw0 >> DagMODik_op_bits) & DagMODik_op_mask);
+    int mod = (op & 2) + 2;
+
+    TRACE_EXTRACT("op:%i i:%i", op, i);
+
+    if (op & 1) {
+        /* Ireg{i} -= 2 or 4; */
+        gen_dagsubi(dc, i, mod);
+    } else {
+        /* Ireg{i} += 2 or 4; */
+        gen_dagaddi(dc, i, mod);
+    }
+}
+
+static void
+disalgnexcpt_ld32u(DisasContext *dc, TCGv ret, TCGv addr)
+{
+    if (dc->disalgnexcpt) {
+        TCGv tmp = tcg_temp_new();
+        tcg_gen_andi_tl(tmp, addr, ~0x3);
+        tcg_gen_qemu_ld32u(ret, tmp, dc->mem_idx);
+        tcg_temp_free(tmp);
+    } else {
+        gen_aligned_qemu_ld32u(dc, ret, addr);
+    }
+}
+
+static void
+decode_dspLDST_0(DisasContext *dc, uint16_t iw0)
+{
+    /* dspLDST
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 0 | 1 | 1 | 1 |.W.|.aop...|.m.....|.i.....|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int i   = ((iw0 >> DspLDST_i_bits) & DspLDST_i_mask);
+    int m   = ((iw0 >> DspLDST_m_bits) & DspLDST_m_mask);
+    int W   = ((iw0 >> DspLDST_W_bits) & DspLDST_W_mask);
+    int aop = ((iw0 >> DspLDST_aop_bits) & DspLDST_aop_mask);
+    int reg = ((iw0 >> DspLDST_reg_bits) & DspLDST_reg_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("aop:%i m:%i i:%i reg:%i", aop, m, i, reg);
+
+    if (aop == 0 && W == 0 && m == 0) {
+        /* Dreg{reg} = [Ireg{i}++]; */
+        disalgnexcpt_ld32u(dc, cpu_dreg[reg], cpu_ireg[i]);
+        gen_dagaddi(dc, i, 4);
+    } else if (aop == 0 && W == 0 && m == 1) {
+        /* Dreg_lo{reg} = W[Ireg{i}++]; */
+        tmp = tcg_temp_local_new();
+        gen_aligned_qemu_ld16u(dc, tmp, cpu_ireg[i]);
+        gen_mov_l_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+        gen_dagaddi(dc, i, 2);
+    } else if (aop == 0 && W == 0 && m == 2) {
+        /* Dreg_hi{reg} = W[Ireg{i}++]; */
+        tmp = tcg_temp_local_new();
+        gen_aligned_qemu_ld16u(dc, tmp, cpu_ireg[i]);
+        gen_mov_h_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+        gen_dagaddi(dc, i, 2);
+    } else if (aop == 1 && W == 0 && m == 0) {
+        /* Dreg{reg} = [Ireg{i}--]; */
+        disalgnexcpt_ld32u(dc, cpu_dreg[reg], cpu_ireg[i]);
+        gen_dagsubi(dc, i, 4);
+    } else if (aop == 1 && W == 0 && m == 1) {
+        /* Dreg_lo{reg} = W[Ireg{i}--]; */
+        tmp = tcg_temp_local_new();
+        gen_aligned_qemu_ld16u(dc, tmp, cpu_ireg[i]);
+        gen_mov_l_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+        gen_dagsubi(dc, i, 2);
+    } else if (aop == 1 && W == 0 && m == 2) {
+        /* Dreg_hi{reg} = W[Ireg{i}--]; */
+        tmp = tcg_temp_local_new();
+        gen_aligned_qemu_ld16u(dc, tmp, cpu_ireg[i]);
+        gen_mov_h_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+        gen_dagsubi(dc, i, 2);
+    } else if (aop == 2 && W == 0 && m == 0) {
+        /* Dreg{reg} = [Ireg{i}]; */
+        disalgnexcpt_ld32u(dc, cpu_dreg[reg], cpu_ireg[i]);
+    } else if (aop == 2 && W == 0 && m == 1) {
+        /* Dreg_lo{reg} = W[Ireg{i}]; */
+        tmp = tcg_temp_local_new();
+        gen_aligned_qemu_ld16u(dc, tmp, cpu_ireg[i]);
+        gen_mov_l_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+    } else if (aop == 2 && W == 0 && m == 2) {
+        /* Dreg_hi{reg} = W[Ireg{i}]; */
+        tmp = tcg_temp_local_new();
+        gen_aligned_qemu_ld16u(dc, tmp, cpu_ireg[i]);
+        gen_mov_h_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+    } else if (aop == 0 && W == 1 && m == 0) {
+        /* [Ireg{i}++] = Dreg{reg}; */
+        gen_aligned_qemu_st32(dc, cpu_dreg[reg], cpu_ireg[i]);
+        gen_dagaddi(dc, i, 4);
+    } else if (aop == 0 && W == 1 && m == 1) {
+        /* W[Ireg{i}++] = Dreg_lo{reg}; */
+        gen_aligned_qemu_st16(dc, cpu_dreg[reg], cpu_ireg[i]);
+        gen_dagaddi(dc, i, 2);
+    } else if (aop == 0 && W == 1 && m == 2) {
+        /* W[Ireg{i}++] = Dreg_hi{reg}; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[reg], 16);
+        gen_aligned_qemu_st16(dc, tmp, cpu_ireg[i]);
+        tcg_temp_free(tmp);
+        gen_dagaddi(dc, i, 2);
+    } else if (aop == 1 && W == 1 && m == 0) {
+        /* [Ireg{i}--] = Dreg{reg}; */
+        gen_aligned_qemu_st32(dc, cpu_dreg[reg], cpu_ireg[i]);
+        gen_dagsubi(dc, i, 4);
+    } else if (aop == 1 && W == 1 && m == 1) {
+        /* W[Ireg{i}--] = Dreg_lo{reg}; */
+        gen_aligned_qemu_st16(dc, cpu_dreg[reg], cpu_ireg[i]);
+        gen_dagsubi(dc, i, 2);
+    } else if (aop == 1 && W == 1 && m == 2) {
+        /* W[Ireg{i}--] = Dreg_hi{reg}; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[reg], 16);
+        gen_aligned_qemu_st16(dc, tmp, cpu_ireg[i]);
+        tcg_temp_free(tmp);
+        gen_dagsubi(dc, i, 2);
+    } else if (aop == 2 && W == 1 && m == 0) {
+        /* [Ireg{i}] = Dreg{reg}; */
+        gen_aligned_qemu_st32(dc, cpu_dreg[reg], cpu_ireg[i]);
+    } else if (aop == 2 && W == 1 && m == 1) {
+        /* W[Ireg{i}] = Dreg_lo{reg}; */
+        gen_aligned_qemu_st16(dc, cpu_dreg[reg], cpu_ireg[i]);
+    } else if (aop == 2 && W == 1 && m == 2) {
+        /* W[Ireg{i}] = Dreg_hi{reg}; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[reg], 16);
+        gen_aligned_qemu_st16(dc, tmp, cpu_ireg[i]);
+        tcg_temp_free(tmp);
+    } else if (aop == 3 && W == 0) {
+        /* Dreg{reg} = [Ireg{i} ++ Mreg{m}]; */
+        disalgnexcpt_ld32u(dc, cpu_dreg[reg], cpu_ireg[i]);
+        gen_dagadd(dc, i, cpu_mreg[m]);
+    } else if (aop == 3 && W == 1) {
+        /* [Ireg{i} ++ Mreg{m}] = Dreg{reg}; */
+        gen_aligned_qemu_st32(dc, cpu_dreg[reg], cpu_ireg[i]);
+        gen_dagadd(dc, i, cpu_mreg[m]);
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_LDST_0(DisasContext *dc, uint16_t iw0)
+{
+    /* LDST
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 0 | 1 |.sz....|.W.|.aop...|.Z.|.ptr.......|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int Z   = ((iw0 >> LDST_Z_bits) & LDST_Z_mask);
+    int W   = ((iw0 >> LDST_W_bits) & LDST_W_mask);
+    int sz  = ((iw0 >> LDST_sz_bits) & LDST_sz_mask);
+    int aop = ((iw0 >> LDST_aop_bits) & LDST_aop_mask);
+    int reg = ((iw0 >> LDST_reg_bits) & LDST_reg_mask);
+    int ptr = ((iw0 >> LDST_ptr_bits) & LDST_ptr_mask);
+
+    TRACE_EXTRACT("sz:%i W:%i aop:%i Z:%i ptr:%i reg:%i",
+                  sz, W, aop, Z, ptr, reg);
+
+    if (aop == 3) {
+        illegal_instruction(dc);
+    }
+
+    if (W == 0) {
+        if (sz == 0 && Z == 0) {
+            /* Dreg{reg} = [Preg{ptr}{aop}]; */
+            gen_aligned_qemu_ld32u(dc, cpu_dreg[reg], cpu_preg[ptr]);
+        } else if (sz == 0 && Z == 1) {
+            /* Preg{reg} = [Preg{ptr}{aop}]; */
+            /*if (aop < 2 && ptr == reg)
+                illegal_instruction_combination(dc);*/
+            gen_aligned_qemu_ld32u(dc, cpu_preg[reg], cpu_preg[ptr]);
+        } else if (sz == 1 && Z == 0) {
+            /* Dreg{reg} = W[Preg{ptr}{aop}] (Z); */
+            gen_aligned_qemu_ld16u(dc, cpu_dreg[reg], cpu_preg[ptr]);
+        } else if (sz == 1 && Z == 1) {
+            /* Dreg{reg} = W[Preg{ptr}{aop}] (X); */
+            gen_aligned_qemu_ld16s(dc, cpu_dreg[reg], cpu_preg[ptr]);
+        } else if (sz == 2 && Z == 0) {
+            /* Dreg{reg} = B[Preg{ptr}{aop}] (Z); */
+            tcg_gen_qemu_ld8u(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        } else if (sz == 2 && Z == 1) {
+            /* Dreg{reg} = B[Preg{ptr}{aop}] (X); */
+            tcg_gen_qemu_ld8s(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        } else {
+            illegal_instruction(dc);
+        }
+    } else {
+        if (sz == 0 && Z == 0) {
+            /* [Preg{ptr}{aop}] = Dreg{reg}; */
+            gen_aligned_qemu_st32(dc, cpu_dreg[reg], cpu_preg[ptr]);
+        } else if (sz == 0 && Z == 1) {
+            /* [Preg{ptr}{aop}] = Preg{reg}; */
+            gen_aligned_qemu_st32(dc, cpu_preg[reg], cpu_preg[ptr]);
+        } else if (sz == 1 && Z == 0) {
+            /* W[Preg{ptr}{aop}] = Dreg{reg}; */
+            gen_aligned_qemu_st16(dc, cpu_dreg[reg], cpu_preg[ptr]);
+        } else if (sz == 2 && Z == 0) {
+            /* B[Preg{ptr}{aop}] = Dreg{reg}; */
+            tcg_gen_qemu_st8(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        } else {
+            illegal_instruction(dc);
+        }
+    }
+
+    if (aop == 0) {
+        tcg_gen_addi_tl(cpu_preg[ptr], cpu_preg[ptr], 1 << (2 - sz));
+    }
+    if (aop == 1) {
+        tcg_gen_subi_tl(cpu_preg[ptr], cpu_preg[ptr], 1 << (2 - sz));
+    }
+}
+
+static void
+decode_LDSTiiFP_0(DisasContext *dc, uint16_t iw0)
+{
+    /* LDSTiiFP
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 1 | 1 | 1 | 0 |.W.|.offset............|.reg...........|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    /* This isn't exactly a grp:reg as this insn only supports Dregs & Pregs,
+       but for our usage, its functionality the same thing.  */
+    int grp = ((iw0 >> 3) & 0x1);
+    int reg = ((iw0 >> LDSTiiFP_reg_bits) & 0x7 /*LDSTiiFP_reg_mask*/);
+    int offset = ((iw0 >> LDSTiiFP_offset_bits) & LDSTiiFP_offset_mask);
+    int W = ((iw0 >> LDSTiiFP_W_bits) & LDSTiiFP_W_mask);
+    uint32_t imm = negimm5s4(offset);
+    TCGv treg = get_allreg(dc, grp, reg);
+    TCGv ea;
+
+    TRACE_EXTRACT("W:%i offset:%#x grp:%i reg:%i",
+                  W, offset, grp, reg);
+
+    ea = tcg_temp_local_new();
+    tcg_gen_addi_tl(ea, cpu_fpreg, imm);
+    gen_align_check(dc, ea, 4, false);
+    if (W == 0) {
+        /* DPreg{reg} = [FP + imm{offset}]; */
+        tcg_gen_qemu_ld32u(treg, ea, dc->mem_idx);
+    } else {
+        /* [FP + imm{offset}] = DPreg{reg}; */
+        tcg_gen_qemu_st32(treg, ea, dc->mem_idx);
+    }
+    tcg_temp_free(ea);
+}
+
+static void
+decode_LDSTii_0(DisasContext *dc, uint16_t iw0)
+{
+    /* LDSTii
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 1 |.W.|.op....|.offset........|.ptr.......|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int reg = ((iw0 >> LDSTii_reg_bit) & LDSTii_reg_mask);
+    int ptr = ((iw0 >> LDSTii_ptr_bit) & LDSTii_ptr_mask);
+    int offset = ((iw0 >> LDSTii_offset_bit) & LDSTii_offset_mask);
+    int op = ((iw0 >> LDSTii_op_bit) & LDSTii_op_mask);
+    int W = ((iw0 >> LDSTii_W_bit) & LDSTii_W_mask);
+    uint32_t imm;
+    TCGv ea;
+
+    TRACE_EXTRACT("W:%i op:%i offset:%#x ptr:%i reg:%i",
+                  W, op, offset, ptr, reg);
+
+    if (op == 0 || op == 3) {
+        imm = uimm4s4(offset);
+    } else {
+        imm = uimm4s2(offset);
+    }
+
+    ea = tcg_temp_local_new();
+    tcg_gen_addi_tl(ea, cpu_preg[ptr], imm);
+    if (W == 0) {
+        if (op == 0) {
+            /* Dreg{reg} = [Preg{ptr} + imm{offset}]; */
+            gen_aligned_qemu_ld32u(dc, cpu_dreg[reg], ea);
+        } else if (op == 1) {
+            /* Dreg{reg} = W[Preg{ptr} + imm{offset}] (Z); */
+            gen_aligned_qemu_ld16u(dc, cpu_dreg[reg], ea);
+        } else if (op == 2) {
+            /* Dreg{reg} = W[Preg{ptr} + imm{offset}] (X); */
+            gen_aligned_qemu_ld16s(dc, cpu_dreg[reg], ea);
+        } else if (op == 3) {
+            /* P%i = [Preg{ptr} + imm{offset}]; */
+            gen_aligned_qemu_ld32u(dc, cpu_preg[reg], ea);
+        }
+    } else {
+        if (op == 0) {
+            /* [Preg{ptr} + imm{offset}] = Dreg{reg}; */
+            gen_aligned_qemu_st32(dc, cpu_dreg[reg], ea);
+        } else if (op == 1) {
+            /* W[Preg{ptr} + imm{offset}] = Dreg{reg}; */
+            gen_aligned_qemu_st16(dc, cpu_dreg[reg], ea);
+        } else if (op == 3) {
+            /* [Preg{ptr} + imm{offset}] = P%i; */
+            gen_aligned_qemu_st32(dc, cpu_preg[reg], ea);
+        } else {
+            illegal_instruction(dc);
+        }
+    }
+    tcg_temp_free(ea);
+}
+
+static void
+decode_LoopSetup_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* LoopSetup
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |.rop...|.c.|.soffset.......|
+       |.reg...........| - | - |.eoffset...............................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int c   = ((iw0 >> (LoopSetup_c_bits - 16)) & LoopSetup_c_mask);
+    int reg = ((iw1 >> LoopSetup_reg_bits) & LoopSetup_reg_mask);
+    int rop = ((iw0 >> (LoopSetup_rop_bits - 16)) & LoopSetup_rop_mask);
+    int soffset = ((iw0 >> (LoopSetup_soffset_bits - 16)) & LoopSetup_soffset_mask);
+    int eoffset = ((iw1 >> LoopSetup_eoffset_bits) & LoopSetup_eoffset_mask);
+    int spcrel = pcrel4(soffset);
+    int epcrel = lppcrel10(eoffset);
+
+    TRACE_EXTRACT("rop:%i c:%i soffset:%i reg:%i eoffset:%i",
+                  rop, c, soffset, reg, eoffset);
+
+    if (rop == 0) {
+        /* LSETUP (imm{soffset}, imm{eoffset}) LCreg{c}; */;
+    } else if (rop == 1 && reg <= 7) {
+        /* LSETUP (imm{soffset}, imm{eoffset}) LCreg{c} = Preg{reg}; */
+        tcg_gen_mov_tl(cpu_lcreg[c], cpu_preg[reg]);
+    } else if (rop == 3 && reg <= 7) {
+        /* LSETUP (imm{soffset}, imm{eoffset}) LCreg{c} = Preg{reg} >> 1; */
+        tcg_gen_shri_tl(cpu_lcreg[c], cpu_preg[reg], 1);
+    } else {
+        illegal_instruction(dc);
+    }
+
+    tcg_gen_movi_tl(cpu_ltreg[c], dc->pc + spcrel);
+    tcg_gen_movi_tl(cpu_lbreg[c], dc->pc + epcrel);
+    gen_gotoi_tb(dc, 0, dc->pc + 4);
+}
+
+static void
+decode_LDIMMhalf_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* LDIMMhalf
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 |.Z.|.H.|.S.|.grp...|.reg.......|
+       |.hword.........................................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int H = ((iw0 >> (LDIMMhalf_H_bits - 16)) & LDIMMhalf_H_mask);
+    int Z = ((iw0 >> (LDIMMhalf_Z_bits - 16)) & LDIMMhalf_Z_mask);
+    int S = ((iw0 >> (LDIMMhalf_S_bits - 16)) & LDIMMhalf_S_mask);
+    int reg = ((iw0 >> (LDIMMhalf_reg_bits - 16)) & LDIMMhalf_reg_mask);
+    int grp = ((iw0 >> (LDIMMhalf_grp_bits - 16)) & LDIMMhalf_grp_mask);
+    int hword = ((iw1 >> LDIMMhalf_hword_bits) & LDIMMhalf_hword_mask);
+    uint32_t val;
+    TCGv treg;
+
+    TRACE_EXTRACT("Z:%i H:%i S:%i grp:%i reg:%i hword:%#x",
+                  Z, H, S, grp, reg, hword);
+
+    treg = get_allreg(dc, grp, reg);
+    if (S == 1) {
+        val = imm16(hword);
+    } else {
+        val = luimm16(hword);
+    }
+
+    if (H == 0 && S == 1 && Z == 0) {
+        /* genreg{grp,reg} = imm{hword} (X); */
+        /* Take care of immediate sign extension ourselves */
+        tcg_gen_movi_i32(treg, (int16_t)val);
+    } else if (H == 0 && S == 0 && Z == 1) {
+        /* genreg{grp,reg} = imm{hword} (Z); */
+        tcg_gen_movi_i32(treg, val);
+    } else if (H == 0 && S == 0 && Z == 0) {
+        /* genreg_lo{grp,reg} = imm{hword}; */
+        /* XXX: Convert this to a helper.  */
+        tcg_gen_andi_tl(treg, treg, 0xffff0000);
+        tcg_gen_ori_tl(treg, treg, val);
+    } else if (H == 1 && S == 0 && Z == 0) {
+        /* genreg_hi{grp,reg} = imm{hword}; */
+        /* XXX: Convert this to a helper.  */
+        tcg_gen_andi_tl(treg, treg, 0xffff);
+        tcg_gen_ori_tl(treg, treg, val << 16);
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_CALLa_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* CALLa
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 0 | 0 | 0 | 1 |.S.|.msw...........................|
+       |.lsw...........................................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int S   = ((iw0 >> (CALLa_S_bits - 16)) & CALLa_S_mask);
+    int lsw = ((iw1 >> 0) & 0xffff);
+    int msw = ((iw0 >> 0) & 0xff);
+    int pcrel = pcrel24((msw << 16) | lsw);
+
+    TRACE_EXTRACT("S:%i msw:%#x lsw:%#x", S, msw, lsw);
+
+    if (S == 1) {
+        /* CALL imm{pcrel}; */
+        dc->is_jmp = DISAS_CALL;
+    } else {
+        /* JUMP.L imm{pcrel}; */
+        dc->is_jmp = DISAS_JUMP;
+    }
+    dc->hwloop_callback = gen_hwloop_br_pcrel_imm;
+    dc->hwloop_data = (void *)(unsigned long)pcrel;
+}
+
+static void
+decode_LDSTidxI_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* LDSTidxI
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 0 | 0 | 1 |.W.|.Z.|.sz....|.ptr.......|.reg.......|
+       |.offset........................................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int Z = ((iw0 >> (LDSTidxI_Z_bits - 16)) & LDSTidxI_Z_mask);
+    int W = ((iw0 >> (LDSTidxI_W_bits - 16)) & LDSTidxI_W_mask);
+    int sz = ((iw0 >> (LDSTidxI_sz_bits - 16)) & LDSTidxI_sz_mask);
+    int reg = ((iw0 >> (LDSTidxI_reg_bits - 16)) & LDSTidxI_reg_mask);
+    int ptr = ((iw0 >> (LDSTidxI_ptr_bits - 16)) & LDSTidxI_ptr_mask);
+    int offset = ((iw1 >> LDSTidxI_offset_bits) & LDSTidxI_offset_mask);
+    uint32_t imm_16s4 = imm16s4(offset);
+    uint32_t imm_16s2 = imm16s2(offset);
+    uint32_t imm_16 = imm16(offset);
+    TCGv ea;
+
+    TRACE_EXTRACT("W:%i Z:%i sz:%i ptr:%i reg:%i offset:%#x",
+                  W, Z, sz, ptr, reg, offset);
+
+    ea = tcg_temp_local_new();
+    if (sz == 0) {
+        tcg_gen_addi_tl(ea, cpu_preg[ptr], imm_16s4);
+    } else if (sz == 1) {
+        tcg_gen_addi_tl(ea, cpu_preg[ptr], imm_16s2);
+    } else if (sz == 2) {
+        tcg_gen_addi_tl(ea, cpu_preg[ptr], imm_16);
+    } else {
+        illegal_instruction(dc);
+    }
+
+    if (W == 0) {
+        if (sz == 0 && Z == 0) {
+            /* Dreg{reg} = [Preg{ptr] + imm{offset}]; */
+            gen_aligned_qemu_ld32u(dc, cpu_dreg[reg], ea);
+        } else if (sz == 0 && Z == 1) {
+            /* Preg{reg} = [Preg{ptr] + imm{offset}]; */
+            gen_aligned_qemu_ld32u(dc, cpu_preg[reg], ea);
+        } else if (sz == 1 && Z == 0) {
+            /* Dreg{reg} = W[Preg{ptr] + imm{offset}] (Z); */
+            gen_aligned_qemu_ld16u(dc, cpu_dreg[reg], ea);
+        } else if (sz == 1 && Z == 1) {
+            /* Dreg{reg} = W[Preg{ptr} imm{offset}] (X); */
+            gen_aligned_qemu_ld16s(dc, cpu_dreg[reg], ea);
+        } else if (sz == 2 && Z == 0) {
+            /* Dreg{reg} = B[Preg{ptr} + imm{offset}] (Z); */
+            tcg_gen_qemu_ld8u(cpu_dreg[reg], ea, dc->mem_idx);
+        } else if (sz == 2 && Z == 1) {
+            /* Dreg{reg} = B[Preg{ptr} + imm{offset}] (X); */
+            tcg_gen_qemu_ld8s(cpu_dreg[reg], ea, dc->mem_idx);
+        }
+    } else {
+        if (sz == 0 && Z == 0) {
+            /* [Preg{ptr} + imm{offset}] = Dreg{reg}; */
+            gen_aligned_qemu_st32(dc, cpu_dreg[reg], ea);
+        } else if (sz == 0 && Z == 1) {
+            /* [Preg{ptr} + imm{offset}] = Preg{reg}; */
+            gen_aligned_qemu_st32(dc, cpu_preg[reg], ea);
+        } else if (sz == 1 && Z == 0) {
+            /* W[Preg{ptr} + imm{offset}] = Dreg{reg}; */
+            gen_aligned_qemu_st16(dc, cpu_dreg[reg], ea);
+        } else if (sz == 2 && Z == 0) {
+            /* B[Preg{ptr} + imm{offset}] = Dreg{reg}; */
+            tcg_gen_qemu_st8(cpu_dreg[reg], ea, dc->mem_idx);
+        } else {
+            illegal_instruction(dc);
+        }
+    }
+
+    tcg_temp_free(ea);
+}
+
+static void
+decode_linkage_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* linkage
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |.R.|
+       |.framesize.....................................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int R = ((iw0 >> (Linkage_R_bits - 16)) & Linkage_R_mask);
+    int framesize = ((iw1 >> Linkage_framesize_bits) & Linkage_framesize_mask);
+
+    TRACE_EXTRACT("R:%i framesize:%#x", R, framesize);
+
+    /* XXX: Should do alignment checks of fp/sp */
+
+    if (R == 0) {
+        /* LINK imm{framesize}; */
+        int size = uimm16s4(framesize);
+        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
+        tcg_gen_qemu_st32(cpu_rets, cpu_spreg, dc->mem_idx);
+        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
+        tcg_gen_qemu_st32(cpu_fpreg, cpu_spreg, dc->mem_idx);
+        tcg_gen_mov_tl(cpu_fpreg, cpu_spreg);
+        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, size);
+    } else if (framesize == 0) {
+        /* UNLINK; */
+        /* Restore SP from FP.  */
+        tcg_gen_mov_tl(cpu_spreg, cpu_fpreg);
+        tcg_gen_qemu_ld32u(cpu_fpreg, cpu_spreg, dc->mem_idx);
+        tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
+        tcg_gen_qemu_ld32u(cpu_rets, cpu_spreg, dc->mem_idx);
+        tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_dsp32mac_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* XXX: Very incomplete.  */
+    /* dsp32mac
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 0 | 0 |.M.| 0 | 0 |.mmod..........|.MM|.P.|.w1|.op1...|
+       |.h01|.h11|.w0|.op0...|.h00|.h10|.dst.......|.src0......|.src1..|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int op1  = ((iw0 >> (DSP32Mac_op1_bits - 16)) & DSP32Mac_op1_mask);
+    int w1   = ((iw0 >> (DSP32Mac_w1_bits - 16)) & DSP32Mac_w1_mask);
+    int P    = ((iw0 >> (DSP32Mac_p_bits - 16)) & DSP32Mac_p_mask);
+    int MM   = ((iw0 >> (DSP32Mac_MM_bits - 16)) & DSP32Mac_MM_mask);
+    int mmod = ((iw0 >> (DSP32Mac_mmod_bits - 16)) & DSP32Mac_mmod_mask);
+    int M    = ((iw0 >> (DSP32Mac_M_bits - 16)) & DSP32Mac_M_mask);
+    int w0   = ((iw1 >> DSP32Mac_w0_bits) & DSP32Mac_w0_mask);
+    int src0 = ((iw1 >> DSP32Mac_src0_bits) & DSP32Mac_src0_mask);
+    int src1 = ((iw1 >> DSP32Mac_src1_bits) & DSP32Mac_src1_mask);
+    int dst  = ((iw1 >> DSP32Mac_dst_bits) & DSP32Mac_dst_mask);
+    int h10  = ((iw1 >> DSP32Mac_h10_bits) & DSP32Mac_h10_mask);
+    int h00  = ((iw1 >> DSP32Mac_h00_bits) & DSP32Mac_h00_mask);
+    int op0  = ((iw1 >> DSP32Mac_op0_bits) & DSP32Mac_op0_mask);
+    int h11  = ((iw1 >> DSP32Mac_h11_bits) & DSP32Mac_h11_mask);
+    int h01  = ((iw1 >> DSP32Mac_h01_bits) & DSP32Mac_h01_mask);
+
+    int v_i = 0;
+    TCGv res;
+
+    TRACE_EXTRACT("M:%i mmod:%i MM:%i P:%i w1:%i op1:%i h01:%i h11:%i "
+                  "w0:%i op0:%i h00:%i h10:%i dst:%i src0:%i src1:%i",
+                  M, mmod, MM, P, w1, op1, h01, h11, w0, op0, h00, h10,
+                  dst, src0, src1);
+
+    res = tcg_temp_local_new();
+    tcg_gen_mov_tl(res, cpu_dreg[dst]);
+    if (w1 == 1 || op1 != 3) {
+        TCGv res1 = decode_macfunc (dc, 1, op1, h01, h11, src0, src1, mmod, MM, P, &v_i);
+        if (w1) {
+            if (P) {
+                tcg_gen_mov_tl(cpu_dreg[dst + 1], res1);
+            } else {
+                gen_mov_h_tl(res, res1);
+            }
+        }
+        tcg_temp_free(res1);
+    }
+    if (w0 == 1 || op0 != 3) {
+        TCGv res0 = decode_macfunc (dc, 0, op0, h00, h10, src0, src1, mmod, 0, P, &v_i);
+        if (w0) {
+            if (P) {
+                tcg_gen_mov_tl(cpu_dreg[dst], res0);
+            } else {
+                gen_mov_l_tl(res, res0);
+            }
+        }
+        tcg_temp_free(res0);
+    }
+
+    if (!P && (w0 || w1)) {
+        tcg_gen_mov_tl(cpu_dreg[dst], res);
+    }
+
+    tcg_temp_free(res);
+}
+
+static void
+decode_dsp32mult_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* XXX: Very incomplete.  */
+
+    /* dsp32mult
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 0 | 0 |.M.| 0 | 1 |.mmod..........|.MM|.P.|.w1|.op1...|
+       |.h01|.h11|.w0|.op0...|.h00|.h10|.dst.......|.src0......|.src1..|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int op1  = ((iw0 >> (DSP32Mac_op1_bits - 16)) & DSP32Mac_op1_mask);
+    int w1   = ((iw0 >> (DSP32Mac_w1_bits - 16)) & DSP32Mac_w1_mask);
+    int P    = ((iw0 >> (DSP32Mac_p_bits - 16)) & DSP32Mac_p_mask);
+    int MM   = ((iw0 >> (DSP32Mac_MM_bits - 16)) & DSP32Mac_MM_mask);
+    int mmod = ((iw0 >> (DSP32Mac_mmod_bits - 16)) & DSP32Mac_mmod_mask);
+    int M    = ((iw0 >> (DSP32Mac_M_bits - 16)) & DSP32Mac_M_mask);
+    int w0   = ((iw1 >> DSP32Mac_w0_bits) & DSP32Mac_w0_mask);
+    int src0 = ((iw1 >> DSP32Mac_src0_bits) & DSP32Mac_src0_mask);
+    int src1 = ((iw1 >> DSP32Mac_src1_bits) & DSP32Mac_src1_mask);
+    int dst  = ((iw1 >> DSP32Mac_dst_bits) & DSP32Mac_dst_mask);
+    int h10  = ((iw1 >> DSP32Mac_h10_bits) & DSP32Mac_h10_mask);
+    int h00  = ((iw1 >> DSP32Mac_h00_bits) & DSP32Mac_h00_mask);
+    int op0  = ((iw1 >> DSP32Mac_op0_bits) & DSP32Mac_op0_mask);
+    int h11  = ((iw1 >> DSP32Mac_h11_bits) & DSP32Mac_h11_mask);
+    int h01  = ((iw1 >> DSP32Mac_h01_bits) & DSP32Mac_h01_mask);
+
+    TCGv res;
+    TCGv sat0, sat1;
+
+    TRACE_EXTRACT("M:%i mmod:%i MM:%i P:%i w1:%i op1:%i h01:%i h11:%i "
+                  "w0:%i op0:%i h00:%i h10:%i dst:%i src0:%i src1:%i",
+                  M, mmod, MM, P, w1, op1, h01, h11, w0, op0, h00, h10,
+                  dst, src0, src1);
+
+    if (w1 == 0 && w0 == 0) {
+        illegal_instruction(dc);
+    }
+    if (((1 << mmod) & (P ? 0x313 : 0x1b57)) == 0) {
+        illegal_instruction(dc);
+    }
+    if (P && ((dst & 1) || (op1 != 0) || (op0 != 0) || !is_macmod_pmove (mmod))) {
+        illegal_instruction(dc);
+    }
+    if (!P && ((op1 != 0) || (op0 != 0) || !is_macmod_hmove (mmod))) {
+        illegal_instruction(dc);
+    }
+
+    res = tcg_temp_local_new();
+    tcg_gen_mov_tl(res, cpu_dreg[dst]);
+
+    sat1 = tcg_temp_local_new();
+
+    if (w1) {
+        TCGv res1 = decode_multfunc_tl(dc, h01, h11, src0, src1, mmod, MM, sat1);
+        if (P) {
+            tcg_gen_mov_tl(cpu_dreg[dst + 1], res1);
+        } else {
+            gen_mov_h_tl(res, res1);
+        }
+        tcg_temp_free(res1);
+    }
+
+    sat0 = tcg_temp_local_new();
+
+    if (w0) {
+        TCGv res0 = decode_multfunc_tl(dc, h00, h10, src0, src1, mmod, 0, sat0);
+        if (P) {
+            tcg_gen_mov_tl(cpu_dreg[dst], res0);
+        } else {
+            gen_mov_l_tl(res, res0);
+        }
+        tcg_temp_free(res0);
+    }
+
+    if (!P && (w0 || w1)) {
+        tcg_gen_mov_tl(cpu_dreg[dst], res);
+    }
+
+    tcg_temp_free(sat0);
+    tcg_temp_free(sat1);
+    tcg_temp_free(res);
+}
+
+static void
+decode_dsp32alu_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* dsp32alu
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 0 | 0 |.M.| 1 | 0 | - | - | - |.HL|.aopcde............|
+       |.aop...|.s.|.x.|.dst0......|.dst1......|.src0......|.src1......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int s    = ((iw1 >> DSP32Alu_s_bits) & DSP32Alu_s_mask);
+    int x    = ((iw1 >> DSP32Alu_x_bits) & DSP32Alu_x_mask);
+    int aop  = ((iw1 >> DSP32Alu_aop_bits) & DSP32Alu_aop_mask);
+    int src0 = ((iw1 >> DSP32Alu_src0_bits) & DSP32Alu_src0_mask);
+    int src1 = ((iw1 >> DSP32Alu_src1_bits) & DSP32Alu_src1_mask);
+    int dst0 = ((iw1 >> DSP32Alu_dst0_bits) & DSP32Alu_dst0_mask);
+    int dst1 = ((iw1 >> DSP32Alu_dst1_bits) & DSP32Alu_dst1_mask);
+    int M    = ((iw0 >> (DSP32Alu_M_bits - 16)) & DSP32Alu_M_mask);
+    int HL   = ((iw0 >> (DSP32Alu_HL_bits - 16)) & DSP32Alu_HL_mask);
+    int aopcde = ((iw0 >> (DSP32Alu_aopcde_bits - 16)) & DSP32Alu_aopcde_mask);
+    TCGv tmp;
+    TCGv_i64 tmp64;
+
+    TRACE_EXTRACT("M:%i HL:%i aopcde:%i aop:%i s:%i x:%i dst0:%i "
+                  "dst1:%i src0:%i src1:%i",
+                  M, HL, aopcde, aop, s, x, dst0, dst1, src0, src1);
+
+    if ((aop == 0 || aop == 2) && aopcde == 9 && HL == 0 && s == 0) {
+        int a = aop >> 1;
+        /* Areg_lo{a} = Dreg_lo{src0}; */
+        tcg_gen_andi_i64(cpu_areg[a], cpu_areg[a], ~0xffff);
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, cpu_dreg[src0]);
+        tcg_gen_andi_i64(tmp64, tmp64, 0xffff);
+        tcg_gen_or_i64(cpu_areg[a], cpu_areg[a], tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else if ((aop == 0 || aop == 2) && aopcde == 9 && HL == 1 && s == 0) {
+        int a = aop >> 1;
+        /* Areg_hi{a} = Dreg_hi{src0}; */
+        tcg_gen_andi_i64(cpu_areg[a], cpu_areg[a], 0xff0000ffff);
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, cpu_dreg[src0]);
+        tcg_gen_andi_i64(tmp64, tmp64, 0xffff0000);
+        tcg_gen_or_i64(cpu_areg[a], cpu_areg[a], tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else if ((aop == 1 || aop == 0) && aopcde == 5) {
+        /* Dreg{dst0}_hi{HL==0} = Dreg{src0} +{aop==0} Dreg{src1} (RND12); */
+        /* Dreg{dst0}_lo{HL==1} = Dreg{src0} +{aop==0} Dreg{src1} (RND12); */
+        /* Dreg{dst0}_hi{HL==0} = Dreg{src0} -{aop==1} Dreg{src1} (RND12); */
+        /* Dreg{dst0}_lo{HL==1} = Dreg{src0} -{aop==1} Dreg{src1} (RND12); */
+        unhandled_instruction(dc, "Dreg +/- RND12");
+    } else if ((aop == 2 || aop == 3) && aopcde == 5) {
+        /* Dreg{dst0}_hi{HL==0} = Dreg{src0} +{aop==0} Dreg{src1} (RND20); */
+        /* Dreg{dst0}_lo{HL==1} = Dreg{src0} +{aop==0} Dreg{src1} (RND20); */
+        /* Dreg{dst0}_hi{HL==0} = Dreg{src0} -{aop==1} Dreg{src1} (RND20); */
+        /* Dreg{dst0}_lo{HL==1} = Dreg{src0} -{aop==1} Dreg{src1} (RND20); */
+        unhandled_instruction(dc, "Dreg +/- RND20");
+    } else if (aopcde == 2 || aopcde == 3) {
+        /* Dreg{dst0}_lo{HL==0} = Dreg{src0}_lo{!aop&2} +{aopcde==2} Dreg{src1}_lo{!aop&1} (amod1(s,x)); */
+        /* Dreg{dst0}_hi{HL==1} = Dreg{src0}_hi{aop&2} -{aopcde==3} Dreg{src1}_hi{aop&1} (amod1(s,x)); */
+        TCGv s1, s2, d;
+
+        s1 = tcg_temp_new();
+        if (aop & 2) {
+            tcg_gen_shri_tl(s1, cpu_dreg[src0], 16);
+        } else {
+            tcg_gen_ext16u_tl(s1, cpu_dreg[src0]);
+        }
+
+        s2 = tcg_temp_new();
+        if (aop & 1) {
+            tcg_gen_shri_tl(s2, cpu_dreg[src1], 16);
+        } else {
+            tcg_gen_ext16u_tl(s2, cpu_dreg[src1]);
+        }
+
+        d = tcg_temp_new();
+        if (aopcde == 2) {
+            tcg_gen_add_tl(d, s1, s2);
+        } else {
+            tcg_gen_sub_tl(d, s1, s2);
+        }
+        tcg_gen_andi_tl(d, d, 0xffff);
+
+        tcg_temp_free(s1);
+        tcg_temp_free(s2);
+
+        if (HL) {
+            tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0], 0xffff);
+            tcg_gen_shli_tl(d, d, 16);
+            tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], d);
+        } else {
+            tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0], 0xffff0000);
+            tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], d);
+        }
+        tcg_temp_free(d);
+
+        /* XXX: missing ASTAT update */
+    } else if ((aop == 0 || aop == 2) && aopcde == 9 && s == 1) {
+        int a = aop >> 1;
+        /* Areg{a} = Dreg{src0}; */
+        tcg_gen_ext_i32_i64(cpu_areg[a], cpu_dreg[src0]);
+    } else if ((aop == 1 || aop == 3) && aopcde == 9 && s == 0) {
+        int a = aop >> 1;
+        /* Areg_x{a} = Dreg_lo{src0}; */
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, cpu_dreg[src0]);
+        tcg_gen_ext8u_i64(tmp64, tmp64);
+        tcg_gen_shli_i64(tmp64, tmp64, 32);
+        tcg_gen_andi_i64(cpu_areg[a], cpu_areg[a], 0xffffffff);
+        tcg_gen_or_i64(cpu_areg[a], cpu_areg[a], tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else if (aop == 3 && aopcde == 11 && (s == 0 || s == 1)) {
+        /* A0 -= A0 (W32){s==1}; */
+        tcg_gen_sub_i64(cpu_areg[0], cpu_areg[0], cpu_areg[1]);
+
+        if (s == 1) {
+            unhandled_instruction(dc, "A0 -= A1 (W32)");
+        }
+        /* XXX: missing ASTAT update */
+    } else if ((aop == 0 || aop == 1) && aopcde == 22) {
+        /* Dreg{dst0} = BYTEOP2P (Dreg{src0+1}:Dreg{src0}, Dreg{src1+1}:Dreg{src1} (mode); */
+        /* modes[HL + (aop << 1)] = { rndl, rndh, tl, th }; */
+        /* (modes, r) s==1 */
+        unhandled_instruction(dc, "BYTEOP2P");
+    } else if ((aop == 0 || aop == 1) && s == 0 && aopcde == 8) {
+        /* Areg{aop} = 0; */
+        tcg_gen_movi_i64(cpu_areg[0], 0);
+    } else if (aop == 2 && s == 0 && aopcde == 8) {
+        /* A1 = A0 = 0; */
+        tcg_gen_movi_i64(cpu_areg[0], 0);
+        tcg_gen_mov_i64(cpu_areg[1], cpu_areg[0]);
+    } else if ((aop == 0 || aop == 1 || aop == 2) && s == 1 && aopcde == 8) {
+        /* A0 = A0 (S); {aop==0} */
+        /* A1 = A1 (S); {aop==1} */
+        /* A1 = A1 (S), A0 = A0 (S); {aop==2} */
+        TCGv sat0, sat1;
+
+        sat0 = tcg_temp_local_new();
+        tcg_gen_movi_tl(sat0, 0);
+        if (aop == 0 || aop == 2) {
+            gen_extend_acc(cpu_areg[0]);
+            saturate_s32(cpu_areg[0], sat0);
+            tcg_gen_ext32s_i64(cpu_areg[0], cpu_areg[0]);
+        }
+
+        sat1 = tcg_temp_local_new();
+        tcg_gen_movi_tl(sat1, 0);
+        if (aop == 1 || aop == 2) {
+            gen_extend_acc(cpu_areg[1]);
+            saturate_s32(cpu_areg[1], sat1);
+            tcg_gen_ext32s_i64(cpu_areg[0], cpu_areg[0]);
+        }
+
+        tcg_temp_free(sat1);
+        tcg_temp_free(sat0);
+
+        /* XXX: missing ASTAT update */
+    } else if (aop == 3 && (s == 0 || s == 1) && aopcde == 8) {
+        /* Areg{s} = Areg{!s}; */
+        tcg_gen_mov_i64(cpu_areg[s], cpu_areg[!s]);
+    } else if (aop == 3 && HL == 0 && aopcde == 16) {
+        /* A1 = ABS A1 , A0 = ABS A0; */
+        int i;
+        /* XXX: Missing ASTAT updates and saturation */
+        for (i = 0; i < 2; ++i) {
+            gen_abs_i64(cpu_areg[i], cpu_areg[i]);
+        }
+    } else if (aop == 0 && aopcde == 23) {
+        unhandled_instruction(dc, "BYTEOP3P");
+    } else if ((aop == 0 || aop == 1) && aopcde == 16) {
+        /* Areg{HL} = ABS Areg{aop}; */
+
+        /* XXX: Missing ASTAT updates */
+        /* XXX: Missing saturation */
+        gen_abs_i64(cpu_areg[aop], cpu_areg[aop]);
+    } else if (aop == 3 && aopcde == 12) {
+        /* Dreg{dst0}_lo{HL==0} = Dreg{src0} (RND); */
+        /* Dreg{dst0}_hi{HL==1} = Dreg{src0} (RND); */
+        unhandled_instruction(dc, "Dreg (RND)");
+    } else if (aop == 3 && HL == 0 && aopcde == 15) {
+        /* Dreg{dst0} = -Dreg{src0} (V); */
+        unhandled_instruction(dc, "Dreg = -Dreg (V)");
+    } else if (aop == 3 && HL == 0 && aopcde == 14) {
+        /* A1 = -A1 , A0 = -A0; */
+        tcg_gen_neg_i64(cpu_areg[1], cpu_areg[1]);
+        tcg_gen_neg_i64(cpu_areg[0], cpu_areg[0]);
+        /* XXX: what ASTAT flags need updating ?  */
+    } else if ((aop == 0 || aop == 1) && (HL == 0 || HL == 1) && aopcde == 14) {
+        /* Areg{HL} = -Areg{aop}; */
+        tcg_gen_neg_i64(cpu_areg[HL], cpu_areg[aop]);
+        /* XXX: Missing ASTAT updates */
+    } else if (aop == 0 && aopcde == 12) {
+        /* Dreg_lo{dst0} = Dreg_hi{dst0} =
+                 SIGN(Dreg_hi{src0} * Dreg_hi{src1} +
+                 SIGN(Dreg_lo{src0} * Dreg_lo{src1} */
+        int l;
+        TCGv tmp1_hi, tmp1_lo;
+
+        tmp1_hi = tcg_temp_local_new();
+        /* if ((src0_hi >> 15) & 1) tmp1_hi = -src1_hi; */
+        tcg_gen_sari_tl(tmp1_hi, cpu_dreg[src1], 16);
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_GE, cpu_dreg[src0], 0, l);
+        tcg_gen_neg_tl(tmp1_hi, tmp1_hi);
+        gen_set_label(l);
+
+        tmp = tcg_temp_local_new();
+        tmp1_lo = tcg_temp_local_new();
+        /* if ((src0_lo >> 15) & 1) tmp1_lo = -src1_lo; */
+        tcg_gen_ext16s_tl(tmp, cpu_dreg[src0]);
+        tcg_gen_ext16s_tl(tmp1_lo, cpu_dreg[src1]);
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_GE, tmp, 0, l);
+        tcg_gen_neg_tl(tmp1_lo, tmp1_lo);
+        gen_set_label(l);
+
+        tcg_temp_free(tmp);
+
+        tcg_gen_add_tl(tmp1_hi, tmp1_hi, tmp1_lo);
+        tcg_gen_shli_tl(cpu_dreg[dst0], tmp1_hi, 16);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp1_hi);
+
+        tcg_temp_free(tmp1_lo);
+        tcg_temp_free(tmp1_hi);
+    } else if (aopcde == 0) {
+        /* Dreg{dst0} = Dreg{src0} -{aop&2}+{!aop&2}|-{aop&1}+{!aop&1}
+                        Dreg{src1} (amod0); */
+        TCGv s0, s1, t0, t1;
+
+        if (s || x) {
+            unhandled_instruction(dc, "S/CO/SCO with +|+/-|-");
+        }
+
+        s0 = tcg_temp_local_new();
+        s1 = tcg_temp_local_new();
+
+        t0 = tcg_temp_local_new();
+        tcg_gen_shri_tl(s0, cpu_dreg[src0], 16);
+        tcg_gen_shri_tl(s1, cpu_dreg[src1], 16);
+        if (aop & 2) {
+            tcg_gen_sub_tl(t0, s0, s1);
+        } else {
+            tcg_gen_add_tl(t0, s0, s1);
+        }
+
+        t1 = tcg_temp_local_new();
+        tcg_gen_andi_tl(s0, cpu_dreg[src0], 0xffff);
+        tcg_gen_andi_tl(s1, cpu_dreg[src1], 0xffff);
+        if (aop & 1) {
+            tcg_gen_sub_tl(t1, s0, s1);
+        } else {
+            tcg_gen_add_tl(t1, s0, s1);
+        }
+
+        tcg_temp_free(s1);
+        tcg_temp_free(s0);
+
+        astat_queue_state2(dc, ASTAT_OP_VECTOR_ADD_ADD + aop, t0, t1);
+
+        if (x) {
+            /* dst0.h = t1; dst0.l = t0 */
+            tcg_gen_ext16u_tl(cpu_dreg[dst0], t0);
+            tcg_gen_shli_tl(t1, t1, 16);
+            tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], t1);
+        } else {
+            /* dst0.h = t0; dst0.l = t1 */
+            tcg_gen_ext16u_tl(cpu_dreg[dst0], t1);
+            tcg_gen_shli_tl(t0, t0, 16);
+            tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], t0);
+        }
+
+        tcg_temp_free(t0);
+        tcg_temp_free(t1);
+        /* XXX: missing ASTAT update */
+    } else if (aop == 1 && aopcde == 12) {
+        /* Dreg{dst1} = A1.L + A1.H, Dreg{dst0} = A0.L + A0.H; */
+        TCGv al, ah;
+
+        /*if (dst0 == dst1)
+            illegal_instruction_combination(dc);*/
+
+        al = tcg_temp_local_new();
+        ah = tcg_temp_local_new();
+        tcg_gen_trunc_i64_i32(ah, cpu_areg[0]);
+        tcg_gen_ext16u_tl(al, ah);
+        tcg_gen_shri_tl(ah, ah, 16);
+        tcg_gen_add_tl(cpu_dreg[dst0], al, ah);
+        tcg_temp_free(al);
+        tcg_temp_free(ah);
+        tcg_gen_ext16s_tl(cpu_dreg[dst0], cpu_dreg[dst0]);
+
+        al = tcg_temp_local_new();
+        ah = tcg_temp_local_new();
+        tcg_gen_trunc_i64_i32(ah, cpu_areg[1]);
+        tcg_gen_ext16u_tl(al, ah);
+        tcg_gen_shri_tl(ah, ah, 16);
+        tcg_gen_add_tl(cpu_dreg[dst1], al, ah);
+        tcg_temp_free(al);
+        tcg_temp_free(ah);
+        tcg_gen_ext16s_tl(cpu_dreg[dst1], cpu_dreg[dst1]);
+
+        /* XXX: ASTAT ?  */
+    } else if (aopcde == 1) {
+        /* XXX: missing ASTAT update */
+        unhandled_instruction(dc, "Dreg +|+ Dreg, Dreg -|- Dreg");
+    } else if ((aop == 0 || aop == 1 || aop == 2) && aopcde == 11) {
+        /* Dreg{dst0} = (A0 += A1); {aop==0} */
+        /* Dreg{dst0}_lo{HL==0} = (A0 += A1); {aop==1} */
+        /* Dreg{dst0}_hi{HL==1} = (A0 += A1); {aop==1} */
+        /* (A0 += A1); {aop==2} */
+        tcg_gen_add_i64(cpu_areg[0], cpu_areg[0], cpu_areg[1]);
+
+        if (aop == 2 && s == 1) {
+            unhandled_instruction(dc, "A0 += A1 (W32)");
+        }
+
+        /* XXX: missing saturation support */
+        if (aop == 0) {
+            /* Dregs = A0 += A1 */
+            tcg_gen_trunc_i64_i32(cpu_dreg[dst0], cpu_areg[0]);
+        } else if (aop == 1) {
+            /* Dregs_lo = A0 += A1 */
+            tmp = tcg_temp_new();
+            tcg_gen_trunc_i64_i32(tmp, cpu_areg[0]);
+            gen_mov_l_tl(cpu_dreg[dst0], tmp);
+            tcg_temp_free(tmp);
+        }
+    } else if ((aop == 0 || aop == 1) && aopcde == 10) {
+        /* Dreg_lo{dst0} = Areg_x{aop}; */
+        tmp = tcg_temp_new();
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_shri_i64(tmp64, cpu_areg[aop], 32);
+        tcg_gen_trunc_i64_i32(tmp, tmp64);
+        tcg_temp_free_i64(tmp64);
+        tcg_gen_ext8s_tl(tmp, tmp);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if (aop == 0 && aopcde == 4) {
+        /* Dreg{dst0} = Dreg{src0} + Dreg{src1} (amod1(s,x)); */
+        tcg_gen_add_tl(cpu_dreg[dst0], cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state3(dc, ASTAT_OP_ADD32, cpu_dreg[dst0], cpu_dreg[src0], cpu_dreg[src1]);
+    } else if (aop == 1 && aopcde == 4) {
+        /* Dreg{dst0} = Dreg{src0} - Dreg{src1} (amod1(s,x)); */
+        tcg_gen_sub_tl(cpu_dreg[dst0], cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state3(dc, ASTAT_OP_SUB32, cpu_dreg[dst0], cpu_dreg[src0], cpu_dreg[src1]);
+    } else if (aop == 2 && aopcde == 4) {
+        /* Dreg{dst1} = Dreg{src0} + Dreg{src1}, Dreg{dst0} = Dreg{src0} - Dreg{src1} (amod1(s,x)); */
+
+        /*if (dst0 == dst1)
+            illegal_instruction_combination(dc);*/
+
+        if (dst1 == src0 || dst1 == src1) {
+            tmp = tcg_temp_new();
+        } else {
+            tmp = cpu_dreg[dst1];
+        }
+        tcg_gen_add_tl(tmp, cpu_dreg[src0], cpu_dreg[src1]);
+        tcg_gen_sub_tl(cpu_dreg[dst0], cpu_dreg[src0], cpu_dreg[src1]);
+        if (dst1 == src0 || dst1 == src1) {
+            tcg_gen_mov_tl(cpu_dreg[dst1], tmp);
+            tcg_temp_free(tmp);
+        }
+        /* XXX: Missing ASTAT updates */
+    } else if ((aop == 0 || aop == 1) && aopcde == 17) {
+        unhandled_instruction(dc, "Dreg = Areg + Areg, Dreg = Areg - Areg");
+    } else if (aop == 0 && aopcde == 18) {
+        unhandled_instruction(dc, "SAA");
+    } else if (aop == 3 && aopcde == 18) {
+        dc->disalgnexcpt = true;
+    } else if ((aop == 0 || aop == 1) && aopcde == 20) {
+        unhandled_instruction(dc, "BYTEOP1P");
+    } else if (aop == 0 && aopcde == 21) {
+        unhandled_instruction(dc, "BYTEOP16P");
+    } else if (aop == 1 && aopcde == 21) {
+        unhandled_instruction(dc, "BYTEOP16M");
+    } else if ((aop == 0 || aop == 1) && aopcde == 7) {
+        /* Dreg{dst0} = MIN{aop==1} (Dreg{src0}, Dreg{src1}); */
+        /* Dreg{dst0} = MAX{aop==0} (Dreg{src0}, Dreg{src1}); */
+        int l, _src0, _src1;
+        TCGCond cond;
+
+        if (aop == 0) {
+            cond = TCG_COND_LT;
+        } else {
+            cond = TCG_COND_GE;
+        }
+
+        /* src/dst regs might be the same, so we need to handle that */
+        if (dst0 == src1) {
+            _src0 = src1, _src1 = src0;
+        } else {
+            _src0 = src0, _src1 = src1;
+        }
+
+        l = gen_new_label();
+        tcg_gen_mov_tl(cpu_dreg[dst0], cpu_dreg[_src0]);
+        tcg_gen_brcond_tl(cond, cpu_dreg[_src1], cpu_dreg[_src0], l);
+        tcg_gen_mov_tl(cpu_dreg[dst0], cpu_dreg[_src1]);
+        gen_set_label(l);
+
+        astat_queue_state1(dc, ASTAT_OP_MIN_MAX, cpu_dreg[dst0]);
+    } else if (aop == 2 && aopcde == 7) {
+        /* Dreg{dst0} = ABS Dreg{src0}; */
+
+        /* XXX: Missing saturation support (and ASTAT V/VS) */
+        gen_abs_tl(cpu_dreg[dst0], cpu_dreg[src0]);
+
+        astat_queue_state2(dc, ASTAT_OP_ABS, cpu_dreg[dst0], cpu_dreg[src0]);
+    } else if (aop == 3 && aopcde == 7) {
+        /* Dreg{dst0} = -Dreg{src0} (amod1(s,0)); */
+        int l, endl;
+
+        l = gen_new_label();
+        endl = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_NE, cpu_dreg[src0], 0x80000000, l);
+        if (s) {
+            tcg_gen_movi_tl(cpu_dreg[dst0], 0x7fffffff);
+            tmp = tcg_const_tl(1);
+            _gen_astat_store(ASTAT_V, tmp);
+            _gen_astat_store(ASTAT_V_COPY, tmp);
+            _gen_astat_store(ASTAT_VS, tmp);
+            tcg_temp_free(tmp);
+        } else {
+            tcg_gen_movi_tl(cpu_dreg[dst0], 0x80000000);
+        }
+
+        gen_set_label(l);
+        tcg_gen_neg_tl(cpu_dreg[dst0], cpu_dreg[src0]);
+        gen_set_label(endl);
+        astat_queue_state2(dc, ASTAT_OP_NEGATE, cpu_dreg[dst0], cpu_dreg[src0]);
+    } else if (aop == 2 && aopcde == 6) {
+        /* Dreg{dst0} = ABS Dreg{src0} (V); */
+        TCGv tmp0;
+
+        tmp = tcg_temp_local_new();
+        tcg_gen_sari_tl(tmp, cpu_dreg[src0], 16);
+        gen_abs_tl(tmp, tmp);
+
+        tmp0 = tcg_temp_local_new();
+        tcg_gen_ext16s_tl(tmp0, cpu_dreg[src0]);
+        gen_abs_tl(tmp0, tmp0);
+
+        astat_queue_state2(dc, ASTAT_OP_ABS_VECTOR, tmp0, tmp);
+
+        tcg_gen_shli_tl(tmp, tmp, 16);
+        tcg_gen_andi_tl(tmp0, tmp0, 0xffff);
+        tcg_gen_or_tl(cpu_dreg[dst0], tmp, tmp0);
+
+        tcg_temp_free(tmp0);
+        tcg_temp_free(tmp);
+    } else if ((aop == 0 || aop == 1) && aopcde == 6) {
+        /* Dreg{dst0} = MAX{aop==0} (Dreg{src0}, Dreg{src1}) (V); */
+        /* Dreg{dst0} = MIN{aop==1} (Dreg{src0}, Dreg{src1}) (V); */
+        /* src/dst regs might be the same, so we need to handle that */
+        int l;
+        TCGCond cond;
+        TCGv tmp0, tmp1;
+
+        cond = aop == 1 ? TCG_COND_LE : TCG_COND_GE;
+
+        tmp = tcg_temp_local_new();
+        tmp0 = tcg_temp_local_new();
+        tmp1 = tcg_temp_local_new();
+
+        /* First do top 16bit pair */
+        l = gen_new_label();
+        tcg_gen_andi_tl(tmp0, cpu_dreg[src0], 0xffff0000);
+        tcg_gen_andi_tl(tmp1, cpu_dreg[src1], 0xffff0000);
+        tcg_gen_brcond_tl(cond, tmp0, tmp1, l);
+        tcg_gen_mov_tl(tmp0, tmp1);
+        gen_set_label(l);
+
+        /* Then bottom 16bit pair */
+        l = gen_new_label();
+        tcg_gen_ext16s_tl(tmp, cpu_dreg[src0]);
+        tcg_gen_ext16s_tl(tmp1, cpu_dreg[src1]);
+        tcg_gen_brcond_tl(cond, tmp, tmp1, l);
+        tcg_gen_mov_tl(tmp, tmp1);
+        gen_set_label(l);
+
+        astat_queue_state2(dc, ASTAT_OP_MIN_MAX_VECTOR, tmp0, tmp);
+
+        /* Then combine them */
+        tcg_gen_andi_tl(tmp, tmp, 0xffff);
+        tcg_gen_or_tl(cpu_dreg[dst0], tmp0, tmp);
+
+        tcg_temp_free(tmp1);
+        tcg_temp_free(tmp0);
+        tcg_temp_free(tmp);
+    } else if (aop == 0 && aopcde == 24) {
+        TCGv dst;
+        /* Dreg{dst0} BYTEPACK (Dreg{src0}, Dreg{src1}); */
+
+        /* XXX: could optimize a little if dst0 is diff from src0 or src1 */
+        /* dst |= (((src0 >>  0) & 0xff) <<  0) */
+        dst = tcg_temp_new();
+        tcg_gen_andi_tl(dst, cpu_dreg[src0], 0xff);
+        tmp = tcg_temp_new();
+        /* dst |= (((src0 >> 16) & 0xff) <<  8) */
+        tcg_gen_andi_tl(tmp, cpu_dreg[src0], 0xff0000);
+        tcg_gen_shri_tl(tmp, tmp, 8);
+        tcg_gen_or_tl(dst, dst, tmp);
+        /* dst |= (((src1 >>  0) & 0xff) << 16) */
+        tcg_gen_andi_tl(tmp, cpu_dreg[src1], 0xff);
+        tcg_gen_shli_tl(tmp, tmp, 16);
+        tcg_gen_or_tl(dst, dst, tmp);
+        /* dst |= (((src1 >> 16) & 0xff) << 24) */
+        tcg_gen_andi_tl(tmp, cpu_dreg[src1], 0xff0000);
+        tcg_gen_shli_tl(tmp, tmp, 8);
+        tcg_gen_or_tl(cpu_dreg[dst0], dst, tmp);
+        tcg_temp_free(tmp);
+        tcg_temp_free(dst);
+    } else if (aop == 1 && aopcde == 24) {
+        /* (Dreg{dst1}, Dreg{dst0} = BYTEUNPACK Dreg{src0+1}:{src0} (R){s}; */
+        TCGv lo, hi;
+        TCGv_i64 tmp64_2;
+
+        /*if (dst0 == dst1)
+            illegal_instruction_combination(dc);*/
+
+        if (s) {
+            hi = cpu_dreg[src0], lo = cpu_dreg[src0 + 1];
+        } else {
+            hi = cpu_dreg[src0 + 1], lo = cpu_dreg[src0];
+        }
+
+        /* Create one field of the two regs */
+        tmp64 = tcg_temp_local_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, hi);
+        tcg_gen_shli_i64(tmp64, tmp64, 32);
+        tmp64_2 = tcg_temp_local_new_i64();
+        tcg_gen_extu_i32_i64(tmp64_2, lo);
+        tcg_gen_or_i64(tmp64, tmp64, tmp64_2);
+
+        /* Adjust the two regs field by the Ireg[0] order */
+        tcg_gen_extu_i32_i64(tmp64_2, cpu_ireg[0]);
+        tcg_gen_andi_i64(tmp64_2, tmp64_2, 0x3);
+        tcg_gen_shli_i64(tmp64_2, tmp64_2, 3);    /* multiply by 8 */
+        tcg_gen_shr_i64(tmp64, tmp64, tmp64_2);
+        tcg_temp_free_i64(tmp64_2);
+
+        /* Now that the 4 bytes we want are in the low 32bit, truncate */
+        tmp = tcg_temp_local_new();
+        tcg_gen_trunc_i64_i32(tmp, tmp64);
+        tcg_temp_free_i64(tmp64);
+
+        /* Load bytea into dst0 */
+        tcg_gen_andi_tl(cpu_dreg[dst0], tmp, 0xff);
+        /* Load byted into dst1 */
+        tcg_gen_shri_tl(cpu_dreg[dst1], tmp, 8);
+        tcg_gen_andi_tl(cpu_dreg[dst1], cpu_dreg[dst1], 0xff0000);
+        /* Load byteb into dst0 */
+        tcg_gen_shli_tl(tmp, tmp, 8);
+        tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], tmp);
+        tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0], 0xff00ff);
+        /* Load bytec into dst1 */
+        tcg_gen_shri_tl(tmp, tmp, 24);
+        tcg_gen_or_tl(cpu_dreg[dst1], cpu_dreg[dst1], tmp);
+        tcg_gen_andi_tl(cpu_dreg[dst1], cpu_dreg[dst1], 0xff00ff);
+        tcg_temp_free(tmp);
+    } else if (aopcde == 13) {
+        int l;
+        TCGv a_lo;
+        TCGCond conds[] = {
+            /* GT */ TCG_COND_LE,
+            /* GE */ TCG_COND_LT,
+            /* LT */ TCG_COND_GE,
+            /* LE */ TCG_COND_GT,
+        };
+
+        /* (Dreg{dst1}, Dreg{dst0}) = SEARCH Dreg{src0} (mode{aop}); */
+
+        /*if (dst0 == dst1)
+            illegal_instruction_combination(dc);*/
+
+        a_lo = tcg_temp_local_new();
+        tmp = tcg_temp_local_new();
+
+        /* Compare A1 to Dreg_hi{src0} */
+        tcg_gen_trunc_i64_i32(a_lo, cpu_areg[1]);
+        tcg_gen_ext16s_tl(a_lo, a_lo);
+        tcg_gen_sari_tl(tmp, cpu_dreg[src0], 16);
+
+        l = gen_new_label();
+        tcg_gen_brcond_tl(conds[aop], tmp, a_lo, l);
+        /* Move Dreg_hi{src0} into A0 */
+        tcg_gen_ext_i32_i64(cpu_areg[1], tmp);
+        /* Move Preg{0} into Dreg{dst1} */
+        tcg_gen_mov_tl(cpu_dreg[dst1], cpu_preg[0]);
+        gen_set_label(l);
+
+        /* Compare A0 to Dreg_lo{src0} */
+        tcg_gen_trunc_i64_i32(a_lo, cpu_areg[0]);
+        tcg_gen_ext16s_tl(a_lo, a_lo);
+        tcg_gen_ext16s_tl(tmp, cpu_dreg[src0]);
+
+        l = gen_new_label();
+        tcg_gen_brcond_tl(conds[aop], tmp, a_lo, l);
+        /* Move Dreg_lo{src0} into A0 */
+        tcg_gen_ext_i32_i64(cpu_areg[0], tmp);
+        /* Move Preg{0} into Dreg{dst0} */
+        tcg_gen_mov_tl(cpu_dreg[dst0], cpu_preg[0]);
+        gen_set_label(l);
+
+        tcg_temp_free(a_lo);
+        tcg_temp_free(tmp);
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_dsp32shift_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* dsp32shift
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 0 | 0 |.M.| 1 | 1 | 0 | 0 | - | - |.sopcde............|
+       |.sop...|.HLs...|.dst0......| - | - | - |.src0......|.src1......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int HLs  = ((iw1 >> DSP32Shift_HLs_bits) & DSP32Shift_HLs_mask);
+    int sop  = ((iw1 >> DSP32Shift_sop_bits) & DSP32Shift_sop_mask);
+    int src0 = ((iw1 >> DSP32Shift_src0_bits) & DSP32Shift_src0_mask);
+    int src1 = ((iw1 >> DSP32Shift_src1_bits) & DSP32Shift_src1_mask);
+    int dst0 = ((iw1 >> DSP32Shift_dst0_bits) & DSP32Shift_dst0_mask);
+    int sopcde = ((iw0 >> (DSP32Shift_sopcde_bits - 16)) & DSP32Shift_sopcde_mask);
+    int M = ((iw0 >> (DSP32Shift_M_bits - 16)) & DSP32Shift_M_mask);
+    TCGv tmp;
+    TCGv_i64 tmp64;
+
+    TRACE_EXTRACT("M:%i sopcde:%i sop:%i HLs:%i dst0:%i src0:%i src1:%i",
+                  M, sopcde, sop, HLs, dst0, src0, src1);
+
+    if ((sop == 0 || sop == 1) && sopcde == 0) {
+        int l, endl;
+        TCGv val;
+
+        /* Dreg{dst0}_hi{HLs&2} = ASHIFT Dreg{src1}_hi{HLs&1} BY Dreg_lo{src0} (S){sop==1}; */
+        /* Dreg{dst0}_lo{!HLs&2} = ASHIFT Dreg{src1}_lo{!HLs&1} BY Dreg_lo{src0} (S){sop==1}; */
+
+        tmp = tcg_temp_local_new();
+        gen_extNsi_tl(tmp, cpu_dreg[src0], 6);
+
+        val = tcg_temp_local_new();
+        if (HLs & 1) {
+            tcg_gen_sari_tl(val, cpu_dreg[src1], 16);
+        } else {
+            tcg_gen_ext16s_tl(val, cpu_dreg[src1]);
+        }
+
+        /* Positive shift magnitudes produce Logical Left shifts.
+         * Negative shift magnitudes produce Arithmetic Right shifts.
+         */
+        endl = gen_new_label();
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_GE, tmp, 0, l);
+        tcg_gen_neg_tl(tmp, tmp);
+        tcg_gen_sar_tl(val, val, tmp);
+        astat_queue_state1(dc, ASTAT_OP_ASHIFT16, val);
+        tcg_gen_br(endl);
+        gen_set_label(l);
+        tcg_gen_shl_tl(val, val, tmp);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT16, val);
+        gen_set_label(endl);
+
+        if (HLs & 2) {
+            gen_mov_h_tl(cpu_dreg[dst0], val);
+        } else {
+            gen_mov_l_tl(cpu_dreg[dst0], val);
+        }
+
+        tcg_temp_free(val);
+        tcg_temp_free(tmp);
+
+        /* XXX: Missing V updates */
+    } else if (sop == 2 && sopcde == 0) {
+        int l, endl;
+        TCGv val;
+
+        /* Dreg{dst0}_hi{HLs&2} = LSHIFT Dreg{src1}_hi{HLs&1} BY Dreg_lo{src0}; */
+        /* Dreg{dst0}_lo{!HLs&2} = LSHIFT Dreg{src1}_lo{!HLs&1} BY Dreg_lo{src0}; */
+
+        tmp = tcg_temp_local_new();
+        gen_extNsi_tl(tmp, cpu_dreg[src0], 6);
+
+        val = tcg_temp_local_new();
+        if (HLs & 1) {
+            tcg_gen_shri_tl(val, cpu_dreg[src1], 16);
+        } else {
+            tcg_gen_ext16u_tl(val, cpu_dreg[src1]);
+        }
+
+        /* Negative shift magnitudes means shift right */
+        endl = gen_new_label();
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_GE, tmp, 0, l);
+        tcg_gen_neg_tl(tmp, tmp);
+        tcg_gen_shr_tl(val, val, tmp);
+        tcg_gen_br(endl);
+        gen_set_label(l);
+        tcg_gen_shl_tl(val, val, tmp);
+        gen_set_label(endl);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT16, val);
+
+        if (HLs & 2) {
+            gen_mov_h_tl(cpu_dreg[dst0], val);
+        } else {
+            gen_mov_l_tl(cpu_dreg[dst0], val);
+        }
+
+        tcg_temp_free(val);
+        tcg_temp_free(tmp);
+
+        /* XXX: Missing AZ/AN/V updates */
+    } else if (sop == 2 && sopcde == 3 && (HLs == 1 || HLs == 0)) {
+        /* Areg{HLs} = ROT Areg{HLs} BY Dreg_lo{src0}; */
+        tmp64 = tcg_temp_local_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, cpu_dreg[src0]);
+        tcg_gen_ext16s_i64(tmp64, tmp64);
+        gen_rot_i64(cpu_areg[HLs], cpu_areg[HLs], tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else if (sop == 0 && sopcde == 3 && (HLs == 0 || HLs == 1)) {
+        /* Areg{HLs} = ASHIFT Areg{HLs} BY Dregs_lo{src0}; */
+        unhandled_instruction(dc, "ASHIFT ACC");
+    } else if (sop == 1 && sopcde == 3 && (HLs == 0 || HLs == 1)) {
+        /* Areg{HLs} = LSHIFT Areg{HLs} BY Dregs_lo{src0}; */
+        unhandled_instruction(dc, "LSHIFT ACC");
+    } else if ((sop == 0 || sop == 1) && sopcde == 1) {
+        /* Dreg{dst0} = ASHIFT Dreg{src1} BY Dreg{src0} (V){sop==0}; */
+        /* Dreg{dst0} = ASHIFT Dreg{src1} BY Dreg{src0} (V,S){sop==1}; */
+        unhandled_instruction(dc, "ASHIFT V");
+    } else if ((sop == 0 || sop == 1 || sop == 2) && sopcde == 2) {
+        /* Dreg{dst0} = [LA]SHIFT Dreg{src1} BY Dreg_lo{src0} (opt_S); */
+        /* sop == 1 : opt_S */
+        int l, endl;
+
+        /* XXX: Missing V/VS update */
+        if (sop == 1) {
+            unhandled_instruction(dc, "[AL]SHIFT with (S)");
+        }
+
+        tmp = tcg_temp_local_new();
+        gen_extNsi_tl(tmp, cpu_dreg[src0], 6);
+
+        /* Negative shift means logical or arith shift right */
+        endl = gen_new_label();
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_GE, tmp, 0, l);
+        tcg_gen_neg_tl(tmp, tmp);
+        if (sop == 2) {
+            tcg_gen_shr_tl(cpu_dreg[dst0], cpu_dreg[src1], tmp);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst0]);
+        } else {
+            tcg_gen_sar_tl(cpu_dreg[dst0], cpu_dreg[src1], tmp);
+            astat_queue_state1(dc, ASTAT_OP_ASHIFT32, cpu_dreg[dst0]);
+        }
+        tcg_gen_br(endl);
+
+        /* Positive shift is a logical left shift */
+        gen_set_label(l);
+        tcg_gen_shl_tl(cpu_dreg[dst0], cpu_dreg[src1], tmp);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst0]);
+        gen_set_label(endl);
+
+        tcg_temp_free(tmp);
+    } else if (sop == 3 && sopcde == 2) {
+        /* Dreg{dst0} = ROT Dreg{src1} BY Dreg_lo{src0}; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_ext16s_tl(tmp, cpu_dreg[src0]);
+        gen_rot_tl(cpu_dreg[dst0], cpu_dreg[src1], tmp);
+        tcg_temp_free(tmp);
+    } else if (sop == 2 && sopcde == 1) {
+        /* Dreg{dst0} = LSHIFT Dreg{src1} BY Dreg_lo{src0} (V); */
+        unhandled_instruction(dc, "LSHIFT (V)");
+    } else if (sopcde == 4) {
+        /* Dreg{dst0} = PACK (Dreg{src1}_hi{sop&2}, Dreg{src0}_hi{sop&1}); */
+        /* Dreg{dst0} = PACK (Dreg{src1}_lo{!sop&2}, Dreg{src0}_lo{!sop&1}); */
+        TCGv tmph;
+        tmp = tcg_temp_new();
+        if (sop & 1) {
+            tcg_gen_shri_tl(tmp, cpu_dreg[src0], 16);
+        } else {
+            tcg_gen_andi_tl(tmp, cpu_dreg[src0], 0xffff);
+        }
+        tmph = tcg_temp_new();
+        if (sop & 2) {
+            tcg_gen_andi_tl(tmph, cpu_dreg[src1], 0xffff0000);
+        } else {
+            tcg_gen_shli_tl(tmph, cpu_dreg[src1], 16);
+        }
+        tcg_gen_or_tl(cpu_dreg[dst0], tmph, tmp);
+        tcg_temp_free(tmph);
+        tcg_temp_free(tmp);
+    } else if (sop == 0 && sopcde == 5) {
+        /* Dreg_lo{dst0} = SIGNBITS Dreg{src1}; */
+        tmp = tcg_temp_new();
+        gen_signbitsi_tl(tmp, cpu_dreg[src1], 32);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if (sop == 1 && sopcde == 5) {
+        /* Dreg_lo{dst0} = SIGNBITS Dreg_lo{src1}; */
+        tmp = tcg_temp_new();
+        gen_signbitsi_tl(tmp, cpu_dreg[src1], 16);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if (sop == 2 && sopcde == 5) {
+        /* Dreg_lo{dst0} = SIGNBITS Dreg_hi{src1}; */
+        tmp = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[src1], 16);
+        gen_signbitsi_tl(tmp, tmp, 16);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if ((sop == 0 || sop == 1) && sopcde == 6) {
+        /* Dreg_lo{dst0} = SIGNBITS Areg{sop}; */
+        tmp = tcg_temp_new();
+        gen_signbitsi_i64_i32(tmp, cpu_areg[sop], 40);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if (sop == 3 && sopcde == 6) {
+        /* Dreg_lo{dst0} = ONES Dreg{src1}; */
+        tmp = tcg_temp_new();
+        gen_helper_ones(tmp, cpu_dreg[src1]);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if (sop == 0 && sopcde == 7) {
+        /* Dreg_lo{dst0} = EXPADJ( Dreg{src1}, Dreg_lo{src0}); */
+        unhandled_instruction(dc, "EXPADJ");
+    } else if (sop == 1 && sopcde == 7) {
+        /* Dreg_lo{dst0} = EXPADJ( Dreg{src1}, Dreg_lo{src0}) (V); */
+        unhandled_instruction(dc, "EXPADJ (V)");
+    } else if (sop == 2 && sopcde == 7) {
+        /* Dreg_lo{dst0} = EXPADJ( Dreg_lo{src1}, Dreg_lo{src0}) (V); */
+        unhandled_instruction(dc, "EXPADJ");
+    } else if (sop == 3 && sopcde == 7) {
+        /* Dreg_lo{dst0} = EXPADJ( Dreg_hi{src1}, Dreg_lo{src0}); */
+        unhandled_instruction(dc, "EXPADJ");
+    } else if (sop == 0 && sopcde == 8) {
+        /* BITMUX (Dreg{src0}, Dreg{src1}, A0) (ASR); */
+        unhandled_instruction(dc, "BITMUX");
+    } else if (sop == 1 && sopcde == 8) {
+        /* BITMUX (Dreg{src0}, Dreg{src1}, A0) (ASL); */
+        unhandled_instruction(dc, "BITMUX");
+    } else if ((sop == 0 || sop == 1) && sopcde == 9) {
+        /* Dreg_lo{dst0} = VIT_MAX (Dreg{src1}) (ASL){sop==0}; */
+        /* Dreg_lo{dst0} = VIT_MAX (Dreg{src1}) (ASR){sop==1}; */
+        TCGv sl, sh;
+        int l;
+
+        gen_extend_acc(cpu_areg[0]);
+        if (sop & 1) {
+            tcg_gen_shri_i64(cpu_areg[0], cpu_areg[0], 1);
+        } else {
+            tcg_gen_shli_i64(cpu_areg[0], cpu_areg[0], 1);
+        }
+
+        sl = tcg_temp_local_new();
+        sh = tcg_temp_local_new();
+        tmp = tcg_temp_local_new();
+
+        tcg_gen_ext16s_tl(sl, cpu_dreg[src1]);
+        tcg_gen_sari_tl(sh, cpu_dreg[src1], 16);
+        /* Hrm, can't this sub be inlined in the branch ? */
+        tcg_gen_sub_tl(tmp, sh, sl);
+        tcg_gen_andi_tl(tmp, tmp, 0x8000);
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_NE, tmp, 0, l);
+        tcg_gen_mov_tl(sl, sh);
+        tcg_gen_ori_i64(cpu_areg[0], cpu_areg[0], (sop & 1) ? 0x80000000 : 1);
+        gen_set_label(l);
+
+        gen_mov_l_tl(cpu_dreg[dst0], sl);
+
+        tcg_temp_free(tmp);
+        tcg_temp_free(sh);
+        tcg_temp_free(sl);
+    } else if ((sop == 2 || sop == 3) && sopcde == 9) {
+        /* Dreg{dst0} = VIT_MAX (Dreg{src1}, Dreg{src0}) (ASL){sop==0}; */
+        /* Dreg{dst0} = VIT_MAX (Dreg{src1}, Dreg{src0}) (ASR){sop==1}; */
+        TCGv sl, sh, dst;
+        int l;
+
+        gen_extend_acc(cpu_areg[0]);
+        if (sop & 1) {
+            tcg_gen_shri_i64(cpu_areg[0], cpu_areg[0], 2);
+        } else {
+            tcg_gen_shli_i64(cpu_areg[0], cpu_areg[0], 2);
+        }
+
+        sl = tcg_temp_local_new();
+        sh = tcg_temp_local_new();
+        tmp = tcg_temp_local_new();
+
+        tcg_gen_ext16s_tl(sl, cpu_dreg[src1]);
+        tcg_gen_sari_tl(sh, cpu_dreg[src1], 16);
+
+        /* Hrm, can't this sub be inlined in the branch ? */
+        tcg_gen_sub_tl(tmp, sh, sl);
+        tcg_gen_andi_tl(tmp, tmp, 0x8000);
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_NE, tmp, 0, l);
+        tcg_gen_mov_tl(sl, sh);
+        tcg_gen_ori_i64(cpu_areg[0], cpu_areg[0], (sop & 1) ? 0x80000000 : 1);
+        gen_set_label(l);
+
+        /* The dst might be a src reg */
+        if (dst0 == src0) {
+            dst = tcg_temp_local_new();
+        } else {
+            dst = cpu_dreg[dst0];
+        }
+
+        tcg_gen_shli_tl(dst, sl, 16);
+
+        tcg_gen_ext16s_tl(sl, cpu_dreg[src0]);
+        tcg_gen_sari_tl(sh, cpu_dreg[src0], 16);
+        /* Hrm, can't this sub be inlined in the branch ? */
+        tcg_gen_sub_tl(tmp, sh, sl);
+        tcg_gen_andi_tl(tmp, tmp, 0x8000);
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_NE, tmp, 0, l);
+        tcg_gen_mov_tl(sl, sh);
+        tcg_gen_ori_i64(cpu_areg[0], cpu_areg[0], (sop & 1) ? 0x40000000 : 2);
+        gen_set_label(l);
+
+        gen_mov_l_tl(dst, sl);
+
+        if (dst0 == src0) {
+            tcg_gen_mov_tl(cpu_dreg[dst0], dst);
+            tcg_temp_free(dst);
+        }
+
+        tcg_temp_free(tmp);
+        tcg_temp_free(sh);
+        tcg_temp_free(sl);
+    } else if ((sop == 0 || sop == 1) && sopcde == 10) {
+        /* Dreg{dst0} = EXTRACT (Dreg{src1}, Dreg_lo{src0}) (X{sop==1}); */
+        /* Dreg{dst0} = EXTRACT (Dreg{src1}, Dreg_lo{src0}) (Z{sop==0}); */
+        TCGv mask, x, sgn;
+
+        /* mask = 1 << (src0 & 0x1f) */
+        tmp = tcg_temp_new();
+        tcg_gen_andi_tl(tmp, cpu_dreg[src0], 0x1f);
+        mask = tcg_temp_local_new();
+        tcg_gen_movi_tl(mask, 1);
+        tcg_gen_shl_tl(mask, mask, tmp);
+        tcg_temp_free(tmp);
+        if (sop) {
+            /* sgn = mask >> 1 */
+            sgn = tcg_temp_local_new();
+            tcg_gen_shri_tl(sgn, mask, 1);
+        }
+        /* mask -= 1 */
+        tcg_gen_subi_tl(mask, mask, 1);
+
+        /* x = src1 >> ((src0 >> 8) & 0x1f) */
+        tmp = tcg_temp_new();
+        x = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[src0], 8);
+        tcg_gen_andi_tl(tmp, tmp, 0x1f);
+        tcg_gen_shr_tl(x, cpu_dreg[src1], tmp);
+        tcg_temp_free(tmp);
+        /* dst0 = x & mask */
+        tcg_gen_and_tl(cpu_dreg[dst0], x, mask);
+        tcg_temp_free(x);
+
+        if (sop) {
+            /* if (dst0 & sgn) dst0 |= ~mask */
+            int l;
+            l = gen_new_label();
+            tmp = tcg_temp_new();
+            tcg_gen_and_tl(tmp, cpu_dreg[dst0], sgn);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, tmp, 0, l);
+            tcg_gen_not_tl(mask, mask);
+            tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], mask);
+            gen_set_label(l);
+            tcg_temp_free(sgn);
+            tcg_temp_free(tmp);
+        }
+
+        tcg_temp_free(mask);
+
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst0]);
+    } else if ((sop == 2 || sop == 3) && sopcde == 10) {
+        /* The first dregs is the "background" while the second dregs is the
+         * "foreground".  The fg reg is used to overlay the bg reg and is:
+         * | nnnn nnnn | nnnn nnnn | xxxp pppp | xxxL LLLL |
+         *  n = the fg bit field
+         *  p = bit position in bg reg to start LSB of fg field
+         *  L = number of fg bits to extract
+         * Using (X) sign-extends the fg bit field.
+         */
+        TCGv fg, bg, len, mask, fgnd, shft;
+
+        /* Dreg{dst0} = DEPOSIT (Dreg{src1}, Dreg{src0}) (X){sop==3}; */
+        fg = cpu_dreg[src0];
+        bg = cpu_dreg[src1];
+
+        len = tcg_temp_new();
+        tcg_gen_andi_tl(len, fg, 0x1f);
+
+        mask = tcg_temp_new();
+        tcg_gen_movi_tl(mask, 1);
+        tcg_gen_shl_tl(mask, mask, len);
+        tcg_gen_subi_tl(mask, mask, 1);
+        tcg_gen_andi_tl(mask, mask, 0xffff);
+
+        fgnd = tcg_temp_new();
+        tcg_gen_shri_tl(fgnd, fg, 16);
+        tcg_gen_and_tl(fgnd, fgnd, mask);
+
+        shft = tcg_temp_new();
+        tcg_gen_shri_tl(shft, fg, 8);
+        tcg_gen_andi_tl(shft, shft, 0x1f);
+
+        if (sop == 3) {
+            /* Sign extend the fg bit field.  */
+            tcg_gen_movi_tl(mask, -1);
+            gen_extNs_tl(fgnd, fgnd, len);
+        }
+        tcg_gen_shl_tl(fgnd, fgnd, shft);
+        tcg_gen_shl_tl(mask, mask, shft);
+        tcg_gen_not_tl(mask, mask);
+        tcg_gen_and_tl(mask, bg, mask);
+
+        tcg_gen_or_tl(cpu_dreg[dst0], mask, fgnd);
+
+        tcg_temp_free(shft);
+        tcg_temp_free(fgnd);
+        tcg_temp_free(mask);
+        tcg_temp_free(len);
+
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst0]);
+    } else if (sop == 0 && sopcde == 11) {
+        /* Dreg_lo{dst0} = CC = BXORSHIFT (A0, Dreg{src0}); */
+        unhandled_instruction(dc, "BXORSHIFT");
+    } else if (sop == 1 && sopcde == 11) {
+        /* Dreg_lo{dst0} = CC = BXOR (A0, Dreg{src0}); */
+        unhandled_instruction(dc, "BXOR");
+    } else if (sop == 0 && sopcde == 12) {
+        /* A0 = BXORSHIFT (A0, A1, CC); */
+        unhandled_instruction(dc, "BXORSHIFT");
+    } else if (sop == 1 && sopcde == 12) {
+        /* Dreg_lo{dst0} = CC = BXOR (A0, A1, CC); */
+        unhandled_instruction(dc, "CC = BXOR");
+    } else if ((sop == 0 || sop == 1 || sop == 2) && sopcde == 13) {
+        int shift = (sop + 1) * 8;
+        TCGv tmp2;
+        /* Dreg{dst0} = ALIGN{shift} (Dreg{src1}, Dreg{src0}); */
+        /* XXX: could be optimized a bit if dst0 is not src1 or src0 */
+        tmp = tcg_temp_new();
+        tmp2 = tcg_temp_new();
+        tcg_gen_shli_tl(tmp, cpu_dreg[src1], 32 - shift);
+        tcg_gen_shri_tl(tmp2, cpu_dreg[src0], shift);
+        tcg_gen_or_tl(cpu_dreg[dst0], tmp, tmp2);
+        tcg_temp_free(tmp2);
+        tcg_temp_free(tmp);
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_dsp32shiftimm_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* dsp32shiftimm
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 0 | 0 |.M.| 1 | 1 | 0 | 1 | - | - |.sopcde............|
+       |.sop...|.HLs...|.dst0......|.immag.................|.src1......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int src1     = ((iw1 >> DSP32ShiftImm_src1_bits) & DSP32ShiftImm_src1_mask);
+    int sop      = ((iw1 >> DSP32ShiftImm_sop_bits) & DSP32ShiftImm_sop_mask);
+    int bit8     = ((iw1 >> 8) & 0x1);
+    int immag    = ((iw1 >> DSP32ShiftImm_immag_bits) & DSP32ShiftImm_immag_mask);
+    int newimmag = (-(iw1 >> DSP32ShiftImm_immag_bits) & DSP32ShiftImm_immag_mask);
+    int dst0     = ((iw1 >> DSP32ShiftImm_dst0_bits) & DSP32ShiftImm_dst0_mask);
+    int M        = ((iw0 >> (DSP32ShiftImm_M_bits - 16)) & DSP32ShiftImm_M_mask);
+    int sopcde   = ((iw0 >> (DSP32ShiftImm_sopcde_bits - 16)) & DSP32ShiftImm_sopcde_mask);
+    int HLs      = ((iw1 >> DSP32ShiftImm_HLs_bits) & DSP32ShiftImm_HLs_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("M:%i sopcde:%i sop:%i HLs:%i dst0:%i immag:%#x src1:%i",
+                  M, sopcde, sop, HLs, dst0, immag, src1);
+
+    if (sopcde == 0) {
+        tmp = tcg_temp_new();
+
+        if (HLs & 1) {
+            if (sop == 0) {
+                tcg_gen_sari_tl(tmp, cpu_dreg[src1], 16);
+            } else {
+                tcg_gen_shri_tl(tmp, cpu_dreg[src1], 16);
+            }
+        } else {
+            if (sop == 0) {
+                tcg_gen_ext16s_tl(tmp, cpu_dreg[src1]);
+            } else {
+                tcg_gen_ext16u_tl(tmp, cpu_dreg[src1]);
+            }
+        }
+
+        if (sop == 0) {
+            /* dregs_hi/lo = dregs_hi/lo >>> imm4 */
+            tcg_gen_sari_tl(tmp, tmp, newimmag);
+            astat_queue_state1(dc, ASTAT_OP_ASHIFT16, tmp);
+        } else if (sop == 1 && bit8 == 0) {
+            /*  dregs_hi/lo = dregs_hi/lo << imm4 (S) */
+            tcg_gen_shli_tl(tmp, tmp, immag);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT16, tmp);
+        } else if (sop == 1 && bit8) {
+            /* dregs_hi/lo = dregs_hi/lo >>> imm4 (S) */
+            tcg_gen_shri_tl(tmp, tmp, immag);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT16, tmp);
+        } else if (sop == 2 && bit8) {
+            /* dregs_hi/lo = dregs_hi/lo >> imm4 */
+            tcg_gen_shri_tl(tmp, tmp, newimmag);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT16, tmp);
+        } else if (sop == 2 && bit8 == 0) {
+            /* dregs_hi/lo = dregs_hi/lo << imm4 */
+            tcg_gen_shli_tl(tmp, tmp, immag);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT16, tmp);
+        } else {
+            illegal_instruction(dc);
+        }
+
+        if (HLs & 2) {
+            gen_mov_h_tl(cpu_dreg[dst0], tmp);
+        } else {
+            gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        }
+
+        tcg_temp_free(tmp);
+    } else if (sop == 2 && sopcde == 3 && (HLs == 1 || HLs == 0)) {
+        /* Areg{HLs} = ROT Areg{HLs} BY imm{immag}; */
+        int shift = imm6(immag);
+        gen_roti_i64(cpu_areg[HLs], cpu_areg[HLs], shift);
+    } else if (sop == 0 && sopcde == 3 && bit8 == 1) {
+        /* Arithmetic shift, so shift in sign bit copies */
+        int shift = uimm5(newimmag);
+        HLs = !!HLs;
+
+        /* Areg{HLs} = Aregs{HLs} >>> imm{newimmag}; */
+        tcg_gen_sari_i64(cpu_areg[HLs], cpu_areg[HLs], shift);
+    } else if ((sop == 0 && sopcde == 3 && bit8 == 0) ||
+               (sop == 1 && sopcde == 3)) {
+        int shiftup = uimm5(immag);
+        int shiftdn = uimm5(newimmag);
+        HLs = !!HLs;
+
+        if (sop == 0) {
+            /* Areg{HLs} = Aregs{HLs} <<{sop} imm{immag}; */
+            tcg_gen_shli_i64(cpu_areg[HLs], cpu_areg[HLs], shiftup);
+        } else {
+            /* Areg{HLs} = Aregs{HLs} >>{sop} imm{newimmag}; */
+            tcg_gen_shri_i64(cpu_areg[HLs], cpu_areg[HLs], shiftdn);
+        }
+
+        /* XXX: Missing ASTAT update */
+    } else if (sop == 1 && sopcde == 1 && bit8 == 0) {
+        /* Dreg{dst0} = Dreg{src1} << imm{immag} (V, S); */
+        unhandled_instruction(dc, "Dreg = Dreg << imm (V,S)");
+    } else if (sop == 2 && sopcde == 1 && bit8 == 1) {
+        /* Dreg{dst0} = Dreg{src1} >> imm{count} (V); */
+        int count = imm5(newimmag);
+
+        /* XXX: No ASTAT handling */
+        if (count > 0 && count <= 15) {
+            tcg_gen_shri_tl(cpu_dreg[dst0], cpu_dreg[src1], count);
+            tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0],
+                            0xffff0000 | ((1 << (16 - count)) - 1));
+        } else if (count) {
+            tcg_gen_movi_tl(cpu_dreg[dst0], 0);
+        }
+    } else if (sop == 2 && sopcde == 1 && bit8 == 0) {
+        /* Dreg{dst0} = Dreg{src1} << imm{count} (V); */
+        int count = imm5(immag);
+
+        /* XXX: No ASTAT handling */
+        if (count > 0 && count <= 15) {
+            tcg_gen_shli_tl(cpu_dreg[dst0], cpu_dreg[src1], count);
+            tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0],
+                            ~(((1 << count) - 1) << 16));
+        } else if (count) {
+            tcg_gen_movi_tl(cpu_dreg[dst0], 0);
+        }
+    } else if (sopcde == 1 && (sop == 0 || (sop == 1 && bit8 == 1))) {
+        /* Dreg{dst0} = Dreg{src1} >>> imm{newimmag} (V){sop==0}; */
+        /* Dreg{dst0} = Dreg{src1} >>> imm{newimmag} (V,S){sop==1}; */
+        int count = uimm5(newimmag);
+
+        if (sop == 1) {
+            unhandled_instruction(dc, "ashiftrt (S)");
+        }
+
+        /* XXX: No ASTAT handling */
+        if (count > 0 && count <= 15) {
+            tmp = tcg_temp_new();
+            tcg_gen_ext16s_tl(tmp, cpu_dreg[src1]);
+            tcg_gen_sari_tl(tmp, tmp, count);
+            tcg_gen_andi_tl(tmp, tmp, 0xffff);
+            tcg_gen_sari_tl(cpu_dreg[dst0], cpu_dreg[src1], count);
+            tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0], 0xffff0000);
+            tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], tmp);
+            tcg_temp_free(tmp);
+        } else if (count) {
+            unhandled_instruction(dc, "ashiftrt (S)");
+        }
+    } else if (sop == 1 && sopcde == 2) {
+        /* Dreg{dst0} = Dreg{src1} << imm{count} (S); */
+        int count = imm6(immag);
+        tcg_gen_shli_tl(cpu_dreg[dst0], cpu_dreg[src1], -count);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst0]);
+    } else if (sop == 2 && sopcde == 2) {
+        /* Dreg{dst0} = Dreg{src1} >> imm{count}; */
+        int count = imm6(newimmag);
+        if (count < 0) {
+            tcg_gen_shli_tl(cpu_dreg[dst0], cpu_dreg[src1], -count);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst0]);
+        } else {
+            tcg_gen_shri_tl(cpu_dreg[dst0], cpu_dreg[src1], count);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst0]);
+        }
+    } else if (sop == 3 && sopcde == 2) {
+        /* Dreg{dst0} = ROT Dreg{src1} BY imm{shift}; */
+        int shift = imm6(immag);
+        gen_roti_tl(cpu_dreg[dst0], cpu_dreg[src1], shift);
+    } else if (sop == 0 && sopcde == 2) {
+        /* Dreg{dst0} = Dreg{src1} >>> imm{count}; */
+        int count = imm6(newimmag);
+
+        /* Negative shift magnitudes produce Logical Left shifts.
+         * Positive shift magnitudes produce Arithmetic Right shifts.
+         */
+        if (count < 0) {
+            tcg_gen_shli_tl(cpu_dreg[dst0], cpu_dreg[src1], -count);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst0]);
+        } else {
+            tcg_gen_sari_tl(cpu_dreg[dst0], cpu_dreg[src1], count);
+            astat_queue_state1(dc, ASTAT_OP_ASHIFT32, cpu_dreg[dst0]);
+        }
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_psedoDEBUG_0(DisasContext *dc, uint16_t iw0)
+{
+    /* psedoDEBUG
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 |.fn....|.grp.......|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int fn  = ((iw0 >> PseudoDbg_fn_bits) & PseudoDbg_fn_mask);
+    int grp = ((iw0 >> PseudoDbg_grp_bits) & PseudoDbg_grp_mask);
+    int reg = ((iw0 >> PseudoDbg_reg_bits) & PseudoDbg_reg_mask);
+
+    TRACE_EXTRACT("fn:%i grp:%i reg:%i", fn, grp, reg);
+
+    if ((reg == 0 || reg == 1) && fn == 3) {
+        /* DBG Areg{reg}; */
+        TCGv tmp = tcg_const_tl(reg);
+        gen_helper_dbg_areg(cpu_areg[reg], tmp);
+        tcg_temp_free(tmp);
+    } else if (reg == 3 && fn == 3) {
+        /* ABORT; */
+        cec_exception(dc, EXCP_ABORT);
+    } else if (reg == 4 && fn == 3) {
+        /* HLT; */
+        cec_exception(dc, EXCP_HLT);
+    } else if (reg == 5 && fn == 3) {
+        unhandled_instruction(dc, "DBGHALT");
+    } else if (reg == 6 && fn == 3) {
+        unhandled_instruction(dc, "DBGCMPLX (dregs)");
+    } else if (reg == 7 && fn == 3) {
+        unhandled_instruction(dc, "DBG");
+    } else if (grp == 0 && fn == 2) {
+        /* OUTC Dreg{reg}; */
+        gen_helper_outc(cpu_dreg[reg]);
+    } else if (fn == 0) {
+        /* DBG allreg{grp,reg}; */
+        bool istmp;
+        TCGv tmp;
+        TCGv tmp_grp = tcg_const_tl(grp);
+        TCGv tmp_reg = tcg_const_tl(reg);
+
+        if (grp == 4 && reg == 6) {
+            /* ASTAT */
+            tmp = tcg_temp_new();
+            gen_astat_load(dc, tmp);
+            istmp = true;
+        } else {
+            tmp = get_allreg(dc, grp, reg);
+            istmp = false;
+        }
+
+        gen_helper_dbg(tmp, tmp_grp, tmp_reg);
+
+        if (istmp) {
+            tcg_temp_free(tmp);
+        }
+        tcg_temp_free(tmp_reg);
+        tcg_temp_free(tmp_grp);
+    } else if (fn == 1) {
+        unhandled_instruction(dc, "PRNT allregs");
+    } else {
+        illegal_instruction(dc);
+    }
+}
+
+static void
+decode_psedoOChar_0(DisasContext *dc, uint16_t iw0)
+{
+    /* psedoOChar
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 |.ch............................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int ch = ((iw0 >> PseudoChr_ch_bits) & PseudoChr_ch_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("ch:%#x", ch);
+
+    /* OUTC imm{ch}; */
+    tmp = tcg_temp_new();
+    tcg_gen_movi_tl(tmp, ch);
+    gen_helper_outc(tmp);
+    tcg_temp_free(tmp);
+}
+
+static void
+decode_psedodbg_assert_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* psedodbg_assert
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 1 | 0 | - | - | - | dbgop |.grp.......|.regtest...|
+       |.expected......................................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int expected = ((iw1 >> PseudoDbg_Assert_expected_bits) & PseudoDbg_Assert_expected_mask);
+    int dbgop    = ((iw0 >> (PseudoDbg_Assert_dbgop_bits - 16)) & PseudoDbg_Assert_dbgop_mask);
+    int grp      = ((iw0 >> (PseudoDbg_Assert_grp_bits - 16)) & PseudoDbg_Assert_grp_mask);
+    int regtest  = ((iw0 >> (PseudoDbg_Assert_regtest_bits - 16)) & PseudoDbg_Assert_regtest_mask);
+    TCGv reg, exp, pc;
+    bool istmp;
+
+    TRACE_EXTRACT("dbgop:%i grp:%i regtest:%i expected:%#x",
+                  dbgop, grp, regtest, expected);
+
+    if (dbgop == 0 || dbgop == 2) {
+        /* DBGA (genreg_lo{grp,regtest}, imm{expected} */
+        /* DBGAL (genreg{grp,regtest}, imm{expected} */
+    } else if (dbgop == 1 || dbgop == 3) {
+        /* DBGA (genreg_hi{grp,regtest}, imm{expected} */
+        /* DBGAH (genreg{grp,regtest}, imm{expected} */
+    } else {
+        illegal_instruction(dc);
+    }
+
+    if (grp == 4 && regtest == 6) {
+        /* ASTAT */
+        reg = tcg_temp_new();
+        gen_astat_load(dc, reg);
+        istmp = true;
+    } else if (grp == 4 && (regtest == 0 || regtest == 2)) {
+        /* A#.X */
+        TCGv_i64 tmp64 = tcg_temp_new_i64();
+        reg = tcg_temp_new();
+        tcg_gen_shri_i64(tmp64, cpu_areg[regtest >> 1], 32);
+        tcg_gen_andi_i64(tmp64, tmp64, 0xff);
+        tcg_gen_trunc_i64_i32(reg, tmp64);
+        tcg_temp_free_i64(tmp64);
+        istmp = true;
+    } else if (grp == 4 && (regtest == 1 || regtest == 3)) {
+        /* A#.W */
+        reg = tcg_temp_new();
+        tcg_gen_trunc_i64_i32(reg, cpu_areg[regtest >> 1]);
+        istmp = true;
+    } else {
+        reg = get_allreg(dc, grp, regtest);
+        istmp = false;
+    }
+
+    exp = tcg_const_tl(expected);
+    pc = tcg_const_tl(dc->pc);
+    if (dbgop & 1) {
+        gen_helper_dbga_h(cpu_env, pc, reg, exp);
+    } else {
+        gen_helper_dbga_l(cpu_env, pc, reg, exp);
+    }
+
+    if (istmp) {
+        tcg_temp_free(reg);
+    }
+    tcg_temp_free(pc);
+    tcg_temp_free(exp);
+}
+
+#include "linux-fixed-code.h"
+
+static uint32_t bfin_lduw_code(DisasContext *dc, target_ulong pc)
+{
+#ifdef CONFIG_USER_ONLY
+    /* Intercept jump to the magic kernel page */
+    if (((dc->env->personality & 0xff/*PER_MASK*/) == 0/*PER_LINUX*/) &&
+        (pc & 0xFFFFFF00) == 0x400) {
+        uint32_t off = pc - 0x400;
+        if (off < sizeof(bfin_linux_fixed_code)) {
+            return ((uint16_t)bfin_linux_fixed_code[off + 1] << 8) |
+                   bfin_linux_fixed_code[off];
+        }
+    }
+#endif
+
+    return cpu_lduw_code(dc->env, pc);
+}
+
+/* Interpret a single 16bit/32bit insn; no parallel insn handling */
+static void
+_interp_insn_bfin(DisasContext *dc, target_ulong pc)
+{
+    uint16_t iw0, iw1;
+
+    iw0 = bfin_lduw_code(dc, pc);
+    if ((iw0 & 0xc000) != 0xc000) {
+        /* 16-bit opcode */
+        dc->insn_len = 2;
+
+        TRACE_EXTRACT("iw0:%#x", iw0);
+        if ((iw0 & 0xFF00) == 0x0000) {
+            decode_ProgCtrl_0(dc, iw0);
+        } else if ((iw0 & 0xFFC0) == 0x0240) {
+            decode_CaCTRL_0(dc, iw0);
+        } else if ((iw0 & 0xFF80) == 0x0100) {
+            decode_PushPopReg_0(dc, iw0);
+        } else if ((iw0 & 0xFE00) == 0x0400) {
+            decode_PushPopMultiple_0(dc, iw0);
+        } else if ((iw0 & 0xFE00) == 0x0600) {
+            decode_ccMV_0(dc, iw0);
+        } else if ((iw0 & 0xF800) == 0x0800) {
+            decode_CCflag_0(dc, iw0);
+        } else if ((iw0 & 0xFFE0) == 0x0200) {
+            decode_CC2dreg_0(dc, iw0);
+        } else if ((iw0 & 0xFF00) == 0x0300) {
+            decode_CC2stat_0(dc, iw0);
+        } else if ((iw0 & 0xF000) == 0x1000) {
+            decode_BRCC_0(dc, iw0);
+        } else if ((iw0 & 0xF000) == 0x2000) {
+            decode_UJUMP_0(dc, iw0);
+        } else if ((iw0 & 0xF000) == 0x3000) {
+            decode_REGMV_0(dc, iw0);
+        } else if ((iw0 & 0xFC00) == 0x4000) {
+            decode_ALU2op_0(dc, iw0);
+        } else if ((iw0 & 0xFE00) == 0x4400) {
+            decode_PTR2op_0(dc, iw0);
+        } else if ((iw0 & 0xF800) == 0x4800) {
+            decode_LOGI2op_0(dc, iw0);
+        } else if ((iw0 & 0xF000) == 0x5000) {
+            decode_COMP3op_0(dc, iw0);
+        } else if ((iw0 & 0xF800) == 0x6000) {
+            decode_COMPI2opD_0(dc, iw0);
+        } else if ((iw0 & 0xF800) == 0x6800) {
+            decode_COMPI2opP_0(dc, iw0);
+        } else if ((iw0 & 0xF000) == 0x8000) {
+            decode_LDSTpmod_0(dc, iw0);
+        } else if ((iw0 & 0xFF60) == 0x9E60) {
+            decode_dagMODim_0(dc, iw0);
+        } else if ((iw0 & 0xFFF0) == 0x9F60) {
+            decode_dagMODik_0(dc, iw0);
+        } else if ((iw0 & 0xFC00) == 0x9C00) {
+            decode_dspLDST_0(dc, iw0);
+        } else if ((iw0 & 0xF000) == 0x9000) {
+            decode_LDST_0(dc, iw0);
+        } else if ((iw0 & 0xFC00) == 0xB800) {
+            decode_LDSTiiFP_0(dc, iw0);
+        } else if ((iw0 & 0xE000) == 0xA000) {
+            decode_LDSTii_0(dc, iw0);
+        } else {
+            TRACE_EXTRACT("no matching 16-bit pattern");
+            illegal_instruction(dc);
+        }
+        return;
+    }
+
+    /* Grab the next 16 bits to determine if it's a 32-bit or 64-bit opcode */
+    iw1 = bfin_lduw_code(dc, pc + 2);
+    if ((iw0 & BIT_MULTI_INS) && (iw0 & 0xe800) != 0xe800 /* not linkage */) {
+        dc->insn_len = 8;
+    } else {
+        dc->insn_len = 4;
+    }
+
+    TRACE_EXTRACT("iw0:%#x iw1:%#x insn_len:%i",
+                  iw0, iw1, dc->insn_len);
+
+    if ((iw0 & 0xf7ff) == 0xc003 && iw1 == 0x1800) {
+        /* MNOP; */;
+    } else if (((iw0 & 0xFF80) == 0xE080) && ((iw1 & 0x0C00) == 0x0000)) {
+        decode_LoopSetup_0(dc, iw0, iw1);
+    } else if (((iw0 & 0xFF00) == 0xE100) && ((iw1 & 0x0000) == 0x0000)) {
+        decode_LDIMMhalf_0(dc, iw0, iw1);
+    } else if (((iw0 & 0xFE00) == 0xE200) && ((iw1 & 0x0000) == 0x0000)) {
+        decode_CALLa_0(dc, iw0, iw1);
+    } else if (((iw0 & 0xFC00) == 0xE400) && ((iw1 & 0x0000) == 0x0000)) {
+        decode_LDSTidxI_0(dc, iw0, iw1);
+    } else if (((iw0 & 0xFFFE) == 0xE800) && ((iw1 & 0x0000) == 0x0000)) {
+        decode_linkage_0(dc, iw0, iw1);
+    } else if (((iw0 & 0xF600) == 0xC000) && ((iw1 & 0x0000) == 0x0000)) {
+        decode_dsp32mac_0(dc, iw0, iw1);
+    } else if (((iw0 & 0xF600) == 0xC200) && ((iw1 & 0x0000) == 0x0000)) {
+        decode_dsp32mult_0(dc, iw0, iw1);
+    } else if (((iw0 & 0xF7C0) == 0xC400) && ((iw1 & 0x0000) == 0x0000)) {
+        decode_dsp32alu_0(dc, iw0, iw1);
+    } else if (((iw0 & 0xF7E0) == 0xC600) && ((iw1 & 0x01C0) == 0x0000)) {
+        decode_dsp32shift_0(dc, iw0, iw1);
+    } else if (((iw0 & 0xF7E0) == 0xC680) && ((iw1 & 0x0000) == 0x0000)) {
+        decode_dsp32shiftimm_0(dc, iw0, iw1);
+    } else if ((iw0 & 0xFF00) == 0xF800) {
+        decode_psedoDEBUG_0(dc, iw0), dc->insn_len = 2;
+    } else if ((iw0 & 0xFF00) == 0xF900) {
+        decode_psedoOChar_0(dc, iw0), dc->insn_len = 2;
+    } else if (((iw0 & 0xFF00) == 0xF000) && ((iw1 & 0x0000) == 0x0000)) {
+        decode_psedodbg_assert_0(dc, iw0, iw1);
+    } else {
+        TRACE_EXTRACT("no matching 32-bit pattern");
+        illegal_instruction(dc);
+    }
+}
+
+/* Interpret a single Blackfin insn; breaks up parallel insns */
+static void
+interp_insn_bfin(DisasContext *dc)
+{
+    _interp_insn_bfin(dc, dc->pc);
+
+    /* Proper display of multiple issue instructions */
+    if (dc->insn_len == 8) {
+        _interp_insn_bfin(dc, dc->pc + 4);
+        _interp_insn_bfin(dc, dc->pc + 6);
+        dc->disalgnexcpt = 0;
+        /* Reset back for higher levels to process branches */
+        dc->insn_len = 8;
+    }
+}
diff --git a/target-bfin/bfin-tdep.h b/target-bfin/bfin-tdep.h
new file mode 100644
index 0000000..ef6d325
--- /dev/null
+++ b/target-bfin/bfin-tdep.h
@@ -0,0 +1,94 @@ 
+/* Target-dependent code for Analog Devices Blackfin processer, for GDB.
+
+   Copyright (C) 2005 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+enum gdb_regnum {
+  /* Core Registers */
+  BFIN_R0_REGNUM = 0,
+  BFIN_R1_REGNUM,
+  BFIN_R2_REGNUM,
+  BFIN_R3_REGNUM,
+  BFIN_R4_REGNUM,
+  BFIN_R5_REGNUM,
+  BFIN_R6_REGNUM,
+  BFIN_R7_REGNUM,
+  BFIN_P0_REGNUM,
+  BFIN_P1_REGNUM,
+  BFIN_P2_REGNUM,
+  BFIN_P3_REGNUM,
+  BFIN_P4_REGNUM,
+  BFIN_P5_REGNUM,
+  BFIN_SP_REGNUM,
+  BFIN_FP_REGNUM,
+  BFIN_I0_REGNUM,
+  BFIN_I1_REGNUM,
+  BFIN_I2_REGNUM,
+  BFIN_I3_REGNUM,
+  BFIN_M0_REGNUM,
+  BFIN_M1_REGNUM,
+  BFIN_M2_REGNUM,
+  BFIN_M3_REGNUM,
+  BFIN_B0_REGNUM,
+  BFIN_B1_REGNUM,
+  BFIN_B2_REGNUM,
+  BFIN_B3_REGNUM,
+  BFIN_L0_REGNUM,
+  BFIN_L1_REGNUM,
+  BFIN_L2_REGNUM,
+  BFIN_L3_REGNUM,
+  BFIN_A0_DOT_X_REGNUM,
+  BFIN_A0_DOT_W_REGNUM,
+  BFIN_A1_DOT_X_REGNUM,
+  BFIN_A1_DOT_W_REGNUM,
+  BFIN_ASTAT_REGNUM,
+  BFIN_RETS_REGNUM,
+  BFIN_LC0_REGNUM,
+  BFIN_LT0_REGNUM,
+  BFIN_LB0_REGNUM,
+  BFIN_LC1_REGNUM,
+  BFIN_LT1_REGNUM,
+  BFIN_LB1_REGNUM,
+  BFIN_CYCLES_REGNUM,
+  BFIN_CYCLES2_REGNUM,
+  BFIN_USP_REGNUM,
+  BFIN_SEQSTAT_REGNUM,
+  BFIN_SYSCFG_REGNUM,
+  BFIN_RETI_REGNUM,
+  BFIN_RETX_REGNUM,
+  BFIN_RETN_REGNUM,
+  BFIN_RETE_REGNUM,
+
+  /* Pseudo Registers */
+  BFIN_PC_REGNUM,
+  BFIN_CC_REGNUM,
+  BFIN_TEXT_ADDR,		/* Address of .text section.  */
+  BFIN_TEXT_END_ADDR,		/* Address of the end of .text section.  */
+  BFIN_DATA_ADDR,		/* Address of .data section.  */
+
+  BFIN_FDPIC_EXEC_REGNUM,
+  BFIN_FDPIC_INTERP_REGNUM,
+
+  /* MMRs */
+  BFIN_IPEND_REGNUM,
+
+  /* LAST ENTRY SHOULD NOT BE CHANGED.  */
+  BFIN_NUM_REGS			/* The number of all registers.  */
+};
diff --git a/target-bfin/cpu-qom.h b/target-bfin/cpu-qom.h
new file mode 100644
index 0000000..697797b
--- /dev/null
+++ b/target-bfin/cpu-qom.h
@@ -0,0 +1,61 @@ 
+/*
+ * QEMU Blackfin CPU
+ *
+ * Copyright 2007-2013 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#ifndef QEMU_BFIN_CPU_QOM_H
+#define QEMU_BFIN_CPU_QOM_H
+
+#include "qom/cpu.h"
+
+#define TYPE_BFIN_CPU "bfin-cpu"
+
+#define BFIN_CPU_CLASS(klass) \
+    OBJECT_CLASS_CHECK(BfinCPUClass, (klass), TYPE_BFIN_CPU)
+#define BFIN_CPU(obj) \
+    OBJECT_CHECK(BfinCPU, (obj), TYPE_BFIN_CPU)
+#define BFIN_CPU_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(BfinCPUClass, (obj), TYPE_BFIN_CPU)
+
+/**
+ * BfinCPUClass:
+ * @parent_reset: The parent class' reset handler.
+ *
+ * An Bfin CPU model.
+ */
+typedef struct BfinCPUClass {
+    /*< private >*/
+    CPUClass parent_class;
+    /*< public >*/
+
+    void (*parent_reset)(CPUState *cpu);
+} BfinCPUClass;
+
+/**
+ * BfinCPU:
+ * @env: #CPUArchState
+ *
+ * An Bfin CPU.
+ */
+typedef struct BfinCPU {
+    /*< private >*/
+    CPUState parent_obj;
+    /*< public >*/
+
+    CPUArchState env;
+} BfinCPU;
+
+static inline BfinCPU *bfin_env_get_cpu(CPUArchState *env)
+{
+    return BFIN_CPU(container_of(env, BfinCPU, env));
+}
+
+#define ENV_GET_CPU(e) CPU(bfin_env_get_cpu(e))
+
+#define ENV_OFFSET offsetof(BfinCPU, env)
+
+#endif
diff --git a/target-bfin/cpu.c b/target-bfin/cpu.c
new file mode 100644
index 0000000..871a1a1
--- /dev/null
+++ b/target-bfin/cpu.c
@@ -0,0 +1,55 @@ 
+/*
+ * QEMU Blackfin CPU
+ *
+ * Copyright 2007-2013 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#include "cpu.h"
+#include "qemu-common.h"
+
+
+/* CPUClass::reset() */
+static void bfin_cpu_reset(CPUState *s)
+{
+    BfinCPU *cpu = BFIN_CPU(s);
+    CPUArchState *env = &cpu->env;
+
+    env->pc = 0xEF000000;
+}
+
+static void bfin_cpu_initfn(Object *obj)
+{
+    CPUState *cs = CPU(obj);
+    BfinCPU *cpu = BFIN_CPU(obj);
+    CPUArchState *env = &cpu->env;
+
+    cs->env_ptr = env;
+    cpu_exec_init(env);
+}
+
+static void bfin_cpu_class_init(ObjectClass *oc, void *data)
+{
+    CPUClass *cc = CPU_CLASS(oc);
+
+    cc->reset = bfin_cpu_reset;
+}
+
+static const TypeInfo bfin_cpu_type_info = {
+    .name = TYPE_BFIN_CPU,
+    .parent = TYPE_CPU,
+    .instance_size = sizeof(BfinCPU),
+    .instance_init = bfin_cpu_initfn,
+    .abstract = false,
+    .class_size = sizeof(BfinCPUClass),
+    .class_init = bfin_cpu_class_init,
+};
+
+static void bfin_cpu_register_types(void)
+{
+    type_register_static(&bfin_cpu_type_info);
+}
+
+type_init(bfin_cpu_register_types)
diff --git a/target-bfin/cpu.h b/target-bfin/cpu.h
new file mode 100644
index 0000000..d288197
--- /dev/null
+++ b/target-bfin/cpu.h
@@ -0,0 +1,236 @@ 
+/*
+ * Blackfin emulation
+ *
+ * Copyright 2007-2013 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#ifndef CPU_BFIN_H
+#define CPU_BFIN_H
+
+struct DisasContext;
+
+#define TARGET_LONG_BITS 32
+
+#define ELF_MACHINE	EM_BLACKFIN
+
+#define CPUArchState struct CPUBfinState
+
+#include "config.h"
+#include "qemu-common.h"
+#include "exec/cpu-defs.h"
+
+#define TARGET_HAS_ICE 1
+
+#define EXCP_SYSCALL        0
+#define EXCP_SOFT_BP        1
+#define EXCP_STACK_OVERFLOW 3
+#define EXCP_SINGLE_STEP    0x10
+#define EXCP_TRACE_FULL     0x11
+#define EXCP_UNDEF_INST     0x21
+#define EXCP_ILL_INST       0x22
+#define EXCP_DCPLB_VIOLATE  0x23
+#define EXCP_DATA_MISALGIN  0x24
+#define EXCP_UNRECOVERABLE  0x25
+#define EXCP_DCPLB_MISS     0x26
+#define EXCP_DCPLB_MULT     0x27
+#define EXCP_EMU_WATCH      0x28
+#define EXCP_MISALIG_INST   0x2a
+#define EXCP_ICPLB_PROT     0x2b
+#define EXCP_ICPLB_MISS     0x2c
+#define EXCP_ICPLB_MULT     0x2d
+#define EXCP_ILL_SUPV       0x2e
+#define EXCP_ABORT          0x100
+#define EXCP_DBGA           0x101
+#define EXCP_OUTC           0x102
+
+#define CPU_INTERRUPT_NMI   CPU_INTERRUPT_TGT_EXT_1
+
+#define BFIN_L1_CACHE_BYTES 32
+
+/* Blackfin does 1K/4K/1M/4M, but for now only support 4k */
+#define TARGET_PAGE_BITS    12
+#define NB_MMU_MODES        2
+
+#define TARGET_PHYS_ADDR_SPACE_BITS 32
+#define TARGET_VIRT_ADDR_SPACE_BITS 32
+
+#define cpu_init cpu_bfin_init
+#define cpu_exec cpu_bfin_exec
+#define cpu_gen_code cpu_bfin_gen_code
+#define cpu_signal_handler cpu_bfin_signal_handler
+
+/* Indexes into astat array; matches bitpos in hardware too */
+enum {
+    ASTAT_AZ = 0,
+    ASTAT_AN,
+    ASTAT_AC0_COPY,
+    ASTAT_V_COPY,
+    ASTAT_CC = 5,
+    ASTAT_AQ,
+    ASTAT_RND_MOD = 8,
+    ASTAT_AC0 = 12,
+    ASTAT_AC1,
+    ASTAT_AV0 = 16,
+    ASTAT_AV0S,
+    ASTAT_AV1,
+    ASTAT_AV1S,
+    ASTAT_V = 24,
+    ASTAT_VS
+};
+
+typedef struct CPUBfinState {
+    CPU_COMMON
+    int personality;
+
+    uint32_t dreg[8];
+    uint32_t preg[8];
+    uint32_t ireg[4];
+    uint32_t mreg[4];
+    uint32_t breg[4];
+    uint32_t lreg[4];
+    uint64_t areg[2];
+    uint32_t rets;
+    uint32_t lcreg[2], ltreg[2], lbreg[2];
+    uint32_t cycles[2];
+    uint32_t uspreg;
+    uint32_t seqstat;
+    uint32_t syscfg;
+    uint32_t reti;
+    uint32_t retx;
+    uint32_t retn;
+    uint32_t rete;
+    uint32_t emudat;
+    uint32_t pc;
+
+    /* ASTAT bits; broken up for speeeeeeeed */
+    uint32_t astat[32];
+    /* ASTAT delayed helpers */
+    uint32_t astat_op, astat_arg[3];
+} CPUBfinState;
+#define spreg preg[6]
+#define fpreg preg[7]
+
+static inline uint32_t bfin_astat_read(CPUArchState *env)
+{
+    unsigned int i, ret;
+
+    ret = 0;
+    for (i = 0; i < 32; ++i)
+        ret |= (env->astat[i] << i);
+
+    return ret;
+}
+
+static inline void bfin_astat_write(CPUArchState *env, uint32_t astat)
+{
+    unsigned int i;
+    for (i = 0; i < 32; ++i)
+        env->astat[i] = !!(astat & (1 << i));
+}
+
+enum astat_ops {
+    ASTAT_OP_NONE,
+    ASTAT_OP_DYNAMIC,
+    ASTAT_OP_ABS,
+    ASTAT_OP_ABS_VECTOR,
+    ASTAT_OP_ADD16,
+    ASTAT_OP_ADD32,
+    ASTAT_OP_ASHIFT16,
+    ASTAT_OP_ASHIFT32,
+    ASTAT_OP_COMPARE_SIGNED,
+    ASTAT_OP_COMPARE_UNSIGNED,
+    ASTAT_OP_LOGICAL,
+    ASTAT_OP_LSHIFT16,
+    ASTAT_OP_LSHIFT32,
+    ASTAT_OP_LSHIFT_RT16,
+    ASTAT_OP_LSHIFT_RT32,
+    ASTAT_OP_MIN_MAX,
+    ASTAT_OP_MIN_MAX_VECTOR,
+    ASTAT_OP_NEGATE,
+    ASTAT_OP_SUB16,
+    ASTAT_OP_SUB32,
+    ASTAT_OP_VECTOR_ADD_ADD,    /* +|+ */
+    ASTAT_OP_VECTOR_ADD_SUB,    /* +|- */
+    ASTAT_OP_VECTOR_SUB_SUB,    /* -|- */
+    ASTAT_OP_VECTOR_SUB_ADD,    /* -|+ */
+};
+
+typedef void (*hwloop_callback)(struct DisasContext *dc, int loop);
+
+typedef struct DisasContext {
+    CPUArchState *env;
+    struct TranslationBlock *tb;
+    /* The current PC we're decoding (could be middle of parallel insn) */
+    target_ulong pc;
+    /* Length of current insn (2/4/8) */
+    target_ulong insn_len;
+
+    /* For delayed ASTAT handling */
+    enum astat_ops astat_op;
+
+    /* For hardware loop processing */
+    hwloop_callback hwloop_callback;
+    void *hwloop_data;
+
+    /* Was a DISALGNEXCPT used in this parallel insn ? */
+    int disalgnexcpt;
+
+    int is_jmp;
+    int mem_idx;
+} DisasContext;
+
+void do_interrupt(CPUArchState *env);
+CPUArchState *cpu_init(const char *cpu_model);
+int cpu_exec(CPUArchState *s);
+int cpu_bfin_signal_handler(int host_signum, void *pinfo, void *puc);
+
+extern const char * const greg_names[];
+extern const char *get_allreg_name(int grp, int reg);
+
+#define MMU_KERNEL_IDX 0
+#define MMU_USER_IDX   1
+
+int cpu_bfin_handle_mmu_fault(CPUArchState *env, target_ulong address, int rw,
+                              int mmu_idx);
+#define cpu_handle_mmu_fault cpu_bfin_handle_mmu_fault
+
+#if defined(CONFIG_USER_ONLY)
+static inline void cpu_clone_regs(CPUArchState *env, target_ulong newsp)
+{
+    if (newsp)
+        env->spreg = newsp;
+}
+#endif
+
+#include "exec/cpu-all.h"
+#include "cpu-qom.h"
+
+static inline bool cpu_has_work(CPUState *cpu)
+{
+    return (cpu->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI));
+}
+
+#include "exec/exec-all.h"
+
+static inline void cpu_pc_from_tb(CPUArchState *env, TranslationBlock *tb)
+{
+    env->pc = tb->pc;
+}
+
+static inline target_ulong cpu_get_pc(CPUArchState *env)
+{
+    return env->pc;
+}
+
+static inline void cpu_get_tb_cpu_state(CPUArchState *env, target_ulong *pc,
+                                        target_ulong *cs_base, int *flags)
+{
+    *pc = cpu_get_pc(env);
+    *cs_base = 0;
+    *flags = env->astat[ASTAT_RND_MOD];
+}
+
+#endif
diff --git a/target-bfin/helper.c b/target-bfin/helper.c
new file mode 100644
index 0000000..79a15cf
--- /dev/null
+++ b/target-bfin/helper.c
@@ -0,0 +1,37 @@ 
+/*
+ * Blackfin helpers
+ *
+ * Copyright 2007-2013 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <assert.h>
+
+#include "config.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "qemu/host-utils.h"
+
+#if defined(CONFIG_USER_ONLY)
+
+void do_interrupt(CPUArchState *env)
+{
+    env->exception_index = -1;
+}
+
+int cpu_handle_mmu_fault(CPUArchState *env, target_ulong address, int rw,
+                         int mmu_idx)
+{
+    env->exception_index = EXCP_DCPLB_VIOLATE;
+    return 1;
+}
+
+#endif
diff --git a/target-bfin/helper.h b/target-bfin/helper.h
new file mode 100644
index 0000000..9596721
--- /dev/null
+++ b/target-bfin/helper.h
@@ -0,0 +1,23 @@ 
+#include "exec/def-helper.h"
+
+DEF_HELPER_3(raise_exception, void, env, i32, i32)
+DEF_HELPER_5(memalign, void, env, i32, i32, i32, i32)
+
+DEF_HELPER_4(dbga_l, void, env, i32, i32, i32)
+DEF_HELPER_4(dbga_h, void, env, i32, i32, i32)
+DEF_HELPER_FLAGS_1(outc, TCG_CALL_NO_RWG, void, i32)
+DEF_HELPER_FLAGS_3(dbg, TCG_CALL_NO_RWG, void, i32, i32, i32)
+DEF_HELPER_FLAGS_2(dbg_areg, TCG_CALL_NO_RWG, void, i64, i32)
+
+DEF_HELPER_1(astat_load, i32, env)
+DEF_HELPER_2(astat_store, void, env, i32)
+
+DEF_HELPER_FLAGS_1(ones, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_2(signbits, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_2(signbits_64, TCG_CALL_NO_RWG_SE, i32, i64, i32)
+
+DEF_HELPER_FLAGS_4(dagadd, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_4(dagsub, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_2(add_brev, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+
+#include "exec/def-helper.h"
diff --git a/target-bfin/linux-fixed-code.h b/target-bfin/linux-fixed-code.h
new file mode 100644
index 0000000..a6dddc4
--- /dev/null
+++ b/target-bfin/linux-fixed-code.h
@@ -0,0 +1,23 @@ 
+/* DO NOT EDIT: Autogenerated.  */
+/* Fixed code region of Linux userspace starting at 0x400.  Last produced
+   from Linux-2.6.37 (not that the fixed code region changes often).  */
+static const unsigned char bfin_linux_fixed_code[] = {
+0x28, 0xe1, 0xad, 0x00, 0xa0, 0x00, 0x00, 0x20,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x91, 0x01, 0x93, 0x10, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x91, 0x08, 0x08, 0x02, 0x10, 0x02, 0x93,
+0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x01, 0x91, 0x01, 0x50, 0x00, 0x93, 0x10, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x01, 0x91, 0x01, 0x52, 0x00, 0x93, 0x10, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x01, 0x91, 0x01, 0x56, 0x00, 0x93, 0x10, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x01, 0x91, 0x01, 0x54, 0x00, 0x93, 0x10, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x01, 0x91, 0x01, 0x58, 0x00, 0x93, 0x10, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0xa4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
diff --git a/target-bfin/op_helper.c b/target-bfin/op_helper.c
new file mode 100644
index 0000000..c905760
--- /dev/null
+++ b/target-bfin/op_helper.c
@@ -0,0 +1,229 @@ 
+/*
+ * Blackfin helpers
+ *
+ * Copyright 2007-2013 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#include "cpu.h"
+#include "helper.h"
+
+void HELPER(raise_exception)(CPUArchState *env, uint32_t excp, uint32_t pc)
+{
+    env->exception_index = excp;
+    if (pc != -1)
+        env->pc = pc;
+    cpu_loop_exit(env);
+}
+
+void HELPER(memalign)(CPUArchState *env, uint32_t excp, uint32_t pc,
+                      uint32_t addr, uint32_t len)
+{
+    if ((addr & (len - 1)) == 0)
+        return;
+
+    HELPER(raise_exception)(env, excp, pc);
+}
+
+void HELPER(dbga_l)(CPUArchState *env, uint32_t pc, uint32_t actual,
+                    uint32_t expected)
+{
+    if ((actual & 0xffff) != expected)
+        HELPER(raise_exception)(env, EXCP_DBGA, pc);
+}
+
+void HELPER(dbga_h)(CPUArchState *env, uint32_t pc, uint32_t actual,
+                    uint32_t expected)
+{
+    if ((actual >> 16) != expected)
+        HELPER(raise_exception)(env, EXCP_DBGA, pc);
+}
+
+void HELPER(outc)(uint32_t ch)
+{
+    putc(ch, stdout);
+    if (ch == '\n')
+        fflush(stdout);
+}
+
+void HELPER(dbg)(uint32_t val, uint32_t grp, uint32_t reg)
+{
+    printf("DBG : %s = 0x%08x\n", get_allreg_name(grp, reg), val);
+}
+
+void HELPER(dbg_areg)(uint64_t val, uint32_t areg)
+{
+    printf("DBG : A%u = 0x%010"PRIx64"\n", areg, (val << 24) >> 24);
+}
+
+uint32_t HELPER(astat_load)(CPUArchState *env)
+{
+    return bfin_astat_read(env);
+}
+
+void HELPER(astat_store)(CPUArchState *env, uint32_t astat)
+{
+    bfin_astat_write(env, astat);
+}
+
+/* Count the number of bits set to 1 in the 32bit value */
+uint32_t HELPER(ones)(uint32_t val)
+{
+    uint32_t i;
+    uint32_t ret;
+
+    ret = 0;
+    for (i = 0; i < 32; ++i)
+        ret += !!(val & (1 << i));
+
+    return ret;
+}
+
+/* Count number of leading bits that match the sign bit */
+uint32_t HELPER(signbits)(uint32_t val, uint32_t size)
+{
+    uint32_t mask = 1 << (size - 1);
+    uint32_t bit = val & mask;
+    uint32_t count = 0;
+
+    for (;;) {
+        mask >>= 1;
+        bit >>= 1;
+        if (mask == 0)
+            break;
+        if ((val & mask) != bit)
+            break;
+        ++count;
+    }
+
+    return count;
+}
+
+/* Count number of leading bits that match the sign bit */
+uint32_t HELPER(signbits_64)(uint64_t val, uint32_t size)
+{
+    uint64_t mask = (uint64_t)1 << (size - 1);
+    uint64_t bit = val & mask;
+    uint32_t count = 0;
+
+    for (;;) {
+        mask >>= 1;
+        bit >>= 1;
+        if (mask == 0)
+            break;
+        if ((val & mask) != bit)
+            break;
+        ++count;
+    }
+
+    if (size == 40)
+        count -= 8;
+
+    return count;
+}
+
+/* This is a bit crazy, but we want to simulate the hardware behavior exactly
+   rather than worry about the circular buffers being used correctly.  Which
+   isn't to say there isn't room for improvement here, just that we want to
+   be conservative.  See also dagsub().  */
+uint32_t HELPER(dagadd)(uint32_t I, uint32_t L, uint32_t B, uint32_t M)
+{
+    uint64_t i = I;
+    uint64_t l = L;
+    uint64_t b = B;
+    uint64_t m = M;
+
+    uint64_t LB, IM, IML;
+    uint32_t im32, iml32, lb32, res;
+    uint64_t msb, car;
+
+    msb = (uint64_t)1 << 31;
+    car = (uint64_t)1 << 32;
+
+    IM = i + m;
+    im32 = IM;
+    LB = l + b;
+    lb32 = LB;
+
+    if ((int32_t)M < 0) {
+        IML = i + m + l;
+        iml32 = IML;
+        if ((i & msb) || (IM & car))
+            res = (im32 < b) ? iml32 : im32;
+        else
+            res = (im32 < b) ? im32 : iml32;
+    } else {
+        IML = i + m - l;
+        iml32 = IML;
+        if ((IM & car) == (LB & car))
+            res = (im32 < lb32) ? im32 : iml32;
+        else
+            res = (im32 < lb32) ? iml32 : im32;
+    }
+
+    return res;
+}
+
+/* See dagadd() notes above.  */
+uint32_t HELPER(dagsub)(uint32_t I, uint32_t L, uint32_t B, uint32_t M)
+{
+    uint64_t i = I;
+    uint64_t l = L;
+    uint64_t b = B;
+    uint64_t m = M;
+
+    uint64_t mbar = (uint32_t)(~m + 1);
+    uint64_t LB, IM, IML;
+    uint32_t b32, im32, iml32, lb32, res;
+    uint64_t msb, car;
+
+    msb = (uint64_t)1 << 31;
+    car = (uint64_t)1 << 32;
+
+    IM = i + mbar;
+    im32 = IM;
+    LB = l + b;
+    lb32 = LB;
+
+    if ((int32_t)M < 0) {
+        IML = i + mbar - l;
+        iml32 = IML;
+        if (!!((i & msb) && (IM & car)) == !!(LB & car))
+            res = (im32 < lb32) ? im32 : iml32;
+        else
+            res = (im32 < lb32) ? iml32 : im32;
+    } else {
+        IML = i + mbar + l;
+        iml32 = IML;
+        b32 = b;
+        if (M == 0 || IM & car)
+            res = (im32 < b32) ? iml32 : im32;
+        else
+            res = (im32 < b32) ? im32 : iml32;
+    }
+
+    return res;
+}
+
+uint32_t HELPER(add_brev)(uint32_t addend1, uint32_t addend2)
+{
+    uint32_t mask, b, r;
+    int i, cy;
+
+    mask = 0x80000000;
+    r = 0;
+    cy = 0;
+
+    for (i = 31; i >= 0; --i) {
+        b = ((addend1 & mask) >> i) + ((addend2 & mask) >> i);
+        b += cy;
+        cy = b >> 1;
+        b &= 1;
+        r |= b << i;
+        mask >>= 1;
+    }
+
+    return r;
+}
diff --git a/target-bfin/translate.c b/target-bfin/translate.c
new file mode 100644
index 0000000..a619f66
--- /dev/null
+++ b/target-bfin/translate.c
@@ -0,0 +1,1347 @@ 
+/*
+ * Blackfin translation
+ *
+ * Copyright 2007-2013 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "cpu.h"
+#include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg-op.h"
+#include "qemu-common.h"
+#include "opcode/bfin.h"
+
+#include "helper.h"
+#define GEN_HELPER 1
+#include "helper.h"
+
+/* We're making a call (which means we need to update RTS) */
+#define DISAS_CALL 0xad0
+
+static TCGv_ptr cpu_env;
+static TCGv cpu_dreg[8];
+static TCGv cpu_preg[8];
+#define cpu_spreg cpu_preg[6]
+#define cpu_fpreg cpu_preg[7]
+static TCGv cpu_ireg[4];
+static TCGv cpu_mreg[4];
+static TCGv cpu_breg[4];
+static TCGv cpu_lreg[4];
+static TCGv_i64 cpu_areg[2];
+static TCGv cpu_rets;
+static TCGv cpu_lcreg[2], cpu_ltreg[2], cpu_lbreg[2];
+static TCGv cpu_cycles[2];
+static TCGv cpu_uspreg;
+static TCGv cpu_seqstat;
+static TCGv cpu_syscfg;
+static TCGv cpu_reti;
+static TCGv cpu_retx;
+static TCGv cpu_retn;
+static TCGv cpu_rete;
+static TCGv cpu_emudat;
+static TCGv cpu_pc;
+static TCGv cpu_cc;
+static TCGv /*cpu_astat_op,*/ cpu_astat_arg[3];
+
+#include "exec/gen-icount.h"
+
+static inline void
+bfin_tcg_new_set3(TCGv *tcgv, unsigned int cnt, unsigned int offbase,
+                  const char * const *names)
+{
+    unsigned int i;
+    for (i = 0; i < cnt; ++i)
+        tcgv[i] = tcg_global_mem_new(TCG_AREG0, offbase + (i * 4), names[i]);
+}
+#define bfin_tcg_new_set2(tcgv, cnt, reg, name_idx) \
+    bfin_tcg_new_set3(tcgv, cnt, offsetof(CPUArchState, reg), &greg_names[name_idx])
+#define bfin_tcg_new_set(reg, name_idx) \
+    bfin_tcg_new_set2(cpu_##reg, ARRAY_SIZE(cpu_##reg), reg, name_idx)
+#define bfin_tcg_new(reg, name_idx) \
+    bfin_tcg_new_set2(&cpu_##reg, 1, reg, name_idx)
+
+CPUArchState *cpu_init(const char *cpu_model)
+{
+    BfinCPU *cpu;
+    CPUArchState *env;
+    static int tcg_initialized = 0;
+
+    cpu = BFIN_CPU(object_new(TYPE_BFIN_CPU));
+    env = &cpu->env;
+
+    cpu_reset(CPU(cpu));
+    qemu_init_vcpu(env);
+
+    if (tcg_initialized)
+        return env;
+
+    tcg_initialized = 1;
+
+#define GEN_HELPER 2
+#include "helper.h"
+
+    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
+
+    cpu_pc = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUArchState, pc), "PC");
+    cpu_cc = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUArchState, astat[ASTAT_CC]), "CC");
+
+    /*cpu_astat_op = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUArchState, astat_op), "astat_op");*/
+    cpu_astat_arg[0] = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUArchState, astat_arg[0]), "astat_arg[0]");
+    cpu_astat_arg[1] = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUArchState, astat_arg[1]), "astat_arg[1]");
+    cpu_astat_arg[2] = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUArchState, astat_arg[2]), "astat_arg[2]");
+
+    cpu_areg[0] = tcg_global_mem_new_i64(TCG_AREG0,
+        offsetof(CPUArchState, areg[0]), "A0");
+    cpu_areg[1] = tcg_global_mem_new_i64(TCG_AREG0,
+        offsetof(CPUArchState, areg[1]), "A1");
+
+    bfin_tcg_new_set(dreg, 0);
+    bfin_tcg_new_set(preg, 8);
+    bfin_tcg_new_set(ireg, 16);
+    bfin_tcg_new_set(mreg, 20);
+    bfin_tcg_new_set(breg, 24);
+    bfin_tcg_new_set(lreg, 28);
+    bfin_tcg_new(rets, 39);
+    bfin_tcg_new(lcreg[0], 48);
+    bfin_tcg_new(ltreg[0], 49);
+    bfin_tcg_new(lbreg[0], 50);
+    bfin_tcg_new(lcreg[1], 51);
+    bfin_tcg_new(ltreg[1], 52);
+    bfin_tcg_new(lbreg[1], 53);
+    bfin_tcg_new_set(cycles, 54);
+    bfin_tcg_new(uspreg, 56);
+    bfin_tcg_new(seqstat, 57);
+    bfin_tcg_new(syscfg, 58);
+    bfin_tcg_new(reti, 59);
+    bfin_tcg_new(retx, 60);
+    bfin_tcg_new(retn, 61);
+    bfin_tcg_new(rete, 62);
+    bfin_tcg_new(emudat, 63);
+
+    return env;
+}
+
+#define _astat_printf(bit) cpu_fprintf(f, "%s" #bit " ", (env->astat[ASTAT_##bit] ? "" : "~"))
+void cpu_dump_state(CPUArchState *env, FILE *f,
+                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+                    int flags)
+{
+    cpu_fprintf(f, "              SYSCFG: %04lx   SEQSTAT: %08x\n",
+                env->syscfg, env->seqstat);
+    cpu_fprintf(f, "RETE: %08x  RETN: %08x  RETX: %08x\n",
+                env->rete, env->retn, env->retx);
+    cpu_fprintf(f, "RETI: %08x  RETS: %08x   PC : %08x\n",
+                env->reti, env->rets, env->pc);
+    cpu_fprintf(f, " R0 : %08x   R4 : %08x   P0 : %08x   P4 : %08x\n",
+                env->dreg[0], env->dreg[4], env->preg[0], env->preg[4]);
+    cpu_fprintf(f, " R1 : %08x   R5 : %08x   P1 : %08x   P5 : %08x\n",
+                env->dreg[1], env->dreg[5], env->preg[1], env->preg[5]);
+    cpu_fprintf(f, " R2 : %08x   R6 : %08x   P2 : %08x   SP : %08x\n",
+                env->dreg[2], env->dreg[6], env->preg[2], env->spreg);
+    cpu_fprintf(f, " R3 : %08x   R7 : %08x   P3 : %08x   FP : %08x\n",
+                env->dreg[3], env->dreg[7], env->preg[3], env->fpreg);
+    cpu_fprintf(f, " LB0: %08x   LT0: %08x   LC0: %08x\n",
+                env->lbreg[0], env->ltreg[0], env->lcreg[0]);
+    cpu_fprintf(f, " LB1: %08x   LT1: %08x   LC1: %08x\n",
+                env->lbreg[1], env->ltreg[1], env->lcreg[1]);
+    cpu_fprintf(f, " B0 : %08x   L0 : %08x   M0 : %08x   I0 : %08x\n",
+                env->breg[0], env->lreg[0], env->mreg[0], env->ireg[0]);
+    cpu_fprintf(f, " B1 : %08x   L1 : %08x   M1 : %08x   I1 : %08x\n",
+                env->breg[1], env->lreg[1], env->mreg[1], env->ireg[1]);
+    cpu_fprintf(f, " B2 : %08x   L2 : %08x   M2 : %08x   I2 : %08x\n",
+                env->breg[2], env->lreg[2], env->mreg[2], env->ireg[2]);
+    cpu_fprintf(f, " B3 : %08x   L3 : %08x   M3 : %08x   I3 : %08x\n",
+                env->breg[3], env->lreg[3], env->mreg[3], env->ireg[3]);
+    cpu_fprintf(f, "  A0: %010lx                 A1: %010lx\n",
+                env->areg[0] & 0xffffffffff, env->areg[1] & 0xffffffffff);
+    cpu_fprintf(f, " USP: %08x ASTAT: %08x   CC : %08x\n",
+                env->uspreg, bfin_astat_read(env), env->astat[ASTAT_CC]);
+    cpu_fprintf(f, "ASTAT BITS: ");
+    _astat_printf(VS);
+    _astat_printf(V);
+    _astat_printf(AV1S);
+    _astat_printf(AV1);
+    _astat_printf(AV0S);
+    _astat_printf(AV0);
+    _astat_printf(AC1);
+    _astat_printf(AC0);
+    _astat_printf(AQ);
+    _astat_printf(CC);
+    _astat_printf(V_COPY);
+    _astat_printf(AC0_COPY);
+    _astat_printf(AN);
+    _astat_printf(AZ);
+    cpu_fprintf(f, "\nASTAT CACHE:   OP: %02u   ARG: %08x %08x %08x\n",
+                env->astat_op, env->astat_arg[0], env->astat_arg[1], env->astat_arg[2]);
+    cpu_fprintf(f, "              CYCLES: %08x %08x\n",
+                env->cycles[0], env->cycles[1]);
+
+/*
+    iw = ldq_code(env->pc);
+    if ((iw & 0xc000) != 0xc000)
+        len = 2;
+    else if ((iw & BIT_MULTI_INS) && (iw & 0xe800) != 0xe800)
+        len = 8;
+    else
+        len = 4;
+    log_target_disas(env->pc, len, 0);
+*/
+}
+
+static void gen_astat_update(DisasContext *, bool);
+
+static void gen_goto_tb(DisasContext *dc, int tb_num, TCGv dest)
+{
+/*
+    TranslationBlock *tb;
+    tb = dc->tb;
+
+    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
+        tcg_gen_goto_tb(tb_num);
+        tcg_gen_mov_tl(cpu_pc, dest);
+        tcg_gen_exit_tb((long)tb + tb_num);
+    } else */{
+        gen_astat_update(dc, false);
+        tcg_gen_mov_tl(cpu_pc, dest);
+        tcg_gen_exit_tb(0);
+    }
+}
+
+static void gen_gotoi_tb(DisasContext *dc, int tb_num, target_ulong dest)
+{
+    TCGv tmp = tcg_temp_local_new();
+    tcg_gen_movi_tl(tmp, dest);
+    gen_goto_tb(dc, tb_num, tmp);
+    tcg_temp_free(tmp);
+}
+
+static void cec_exception(DisasContext *dc, int excp)
+{
+    TCGv tmp = tcg_const_tl(excp);
+    TCGv pc = tcg_const_tl(dc->pc);
+    gen_helper_raise_exception(cpu_env, tmp, pc);
+    tcg_temp_free(tmp);
+    dc->is_jmp = DISAS_UPDATE;
+}
+
+static void cec_require_supervisor(DisasContext *dc)
+{
+#ifdef CONFIG_LINUX_USER
+    cec_exception(dc, EXCP_ILL_SUPV);
+#else
+# error todo
+#endif
+}
+
+static void gen_align_check(DisasContext *dc, TCGv addr, uint32_t len, bool inst)
+{
+    TCGv excp, pc, tmp;
+
+    /* XXX: This should be made into a runtime option.  It adds likes
+            10% overhead to memory intensive apps (like mp3 decoding). */
+    if (1) {
+        return;
+    }
+
+    excp = tcg_const_tl(inst ? EXCP_MISALIG_INST : EXCP_DATA_MISALGIN);
+    pc = tcg_const_tl(dc->pc);
+    tmp = tcg_const_tl(len);
+    gen_helper_memalign(cpu_env, excp, pc, addr, tmp);
+    tcg_temp_free(tmp);
+    tcg_temp_free(pc);
+    tcg_temp_free(excp);
+}
+
+static void gen_aligned_qemu_ld16u(DisasContext *dc, TCGv ret, TCGv addr)
+{
+    gen_align_check(dc, addr, 2, false);
+    tcg_gen_qemu_ld16u(ret, addr, dc->mem_idx);
+}
+
+static void gen_aligned_qemu_ld16s(DisasContext *dc, TCGv ret, TCGv addr)
+{
+    gen_align_check(dc, addr, 2, false);
+    tcg_gen_qemu_ld16s(ret, addr, dc->mem_idx);
+}
+
+static void gen_aligned_qemu_ld32u(DisasContext *dc, TCGv ret, TCGv addr)
+{
+    gen_align_check(dc, addr, 4, false);
+    tcg_gen_qemu_ld32u(ret, addr, dc->mem_idx);
+}
+
+static void gen_aligned_qemu_st16(DisasContext *dc, TCGv val, TCGv addr)
+{
+    gen_align_check(dc, addr, 2, false);
+    tcg_gen_qemu_st16(val, addr, dc->mem_idx);
+}
+
+static void gen_aligned_qemu_st32(DisasContext *dc, TCGv val, TCGv addr)
+{
+    gen_align_check(dc, addr, 4, false);
+    tcg_gen_qemu_st32(val, addr, dc->mem_idx);
+}
+
+/*
+ * If a LB reg is written, we need to invalidate the two translation
+ * blocks that could be affected -- the TB's referenced by the old LB
+ * could have LC/LT handling which we no longer want, and the new LB
+ * is probably missing LC/LT handling which we want.  In both cases,
+ * we need to regenerate the block.
+ */
+static void gen_maybe_lb_exit_tb(DisasContext *dc, TCGv reg)
+{
+    if (!TCGV_EQUAL(reg, cpu_lbreg[0]) && !TCGV_EQUAL(reg, cpu_lbreg[1]))
+        return;
+
+    /* tb_invalidate_phys_page_range */
+    dc->is_jmp = DISAS_UPDATE;
+    /* XXX: Not entirely correct, but very few things load
+     *      directly into LB ... */
+    gen_gotoi_tb(dc, 0, dc->pc + dc->insn_len);
+}
+
+static void gen_hwloop_default(DisasContext *dc, int loop)
+{
+    if (loop != -1)
+        gen_goto_tb(dc, 0, cpu_ltreg[loop]);
+}
+
+static void _gen_hwloop_call(DisasContext *dc, int loop)
+{
+    if (dc->is_jmp != DISAS_CALL)
+        return;
+
+    if (loop == -1)
+        tcg_gen_movi_tl(cpu_rets, dc->pc + dc->insn_len);
+    else
+        tcg_gen_mov_tl(cpu_rets, cpu_ltreg[loop]);
+}
+
+static void gen_hwloop_br_pcrel_cc(DisasContext *dc, int loop)
+{
+    int l;
+    int pcrel = (unsigned long)dc->hwloop_data;
+    int T = pcrel & 1;
+    pcrel &= ~1;
+
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_cc, T, l);
+    gen_gotoi_tb(dc, 0, dc->pc + pcrel);
+    gen_set_label(l);
+    if (loop == -1)
+        dc->hwloop_callback = gen_hwloop_default;
+    else
+        gen_hwloop_default(dc, loop);
+}
+
+static void gen_hwloop_br_pcrel(DisasContext *dc, int loop)
+{
+    TCGv *reg = dc->hwloop_data;
+    _gen_hwloop_call(dc, loop);
+    tcg_gen_addi_tl(cpu_pc, *reg, dc->pc);
+    gen_goto_tb(dc, 0, cpu_pc);
+}
+
+static void gen_hwloop_br_pcrel_imm(DisasContext *dc, int loop)
+{
+    int pcrel = (unsigned long)dc->hwloop_data;
+    TCGv tmp;
+
+    _gen_hwloop_call(dc, loop);
+    tmp = tcg_const_tl(pcrel);
+    tcg_gen_addi_tl(cpu_pc, tmp, dc->pc);
+    tcg_temp_free(tmp);
+    gen_goto_tb(dc, 0, cpu_pc);
+}
+
+static void gen_hwloop_br_direct(DisasContext *dc, int loop)
+{
+    TCGv *reg = dc->hwloop_data;
+    _gen_hwloop_call(dc, loop);
+    gen_goto_tb(dc, 0, *reg);
+}
+
+static void _gen_hwloop_check(DisasContext *dc, int loop, int l)
+{
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_lcreg[loop], 0, l);
+    tcg_gen_subi_tl(cpu_lcreg[loop], cpu_lcreg[loop], 1);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_lcreg[loop], 0, l);
+    dc->hwloop_callback(dc, loop);
+}
+
+static void gen_hwloop_check(DisasContext *dc)
+{
+    bool loop1, loop0;
+    int endl;
+
+    loop1 = (dc->pc == dc->env->lbreg[1]);
+    loop0 = (dc->pc == dc->env->lbreg[0]);
+
+    if (loop1 || loop0)
+        endl = gen_new_label();
+
+    if (loop1) {
+        int l;
+        if (loop0)
+            l = gen_new_label();
+        else
+            l = endl;
+
+        _gen_hwloop_check(dc, 1, l);
+
+        if (loop0) {
+            tcg_gen_br(endl);
+            gen_set_label(l);
+        }
+    }
+
+    if (loop0)
+        _gen_hwloop_check(dc, 0, endl);
+
+    if (loop1 || loop0)
+        gen_set_label(endl);
+
+    dc->hwloop_callback(dc, -1);
+}
+
+/* R#.L = reg; R#.H = reg; */
+/* XXX: This modifies the low source ... assumes it is a temp ... */
+/*
+static void gen_mov_l_h_tl(TCGv dst, TCGv srcl, TCGv srch)
+{
+    tcg_gen_shli_tl(dst, srch, 16);
+    tcg_gen_andi_tl(srcl, srcl, 0xffff);
+    tcg_gen_or_tl(dst, dst, srcl);
+}
+*/
+
+/* R#.L = reg */
+static void gen_mov_l_tl(TCGv dst, TCGv src)
+{
+    tcg_gen_andi_tl(dst, dst, 0xffff0000);
+    tcg_gen_andi_tl(src, src, 0xffff);
+    tcg_gen_or_tl(dst, dst, src);
+}
+
+/* R#.L = imm32 */
+/*
+static void gen_movi_l_tl(TCGv dst, uint32_t src)
+{
+    tcg_gen_andi_tl(dst, dst, 0xffff0000);
+    tcg_gen_ori_tl(dst, dst, src & 0xffff);
+}
+*/
+
+/* R#.H = reg */
+/* XXX: This modifies the source ... assumes it is a temp ... */
+static void gen_mov_h_tl(TCGv dst, TCGv src)
+{
+    tcg_gen_andi_tl(dst, dst, 0xffff);
+    tcg_gen_shli_tl(src, src, 16);
+    tcg_gen_or_tl(dst, dst, src);
+}
+
+/* R#.H = imm32 */
+/*
+static void gen_movi_h_tl(TCGv dst, uint32_t src)
+{
+    tcg_gen_andi_tl(dst, dst, 0xffff);
+    tcg_gen_ori_tl(dst, dst, src << 16);
+}
+*/
+
+static void gen_extNs_tl(TCGv dst, TCGv src, TCGv n)
+{
+    /* Shift the sign bit up, and then back down */
+    TCGv tmp = tcg_temp_new();
+    tcg_gen_subfi_tl(tmp, 32, n);
+    tcg_gen_shl_tl(dst, src, tmp);
+    tcg_gen_sar_tl(dst, dst, tmp);
+    tcg_temp_free(tmp);
+}
+
+static void gen_extNsi_tl(TCGv dst, TCGv src, uint32_t n)
+{
+    /* Shift the sign bit up, and then back down */
+    tcg_gen_shli_tl(dst, src, 32 - n);
+    tcg_gen_sari_tl(dst, dst, 32 - n);
+}
+
+static void gen_extNsi_i64(TCGv_i64 dst, TCGv_i64 src, uint32_t n)
+{
+    /* Shift the sign bit up, and then back down */
+    tcg_gen_shli_i64(dst, src, 64 - n);
+    tcg_gen_sari_i64(dst, dst, 64 - n);
+}
+
+#if 0
+static void gen_extNu_tl(TCGv dst, TCGv src, TCGv n)
+{
+    /* Just mask off the higher bits */
+    tcg_gen_andi_tl(dst, src, ~((1 << n) - 1));
+}
+
+static void gen_extNui_tl(TCGv dst, TCGv src, uint32_t n)
+{
+    /* Just mask off the higher bits */
+    tcg_gen_andi_tl(dst, src, ~((1 << n) - 1));
+}
+#endif
+
+static void gen_signbitsi_tl(TCGv dst, TCGv src, uint32_t size)
+{
+    TCGv tmp_size = tcg_const_tl(size);
+    gen_helper_signbits(dst, src, tmp_size);
+    tcg_temp_free(tmp_size);
+}
+
+static void gen_signbitsi_i64_i32(TCGv dst, TCGv_i64 src, uint32_t size)
+{
+    TCGv tmp_size = tcg_const_tl(size);
+    gen_helper_signbits_64(dst, src, tmp_size);
+    tcg_temp_free(tmp_size);
+}
+
+static void gen_abs_tl(TCGv ret, TCGv arg)
+{
+    int l = gen_new_label();
+    tcg_gen_mov_tl(ret, arg);
+    tcg_gen_brcondi_tl(TCG_COND_GE, arg, 0, l);
+    tcg_gen_neg_tl(ret, ret);
+    gen_set_label(l);
+}
+
+static void gen_abs_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    int l = gen_new_label();
+    tcg_gen_mov_i64(ret, arg);
+    tcg_gen_brcondi_i64(TCG_COND_GE, arg, 0, l);
+    tcg_gen_neg_i64(ret, ret);
+    gen_set_label(l);
+}
+
+/* Common tail code for DIVQ/DIVS insns */
+static void _gen_divqs(TCGv pquo, TCGv r, TCGv aq, TCGv div)
+{
+    /*
+     * pquo <<= 1
+     * pquo |= aq
+     * pquo = (pquo & 0x1FFFF) | (r << 17)
+     */
+    tcg_gen_shli_tl(pquo, pquo, 1);
+    tcg_gen_or_tl(pquo, pquo, aq);
+    tcg_gen_andi_tl(pquo, pquo, 0x1FFFF);
+    tcg_gen_shli_tl(r, r, 17);
+    tcg_gen_or_tl(pquo, pquo, r);
+
+    tcg_temp_free(r);
+    tcg_temp_free(aq);
+    tcg_temp_free(div);
+}
+
+/* Common AQ ASTAT bit management for DIVQ/DIVS insns */
+static void _gen_divqs_st_aq(TCGv r, TCGv aq, TCGv div)
+{
+    /* aq = (r ^ div) >> 15 */
+    tcg_gen_xor_tl(aq, r, div);
+    tcg_gen_shri_tl(aq, aq, 15);
+    tcg_gen_andi_tl(aq, aq, 1);
+    tcg_gen_st_tl(aq, cpu_env, offsetof(CPUArchState, astat[ASTAT_AQ]));
+}
+
+/* DIVQ ( Dreg, Dreg ) ;
+ * Based on AQ status bit, either add or subtract the divisor from
+ * the dividend. Then set the AQ status bit based on the MSBs of the
+ * 32-bit dividend and the 16-bit divisor. Left shift the dividend one
+ * bit. Copy the logical inverse of AQ into the dividend LSB.
+ */
+static void gen_divq(TCGv pquo, TCGv src)
+{
+    int l;
+    TCGv af, r, aq, div;
+
+    /* div = R#.L */
+    div = tcg_temp_local_new();
+    tcg_gen_ext16u_tl(div, src);
+
+    /* af = pquo >> 16 */
+    af = tcg_temp_local_new();
+    tcg_gen_shri_tl(af, pquo, 16);
+
+    /*
+     * we take this:
+     *  if (ASTAT_AQ)
+     *    r = div + af;
+     *  else
+     *    r = af - div;
+     *
+     * and turn it into:
+     *  r = div;
+     *  if (aq == 0)
+     *    r = -r;
+     *  r += af;
+     */
+    aq = tcg_temp_local_new();
+    tcg_gen_ld_tl(aq, cpu_env, offsetof(CPUArchState, astat[ASTAT_AQ]));
+
+    l = gen_new_label();
+    r = tcg_temp_local_new();
+    tcg_gen_mov_tl(r, div);
+    tcg_gen_brcondi_tl(TCG_COND_NE, aq, 0, l);
+    tcg_gen_neg_tl(r, r);
+    gen_set_label(l);
+    tcg_gen_add_tl(r, r, af);
+
+    tcg_temp_free(af);
+
+    _gen_divqs_st_aq(r, aq, div);
+
+    /* aq = !aq */
+    tcg_gen_xori_tl(aq, aq, 1);
+
+    _gen_divqs(pquo, r, aq, div);
+}
+
+/* DIVS ( Dreg, Dreg ) ;
+ * Initialize for DIVQ. Set the AQ status bit based on the signs of
+ * the 32-bit dividend and the 16-bit divisor. Left shift the dividend
+ * one bit. Copy AQ into the dividend LSB.
+ */
+static void gen_divs(TCGv pquo, TCGv src)
+{
+    TCGv r, aq, div;
+
+    /* div = R#.L */
+    div = tcg_temp_local_new();
+    tcg_gen_ext16u_tl(div, src);
+
+    /* r = pquo >> 16 */
+    r = tcg_temp_local_new();
+    tcg_gen_shri_tl(r, pquo, 16);
+
+    aq = tcg_temp_local_new();
+
+    _gen_divqs_st_aq(r, aq, div);
+
+    _gen_divqs(pquo, r, aq, div);
+}
+
+/* Reg = ROT reg BY reg/imm
+ * The Blackfin rotate is not like the TCG rotate.  It shifts through the
+ * CC bit too giving it 33 bits to play with.  So we have to reduce things
+ * to shifts ourself.
+ */
+static void gen_rot_tl(TCGv dst, TCGv src, TCGv orig_shift)
+{
+    uint32_t nbits = 32;
+    TCGv shift, ret, tmp, tmp_shift;
+    int l, endl;
+
+    /* shift = CLAMP (shift, -nbits, nbits); */
+
+    endl = gen_new_label();
+
+    /* if (shift == 0) */
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, orig_shift, 0, l);
+    tcg_gen_mov_tl(dst, src);
+    tcg_gen_br(endl);
+    gen_set_label(l);
+
+    /* Reduce everything to rotate left */
+    shift = tcg_temp_local_new();
+    tcg_gen_mov_tl(shift, orig_shift);
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_GE, shift, 0, l);
+    tcg_gen_addi_tl(shift, shift, nbits + 1);
+    gen_set_label(l);
+
+    if (TCGV_EQUAL(dst, src))
+        ret = tcg_temp_local_new();
+    else
+        ret = dst;
+
+    /* ret = shift == nbits ? 0 : val << shift; */
+    tcg_gen_movi_tl(ret, 0);
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, shift, nbits, l);
+    tcg_gen_shl_tl(ret, src, shift);
+    gen_set_label(l);
+
+    /* ret |= shift == 1 ? 0 : val >> ((nbits + 1) - shift); */
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, shift, 1, l);
+    tmp = tcg_temp_new();
+    tmp_shift = tcg_temp_new();
+    tcg_gen_subfi_tl(tmp_shift, nbits + 1, shift);
+    tcg_gen_shr_tl(tmp, src, tmp_shift);
+    tcg_gen_or_tl(ret, ret, tmp);
+    tcg_temp_free(tmp_shift);
+    tcg_temp_free(tmp);
+    gen_set_label(l);
+
+    /* Then add in and output feedback via the CC register */
+    tcg_gen_subi_tl(shift, shift, 1);
+    tcg_gen_shl_tl(cpu_cc, cpu_cc, shift);
+    tcg_gen_or_tl(ret, ret, cpu_cc);
+    tcg_gen_subfi_tl(shift, nbits - 1, shift);
+    tcg_gen_shr_tl(cpu_cc, src, shift);
+    tcg_gen_andi_tl(cpu_cc, cpu_cc, 1);
+
+    if (TCGV_EQUAL(dst, src)) {
+        tcg_gen_mov_tl(dst, ret);
+        tcg_temp_free(ret);
+    }
+
+    tcg_temp_free(shift);
+    gen_set_label(endl);
+}
+
+static void gen_roti_tl(TCGv dst, TCGv src, int32_t shift)
+{
+    uint32_t nbits = 32;
+    TCGv ret;
+
+    /* shift = CLAMP (shift, -nbits, nbits); */
+
+    if (shift == 0) {
+        tcg_gen_mov_tl(dst, src);
+        return;
+    }
+
+    /* Reduce everything to rotate left */
+    if (shift < 0)
+        shift += nbits + 1;
+
+    if (TCGV_EQUAL(dst, src))
+        ret = tcg_temp_new();
+    else
+        ret = dst;
+
+    /* First rotate the main register */
+    if (shift == nbits)
+        tcg_gen_movi_tl(ret, 0);
+    else
+        tcg_gen_shli_tl(ret, src, shift);
+    if (shift != 1) {
+        TCGv tmp = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, src, (nbits + 1) - shift);
+        tcg_gen_or_tl(ret, ret, tmp);
+        tcg_temp_free(tmp);
+    }
+
+    /* Then add in and output feedback via the CC register */
+    tcg_gen_shli_tl(cpu_cc, cpu_cc, shift - 1);
+    tcg_gen_or_tl(ret, ret, cpu_cc);
+    tcg_gen_shri_tl(cpu_cc, src, nbits - shift);
+    tcg_gen_andi_tl(cpu_cc, cpu_cc, 1);
+
+    if (TCGV_EQUAL(dst, src)) {
+        tcg_gen_mov_tl(dst, ret);
+        tcg_temp_free(ret);
+    }
+}
+
+static void gen_rot_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 orig_shift)
+{
+    uint32_t nbits = 40;
+    TCGv_i64 shift, ret, tmp, tmp_shift, cc64;
+    int l, endl;
+
+    /* shift = CLAMP (shift, -nbits, nbits); */
+
+    endl = gen_new_label();
+
+    /* if (shift == 0) */
+    l = gen_new_label();
+    tcg_gen_brcondi_i64(TCG_COND_NE, orig_shift, 0, l);
+    tcg_gen_mov_i64(dst, src);
+    tcg_gen_br(endl);
+    gen_set_label(l);
+
+    /* Reduce everything to rotate left */
+    shift = tcg_temp_local_new_i64();
+    tcg_gen_mov_i64(shift, orig_shift);
+    l = gen_new_label();
+    tcg_gen_brcondi_i64(TCG_COND_GE, shift, 0, l);
+    tcg_gen_addi_i64(shift, shift, nbits + 1);
+    gen_set_label(l);
+
+    if (TCGV_EQUAL_I64(dst, src))
+        ret = tcg_temp_local_new_i64();
+    else
+        ret = dst;
+
+    /* ret = shift == nbits ? 0 : val << shift; */
+    tcg_gen_movi_i64(ret, 0);
+    l = gen_new_label();
+    tcg_gen_brcondi_i64(TCG_COND_EQ, shift, nbits, l);
+    tcg_gen_shl_i64(ret, src, shift);
+    gen_set_label(l);
+
+    /* ret |= shift == 1 ? 0 : val >> ((nbits + 1) - shift); */
+    l = gen_new_label();
+    tcg_gen_brcondi_i64(TCG_COND_EQ, shift, 1, l);
+    tmp = tcg_temp_new_i64();
+    tmp_shift = tcg_temp_new_i64();
+    tcg_gen_subfi_i64(tmp_shift, nbits + 1, shift);
+    tcg_gen_shr_i64(tmp, src, tmp_shift);
+    tcg_gen_or_i64(ret, ret, tmp);
+    tcg_temp_free_i64(tmp_shift);
+    tcg_temp_free_i64(tmp);
+    gen_set_label(l);
+
+    /* Then add in and output feedback via the CC register */
+    cc64 = tcg_temp_new_i64();
+    tcg_gen_ext_i32_i64(cc64, cpu_cc);
+    tcg_gen_subi_i64(shift, shift, 1);
+    tcg_gen_shl_i64(cc64, cc64, shift);
+    tcg_gen_or_i64(ret, ret, cc64);
+    tcg_gen_subfi_i64(shift, nbits - 1, shift);
+    tcg_gen_shr_i64(cc64, src, shift);
+    tcg_gen_andi_i64(cc64, cc64, 1);
+    tcg_gen_trunc_i64_i32(cpu_cc, cc64);
+    tcg_temp_free_i64(cc64);
+
+    if (TCGV_EQUAL_I64(dst, src)) {
+        tcg_gen_mov_i64(dst, ret);
+        tcg_temp_free_i64(ret);
+    }
+
+    tcg_temp_free_i64(shift);
+    gen_set_label(endl);
+}
+
+static void gen_roti_i64(TCGv_i64 dst, TCGv_i64 src, int32_t shift)
+{
+    uint32_t nbits = 40;
+    TCGv_i64 ret, cc64;
+
+    /* shift = CLAMP (shift, -nbits, nbits); */
+
+    if (shift == 0) {
+        tcg_gen_mov_i64(dst, src);
+        return;
+    }
+
+    /* Reduce everything to rotate left */
+    if (shift < 0)
+        shift += nbits + 1;
+
+    if (TCGV_EQUAL_I64(dst, src))
+        ret = tcg_temp_new_i64();
+    else
+        ret = dst;
+
+    /* First rotate the main register */
+    if (shift == nbits)
+        tcg_gen_movi_i64(ret, 0);
+    else
+        tcg_gen_shli_i64(ret, src, shift);
+    if (shift != 1) {
+        TCGv_i64 tmp = tcg_temp_new_i64();
+        tcg_gen_shri_i64(tmp, src, (nbits + 1) - shift);
+        tcg_gen_or_i64(ret, ret, tmp);
+        tcg_temp_free_i64(tmp);
+    }
+
+    /* Then add in and output feedback via the CC register */
+    cc64 = tcg_temp_new_i64();
+    tcg_gen_ext_i32_i64(cc64, cpu_cc);
+    tcg_gen_shli_i64(cc64, cc64, shift - 1);
+    tcg_gen_or_i64(ret, ret, cc64);
+    tcg_gen_shri_i64(cc64, src, nbits - shift);
+    tcg_gen_andi_i64(cc64, cc64, 1);
+    tcg_gen_trunc_i64_i32(cpu_cc, cc64);
+    tcg_temp_free_i64(cc64);
+
+    if (TCGV_EQUAL_I64(dst, src)) {
+        tcg_gen_mov_i64(dst, ret);
+        tcg_temp_free_i64(ret);
+    }
+}
+
+/* This is a bit crazy, but we want to simulate the hardware behavior exactly
+   rather than worry about the circular buffers being used correctly.  Which
+   isn't to say there isn't room for improvement here, just that we want to
+   be conservative.  See also dagsub().  */
+static void gen_dagadd(DisasContext *dc, int dagno, TCGv M)
+{
+    int l, endl;
+
+    /* Optimize for when circ buffers are not used */
+    l = gen_new_label();
+    endl = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_lreg[dagno], 0, l);
+    tcg_gen_add_tl(cpu_ireg[dagno], cpu_ireg[dagno], M);
+    tcg_gen_br(endl);
+    gen_set_label(l);
+
+    /* Fallback to the big guns */
+    gen_helper_dagadd(cpu_ireg[dagno], cpu_ireg[dagno],
+                      cpu_lreg[dagno], cpu_breg[dagno], M);
+
+    gen_set_label(endl);
+}
+
+static void gen_dagaddi(DisasContext *dc, int dagno, uint32_t M)
+{
+    TCGv m = tcg_temp_local_new();
+    tcg_gen_movi_tl(m, M);
+    gen_dagadd(dc, dagno, m);
+    tcg_temp_free(m);
+}
+
+/* See dagadd() notes above.  */
+static void gen_dagsub(DisasContext *dc, int dagno, TCGv M)
+{
+    int l, endl;
+
+    /* Optimize for when circ buffers are not used */
+    l = gen_new_label();
+    endl = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_lreg[dagno], 0, l);
+    tcg_gen_sub_tl(cpu_ireg[dagno], cpu_ireg[dagno], M);
+    tcg_gen_br(endl);
+    gen_set_label(l);
+
+    /* Fallback to the big guns */
+    gen_helper_dagsub(cpu_ireg[dagno], cpu_ireg[dagno],
+                      cpu_lreg[dagno], cpu_breg[dagno], M);
+
+    gen_set_label(endl);
+}
+
+static void gen_dagsubi(DisasContext *dc, int dagno, uint32_t M)
+{
+    TCGv m = tcg_temp_local_new();
+    tcg_gen_movi_tl(m, M);
+    gen_dagsub(dc, dagno, m);
+    tcg_temp_free(m);
+}
+
+#define _gen_astat_store(bit, reg) tcg_gen_st_tl(reg, cpu_env, offsetof(CPUArchState, astat[bit]))
+
+static void _gen_astat_update_az(TCGv reg, TCGv tmp)
+{
+    tcg_gen_setcondi_tl(TCG_COND_EQ, tmp, reg, 0);
+    _gen_astat_store(ASTAT_AZ, tmp);
+}
+
+static void _gen_astat_update_az2(TCGv reg, TCGv reg2, TCGv tmp)
+{
+    TCGv tmp2 = tcg_temp_new();
+    tcg_gen_setcondi_tl(TCG_COND_EQ, tmp, reg, 0);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, tmp2, reg2, 0);
+    tcg_gen_or_tl(tmp, tmp, tmp2);
+    tcg_temp_free(tmp2);
+    _gen_astat_store(ASTAT_AZ, tmp);
+}
+
+static void _gen_astat_update_an(TCGv reg, TCGv tmp, uint32_t len)
+{
+    tcg_gen_setcondi_tl(TCG_COND_GEU, tmp, reg, 1 << (len - 1));
+    _gen_astat_store(ASTAT_AN, tmp);
+}
+
+static void _gen_astat_update_an2(TCGv reg, TCGv reg2, TCGv tmp, uint32_t len)
+{
+    TCGv tmp2 = tcg_temp_new();
+    tcg_gen_setcondi_tl(TCG_COND_GEU, tmp, reg, 1 << (len - 1));
+    tcg_gen_setcondi_tl(TCG_COND_GEU, tmp2, reg2, 1 << (len - 1));
+    tcg_gen_or_tl(tmp, tmp, tmp2);
+    tcg_temp_free(tmp2);
+    _gen_astat_store(ASTAT_AN, tmp);
+}
+
+static void _gen_astat_update_nz(TCGv reg, TCGv tmp, uint32_t len)
+{
+    _gen_astat_update_az(reg, tmp);
+    _gen_astat_update_an(reg, tmp, len);
+}
+
+static void _gen_astat_update_nz2(TCGv reg, TCGv reg2, TCGv tmp, uint32_t len)
+{
+    _gen_astat_update_az2(reg, reg2, tmp);
+    _gen_astat_update_an2(reg, reg2, tmp, len);
+}
+
+static void gen_astat_update(DisasContext *dc, bool clear)
+{
+    TCGv tmp = tcg_temp_local_new();
+    uint32_t len = 16;
+
+    switch (dc->astat_op) {
+    case ASTAT_OP_ABS:    /* [0] = ABS( [1] ) */
+        len = 32;
+        /* XXX: Missing V/VS updates */
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, len);
+        break;
+
+    case ASTAT_OP_ABS_VECTOR: /* [0][1] = ABS( [2] ) (V) */
+        /* XXX: Missing V/VS updates */
+        _gen_astat_update_nz2(cpu_astat_arg[0], cpu_astat_arg[1], tmp, len);
+        break;
+
+    case ASTAT_OP_ADD32:    /* [0] = [1] + [2] */
+        /* XXX: Missing V/VS updates */
+        len = 32;
+        tcg_gen_not_tl(tmp, cpu_astat_arg[1]);
+        tcg_gen_setcond_tl(TCG_COND_LTU, tmp, tmp, cpu_astat_arg[2]);
+        _gen_astat_store(ASTAT_AC0, tmp);
+        _gen_astat_store(ASTAT_AC0_COPY, tmp);
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, 32);
+        break;
+
+    case ASTAT_OP_ASHIFT32:
+        len *= 2;
+    case ASTAT_OP_ASHIFT16:
+        tcg_gen_movi_tl(tmp, 0);
+        /* Need to update AC0 ? */
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, len);
+        break;
+
+    case ASTAT_OP_COMPARE_SIGNED: {
+        TCGv flgs, flgo, overflow, flgn, res = tcg_temp_new();
+        tcg_gen_sub_tl(res, cpu_astat_arg[0], cpu_astat_arg[1]);
+        _gen_astat_update_az(res, tmp);
+        tcg_gen_setcond_tl(TCG_COND_LEU, tmp, cpu_astat_arg[1], cpu_astat_arg[0]);
+        _gen_astat_store(ASTAT_AC0, tmp);
+        _gen_astat_store(ASTAT_AC0_COPY, tmp);
+        /* XXX: This has got to be simpler ... */
+        /* int flgs = srcop >> 31; */
+        flgs = tcg_temp_new();
+        tcg_gen_shri_tl(flgs, cpu_astat_arg[0], 31);
+        /* int flgo = dstop >> 31; */
+        flgo = tcg_temp_new();
+        tcg_gen_shri_tl(flgo, cpu_astat_arg[1], 31);
+        /* int flgn = result >> 31; */
+        flgn = tcg_temp_new();
+        tcg_gen_shri_tl(flgn, res, 31);
+        /* int overflow = (flgs ^ flgo) & (flgn ^ flgs); */
+        overflow = tcg_temp_new();
+        tcg_gen_xor_tl(tmp, flgs, flgo);
+        tcg_gen_xor_tl(overflow, flgn, flgs);
+        tcg_gen_and_tl(overflow, tmp, overflow);
+        /* an = (flgn && !overflow) || (!flgn && overflow); */
+        tcg_gen_not_tl(tmp, overflow);
+        tcg_gen_and_tl(tmp, flgn, tmp);
+        tcg_gen_not_tl(res, flgn);
+        tcg_gen_and_tl(res, res, overflow);
+        tcg_gen_or_tl(tmp, tmp, res);
+        tcg_temp_free(flgn);
+        tcg_temp_free(overflow);
+        tcg_temp_free(flgo);
+        tcg_temp_free(flgs);
+        tcg_temp_free(res);
+        _gen_astat_store(ASTAT_AN, tmp);
+        break;
+    }
+
+    case ASTAT_OP_COMPARE_UNSIGNED:
+        tcg_gen_sub_tl(tmp, cpu_astat_arg[0], cpu_astat_arg[1]);
+        _gen_astat_update_az(tmp, tmp);
+        tcg_gen_setcond_tl(TCG_COND_LEU, tmp, cpu_astat_arg[1], cpu_astat_arg[0]);
+        _gen_astat_store(ASTAT_AC0, tmp);
+        _gen_astat_store(ASTAT_AC0_COPY, tmp);
+        tcg_gen_setcond_tl(TCG_COND_GTU, tmp, cpu_astat_arg[1], cpu_astat_arg[0]);
+        _gen_astat_store(ASTAT_AN, tmp);
+        break;
+
+    case ASTAT_OP_LOGICAL:
+        len = 32;
+        tcg_gen_movi_tl(tmp, 0);
+        /* AC0 is correct ? */
+        _gen_astat_store(ASTAT_AC0, tmp);
+        _gen_astat_store(ASTAT_AC0_COPY, tmp);
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, len);
+        break;
+
+    case ASTAT_OP_LSHIFT32:
+        len *= 2;
+    case ASTAT_OP_LSHIFT16:
+        _gen_astat_update_az(cpu_astat_arg[0], tmp);
+        /* XXX: should be checking bit shifted */
+        tcg_gen_setcondi_tl(TCG_COND_GEU, tmp, cpu_astat_arg[0], 1 << (len - 1));
+        _gen_astat_store(ASTAT_AN, tmp);
+        /* XXX: No saturation handling ... */
+        tcg_gen_movi_tl(tmp, 0);
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        break;
+
+    case ASTAT_OP_LSHIFT_RT32:
+        len *= 2;
+    case ASTAT_OP_LSHIFT_RT16:
+        _gen_astat_update_az(cpu_astat_arg[0], tmp);
+        /* XXX: should be checking bit shifted */
+        tcg_gen_setcondi_tl(TCG_COND_GEU, tmp, cpu_astat_arg[0], 1 << (len - 1));
+        _gen_astat_store(ASTAT_AN, tmp);
+        tcg_gen_movi_tl(tmp, 0);
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        break;
+
+    case ASTAT_OP_MIN_MAX:    /* [0] = MAX/MIN( [1], [2] ) */
+        tcg_gen_movi_tl(tmp, 0);
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, 32);
+        break;
+
+    case ASTAT_OP_MIN_MAX_VECTOR: /* [0][1] = MAX/MIN( [2], [3] ) (V) */
+        tcg_gen_movi_tl(tmp, 0);
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        tcg_gen_sari_tl(cpu_astat_arg[0], cpu_astat_arg[0], 16);
+        _gen_astat_update_nz2(cpu_astat_arg[0], cpu_astat_arg[1], tmp, 16);
+        break;
+
+    case ASTAT_OP_NEGATE:    /* [0] = -[1] */
+        len = 32;
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, 32);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, tmp, cpu_astat_arg[0], 1 << (len - 1));
+        _gen_astat_store(ASTAT_V, tmp);
+        /* XXX: Should "VS |= V;" */
+        tcg_gen_setcondi_tl(TCG_COND_EQ, tmp, cpu_astat_arg[0], 0);
+        _gen_astat_store(ASTAT_AC0, tmp);
+        break;
+
+    case ASTAT_OP_SUB32:    /* [0] = [1] - [2] */
+        len = 32;
+        /* XXX: Missing V/VS updates */
+        tcg_gen_setcond_tl(TCG_COND_LEU, tmp, cpu_astat_arg[2], cpu_astat_arg[1]);
+        _gen_astat_store(ASTAT_AC0, tmp);
+        _gen_astat_store(ASTAT_AC0_COPY, tmp);
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, len);
+        break;
+
+    case ASTAT_OP_VECTOR_ADD_ADD:    /* [0][1] = [2] +|+ [3] */
+    case ASTAT_OP_VECTOR_ADD_SUB:    /* [0][1] = [2] +|- [3] */
+    case ASTAT_OP_VECTOR_SUB_SUB:    /* [0][1] = [2] -|- [3] */
+    case ASTAT_OP_VECTOR_SUB_ADD:    /* [0][1] = [2] -|+ [3] */
+        _gen_astat_update_az2(cpu_astat_arg[0], cpu_astat_arg[1], tmp);
+        /* Need AN, AC0/AC1, V */
+        break;
+
+    default:
+        fprintf(stderr, "qemu: unhandled astat op %u\n", dc->astat_op);
+        abort();
+    case ASTAT_OP_DYNAMIC:
+    case ASTAT_OP_NONE:
+        break;
+    }
+
+    tcg_temp_free(tmp);
+
+    if (clear)
+        dc->astat_op = ASTAT_OP_NONE;
+}
+
+static void
+_astat_queue_state(DisasContext *dc, enum astat_ops op, unsigned int num,
+                   TCGv arg0, TCGv arg1, TCGv arg2)
+{
+    dc->astat_op = op;
+    /*tcg_gen_movi_tl(cpu_astat_op, dc->astat_op);*/
+
+    tcg_gen_mov_tl(cpu_astat_arg[0], arg0);
+    if (num > 1)
+        tcg_gen_mov_tl(cpu_astat_arg[1], arg1);
+    else
+        tcg_gen_discard_tl(cpu_astat_arg[1]);
+    if (num > 2)
+        tcg_gen_mov_tl(cpu_astat_arg[2], arg2);
+    else
+        tcg_gen_discard_tl(cpu_astat_arg[2]);
+}
+#define astat_queue_state1(dc, op, arg0)             _astat_queue_state(dc, op, 1, arg0, arg0, arg0)
+#define astat_queue_state2(dc, op, arg0, arg1)       _astat_queue_state(dc, op, 2, arg0, arg1, arg1)
+#define astat_queue_state3(dc, op, arg0, arg1, arg2) _astat_queue_state(dc, op, 3, arg0, arg1, arg2)
+
+static void gen_astat_load(DisasContext *dc, TCGv reg)
+{
+    gen_astat_update(dc, true);
+    gen_helper_astat_load(reg, cpu_env);
+}
+
+static void gen_astat_store(DisasContext *dc, TCGv reg)
+{
+    unsigned int i;
+
+    gen_helper_astat_store(cpu_env, reg);
+
+    dc->astat_op = ASTAT_OP_NONE;
+    /*tcg_gen_movi_tl(cpu_astat_op, dc->astat_op);*/
+
+    for (i = 0; i < ARRAY_SIZE(cpu_astat_arg); ++i)
+        tcg_gen_discard_tl(cpu_astat_arg[i]);
+}
+
+static void interp_insn_bfin(DisasContext *dc);
+
+static void check_breakpoint(CPUArchState *env, DisasContext *dc)
+{
+    CPUBreakpoint *bp;
+
+    if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
+        QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
+            if (bp->pc == dc->pc) {
+                cec_exception(dc, EXCP_DEBUG);
+                dc->is_jmp = DISAS_UPDATE;
+             }
+        }
+    }
+}
+
+static void
+gen_intermediate_code_internal(CPUArchState *env, TranslationBlock *tb,
+                               int search_pc)
+{
+    uint16_t *gen_opc_end;
+    uint32_t pc_start;
+    int j, lj;
+    struct DisasContext ctx;
+    struct DisasContext *dc = &ctx;
+    uint32_t next_page_start;
+    int num_insns;
+    int max_insns;
+
+    pc_start = tb->pc;
+    dc->env = env;
+    dc->tb = tb;
+    /* XXX: handle super/user mode here.  */
+    dc->mem_idx = 0;
+
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
+
+    dc->is_jmp = DISAS_NEXT;
+    dc->pc = pc_start;
+    dc->astat_op = ASTAT_OP_DYNAMIC;
+    dc->hwloop_callback = gen_hwloop_default;
+    dc->disalgnexcpt = 1;
+
+    next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+    lj = -1;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+
+    gen_tb_start();
+    do {
+        check_breakpoint(env, dc);
+
+        if (search_pc) {
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            if (lj < j) {
+                lj++;
+                while (lj < j)
+                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
+            }
+            tcg_ctx.gen_opc_pc[lj] = dc->pc;
+            tcg_ctx.gen_opc_instr_start[lj] = 1;
+            tcg_ctx.gen_opc_icount[lj] = num_insns;
+        }
+
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
+
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+            tcg_gen_debug_insn_start(dc->pc);
+
+        interp_insn_bfin(dc);
+        gen_hwloop_check(dc);
+        dc->pc += dc->insn_len;
+
+        ++num_insns;
+    } while (!dc->is_jmp &&
+             tcg_ctx.gen_opc_ptr < gen_opc_end &&
+             !env->singlestep_enabled &&
+             !singlestep &&
+             dc->pc < next_page_start &&
+             num_insns < max_insns);
+
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
+
+    if (unlikely(env->singlestep_enabled)) {
+        cec_exception(dc, EXCP_DEBUG);
+    } else {
+        switch (dc->is_jmp) {
+        case DISAS_NEXT:
+            gen_gotoi_tb(dc, 1, dc->pc);
+            break;
+        default:
+        case DISAS_UPDATE:
+            /* indicate that the hash table must be used
+               to find the next TB */
+            tcg_gen_exit_tb(0);
+            break;
+        case DISAS_CALL:
+        case DISAS_JUMP:
+        case DISAS_TB_JUMP:
+            /* nothing more to generate */
+            break;
+        }
+    }
+
+    gen_tb_end(tb, num_insns);
+    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
+#ifdef DEBUG_DISAS
+    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+        qemu_log("----------------\n");
+        qemu_log("IN: %s\n", lookup_symbol(pc_start));
+        log_target_disas(env, pc_start, dc->pc - pc_start, 0);
+        qemu_log("\n");
+    }
+#endif
+    if (search_pc) {
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        lj++;
+        while (lj <= j)
+            tcg_ctx.gen_opc_instr_start[lj++] = 0;
+    } else {
+        tb->size = dc->pc - pc_start;
+        tb->icount = num_insns;
+    }
+}
+
+void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb)
+{
+    gen_intermediate_code_internal(env, tb, 0);
+}
+
+void gen_intermediate_code_pc(CPUArchState *env, struct TranslationBlock *tb)
+{
+    gen_intermediate_code_internal(env, tb, 1);
+}
+
+void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb, int pc_pos)
+{
+    env->pc = tcg_ctx.gen_opc_pc[pc_pos];
+}
+
+#include "bfin-sim.c"