Patchwork [2/4] Blackfin: initial port

login
register
mail settings
Submitter Mike Frysinger
Date Jan. 24, 2011, 10:29 a.m.
Message ID <1295864975-13703-1-git-send-email-vapier@gentoo.org>
Download mbox | patch
Permalink /patch/80169/
State New
Headers show

Comments

Mike Frysinger - Jan. 24, 2011, 10:29 a.m.
This is the core Blackfin support.  While most things work that gcc will
generate, there are notable things missing at this point:
 - many dsp/alu/mac insns not supported
 - no saturation support
 - many astat flags not updated
 - probably other stuff
Details as to what is missing "by design" vs "not done due to laziness"
can be sorted out in the Blackfin README/TODO files.

FLAT and FDPIC ELFs however seem to work nicely, as do random samplings of
apps from a typical build.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 MAINTAINERS                 |    5 +
 configure                   |    8 +-
 cpu-exec.c                  |    6 +-
 elf.h                       |    6 +
 gdbstub.c                   |  103 +
 scripts/qemu-binfmt-conf.sh |    4 +
 target-bfin/README          |   35 +
 target-bfin/TODO            |    5 +
 target-bfin/bfin-sim.c      | 4760 +++++++++++++++++++++++++++++++++++++++++++
 target-bfin/bfin-tdep.h     |   94 +
 target-bfin/cpu.h           |  215 ++
 target-bfin/exec.h          |   37 +
 target-bfin/helper.c        |   37 +
 target-bfin/helper.h        |   21 +
 target-bfin/op_helper.c     |  213 ++
 target-bfin/translate.c     | 1267 ++++++++++++
 16 files changed, 6813 insertions(+), 3 deletions(-)
 create mode 100644 target-bfin/README
 create mode 100644 target-bfin/TODO
 create mode 100644 target-bfin/bfin-sim.c
 create mode 100644 target-bfin/bfin-tdep.h
 create mode 100644 target-bfin/cpu.h
 create mode 100644 target-bfin/exec.h
 create mode 100644 target-bfin/helper.c
 create mode 100644 target-bfin/helper.h
 create mode 100644 target-bfin/op_helper.c
 create mode 100644 target-bfin/translate.c

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index f20d390..0c71b89 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -65,6 +65,11 @@  M: Paul Brook <paul@codesourcery.com>
 S: Maintained
 F: target-arm/
 
+BLACKFIN
+M: Mike Frysinger <vapier@gentoo.org>
+S: Maintained
+F: target-bfin/
+
 CRIS
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
diff --git a/configure b/configure
index 350c66a..730039f 100755
--- a/configure
+++ b/configure
@@ -280,7 +280,7 @@  else
 fi
 
 case "$cpu" in
-  alpha|cris|ia64|m68k|microblaze|ppc|ppc64|sparc64)
+  alpha|bfin|cris|ia64|m68k|microblaze|ppc|ppc64|sparc64)
     cpu="$cpu"
   ;;
   i386|i486|i586|i686|i86pc|BePC)
@@ -2478,7 +2478,7 @@  echo "docdir=$docdir" >> $config_host_mak
 echo "confdir=$confdir" >> $config_host_mak
 
 case "$cpu" in
-  i386|x86_64|alpha|cris|hppa|ia64|m68k|microblaze|mips|mips64|ppc|ppc64|s390|s390x|sparc|sparc64)
+  i386|x86_64|alpha|bfin|cris|hppa|ia64|m68k|microblaze|mips|mips64|ppc|ppc64|s390|s390x|sparc|sparc64)
     ARCH=$cpu
   ;;
   armv4b|armv4l)
@@ -2909,6 +2909,10 @@  case "$target_arch2" in
     gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
     target_phys_bits=32
   ;;
+  bfin)
+    bflt="yes"
+    target_phys_bits=32
+  ;;
   cris)
     target_nptl="yes"
     target_phys_bits=32
diff --git a/cpu-exec.c b/cpu-exec.c
index 8c9fb8b..c151cc5 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -266,6 +266,7 @@  int cpu_exec(CPUState *env1)
 #elif defined(TARGET_MICROBLAZE)
 #elif defined(TARGET_MIPS)
 #elif defined(TARGET_SH4)
+#elif defined(TARGET_BFIN)
 #elif defined(TARGET_CRIS)
 #elif defined(TARGET_S390X)
     /* XXXXX */
@@ -328,6 +329,8 @@  int cpu_exec(CPUState *env1)
                     do_interrupt(env);
 #elif defined(TARGET_SH4)
 		    do_interrupt(env);
+#elif defined(TARGET_BFIN)
+                    do_interrupt(env);
 #elif defined(TARGET_ALPHA)
                     do_interrupt(env);
 #elif defined(TARGET_CRIS)
@@ -363,7 +366,7 @@  int cpu_exec(CPUState *env1)
                     }
 #if defined(TARGET_ARM) || defined(TARGET_SPARC) || defined(TARGET_MIPS) || \
     defined(TARGET_PPC) || defined(TARGET_ALPHA) || defined(TARGET_CRIS) || \
-    defined(TARGET_MICROBLAZE)
+    defined(TARGET_MICROBLAZE) || defined(TARGET_BFIN)
                     if (interrupt_request & CPU_INTERRUPT_HALT) {
                         env->interrupt_request &= ~CPU_INTERRUPT_HALT;
                         env->halted = 1;
@@ -663,6 +666,7 @@  int cpu_exec(CPUState *env1)
 #elif defined(TARGET_MICROBLAZE)
 #elif defined(TARGET_MIPS)
 #elif defined(TARGET_SH4)
+#elif defined(TARGET_BFIN)
 #elif defined(TARGET_ALPHA)
 #elif defined(TARGET_CRIS)
 #elif defined(TARGET_S390X)
diff --git a/elf.h b/elf.h
index d2f24f4..0ceaf26 100644
--- a/elf.h
+++ b/elf.h
@@ -104,6 +104,7 @@  typedef int64_t  Elf64_Sxword;
 
 #define EM_H8_300H      47      /* Hitachi H8/300H */
 #define EM_H8S          48      /* Hitachi H8S     */
+#define EM_BLACKFIN	106	/* Analog Devices Blackfin */
 
 /*
  * This is an interim value that we will use until the committee comes
@@ -675,6 +676,11 @@  typedef struct {
 
 #define EF_ALPHA_32BIT		1	/* All addresses are below 2GB */
 
+/* Blackfin specific definitions.  */
+
+#define EF_BFIN_PIC		0x00000001	/* -fpic */
+#define EF_BFIN_FDPIC		0x00000002      /* -mfdpic */
+
 /* HPPA specific definitions.  */
 
 /* Legal values for e_flags field of Elf32_Ehdr.  */
diff --git a/gdbstub.c b/gdbstub.c
index d6556c9..a69bb76 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1462,6 +1462,107 @@  static int cpu_gdb_write_register(CPUState *env, uint8_t *mem_buf, int n)
 
     return r;
 }
+#elif defined (TARGET_BFIN)
+
+#include "target-bfin/bfin-tdep.h"
+
+#define NUM_CORE_REGS BFIN_NUM_REGS
+
+static int cpu_gdb_read_register(CPUState *env, uint8_t *mem_buf, int n)
+{
+    switch (n) {
+        case BFIN_R0_REGNUM ... BFIN_R7_REGNUM:
+            GET_REGL(env->dreg[n - BFIN_R0_REGNUM]); break;
+        case BFIN_P0_REGNUM ... BFIN_FP_REGNUM:
+            GET_REGL(env->preg[n - BFIN_P0_REGNUM]); break;
+        case BFIN_I0_REGNUM ... BFIN_I3_REGNUM:
+            GET_REGL(env->ireg[n - BFIN_I0_REGNUM]); break;
+        case BFIN_M0_REGNUM ... BFIN_M3_REGNUM:
+            GET_REGL(env->mreg[n - BFIN_M0_REGNUM]); break;
+        case BFIN_B0_REGNUM ... BFIN_B3_REGNUM:
+            GET_REGL(env->breg[n - BFIN_B0_REGNUM]); break;
+        case BFIN_L0_REGNUM ... BFIN_L3_REGNUM:
+            GET_REGL(env->lreg[n - BFIN_L0_REGNUM]); break;
+        case BFIN_A0_DOT_X_REGNUM: GET_REGL((env->areg[0] >> 32) & 0xff); break;
+        case BFIN_A0_DOT_W_REGNUM: GET_REGL(env->areg[0]); break;
+        case BFIN_A1_DOT_X_REGNUM: GET_REGL((env->areg[1] >> 32) & 0xff); break;
+        case BFIN_A1_DOT_W_REGNUM: GET_REGL(env->areg[1]); break;
+        case BFIN_ASTAT_REGNUM: GET_REGL(bfin_astat_read(env)); break;
+        case BFIN_RETS_REGNUM: GET_REGL(env->rets); break;
+        case BFIN_LC0_REGNUM: GET_REGL(env->lcreg[0]); break;
+        case BFIN_LT0_REGNUM: GET_REGL(env->ltreg[0]); break;
+        case BFIN_LB0_REGNUM: GET_REGL(env->lbreg[0]); break;
+        case BFIN_LC1_REGNUM: GET_REGL(env->lcreg[1]); break;
+        case BFIN_LT1_REGNUM: GET_REGL(env->ltreg[1]); break;
+        case BFIN_LB1_REGNUM: GET_REGL(env->lbreg[1]); break;
+        case BFIN_CYCLES_REGNUM ... BFIN_CYCLES2_REGNUM:
+            GET_REGL(env->cycles[n - BFIN_CYCLES_REGNUM]); break;
+        case BFIN_USP_REGNUM: GET_REGL(env->uspreg); break;
+        case BFIN_SEQSTAT_REGNUM: GET_REGL(env->seqstat); break;
+        case BFIN_SYSCFG_REGNUM: GET_REGL(env->syscfg); break;
+        case BFIN_RETI_REGNUM: GET_REGL(env->reti); break;
+        case BFIN_RETX_REGNUM: GET_REGL(env->retx); break;
+        case BFIN_RETN_REGNUM: GET_REGL(env->retn); break;
+        case BFIN_RETE_REGNUM: GET_REGL(env->rete); break;
+        case BFIN_PC_REGNUM: GET_REGL(env->pc); break;
+    }
+
+    return 0;
+}
+
+static int cpu_gdb_write_register(CPUState *env, uint8_t *mem_buf, int n)
+{
+    target_ulong tmpl;
+    int r = 4;
+    tmpl = ldtul_p(mem_buf);
+
+    switch (n) {
+        case BFIN_R0_REGNUM ... BFIN_R7_REGNUM:
+            env->dreg[n - BFIN_R0_REGNUM] = tmpl; break;
+        case BFIN_P0_REGNUM ... BFIN_FP_REGNUM:
+            env->preg[n - BFIN_P0_REGNUM] = tmpl; break;
+        case BFIN_I0_REGNUM ... BFIN_I3_REGNUM:
+            env->ireg[n - BFIN_I0_REGNUM] = tmpl; break;
+        case BFIN_M0_REGNUM ... BFIN_M3_REGNUM:
+            env->mreg[n - BFIN_M0_REGNUM] = tmpl; break;
+        case BFIN_B0_REGNUM ... BFIN_B3_REGNUM:
+            env->breg[n - BFIN_B0_REGNUM] = tmpl; break;
+        case BFIN_L0_REGNUM ... BFIN_L3_REGNUM:
+            env->lreg[n - BFIN_L0_REGNUM] = tmpl; break;
+        case BFIN_A0_DOT_X_REGNUM:
+            env->areg[0] = (env->areg[0] & 0xffffffff) | ((uint64_t)tmpl << 32);
+            break;
+        case BFIN_A0_DOT_W_REGNUM:
+            env->areg[0] = (env->areg[0] & ~0xffffffff) | tmpl;
+            break;
+        case BFIN_A1_DOT_X_REGNUM:
+            env->areg[1] = (env->areg[1] & 0xffffffff) | ((uint64_t)tmpl << 32);
+            break;
+        case BFIN_A1_DOT_W_REGNUM:
+            env->areg[1] = (env->areg[1] & ~0xffffffff) | tmpl;
+            break;
+        case BFIN_ASTAT_REGNUM: bfin_astat_write(env, tmpl); break;
+        case BFIN_RETS_REGNUM: env->rets = tmpl; break;
+        case BFIN_LC0_REGNUM: env->lcreg[0] = tmpl; break;
+        case BFIN_LT0_REGNUM: env->ltreg[0] = tmpl; break;
+        case BFIN_LB0_REGNUM: env->lbreg[0] = tmpl; break;
+        case BFIN_LC1_REGNUM: env->lcreg[1] = tmpl; break;
+        case BFIN_LT1_REGNUM: env->ltreg[1] = tmpl; break;
+        case BFIN_LB1_REGNUM: env->lbreg[1] = tmpl; break;
+        case BFIN_CYCLES_REGNUM ... BFIN_CYCLES2_REGNUM:
+            env->cycles[n - BFIN_CYCLES_REGNUM] = tmpl; break;
+        case BFIN_USP_REGNUM: env->uspreg = tmpl; break;
+        case BFIN_SEQSTAT_REGNUM: env->seqstat = tmpl; break;
+        case BFIN_SYSCFG_REGNUM: env->syscfg = tmpl; break;
+        case BFIN_RETI_REGNUM: env->reti = tmpl; break;
+        case BFIN_RETX_REGNUM: env->retx = tmpl; break;
+        case BFIN_RETN_REGNUM: env->retn = tmpl; break;
+        case BFIN_RETE_REGNUM: env->rete = tmpl; break;
+        case BFIN_PC_REGNUM: env->pc = tmpl; break;
+    }
+
+    return r;
+}
 #else
 
 #define NUM_CORE_REGS 0
@@ -1737,6 +1838,8 @@  static void gdb_set_cpu_pc(GDBState *s, target_ulong pc)
 #elif defined (TARGET_S390X)
     cpu_synchronize_state(s->c_cpu);
     s->c_cpu->psw.addr = pc;
+#elif defined (TARGET_BFIN)
+    s->c_cpu->pc = pc;
 #endif
 }
 
diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
index c50beb7..e2a65a2 100644
--- a/scripts/qemu-binfmt-conf.sh
+++ b/scripts/qemu-binfmt-conf.sh
@@ -41,6 +41,10 @@  if [ $cpu != "arm" ] ; then
     echo   ':arm:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-arm:' > /proc/sys/fs/binfmt_misc/register
     echo   ':armeb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-armeb:' > /proc/sys/fs/binfmt_misc/register
 fi
+if [ $cpu != "bfin" ] ; then
+    echo   ':bfin:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00''\x02\x00''\x6A\x00''::/usr/local/bin/qemu-bfin:' > /proc/sys/fs/binfmt_misc/register
+    echo   ':bfin-flat:M::bFLT\x00\x00\x00\x04::/usr/local/bin/qemu-bfin:' > /proc/sys/fs/binfmt_misc/register
+fi
 if [ $cpu != "sparc" ] ; then
     echo   ':sparc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sparc:' > /proc/sys/fs/binfmt_misc/register
 fi
diff --git a/target-bfin/README b/target-bfin/README
new file mode 100644
index 0000000..b989d55
--- /dev/null
+++ b/target-bfin/README
@@ -0,0 +1,35 @@ 
+------------------
+Blackfin QEMU port
+------------------
+
+There are some things we don't bother handling in the port for speed reasons.
+If you want an accurate (but not as fast) simulator, then use the GNU sim as
+found in the GNU toolchain (part of gdb).
+
+Things we do not handle by design:
+
+	- invalid parallel instruction combinations
+		- no toolchain will output these
+		- things like jumps
+
+	- invalid register combinations
+		- some insns cannot have same register be both source and dest
+		- no toolchain will output these
+
+	- transactional parallel instructions
+		- on the hardware, if a load/store causes an exception, the other
+		  insns do not change register states either.  in qemu, they do,
+		  but since those exceptions will kill the program anyways, who
+		  cares.  no intermediate store buffers!
+
+	- unaligned memory access exceptions
+		- qemu itself doesn't support this for targets
+
+	- AC0_COPY and V_COPY
+		- no one has ever used these instead of AC0 or V
+
+	- no support for RND_MOD
+
+There are a few insns/modes we don't currently handle, but it's more a matter
+of nothing really uses these, so we haven't bothered.  If these matter to you,
+then feel free to request support for them.
diff --git a/target-bfin/TODO b/target-bfin/TODO
new file mode 100644
index 0000000..fc1e69b
--- /dev/null
+++ b/target-bfin/TODO
@@ -0,0 +1,5 @@ 
+CONFIG_NEED_MMU mmu.c
+
+CEC behavior in user-emulation (SP vs USP)
+
+see if making a global "0", "1", "2", and "4" register speeds things up
diff --git a/target-bfin/bfin-sim.c b/target-bfin/bfin-sim.c
new file mode 100644
index 0000000..2b95e74
--- /dev/null
+++ b/target-bfin/bfin-sim.c
@@ -0,0 +1,4760 @@ 
+/* Simulator for Analog Devices Blackfin processors.
+ *
+ * Copyright 2005-2011 Mike Frysinger
+ * Copyright 2005-2011 Analog Devices, Inc.
+ *
+ * Licensed under the GPL 2 or later.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#define TRACE_INSN(cpu, fmt, args...) do { if (0) qemu_log_mask(CPU_LOG_TB_IN_ASM, fmt "\n", ## args); } while (0)
+#define TRACE_EXTRACT(fmt, args...) do { if (1) qemu_log_mask(CPU_LOG_TB_CPU, fmt "\n", ## args); } while (0)
+
+static void
+illegal_instruction(DisasContext *dc)
+{
+    cec_exception(dc, EXCP_UNDEF_INST);
+}
+
+/*
+static void
+illegal_instruction_combination(DisasContext *dc)
+{
+    cec_exception(dc, EXCP_ILL_INST);
+}
+*/
+
+static void
+unhandled_instruction(DisasContext *dc, const char *insn)
+{
+    fprintf(stderr, "unhandled insn: %s\n", insn);
+    illegal_instruction(dc);
+}
+
+#define M_S2RND 1
+#define M_T     2
+#define M_W32   3
+#define M_FU    4
+#define M_TFU   6
+#define M_IS    8
+#define M_ISS2  9
+#define M_IH    11
+#define M_IU    12
+
+/* Valid flag settings */
+#define is_macmod_pmove(x) \
+    (((x) == 0)       || \
+     ((x) == M_IS)    || \
+     ((x) == M_FU)    || \
+     ((x) == M_S2RND) || \
+     ((x) == M_ISS2)  || \
+     ((x) == M_IU))
+
+#define is_macmod_hmove(x) \
+    (((x) == 0)       || \
+     ((x) == M_IS)    || \
+     ((x) == M_FU)    || \
+     ((x) == M_IU)    || \
+     ((x) == M_T)     || \
+     ((x) == M_TFU)   || \
+     ((x) == M_S2RND) || \
+     ((x) == M_ISS2)  || \
+     ((x) == M_IH))
+
+typedef enum {
+    c_0, c_1, c_4, c_2, c_uimm2, c_uimm3, c_imm3, c_pcrel4,
+    c_imm4, c_uimm4s4, c_uimm4s4d, c_uimm4, c_uimm4s2, c_negimm5s4, c_imm5, c_imm5d, c_uimm5, c_imm6,
+    c_imm7, c_imm7d, c_imm8, c_uimm8, c_pcrel8, c_uimm8s4, c_pcrel8s4, c_lppcrel10, c_pcrel10,
+    c_pcrel12, c_imm16s4, c_luimm16, c_imm16, c_imm16d, c_huimm16, c_rimm16, c_imm16s2, c_uimm16s4,
+    c_uimm16s4d, c_uimm16, c_pcrel24, c_uimm32, c_imm32, c_huimm32, c_huimm32e,
+} const_forms_t;
+
+static const struct {
+    const char *name;
+    const int nbits;
+    const char reloc;
+    const char issigned;
+    const char pcrel;
+    const char scale;
+    const char offset;
+    const char negative;
+    const char positive;
+    const char decimal;
+    const char leading;
+    const char exact;
+} constant_formats[] = {
+    { "0",          0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "1",          0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "4",          0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "2",          0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "uimm2",      2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "uimm3",      3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm3",       3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "pcrel4",     4, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "imm4",       4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "uimm4s4",    4, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0},
+    { "uimm4s4d",   4, 0, 0, 0, 2, 0, 0, 1, 1, 0, 0},
+    { "uimm4",      4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "uimm4s2",    4, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0},
+    { "negimm5s4",  5, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0},
+    { "imm5",       5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm5d",      5, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0},
+    { "uimm5",      5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm6",       6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm7",       7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm7d",      7, 0, 1, 0, 0, 0, 0, 0, 1, 3, 0},
+    { "imm8",       8, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "uimm8",      8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "pcrel8",     8, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "uimm8s4",    8, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0},
+    { "pcrel8s4",   8, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0},
+    { "lppcrel10", 10, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "pcrel10",   10, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "pcrel12",   12, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "imm16s4",   16, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0},
+    { "luimm16",   16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm16",     16, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm16d",    16, 0, 1, 0, 0, 0, 0, 0, 1, 3, 0},
+    { "huimm16",   16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "rimm16",    16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm16s2",   16, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0},
+    { "uimm16s4",  16, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0},
+    { "uimm16s4d", 16, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0},
+    { "uimm16",    16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "pcrel24",   24, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
+    { "uimm32",    32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "imm32",     32, 0, 1, 0, 0, 0, 0, 0, 1, 3, 0},
+    { "huimm32",   32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    { "huimm32e",  32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1},
+};
+
+#define HOST_LONG_WORD_SIZE (sizeof(long) * 8)
+#define SIGNEXTEND(v, n) (((int32_t)(v) << (HOST_LONG_WORD_SIZE - (n))) >> (HOST_LONG_WORD_SIZE - (n)))
+
+static uint32_t
+fmtconst_val(const_forms_t cf, uint32_t x)
+{
+    /* Negative constants have an implied sign bit.  */
+    if (constant_formats[cf].negative) {
+        int nb = constant_formats[cf].nbits + 1;
+        x = x | (1 << constant_formats[cf].nbits);
+        x = SIGNEXTEND(x, nb);
+    } else if (constant_formats[cf].issigned)
+        x = SIGNEXTEND(x, constant_formats[cf].nbits);
+
+    x += constant_formats[cf].offset;
+    x <<= constant_formats[cf].scale;
+
+    return x;
+}
+
+#define uimm16s4(x)  fmtconst_val(c_uimm16s4, x)
+#define uimm16s4d(x) fmtconst_val(c_uimm16s4d, x)
+#define pcrel4(x)    fmtconst_val(c_pcrel4, x)
+#define pcrel8(x)    fmtconst_val(c_pcrel8, x)
+#define pcrel8s4(x)  fmtconst_val(c_pcrel8s4, x)
+#define pcrel10(x)   fmtconst_val(c_pcrel10, x)
+#define pcrel12(x)   fmtconst_val(c_pcrel12, x)
+#define negimm5s4(x) fmtconst_val(c_negimm5s4, x)
+#define rimm16(x)    fmtconst_val(c_rimm16, x)
+#define huimm16(x)   fmtconst_val(c_huimm16, x)
+#define imm16(x)     fmtconst_val(c_imm16, x)
+#define imm16d(x)    fmtconst_val(c_imm16d, x)
+#define uimm2(x)     fmtconst_val(c_uimm2, x)
+#define uimm3(x)     fmtconst_val(c_uimm3, x)
+#define luimm16(x)   fmtconst_val(c_luimm16, x)
+#define uimm4(x)     fmtconst_val(c_uimm4, x)
+#define uimm5(x)     fmtconst_val(c_uimm5, x)
+#define imm16s2(x)   fmtconst_val(c_imm16s2, x)
+#define uimm8(x)     fmtconst_val(c_uimm8, x)
+#define imm16s4(x)   fmtconst_val(c_imm16s4, x)
+#define uimm4s2(x)   fmtconst_val(c_uimm4s2, x)
+#define uimm4s4(x)   fmtconst_val(c_uimm4s4, x)
+#define uimm4s4d(x)  fmtconst_val(c_uimm4s4d, x)
+#define lppcrel10(x) fmtconst_val(c_lppcrel10, x)
+#define imm3(x)      fmtconst_val(c_imm3, x)
+#define imm4(x)      fmtconst_val(c_imm4, x)
+#define uimm8s4(x)   fmtconst_val(c_uimm8s4, x)
+#define imm5(x)      fmtconst_val(c_imm5, x)
+#define imm5d(x)     fmtconst_val(c_imm5d, x)
+#define imm6(x)      fmtconst_val(c_imm6, x)
+#define imm7(x)      fmtconst_val(c_imm7, x)
+#define imm7d(x)     fmtconst_val(c_imm7d, x)
+#define imm8(x)      fmtconst_val(c_imm8, x)
+#define pcrel24(x)   fmtconst_val(c_pcrel24, x)
+#define uimm16(x)    fmtconst_val(c_uimm16, x)
+#define uimm32(x)    fmtconst_val(c_uimm32, x)
+#define imm32(x)     fmtconst_val(c_imm32, x)
+#define huimm32(x)   fmtconst_val(c_huimm32, x)
+#define huimm32e(x)  fmtconst_val(c_huimm32e, x)
+
+/* Table C-4. Core Register Encoding Map */
+const char * const greg_names[] = {
+    "R0",    "R1",      "R2",     "R3",    "R4",    "R5",    "R6",     "R7",
+    "P0",    "P1",      "P2",     "P3",    "P4",    "P5",    "SP",     "FP",
+    "I0",    "I1",      "I2",     "I3",    "M0",    "M1",    "M2",     "M3",
+    "B0",    "B1",      "B2",     "B3",    "L0",    "L1",    "L2",     "L3",
+    "A0.X",  "A0.W",    "A1.X",   "A1.W",  "<res>", "<res>", "ASTAT",  "RETS",
+    "<res>", "<res>",   "<res>",  "<res>", "<res>", "<res>", "<res>",  "<res>",
+    "LC0",   "LT0",     "LB0",    "LC1",   "LT1",   "LB1",   "CYCLES", "CYCLES2",
+    "USP",   "SEQSTAT", "SYSCFG", "RETI",  "RETX",  "RETN",  "RETE",   "EMUDAT",
+};
+
+const char *
+get_allreg_name(int grp, int reg)
+{
+    return greg_names[(grp << 3) | reg];
+}
+
+/*
+static bool
+reg_is_reserved(int grp, int reg)
+{
+    return (grp == 4 && (reg == 4 || reg == 5)) || (grp == 5);
+}
+*/
+
+static TCGv * const cpu_regs[] = {
+    &cpu_dreg[0], &cpu_dreg[1], &cpu_dreg[2], &cpu_dreg[3], &cpu_dreg[4], &cpu_dreg[5], &cpu_dreg[6], &cpu_dreg[7],
+    &cpu_preg[0], &cpu_preg[1], &cpu_preg[2], &cpu_preg[3], &cpu_preg[4], &cpu_preg[5], &cpu_preg[6], &cpu_preg[7],
+    &cpu_ireg[0], &cpu_ireg[1], &cpu_ireg[2], &cpu_ireg[3], &cpu_mreg[0], &cpu_mreg[1], &cpu_mreg[2], &cpu_mreg[3],
+    &cpu_breg[0], &cpu_breg[1], &cpu_breg[2], &cpu_breg[3], &cpu_lreg[0], &cpu_lreg[1], &cpu_lreg[2], &cpu_lreg[3],
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, &cpu_rets,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    &cpu_lcreg[0], &cpu_ltreg[0], &cpu_lbreg[0], &cpu_lcreg[1], &cpu_ltreg[1], &cpu_lbreg[1], &cpu_cycles[0], &cpu_cycles[1],
+    &cpu_uspreg, &cpu_seqstat, &cpu_syscfg, &cpu_reti, &cpu_retx, &cpu_retn, &cpu_rete, &cpu_emudat,
+};
+
+static TCGv
+get_allreg(DisasContext *dc, int grp, int reg)
+{
+    TCGv *ret = cpu_regs[(grp << 3) | reg];
+    if (ret)
+       return *ret;
+    abort();
+    illegal_instruction(dc);
+}
+
+#if 0
+static const char *
+amod0amod2 (int s0, int x0, int aop0)
+{
+  if (s0 == 1 && x0 == 0 && aop0 == 0)
+    return " (S)";
+  else if (s0 == 0 && x0 == 1 && aop0 == 0)
+    return " (CO)";
+  else if (s0 == 1 && x0 == 1 && aop0 == 0)
+    return " (SCO)";
+  else if (s0 == 0 && x0 == 0 && aop0 == 2)
+    return " (ASR)";
+  else if (s0 == 1 && x0 == 0 && aop0 == 2)
+    return " (S, ASR)";
+  else if (s0 == 0 && x0 == 1 && aop0 == 2)
+    return  " (CO, ASR)";
+  else if (s0 == 1 && x0 == 1 && aop0 == 2)
+    return " (SCO, ASR)";
+  else if (s0 == 0 && x0 == 0 && aop0 == 3)
+    return " (ASL)";
+  else if (s0 == 1 && x0 == 0 && aop0 == 3)
+    return " (S, ASL)";
+  else if (s0 == 0 && x0 == 1 && aop0 == 3)
+    return " (CO, ASL)";
+  else if (s0 == 1 && x0 == 1 && aop0 == 3)
+    return " (SCO, ASL)";
+  return "";
+}
+#endif
+
+static const char *
+amod1 (int s0, int x0)
+{
+  if (s0 == 0 && x0 == 0)
+    return " (NS)";
+  else if (s0 == 1 && x0 == 0)
+    return " (S)";
+  return "";
+}
+
+static const char *
+amod0 (int s0, int x0)
+{
+  if (s0 == 1 && x0 == 0)
+    return " (S)";
+  else if (s0 == 0 && x0 == 1)
+    return " (CO)";
+  else if (s0 == 1 && x0 == 1)
+    return " (SCO)";
+  return "";
+}
+
+static void
+reg_check_sup(DisasContext *dc, int grp, int reg)
+{
+    if (grp == 7)
+        cec_require_supervisor(dc);
+}
+
+/* Perform a multiplication of D registers SRC0 and SRC1, sign- or
+   zero-extending the result to 64 bit.  H0 and H1 determine whether the
+   high part or the low part of the source registers is used.  Store 1 in
+   *PSAT if saturation occurs, 0 otherwise.  */
+static TCGv
+decode_multfunc_tl(DisasContext *dc, int h0, int h1, int src0, int src1,
+                   int mmod, int MM, int *psat)
+{
+    TCGv s0, s1, val;
+
+    s0 = tcg_temp_local_new();
+    if (h0)
+        tcg_gen_shri_tl(s0, cpu_dreg[src0], 16);
+    else
+        tcg_gen_andi_tl(s0, cpu_dreg[src0], 0xffff);
+
+    s1 = tcg_temp_local_new();
+    if (h1)
+        tcg_gen_shri_tl(s1, cpu_dreg[src1], 16);
+    else
+        tcg_gen_andi_tl(s1, cpu_dreg[src1], 0xffff);
+
+    if (MM)
+        tcg_gen_ext16s_tl(s0, s0);
+    else
+        switch (mmod) {
+        case 0:
+        case M_S2RND:
+        case M_T:
+        case M_IS:
+        case M_ISS2:
+        case M_IH:
+        case M_W32:
+            tcg_gen_ext16s_tl(s0, s0);
+            tcg_gen_ext16s_tl(s1, s1);
+            break;
+        case M_FU:
+        case M_IU:
+        case M_TFU:
+            break;
+        default:
+            illegal_instruction(dc);
+        }
+
+    val = tcg_temp_local_new();
+    tcg_gen_mul_tl(val, s0, s1);
+    tcg_temp_free(s0);
+    tcg_temp_free(s1);
+
+    /* Perform shift correction if appropriate for the mode.  */
+    *psat = 0;
+    if (!MM && (mmod == 0 || mmod == M_T || mmod == M_S2RND || mmod == M_W32)) {
+        int l, endl;
+
+        l = gen_new_label();
+        endl = gen_new_label();
+
+        tcg_gen_brcondi_tl(TCG_COND_NE, val, 0x40000000, l);
+        if (mmod == M_W32)
+            tcg_gen_movi_tl(val, 0x7fffffff);
+        else
+            tcg_gen_movi_tl(val, 0x80000000);
+//          *psat = 1;
+        tcg_gen_br(endl);
+
+        gen_set_label(l);
+        tcg_gen_shli_tl(val, val, 1);
+
+        gen_set_label(endl);
+    }
+
+    return val;
+}
+
+static TCGv_i64
+decode_multfunc_i64(DisasContext *dc, int h0, int h1, int src0, int src1,
+                    int mmod, int MM, int *psat)
+{
+    TCGv val;
+    TCGv_i64 val1;
+
+    val = decode_multfunc_tl(dc, h0, h1, src0, src1, mmod, MM, psat);
+    val1 = tcg_temp_local_new_i64();
+    tcg_gen_extu_i32_i64(val1, val);
+    tcg_temp_free(val);
+
+    if (mmod == 0 || mmod == M_IS || mmod == M_T || mmod == M_S2RND ||
+        mmod == M_ISS2 || mmod == M_IH || (MM && mmod == M_FU)) {
+        /* Shift the sign bit up, and then back down */
+        tcg_gen_shli_i64(val1, val1, 64 - 40);
+        tcg_gen_sari_i64(val1, val1, 64 - 40);
+    }
+
+//  if (*psat)
+//    val1 &= 0xFFFFFFFFull;
+
+  return val1;
+}
+
+#if 0
+static bu40
+saturate_s40_astat (bu64 val, bu32 *v)
+{
+  if ((bs64)val < -((bs64)1 << 39))
+    {
+      *v = 1;
+      return -((bs64)1 << 39);
+    }
+  else if ((bs64)val >= ((bs64)1 << 39) - 1)
+    {
+      *v = 1;
+      return ((bu64)1 << 39) - 1;
+    }
+  *v = 0; /* no overflow */
+  return val;
+}
+
+static bu40
+saturate_s40 (bu64 val)
+{
+  bu32 v;
+  return saturate_s40_astat (val, &v);
+}
+
+static bu32
+saturate_s32(bu64 val, bu32 *overflow)
+{
+  if ((bs64)val < -0x80000000ll)
+    {
+      if (overflow)
+	*overflow = 1;
+      return 0x80000000;
+    }
+  if ((bs64)val > 0x7fffffff)
+    {
+      if (overflow)
+	*overflow = 1;
+      return 0x7fffffff;
+    }
+  return val;
+}
+
+static bu32
+saturate_u32(bu64 val, bu32 *overflow)
+{
+  if (val > 0xffffffff)
+    {
+      if (overflow)
+	*overflow = 1;
+      return 0xffffffff;
+    }
+  return val;
+}
+
+static bu32
+saturate_u16(bu64 val, bu32 *overflow)
+{
+  if (val > 0xffff)
+    {
+      if (overflow)
+	*overflow = 1;
+      return 0xffff;
+    }
+  return val;
+}
+
+static bu64
+rnd16(bu64 val)
+{
+  bu64 sgnbits;
+
+  /* FIXME: Should honour rounding mode.  */
+  if ((val & 0xffff) > 0x8000
+      || ((val & 0xffff) == 0x8000 && (val & 0x10000)))
+    val += 0x8000;
+
+  sgnbits = val & 0xffff000000000000ull;
+  val >>= 16;
+  return val | sgnbits;
+}
+
+static bu64
+trunc16(bu64 val)
+{
+  bu64 sgnbits = val & 0xffff000000000000ull;
+  val >>= 16;
+  return val | sgnbits;
+}
+
+/* Extract a 16 or 32 bit value from a 64 bit multiplication result.
+   These 64 bits must be sign- or zero-extended properly from the source
+   we want to extract, either a 32 bit multiply or a 40 bit accumulator.  */
+static TCGv
+extract_mult(DisasContext *dc, bu64 res, int mmod, int MM,
+             int fullword, int *overflow)
+{
+    if (fullword)
+        switch (mmod) {
+        case 0:
+        case M_IS:
+            return saturate_s32(res, overflow);
+        case M_FU:
+            if (MM)
+                return saturate_s32(res, overflow);
+            return saturate_u32(res, overflow);
+        case M_S2RND:
+        case M_ISS2:
+            return saturate_s32(res << 1, overflow);
+        default:
+            illegal_instruction(dc);
+        }
+    else
+        switch (mmod) {
+        case 0:
+        case M_W32:
+            return saturate_s16(rnd16(res), overflow);
+        case M_IH:
+            return saturate_s32(rnd16(res), overflow) & 0xFFFF;
+        case M_IS:
+            return saturate_s16(res, overflow);
+        case M_FU:
+            if (MM)
+                return saturate_s16(rnd16(res), overflow);
+            return saturate_u16(rnd16(res), overflow);
+        case M_IU:
+            if (MM)
+                return saturate_s16(res, overflow);
+            return saturate_u16(res, overflow);
+
+        case M_T:
+            return saturate_s16(trunc16(res), overflow);
+        case M_TFU:
+            return saturate_u16(trunc16(res), overflow);
+
+        case M_S2RND:
+            return saturate_s16(rnd16(res << 1), overflow);
+        case M_ISS2:
+            return saturate_s16(res << 1, overflow);
+        default:
+            illegal_instruction(dc);
+        }
+}
+#endif
+
+static TCGv
+decode_macfunc (DisasContext *dc, int which, int op, int h0, int h1, int src0,
+                int src1, int mmod, int MM, int fullword, int *overflow)
+{
+  TCGv_i64 acc;
+//  bu32 sat = 0,
+  int tsat;
+
+  /* Sign extend accumulator if necessary, otherwise unsigned */
+  if (mmod == 0 || mmod == M_T || mmod == M_IS || mmod == M_ISS2 || mmod == M_S2RND || mmod == M_IH || mmod == M_W32)
+{}
+    //acc = get_extended_acc (cpu, which);
+  else
+{}
+    //acc = get_unextended_acc (cpu, which);
+  acc = cpu_areg[which];
+
+//  if (MM && (mmod == M_T || mmod == M_IS || mmod == M_ISS2 || mmod == M_S2RND || mmod == M_IH || mmod == M_W32))
+//    acc |= -(acc & 0x80000000);
+
+    if (op != 3) {
+//      bu8 sgn0 = (acc >> 31) & 1;
+        /* this can't saturate, so we don't keep track of the sat flag */
+        TCGv_i64 res = decode_multfunc_i64(dc, h0, h1, src0, src1, mmod, MM, &tsat);
+
+//      res64 = tcg_temp_local_new_i64();
+//      tcg_gen_extu_i32_i64(res64, res);
+
+        /* Perform accumulation.  */
+        switch (op) {
+        case 0:
+            tcg_gen_mov_i64(acc, res);
+//          sgn0 = (acc >> 31) & 1;
+            break;
+        case 1:
+            tcg_gen_add_i64(acc, acc, res);
+            break;
+        case 2:
+//          acc = acc - res;
+            tcg_gen_sub_i64(acc, acc, res);
+            break;
+        }
+        tcg_temp_free_i64(res);
+
+      /* Saturate.  */
+/*
+      switch (mmod)
+	{
+	case 0:
+	case M_T:
+	case M_IS:
+	case M_ISS2:
+	case M_S2RND:
+	  if ((bs64)acc < -((bs64)1 << 39))
+	    acc = -((bu64)1 << 39), sat = 1;
+	  else if ((bs64)acc > 0x7fffffffffll)
+	    acc = 0x7fffffffffull, sat = 1;
+	  break;
+	case M_TFU:
+	  if (!MM && acc > 0xFFFFFFFFFFull)
+	    acc = 0x0, sat = 1;
+	  if (MM && acc > 0xFFFFFFFF)
+	    acc &= 0xFFFFFFFF;
+	  break;
+	case M_IU:
+	  if (acc & 0x8000000000000000ull)
+	    acc = 0x0, sat = 1;
+	  if (acc > 0xFFFFFFFFFFull)
+	    acc &= 0xFFFFFFFFFFull, sat = 1;
+	  if (MM && acc > 0xFFFFFFFF)
+	    acc &= 0xFFFFFFFF;
+	  if (acc & 0x80000000)
+	    acc |= 0xffffffff00000000ull;
+	  break;
+	case M_FU:
+	  if (!MM && (bs64)acc < 0)
+	    acc = 0x0, sat = 1;
+	  if (MM && (bs64)acc < -((bs64)1 << 39))
+	    acc = -((bu64)1 << 39), sat = 1;
+	  if (!MM && (bs64)acc > (bs64)0xFFFFFFFFFFll)
+	    acc = 0xFFFFFFFFFFull, sat = 1;
+	  if (MM && acc > 0xFFFFFFFFFFull)
+	    acc &= 0xFFFFFFFFFFull;
+	  if (MM && acc & 0x80000000)
+	    acc |= 0xffffffff00000000ull;
+	  break;
+	case M_IH:
+	  if ((bs64)acc < -0x80000000ll)
+	    acc = -0x80000000ull, sat = 1;
+	  else if ((bs64)acc >= 0x7fffffffll)
+	    acc = 0x7fffffffull, sat = 1;
+	  break;
+	case M_W32:
+	  if (sgn0 && (sgn0 != ((acc >> 31) & 1)) && (((acc >> 32) & 0xFF) == 0xff))
+	    acc = 0x80000000;
+	  acc &= 0xffffffff;
+	  if (acc & 0x80000000)
+	    acc |= 0xffffffff00000000ull;
+	  break;
+	default:
+	  illegal_instruction(dc);
+	}
+*/
+    }
+
+/*
+  STORE (AXREG (which), (acc >> 32) & 0xff);
+  STORE (AWREG (which), acc & 0xffffffff);
+  STORE (ASTATREG (av[which]), sat);
+  if (sat)
+    STORE (ASTATREG (avs[which]), sat);
+*/
+
+//  return extract_mult (cpu, acc, mmod, MM, fullword, overflow);
+  TCGv tmp = tcg_temp_local_new();
+  tcg_gen_trunc_i64_i32(tmp, acc);
+  return tmp;
+}
+
+static void
+decode_ProgCtrl_0(DisasContext *dc, uint16_t iw0)
+{
+    /* ProgCtrl
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |.prgfunc.......|.poprnd........|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int poprnd  = ((iw0 >> ProgCtrl_poprnd_bits) & ProgCtrl_poprnd_mask);
+    int prgfunc = ((iw0 >> ProgCtrl_prgfunc_bits) & ProgCtrl_prgfunc_mask);
+
+    TRACE_EXTRACT("%s: poprnd:%i prgfunc:%i", __func__, poprnd, prgfunc);
+
+    if (prgfunc == 0 && poprnd == 0)
+        /* NOP */;
+    else if (prgfunc == 1 && poprnd == 0) {
+        /* RTS; */
+        dc->is_jmp = DISAS_JUMP;
+        dc->hwloop_callback = gen_hwloop_br_direct;
+        dc->hwloop_data = &cpu_rets;
+    } else if (prgfunc == 1 && poprnd == 1)
+        /* RTI; */
+        cec_require_supervisor (dc);
+    else if (prgfunc == 1 && poprnd == 2)
+        /* RTX; */
+        cec_require_supervisor (dc);
+    else if (prgfunc == 1 && poprnd == 3)
+        /* RTN; */
+        cec_require_supervisor (dc);
+    else if (prgfunc == 1 && poprnd == 4)
+        /* RTE; */
+        cec_require_supervisor (dc);
+    else if (prgfunc == 2 && poprnd == 0)
+        /* IDLE; */
+        /* just NOP it */;
+    else if (prgfunc == 2 && poprnd == 3)
+        /* CSYNC; */
+        /* just NOP it */;
+    else if (prgfunc == 2 && poprnd == 4)
+        /* SSYNC; */
+        /* just NOP it */;
+    else if (prgfunc == 2 && poprnd == 5)
+        /* EMUEXCPT; */
+        cec_exception(dc, EXCP_DEBUG);
+    else if (prgfunc == 3 && poprnd < 8)
+        /* CLI Dreg{poprnd}; */
+        cec_require_supervisor (dc);
+    else if (prgfunc == 4 && poprnd < 8)
+        /* STI Dreg{poprnd}; */
+        cec_require_supervisor (dc);
+    else if (prgfunc == 5 && poprnd < 8) {
+        /* JUMP (Preg{poprnd}); */
+        dc->is_jmp = DISAS_JUMP;
+        dc->hwloop_callback = gen_hwloop_br_direct;
+        dc->hwloop_data = &cpu_preg[poprnd];
+    } else if (prgfunc == 6 && poprnd < 8) {
+        /* CALL (Preg{poprnd}); */
+        dc->is_jmp = DISAS_CALL;
+        dc->hwloop_callback = gen_hwloop_br_direct;
+        dc->hwloop_data = &cpu_preg[poprnd];
+    } else if (prgfunc == 7 && poprnd < 8) {
+        /* CALL (PC + Preg{poprnd}); */
+        dc->is_jmp = DISAS_CALL;
+        dc->hwloop_callback = gen_hwloop_br_pcrel;
+        dc->hwloop_data = &cpu_preg[poprnd];
+    } else if (prgfunc == 8 && poprnd < 8) {
+        /* JUMP (PC + Preg{poprnd}); */
+        dc->is_jmp = DISAS_JUMP;
+        dc->hwloop_callback = gen_hwloop_br_pcrel;
+        dc->hwloop_data = &cpu_preg[poprnd];
+    } else if (prgfunc == 9) {
+        /* RAISE imm{poprnd}; */
+        /* int raise = uimm4 (poprnd); */
+        cec_require_supervisor (dc);
+    } else if (prgfunc == 10) {
+        /* EXCPT imm{poprnd}; */
+        int excpt = uimm4 (poprnd);
+        cec_exception(dc, excpt);
+    } else if (prgfunc == 11 && poprnd < 6) {
+        /* TESTSET (Preg{poprnd}); */
+        TCGv tmp = tcg_temp_new();
+        tcg_gen_qemu_ld8u(tmp, cpu_preg[poprnd], dc->mem_idx);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_cc, tmp, 0);
+        tcg_gen_ori_tl(tmp, tmp, 0x80);
+        tcg_gen_qemu_st8(tmp, cpu_preg[poprnd], dc->mem_idx);
+        tcg_temp_free(tmp);
+    } else
+        illegal_instruction(dc);
+}
+
+static void
+decode_CaCTRL_0(DisasContext *dc, uint16_t iw0)
+{
+    /* CaCTRL
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |.a.|.op....|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int a   = ((iw0 >> CaCTRL_a_bits) & CaCTRL_a_mask);
+    int op  = ((iw0 >> CaCTRL_op_bits) & CaCTRL_op_mask);
+    int reg = ((iw0 >> CaCTRL_reg_bits) & CaCTRL_reg_mask);
+
+    TRACE_EXTRACT("%s: a:%i op:%i reg:%i", __func__, a, op, reg);
+
+    /*
+     * PREFETCH [Preg{reg}];
+     * PREFETCH [Preg{reg}++{a}];
+     * FLUSHINV [Preg{reg}];
+     * FLUSHINV [Preg{reg}++{a}];
+     * FLUSH [Preg{reg}];
+     * FLUSH [Preg{reg}++{a}];
+     * IFLUSH [Preg{reg}];
+     * IFLUSH [Preg{reg}++{a}];
+     */
+
+    /* No cache simulation, and we'll ignore the implicit CPLB aspects */
+
+    if (a)
+        tcg_gen_addi_tl(cpu_preg[reg], cpu_preg[reg], BFIN_L1_CACHE_BYTES);
+}
+
+static void
+decode_PushPopReg_0(DisasContext *dc, uint16_t iw0)
+{
+    /* PushPopReg
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |.W.|.grp.......|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int W   = ((iw0 >> PushPopReg_W_bits) & PushPopReg_W_mask);
+    int grp = ((iw0 >> PushPopReg_grp_bits) & PushPopReg_grp_mask);
+    int reg = ((iw0 >> PushPopReg_reg_bits) & PushPopReg_reg_mask);
+    TCGv treg, tmp;
+    TCGv_i64 tmp64;
+
+    TRACE_EXTRACT("%s: W:%i grp:%i reg:%i", __func__, W, grp, reg);
+
+    /* Can't push/pop reserved registers  */
+    /*if (reg_is_reserved(grp, reg))
+        illegal_instruction(dc);*/
+
+    reg_check_sup(dc, grp, reg);
+
+    if (W == 0) {
+        /* Dreg and Preg are not supported by this instruction */
+        /*if (grp == 0 || grp == 1)
+            illegal_instruction(dc);*/
+
+        /* genreg{grp,reg} [SP++]; */
+        if (grp == 4 && reg == 6) {
+            /* Pop ASTAT */
+            tmp = tcg_temp_new();
+            tcg_gen_qemu_ld32u(tmp, cpu_spreg, dc->mem_idx);
+            gen_astat_store(dc, tmp);
+            tcg_temp_free(tmp);
+        } else if (grp == 4 && (reg == 0 || reg == 2)) {
+            /* Pop A#.X */
+            tmp = tcg_temp_new();
+            tcg_gen_qemu_ld32u(tmp, cpu_spreg, dc->mem_idx);
+            tcg_gen_andi_tl(tmp, tmp, 0xff);
+            tmp64 = tcg_temp_new_i64();
+            tcg_gen_extu_i32_i64(tmp64, tmp);
+            tcg_temp_free(tmp);
+
+            tcg_gen_andi_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], 0xffffffff);
+            tcg_gen_shli_i64(tmp64, tmp64, 32);
+            tcg_gen_or_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], tmp64);
+            tcg_temp_free_i64(tmp64);
+        } else if (grp == 4 && (reg == 1 || reg == 3)) {
+            /* Pop A#.W */
+            tcg_gen_andi_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], 0xff00000000);
+            tmp = tcg_temp_new();
+            tcg_gen_qemu_ld32u(tmp, cpu_spreg, dc->mem_idx);
+            tmp64 = tcg_temp_new_i64();
+            tcg_gen_extu_i32_i64(tmp64, tmp);
+            tcg_temp_free(tmp);
+            tcg_gen_or_i64(cpu_areg[reg >> 1], cpu_areg[reg >> 1], tmp64);
+            tcg_temp_free_i64(tmp64);
+        } else {
+            treg = get_allreg(dc, grp, reg);
+            tcg_gen_qemu_ld32u(treg, cpu_spreg, dc->mem_idx);
+
+            if (grp == 6 && (reg == 1 || reg == 4))
+                /* LT loads auto clear the LSB */
+                tcg_gen_andi_tl(treg, treg, ~1);
+        }
+
+        tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
+        gen_maybe_lb_exit_tb(dc, treg);
+    } else {
+        /* [--SP] = genreg{grp,reg}; */
+
+        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
+        if (grp == 4 && reg == 6) {
+            /* Push ASTAT */
+            tmp = tcg_temp_new();
+            gen_astat_load(dc, tmp);
+            tcg_gen_qemu_st32(tmp, cpu_spreg, dc->mem_idx);
+            tcg_temp_free(tmp);
+        } else if (grp == 4 && (reg == 0 || reg == 2)) {
+            /* Push A#.X */
+            tmp64 = tcg_temp_new_i64();
+            tcg_gen_shri_i64(tmp64, cpu_areg[reg >> 1], 32);
+            tmp = tcg_temp_new();
+            tcg_gen_trunc_i64_i32(tmp, tmp64);
+            tcg_temp_free_i64(tmp64);
+            tcg_gen_andi_tl(tmp, tmp, 0xff);
+            tcg_gen_qemu_st32(tmp, cpu_spreg, dc->mem_idx);
+            tcg_temp_free(tmp);
+        } else if (grp == 4 && (reg == 1 || reg == 3)) {
+            /* Push A#.W */
+            tmp = tcg_temp_new();
+            tcg_gen_trunc_i64_i32(tmp, cpu_areg[reg >> 1]);
+            tcg_gen_qemu_st32(tmp, cpu_spreg, dc->mem_idx);
+            tcg_temp_free(tmp);
+        } else {
+            treg = get_allreg(dc, grp, reg);
+            tcg_gen_qemu_st32(treg, cpu_spreg, dc->mem_idx);
+        }
+    }
+}
+
+static void
+decode_PushPopMultiple_0(DisasContext *dc, uint16_t iw0)
+{
+    /* PushPopMultiple
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 1 | 0 |.d.|.p.|.W.|.dr........|.pr........|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int p  = ((iw0 >> PushPopMultiple_p_bits) & PushPopMultiple_p_mask);
+    int d  = ((iw0 >> PushPopMultiple_d_bits) & PushPopMultiple_d_mask);
+    int W  = ((iw0 >> PushPopMultiple_W_bits) & PushPopMultiple_W_mask);
+    int dr = ((iw0 >> PushPopMultiple_dr_bits) & PushPopMultiple_dr_mask);
+    int pr = ((iw0 >> PushPopMultiple_pr_bits) & PushPopMultiple_pr_mask);
+    int i;
+
+    TRACE_EXTRACT("%s: d:%i p:%i W:%i dr:%i pr:%i", __func__, d, p, W, dr, pr);
+
+    if ((d == 0 && p == 0) || (p && imm5(pr) > 5) ||
+        (d && !p && pr) || (p && !d && dr))
+        illegal_instruction(dc);
+
+    if (W == 1) {
+        /* [--SP] = ({d}R7:imm{dr}, {p}P5:imm{pr}); */
+        if (d)
+            for (i = dr; i < 8; i++) {
+                tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
+                tcg_gen_qemu_st32(cpu_dreg[i], cpu_spreg, dc->mem_idx);
+            }
+        if (p)
+            for (i = pr; i < 6; i++) {
+                tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
+                tcg_gen_qemu_st32(cpu_preg[i], cpu_spreg, dc->mem_idx);
+            }
+    } else {
+        /* ({d}R7:imm{dr}, {p}P5:imm{pr}) = [SP++]; */
+        if (p)
+            for (i = 5; i >= pr; i--) {
+                tcg_gen_qemu_ld32u(cpu_preg[i], cpu_spreg, dc->mem_idx);
+                tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
+            }
+        if (d)
+            for (i = 7; i >= dr; i--) {
+                tcg_gen_qemu_ld32u(cpu_dreg[i], cpu_spreg, dc->mem_idx);
+                tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
+            }
+    }
+}
+
+static void
+decode_ccMV_0(DisasContext *dc, uint16_t iw0)
+{
+    /* ccMV
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 1 | 1 |.T.|.d.|.s.|.dst.......|.src.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int s  = ((iw0 >> CCmv_s_bits) & CCmv_s_mask);
+    int d  = ((iw0 >> CCmv_d_bits) & CCmv_d_mask);
+    int T  = ((iw0 >> CCmv_T_bits) & CCmv_T_mask);
+    int src = ((iw0 >> CCmv_src_bits) & CCmv_src_mask);
+    int dst = ((iw0 >> CCmv_dst_bits) & CCmv_dst_mask);
+    int l;
+    TCGv reg_src, reg_dst;
+
+    TRACE_EXTRACT("%s: T:%i d:%i s:%i dst:%i src:%i",
+                  __func__, T, d, s, dst, src);
+
+    /* IF !{T} CC DPreg{d,dst} = DPreg{s,src}; */
+    reg_src = get_allreg(dc, s, src);
+    reg_dst = get_allreg(dc, d, dst);
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_cc, T, l);
+    tcg_gen_mov_tl(reg_dst, reg_src);
+    gen_set_label(l);
+}
+
+static void
+decode_CCflag_0(DisasContext *dc, uint16_t iw0)
+{
+    /* CCflag
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 1 |.I.|.opc.......|.G.|.y.........|.x.........|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int x = ((iw0 >> CCflag_x_bits) & CCflag_x_mask);
+    int y = ((iw0 >> CCflag_y_bits) & CCflag_y_mask);
+    int I = ((iw0 >> CCflag_I_bits) & CCflag_I_mask);
+    int G = ((iw0 >> CCflag_G_bits) & CCflag_G_mask);
+    int opc = ((iw0 >> CCflag_opc_bits) & CCflag_opc_mask);
+
+    TRACE_EXTRACT("%s: I:%i opc:%i G:%i y:%i x:%i",
+                  __func__, I, opc, G, y, x);
+
+    if (opc > 4) {
+        TCGv_i64 tmp64;
+        TCGCond cond;
+
+        /*if (x != 0 || y != 0)
+            illegal_instruction(dc);*/
+
+        if (opc == 5 && I == 0 && G == 0)
+            /* CC = A0 == A1; */
+            cond = TCG_COND_EQ;
+        else if (opc == 6 && I == 0 && G == 0)
+            /* CC = A0 < A1; */
+            cond = TCG_COND_LT;
+        else if (opc == 7 && I == 0 && G == 0)
+            /* CC = A0 <= A1; */
+            cond = TCG_COND_LE;
+        else
+            illegal_instruction(dc);
+
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_setcond_i64(cond, tmp64, cpu_areg[0], cpu_areg[1]);
+        tcg_gen_trunc_i64_i32(cpu_cc, tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else {
+        int issigned = opc < 3;
+        uint32_t dst_imm = issigned ? imm3(y) : uimm3(y);
+        TCGv src_reg = G ? cpu_preg[x] : cpu_dreg[x];
+        TCGv dst_reg = G ? cpu_preg[y] : cpu_dreg[y];
+        TCGv tmp;
+        TCGCond cond;
+        enum astat_ops astat_op;
+
+        switch (opc) {
+        default: /* shutup useless gcc warnings */
+        case 0: /* signed == */
+            cond = TCG_COND_EQ;
+            break;
+        case 1: /* signed < */
+            cond = TCG_COND_LT;
+            break;
+        case 2: /* signed <= */
+            cond = TCG_COND_LE;
+            break;
+        case 3: /* unsigned < */
+            cond = TCG_COND_LTU;
+            break;
+        case 4: /* unsigned <= */
+            cond = TCG_COND_LEU;
+            break;
+        }
+        if (issigned)
+            astat_op = ASTAT_OP_COMPARE_SIGNED;
+        else
+            astat_op = ASTAT_OP_COMPARE_UNSIGNED;
+
+        if (I) {
+            /* Compare to an immediate rather than a reg */
+            tmp = tcg_const_tl(dst_imm);
+            dst_reg = tmp;
+        }
+        tcg_gen_setcond_tl(cond, cpu_cc, src_reg, dst_reg);
+
+        /* Pointer compares only touch CC.  */
+        if (!G)
+            astat_queue_state2(dc, astat_op, src_reg, dst_reg);
+
+        if (I)
+            tcg_temp_free(tmp);
+    }
+}
+
+static void
+decode_CC2dreg_0(DisasContext *dc, uint16_t iw0)
+{
+    /* CC2dreg
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |.op....|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int op  = ((iw0 >> CC2dreg_op_bits) & CC2dreg_op_mask);
+    int reg = ((iw0 >> CC2dreg_reg_bits) & CC2dreg_reg_mask);
+
+    TRACE_EXTRACT("%s: op:%i reg:%i", __func__, op, reg);
+
+    if (op == 0)
+        /* Dreg{reg} = CC; */
+        tcg_gen_mov_tl(cpu_dreg[reg], cpu_cc);
+    else if (op == 1)
+        /* CC = Dreg{reg}; */
+        tcg_gen_setcondi_tl(TCG_COND_NE, cpu_cc, cpu_dreg[reg], 0);
+    else if (op == 3 && reg == 0)
+        /* CC = !CC; */
+        tcg_gen_xori_tl(cpu_cc, cpu_cc, 1);
+    else
+        illegal_instruction(dc);
+}
+
+static void
+decode_CC2stat_0(DisasContext *dc, uint16_t iw0)
+{
+    /* CC2stat
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |.D.|.op....|.cbit..............|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int D    = ((iw0 >> CC2stat_D_bits) & CC2stat_D_mask);
+    int op   = ((iw0 >> CC2stat_op_bits) & CC2stat_op_mask);
+    int cbit = ((iw0 >> CC2stat_cbit_bits) & CC2stat_cbit_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("%s: D:%i op:%i cbit:%i", __func__, D, op, cbit);
+
+    /* CC = CC; is invalid.  */
+    if (cbit == 5)
+        illegal_instruction(dc);
+
+    gen_astat_update(dc, true);
+
+    if (D == 0)
+        switch (op) {
+        case 0: /* CC = ASTAT[cbit] */
+            tcg_gen_ld_tl(cpu_cc, cpu_env, offsetof(CPUState, astat[cbit]));
+            break;
+        case 1: /* CC |= ASTAT[cbit] */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUState, astat[cbit]));
+            tcg_gen_or_tl(cpu_cc, cpu_cc, tmp);
+            tcg_temp_free(tmp);
+            break;
+        case 2: /* CC &= ASTAT[cbit] */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUState, astat[cbit]));
+            tcg_gen_and_tl(cpu_cc, cpu_cc, tmp);
+            tcg_temp_free(tmp);
+            break;
+        case 3: /* CC ^= ASTAT[cbit] */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUState, astat[cbit]));
+            tcg_gen_xor_tl(cpu_cc, cpu_cc, tmp);
+            tcg_temp_free(tmp);
+            break;
+        }
+    else
+        switch (op) {
+        case 0: /* ASTAT[cbit] = CC */
+            tcg_gen_st_tl(cpu_cc, cpu_env, offsetof(CPUState, astat[cbit]));
+            break;
+        case 1: /* ASTAT[cbit] |= CC */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUState, astat[cbit]));
+            tcg_gen_or_tl(tmp, tmp, cpu_cc);
+            tcg_gen_st_tl(tmp, cpu_env, offsetof(CPUState, astat[cbit]));
+            tcg_temp_free(tmp);
+            break;
+        case 2: /* ASTAT[cbit] &= CC */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUState, astat[cbit]));
+            tcg_gen_and_tl(tmp, tmp, cpu_cc);
+            tcg_gen_st_tl(tmp, cpu_env, offsetof(CPUState, astat[cbit]));
+            tcg_temp_free(tmp);
+            break;
+        case 3: /* ASTAT[cbit] ^= CC */
+            tmp = tcg_temp_new();
+            tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPUState, astat[cbit]));
+            tcg_gen_xor_tl(tmp, tmp, cpu_cc);
+            tcg_gen_st_tl(tmp, cpu_env, offsetof(CPUState, astat[cbit]));
+            tcg_temp_free(tmp);
+            break;
+      }
+}
+
+static void
+decode_BRCC_0(DisasContext *dc, uint16_t iw0)
+{
+    /* BRCC
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 0 | 1 |.T.|.B.|.offset................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int B = ((iw0 >> BRCC_B_bits) & BRCC_B_mask);
+    int T = ((iw0 >> BRCC_T_bits) & BRCC_T_mask);
+    int offset = ((iw0 >> BRCC_offset_bits) & BRCC_offset_mask);
+    int pcrel = pcrel10(offset);
+
+    TRACE_EXTRACT("%s: T:%i B:%i offset:%#x", __func__, T, B, offset);
+
+    /* IF !{T} CC JUMP imm{offset} (bp){B}; */
+    dc->hwloop_callback = gen_hwloop_br_pcrel_cc;
+    dc->hwloop_data = (void *)(unsigned long)(pcrel | T);
+}
+
+static void
+decode_UJUMP_0(DisasContext *dc, uint16_t iw0)
+{
+    /* UJUMP
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 1 | 0 |.offset........................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int offset = ((iw0 >> UJump_offset_bits) & UJump_offset_mask);
+    int pcrel = pcrel12(offset);
+
+    TRACE_EXTRACT("%s: offset:%#x", __func__, offset);
+
+    /* JUMP.S imm{offset}; */
+    dc->is_jmp = DISAS_JUMP;
+    dc->hwloop_callback = gen_hwloop_br_pcrel_imm;
+    dc->hwloop_data = (void *)(unsigned long)pcrel;
+}
+
+static void
+decode_REGMV_0(DisasContext *dc, uint16_t iw0)
+{
+    /* REGMV
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 0 | 1 | 1 |.gd........|.gs........|.dst.......|.src.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int gs  = ((iw0 >> RegMv_gs_bits) & RegMv_gs_mask);
+    int gd  = ((iw0 >> RegMv_gd_bits) & RegMv_gd_mask);
+    int src = ((iw0 >> RegMv_src_bits) & RegMv_src_mask);
+    int dst = ((iw0 >> RegMv_dst_bits) & RegMv_dst_mask);
+    TCGv reg_src, reg_dst, tmp;
+    TCGv_i64 tmp64;
+    bool istmp;
+
+    TRACE_EXTRACT("%s: gd:%i gs:%i dst:%i src:%i",
+                  __func__, gd, gs, dst, src);
+
+    /* genreg{gd,dst} = genreg{gs,src}; */
+
+    reg_check_sup(dc, gs, src);
+    reg_check_sup(dc, gd, dst);
+
+#if 0
+    /* Reserved slots cannot be a src/dst.  */
+    if (reg_is_reserved(gs, src) || reg_is_reserved(gd, dst))
+        goto invalid_move;
+
+    /* Standard register moves  */
+    if ((gs < 2) ||             /* Dregs/Pregs as source  */
+        (gd < 2) ||             /* Dregs/Pregs as dest    */
+        (gs == 4 && src < 4) || /* Accumulators as source */
+        (gd == 4 && dst < 4 && (gs < 4)) || /* Accumulators as dest   */
+        (gs == 7 && src == 7 && !(gd == 4 && dst < 4)) || /* EMUDAT as src */
+        (gd == 7 && dst == 7))                            /* EMUDAT as dest */
+        goto valid_move;
+
+    /* dareg = dareg (IMBL) */
+    if (gs < 4 && gd < 4)
+        goto valid_move;
+
+    /* USP can be src to sysregs, but not dagregs.  */
+    if ((gs == 7 && src == 0) && (gd >= 4))
+        goto valid_move;
+
+    /* USP can move between genregs (only check Accumulators).  */
+    if (((gs == 7 && src == 0) && (gd == 4 && dst < 4)) ||
+        ((gd == 7 && dst == 0) && (gs == 4 && src < 4)))
+        goto valid_move;
+
+    /* Still here ?  Invalid reg pair.  */
+ invalid_move:
+    illegal_instruction(dc);
+
+ valid_move:
+#endif
+    if (gs == 4 && src == 6) {
+        /* Reads of ASTAT */
+        tmp = tcg_temp_new();
+        gen_astat_load(dc, tmp);
+        reg_src = tmp;
+        istmp = true;
+    } else if (gs == 4 && (src == 0 || src == 2)) {
+        /* Reads of A#.X */
+        tmp = tcg_temp_new();
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_shri_i64(tmp64, cpu_areg[src >> 1], 32);
+        tcg_gen_trunc_i64_i32(tmp, tmp64);
+        tcg_temp_free_i64(tmp64);
+        tcg_gen_ext8s_tl(tmp, tmp);
+        reg_src = tmp;
+        istmp = true;
+    } else if (gs == 4 && (src == 1 || src == 3)) {
+        /* Reads of A#.W */
+        tmp = tcg_temp_new();
+        tcg_gen_trunc_i64_i32(tmp, cpu_areg[src >> 1]);
+        reg_src = tmp;
+        istmp = true;
+    } else {
+        reg_src = get_allreg(dc, gs, src);
+        istmp = false;
+    }
+
+    if (gd == 4 && dst == 6) {
+        /* Writes to ASTAT */
+        gen_astat_store(dc, reg_src);
+    } else if (gd == 4 && (dst == 0 || dst == 2)) {
+        /* Writes to A#.X */
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_andi_i64(cpu_areg[dst >> 1], cpu_areg[dst >> 1], 0xffffffff);
+        tcg_gen_extu_i32_i64(tmp64, reg_src);
+        tcg_gen_andi_i64(tmp64, tmp64, 0xff);
+        tcg_gen_shli_i64(tmp64, tmp64, 32);
+        tcg_gen_or_i64(cpu_areg[dst >> 1], cpu_areg[dst >> 1], tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else if (gd == 4 && (dst == 1 || dst == 3)) {
+        /* Writes to A#.W */
+        tcg_gen_andi_i64(cpu_areg[dst >> 1], cpu_areg[dst >> 1], 0xff00000000);
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, reg_src);
+        tcg_gen_or_i64(cpu_areg[dst >> 1], cpu_areg[dst >> 1], tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else if (gd == 6 && (dst == 1 || dst == 4)) {
+        /* Writes to LT# */
+        /* LT loads auto clear the LSB */
+        tcg_gen_andi_tl(cpu_ltreg[dst >> 2], reg_src, ~1);
+    } else {
+        reg_dst = get_allreg(dc, gd, dst);
+        tcg_gen_mov_tl(reg_dst, reg_src);
+        gen_maybe_lb_exit_tb(dc, reg_dst);
+    }
+
+    if (istmp)
+        tcg_temp_free(tmp);
+}
+
+static void
+clipi(DisasContext *dc, TCGv *reg, TCGv *tmp, uint32_t limit)
+{
+    int l = gen_new_label();
+    *tmp = tcg_temp_local_new();
+    tcg_gen_mov_tl(*tmp, *reg);
+    tcg_gen_brcondi_tl(TCG_COND_LEU, *tmp, limit, l);
+    tcg_gen_movi_tl(*tmp, limit);
+    gen_set_label(l);
+}
+
+static void
+decode_ALU2op_0(DisasContext *dc, uint16_t iw0)
+{
+    /* ALU2op
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 0 | 0 | 0 | 0 |.opc...........|.src.......|.dst.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int src = ((iw0 >> ALU2op_src_bits) & ALU2op_src_mask);
+    int opc = ((iw0 >> ALU2op_opc_bits) & ALU2op_opc_mask);
+    int dst = ((iw0 >> ALU2op_dst_bits) & ALU2op_dst_mask);
+    int l;
+    TCGv tmp;
+
+    TRACE_EXTRACT("%s: opc:%i src:%i dst:%i", __func__, opc, src, dst);
+
+    if (opc == 0) {
+        /* Dreg{dst} >>>= Dreg{src}; */
+        clipi (dc, &cpu_dreg[src], &tmp, 31);
+        tcg_gen_sar_tl(cpu_dreg[dst], cpu_dreg[dst], tmp);
+        tcg_temp_free(tmp);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst]);
+    } else if (opc == 1) {
+        /* Dreg{dst} >>= Dreg{src}; */
+/*
+        if (DREG (src) <= 0x1F)
+            val = lshiftrt (cpu, DREG (dst), DREG (src), 32);
+        else
+            val = 0;
+        SET_DREG (dst, val);
+*/
+        l = gen_new_label();
+        tmp = tcg_temp_local_new();
+
+        /* Clip the shift magnitude to 31 bits */
+        tcg_gen_mov_tl(tmp, cpu_dreg[src]);
+        tcg_gen_brcondi_tl(TCG_COND_LEU, tmp, 31, l);
+        tcg_gen_movi_tl(tmp, 0);
+        tcg_gen_mov_tl(cpu_dreg[dst], tmp);
+        gen_set_label(l);
+
+        tcg_gen_shr_tl(cpu_dreg[dst], cpu_dreg[dst], tmp);
+
+        tcg_temp_free(tmp);
+
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst]);
+    } else if (opc == 2) {
+        /* Dreg{dst} <<= Dreg{src}; */
+//      SET_DREG (dst, lshift (cpu, DREG (dst), DREG (src), 32, 0));
+//      clipi (dc, &cpu_dreg[src], &tmp, 31);
+        l = gen_new_label();
+        tmp = tcg_temp_local_new();
+
+        /* Clip the shift magnitude to 31 bits */
+        tcg_gen_mov_tl(tmp, cpu_dreg[src]);
+        tcg_gen_brcondi_tl(TCG_COND_LEU, tmp, 31, l);
+        tcg_gen_movi_tl(tmp, 0);
+        tcg_gen_mov_tl(cpu_dreg[dst], tmp);
+        gen_set_label(l);
+
+        tcg_gen_shl_tl(cpu_dreg[dst], cpu_dreg[dst], tmp);
+
+        tcg_temp_free(tmp);
+
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst]);
+    } else if (opc == 3) {
+        /* Dreg{dst} *= Dreg{src}; */
+        tcg_gen_mul_tl(cpu_dreg[dst], cpu_dreg[dst], cpu_dreg[src]);
+    } else if (opc == 4 || opc == 5) {
+        /* Dreg{dst} = (Dreg{dst} + Dreg{src}) << imm{opc}; */
+        tcg_gen_add_tl(cpu_dreg[dst], cpu_dreg[dst], cpu_dreg[src]);
+        tcg_gen_shli_tl(cpu_dreg[dst], cpu_dreg[dst], (opc - 3));
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst]);
+    } else if (opc == 8)
+        /* DIVQ (Dreg, Dreg); */
+        gen_divq(cpu_dreg[dst], cpu_dreg[src]);
+    else if (opc == 9)
+        /* DIVS (Dreg, Dreg); */
+        gen_divs(cpu_dreg[dst], cpu_dreg[src]);
+    else if (opc == 10) {
+        /* Dreg{dst} = Dreg_lo{src} (X); */
+        tcg_gen_ext16s_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 11) {
+        /* Dreg{dst} = Dreg_lo{src} (Z); */
+        tcg_gen_ext16u_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 12) {
+        /* Dreg{dst} = Dreg_byte{src} (X); */
+        tcg_gen_ext8s_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 13) {
+        /* Dreg{dst} = Dreg_byte{src} (Z); */
+        tcg_gen_ext8u_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 14) {
+        /* Dreg{dst} = -Dreg{src}; */
+        /* XXX: Documentation isn't entirely clear about av0 and av1.  */
+        tcg_gen_neg_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_NEGATE, cpu_dreg[dst]);
+    } else if (opc == 15) {
+        /* Dreg = ~Dreg; */
+        tcg_gen_not_tl(cpu_dreg[dst], cpu_dreg[src]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    }
+}
+
+static void
+decode_PTR2op_0(DisasContext *dc, uint16_t iw0)
+{
+    /* PTR2op
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 0 | 0 | 0 | 1 | 0 |.opc.......|.src.......|.dst.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int src = ((iw0 >> PTR2op_src_bits) & PTR2op_dst_mask);
+    int opc = ((iw0 >> PTR2op_opc_bits) & PTR2op_opc_mask);
+    int dst = ((iw0 >> PTR2op_dst_bits) & PTR2op_dst_mask);
+
+    TRACE_EXTRACT("%s: opc:%i src:%i dst:%i", __func__, opc, src, dst);
+
+    if (opc == 0)
+        /* Preg{dst} -= Preg{src}; */
+        tcg_gen_sub_tl(cpu_preg[dst], cpu_preg[dst], cpu_preg[src]);
+    else if (opc == 1)
+        /* Preg{dst} = Preg{src} << 2; */
+        tcg_gen_shli_tl(cpu_preg[dst], cpu_preg[src], 2);
+    else if (opc == 3)
+        /* Preg{dst} = Preg{src} >> 2; */
+        tcg_gen_shri_tl(cpu_preg[dst], cpu_preg[src], 2);
+    else if (opc == 4)
+        /* Preg{dst} = Preg{src} >> 1; */
+        tcg_gen_shri_tl(cpu_preg[dst], cpu_preg[src], 1);
+    else if (opc == 5)
+        /* Preg{dst} += Preg{src} (BREV); */
+        gen_helper_add_brev(cpu_preg[dst], cpu_preg[dst], cpu_preg[src]);
+    else /*if (opc == 6 || opc == 7)*/ {
+        /* Preg{dst} = (Preg{dst} + Preg{src}) << imm{opc}; */
+        tcg_gen_add_tl(cpu_preg[dst], cpu_preg[dst], cpu_preg[src]);
+        tcg_gen_shli_tl(cpu_preg[dst], cpu_preg[dst], (opc - 5));
+    }
+}
+
+static void
+decode_LOGI2op_0(DisasContext *dc, uint16_t iw0)
+{
+    /* LOGI2op
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 0 | 0 | 1 |.opc.......|.src...............|.dst.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int src = ((iw0 >> LOGI2op_src_bits) & LOGI2op_src_mask);
+    int opc = ((iw0 >> LOGI2op_opc_bits) & LOGI2op_opc_mask);
+    int dst = ((iw0 >> LOGI2op_dst_bits) & LOGI2op_dst_mask);
+    int uimm = uimm5(src);
+    TCGv tmp;
+
+    TRACE_EXTRACT("%s: opc:%i src:%i dst:%i", __func__, opc, src, dst);
+
+    if (opc == 0) {
+        /* CC = ! BITTST (Dreg{dst}, imm{uimm}); */
+        tmp = tcg_temp_new();
+        tcg_gen_movi_tl(tmp, 1 << uimm);
+        tcg_gen_and_tl(tmp, tmp, cpu_dreg[dst]);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_cc, tmp, 0);
+        tcg_temp_free(tmp);
+    } else if (opc == 1) {
+        /* CC = BITTST (Dreg{dst}, imm{uimm}); */
+        tmp = tcg_temp_new();
+        tcg_gen_movi_tl(tmp, 1 << uimm);
+        tcg_gen_and_tl(tmp, tmp, cpu_dreg[dst]);
+        tcg_gen_setcondi_tl(TCG_COND_NE, cpu_cc, tmp, 0);
+        tcg_temp_free(tmp);
+    } else if (opc == 2) {
+        /* BITSET (Dreg{dst}, imm{uimm}); */
+        tcg_gen_ori_tl(cpu_dreg[dst], cpu_dreg[dst], 1 << uimm);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 3) {
+        /* BITTGL (Dreg{dst}, imm{uimm}); */
+        tcg_gen_xori_tl(cpu_dreg[dst], cpu_dreg[dst], 1 << uimm);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 4) {
+        /* BITCLR (Dreg{dst}, imm{uimm}); */
+        tcg_gen_andi_tl(cpu_dreg[dst], cpu_dreg[dst], ~(1 << uimm));
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 5) {
+        /* Dreg{dst} >>>= imm{uimm}; */
+        tcg_gen_sari_tl(cpu_dreg[dst], cpu_dreg[dst], uimm);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst]);
+    } else if (opc == 6) {
+        /* Dreg{dst} >>= imm{uimm}; */
+        tcg_gen_shri_tl(cpu_dreg[dst], cpu_dreg[dst], uimm);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst]);
+    } else /*if (opc == 7)*/ {
+        /* Dreg{dst} <<= imm{uimm}; */
+        tcg_gen_shli_tl(cpu_dreg[dst], cpu_dreg[dst], uimm);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst]);
+    }
+}
+
+static void
+decode_COMP3op_0(DisasContext *dc, uint16_t iw0)
+{
+    /* COMP3op
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 0 | 1 |.opc.......|.dst.......|.src1......|.src0......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int opc  = ((iw0 >> COMP3op_opc_bits) & COMP3op_opc_mask);
+    int dst  = ((iw0 >> COMP3op_dst_bits) & COMP3op_dst_mask);
+    int src0 = ((iw0 >> COMP3op_src0_bits) & COMP3op_src0_mask);
+    int src1 = ((iw0 >> COMP3op_src1_bits) & COMP3op_src1_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("%s: opc:%i dst:%i src1:%i src0:%i",
+                  __func__, opc, dst, src1, src0);
+
+    tmp = tcg_temp_local_new();
+    if (opc == 0) {
+        /* Dreg{dst} = Dreg{src0} + Dreg{src1}; */
+        tcg_gen_add_tl(tmp, cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state3(dc, ASTAT_OP_ADD32, tmp, cpu_dreg[src0], cpu_dreg[src1]);
+        tcg_gen_mov_tl(cpu_dreg[dst], tmp);
+    } else if (opc == 1) {
+        /* Dreg{dst} = Dreg{src0} - Dreg{src1}; */
+        tcg_gen_sub_tl(tmp, cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state3(dc, ASTAT_OP_SUB32, tmp, cpu_dreg[src0], cpu_dreg[src1]);
+        tcg_gen_mov_tl(cpu_dreg[dst], tmp);
+    } else if (opc == 2) {
+        /* Dreg{dst} = Dreg{src0} & Dreg{src1}; */
+        tcg_gen_and_tl(cpu_dreg[dst], cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 3) {
+        /* Dreg{dst} = Dreg{src0} | Dreg{src1}; */
+        tcg_gen_or_tl(cpu_dreg[dst], cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 4) {
+        /* Dreg{dst} = Dreg{src0} ^ Dreg{src1}; */
+        tcg_gen_xor_tl(cpu_dreg[dst], cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst]);
+    } else if (opc == 5) {
+        /* Preg{dst} = Preg{src0} + Preg{src1}; */
+        /* If src0 == src1 this is disassembled as a shift by 1, but this
+           distinction doesn't matter for our purposes */
+        tcg_gen_add_tl(cpu_preg[dst], cpu_preg[src0], cpu_preg[src1]);
+    } else /*if (opc == 6 || opc == 7)*/ {
+        /* Preg{dst} = Preg{src0} + Preg{src1} << imm{opc}; */
+        /* The dst/src0/src1 might all be the same register, so we need
+           the temp here to avoid clobbering source values too early.
+           This could be optimized a little, but for now we'll leave it. */
+        tcg_gen_shli_tl(tmp, cpu_preg[src1], (opc - 5));
+        tcg_gen_add_tl(cpu_preg[dst], cpu_preg[src0], tmp);
+    }
+    tcg_temp_free(tmp);
+}
+
+static void
+decode_COMPI2opD_0(DisasContext *dc, uint16_t iw0)
+{
+    /* COMPI2opD
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 1 | 0 | 0 |.op|..src......................|.dst.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int op  = ((iw0 >> COMPI2opD_op_bits) & COMPI2opD_op_mask);
+    int dst = ((iw0 >> COMPI2opD_dst_bits) & COMPI2opD_dst_mask);
+    int src = ((iw0 >> COMPI2opD_src_bits) & COMPI2opD_src_mask);
+    int imm = imm7(src);
+    TCGv tmp;
+
+    TRACE_EXTRACT("%s: op:%i src:%i dst:%i", __func__, op, src, dst);
+
+    if (op == 0) {
+        /* Dreg{dst} = imm{src} (X); */
+        tcg_gen_movi_tl(cpu_dreg[dst], imm);
+    } else {
+        /* Dreg{dst} += imm{src}; */
+        tmp = tcg_const_tl(imm);
+        tcg_gen_mov_tl(cpu_astat_arg[1], cpu_dreg[dst]);
+        tcg_gen_add_tl(cpu_dreg[dst], cpu_astat_arg[1], tmp);
+        astat_queue_state3(dc, ASTAT_OP_ADD32, cpu_dreg[dst], cpu_astat_arg[1], tmp);
+        tcg_temp_free(tmp);
+    }
+}
+
+static void
+decode_COMPI2opP_0(DisasContext *dc, uint16_t iw0)
+{
+    /* COMPI2opP
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 0 | 1 | 1 | 0 | 1 |.op|.src.......................|.dst.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int op  = ((iw0 >> COMPI2opP_op_bits) & COMPI2opP_op_mask);
+    int src = ((iw0 >> COMPI2opP_src_bits) & COMPI2opP_src_mask);
+    int dst = ((iw0 >> COMPI2opP_dst_bits) & COMPI2opP_dst_mask);
+    int imm = imm7(src);
+
+    TRACE_EXTRACT("%s: op:%i src:%i dst:%i", __func__, op, src, dst);
+
+    if (op == 0)
+        /* Preg{dst} = imm{src}; */
+        tcg_gen_movi_tl(cpu_preg[dst], imm);
+    else
+        /* Preg{dst} += imm{src}; */
+        tcg_gen_addi_tl(cpu_preg[dst], cpu_preg[dst], imm);
+}
+
+static void
+decode_LDSTpmod_0(DisasContext *dc, uint16_t iw0)
+{
+    /* LDSTpmod
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 0 | 0 |.W.|.aop...|.reg.......|.idx.......|.ptr.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int W   = ((iw0 >> LDSTpmod_W_bits) & LDSTpmod_W_mask);
+    int aop = ((iw0 >> LDSTpmod_aop_bits) & LDSTpmod_aop_mask);
+    int idx = ((iw0 >> LDSTpmod_idx_bits) & LDSTpmod_idx_mask);
+    int ptr = ((iw0 >> LDSTpmod_ptr_bits) & LDSTpmod_ptr_mask);
+    int reg = ((iw0 >> LDSTpmod_reg_bits) & LDSTpmod_reg_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("%s: W:%i aop:%i reg:%i idx:%i ptr:%i",
+                  __func__, W, aop, reg, idx, ptr);
+
+    if (aop == 1 && W == 0 && idx == ptr) {
+        /* Dreg_lo{reg} = W[Preg{ptr}]; */
+        tmp = tcg_temp_new();
+        tcg_gen_andi_tl(cpu_dreg[reg], cpu_dreg[reg], 0xffff0000);
+        tcg_gen_qemu_ld16u(tmp, cpu_preg[ptr], dc->mem_idx);
+        tcg_gen_or_tl(cpu_dreg[reg], cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+    } else if (aop == 2 && W == 0 && idx == ptr) {
+        /* Dreg_hi{reg} = W[Preg{ptr}]; */
+        tmp = tcg_temp_new();
+        tcg_gen_andi_tl(cpu_dreg[reg], cpu_dreg[reg], 0xffff);
+        tcg_gen_qemu_ld16u(tmp, cpu_preg[ptr], dc->mem_idx);
+        tcg_gen_shli_tl(tmp, tmp, 16);
+        tcg_gen_or_tl(cpu_dreg[reg], cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+    } else if (aop == 1 && W == 1 && idx == ptr) {
+        /* W[Preg{ptr}] = Dreg_lo{reg}; */
+        tcg_gen_qemu_st16(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+    } else if (aop == 2 && W == 1 && idx == ptr) {
+        /* W[Preg{ptr}] = Dreg_hi{reg}; */
+        tmp = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[reg], 16);
+        tcg_gen_qemu_st16(tmp, cpu_preg[ptr], dc->mem_idx);
+        tcg_temp_free(tmp);
+    } else if (aop == 0 && W == 0) {
+        /* Dreg{reg} = [Preg{ptr} ++ Preg{idx}]; */
+        tcg_gen_qemu_ld32u(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        if (ptr != idx)
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+    } else if (aop == 1 && W == 0) {
+        /* Dreg_lo{reg} = W[Preg{ptr} ++ Preg{idx}]; */
+        tmp = tcg_temp_new();
+        tcg_gen_andi_tl(cpu_dreg[reg], cpu_dreg[reg], 0xffff0000);
+        tcg_gen_qemu_ld16u(tmp, cpu_preg[ptr], dc->mem_idx);
+        tcg_gen_or_tl(cpu_dreg[reg], cpu_dreg[reg], tmp);
+        if (ptr != idx)
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        tcg_temp_free(tmp);
+    } else if (aop == 2 && W == 0) {
+        /* Dreg_hi{reg} = W[Preg{ptr} ++ Preg{idx}]; */
+        tmp = tcg_temp_new();
+        tcg_gen_andi_tl(cpu_dreg[reg], cpu_dreg[reg], 0xffff);
+        tcg_gen_qemu_ld16u(tmp, cpu_preg[ptr], dc->mem_idx);
+        tcg_gen_shli_tl(tmp, tmp, 16);
+        tcg_gen_or_tl(cpu_dreg[reg], cpu_dreg[reg], tmp);
+        if (ptr != idx)
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        tcg_temp_free(tmp);
+    } else if (aop == 3 && W == 0) {
+        /* R%i = W[Preg{ptr} ++ Preg{idx}] (Z); */
+        tcg_gen_qemu_ld16u(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        if (ptr != idx)
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+    } else if (aop == 3 && W == 1) {
+        /* R%i = W[Preg{ptr} ++ Preg{idx}] (X); */
+        tcg_gen_qemu_ld16s(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        if (ptr != idx)
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+    } else if (aop == 0 && W == 1) {
+        /* [Preg{ptr} ++ Preg{idx}] = R%i; */
+        tcg_gen_qemu_st32(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        if (ptr != idx)
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+    } else if (aop == 1 && W == 1) {
+        /* W[Preg{ptr} ++ Preg{idx}] = Dreg_lo{reg}; */
+        tcg_gen_qemu_st16(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        if (ptr != idx)
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+    } else if (aop == 2 && W == 1) {
+        /* W[Preg{ptr} ++ Preg{idx}] = Dreg_hi{reg}; */
+        tmp = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[reg], 16);
+        tcg_gen_qemu_st16(tmp, cpu_preg[ptr], dc->mem_idx);
+        if (ptr != idx)
+            tcg_gen_add_tl(cpu_preg[ptr], cpu_preg[ptr], cpu_preg[idx]);
+        tcg_temp_free(tmp);
+    } else
+        illegal_instruction(dc);
+}
+
+static void
+decode_dagMODim_0(DisasContext *dc, uint16_t iw0)
+{
+    /* dagMODim
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 0 |.br| 1 | 1 |.op|.m.....|.i.....|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int i  = ((iw0 >> DagMODim_i_bits) & DagMODim_i_mask);
+    int m  = ((iw0 >> DagMODim_m_bits) & DagMODim_m_mask);
+    int br = ((iw0 >> DagMODim_br_bits) & DagMODim_br_mask);
+    int op = ((iw0 >> DagMODim_op_bits) & DagMODim_op_mask);
+
+    TRACE_EXTRACT("%s: br:%i op:%i m:%i i:%i", __func__, br, op, m, i);
+
+    if (op == 0 && br == 1)
+        /* Ireg{i} += Mreg{m} (BREV); */
+        gen_helper_add_brev(cpu_ireg[i], cpu_ireg[i], cpu_mreg[m]);
+    else if (op == 0)
+        /* Ireg{i} += Mreg{m}; */
+        gen_dagadd (dc, i, cpu_mreg[m]);
+    else if (op == 1 && br == 0)
+        /* Ireg{i} -= Mreg{m}; */
+        gen_dagsub (dc, i, cpu_mreg[m]);
+    else
+        illegal_instruction(dc);
+}
+
+static void
+decode_dagMODik_0(DisasContext *dc, uint16_t iw0)
+{
+    /* dagMODik
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 |.op....|.i.....|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int i  = ((iw0 >> DagMODik_i_bits) & DagMODik_i_mask);
+    int op = ((iw0 >> DagMODik_op_bits) & DagMODik_op_mask);
+    int mod = (op & 2) + 2;
+
+    TRACE_EXTRACT("%s: op:%i i:%i", __func__, op, i);
+
+    if (op & 1)
+        /* Ireg{i} -= 2 or 4; */
+        gen_dagsubi (dc, i, mod);
+    else
+        /* Ireg{i} += 2 or 4; */
+        gen_dagaddi (dc, i, mod);
+}
+
+static void
+decode_dspLDST_0(DisasContext *dc, uint16_t iw0)
+{
+    /* dspLDST
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 0 | 1 | 1 | 1 |.W.|.aop...|.m.....|.i.....|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int i   = ((iw0 >> DspLDST_i_bits) & DspLDST_i_mask);
+    int m   = ((iw0 >> DspLDST_m_bits) & DspLDST_m_mask);
+    int W   = ((iw0 >> DspLDST_W_bits) & DspLDST_W_mask);
+    int aop = ((iw0 >> DspLDST_aop_bits) & DspLDST_aop_mask);
+    int reg = ((iw0 >> DspLDST_reg_bits) & DspLDST_reg_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("%s: aop:%i m:%i i:%i reg:%i", __func__, aop, m, i, reg);
+
+    if (aop == 0 && W == 0 && m == 0) {
+        /* Dreg{reg} = [Ireg{i}++]; */
+        /* XXX: No DISALGNEXCPT support */
+        tcg_gen_qemu_ld32u(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+        gen_dagaddi (dc, i, 4);
+    } else if (aop == 0 && W == 0 && m == 1) {
+        /* Dreg_lo{reg} = W[Ireg{i}++]; */
+        tmp = tcg_temp_new();
+        tcg_gen_qemu_ld16u(tmp, cpu_ireg[i], dc->mem_idx);
+        gen_mov_l_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+        gen_dagaddi (dc, i, 2);
+    } else if (aop == 0 && W == 0 && m == 2) {
+        /* Dreg_hi{reg} = W[Ireg{i}++]; */
+        tmp = tcg_temp_new();
+        tcg_gen_qemu_ld16u(tmp, cpu_ireg[i], dc->mem_idx);
+        gen_mov_h_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+        gen_dagaddi (dc, i, 2);
+    } else if (aop == 1 && W == 0 && m == 0) {
+        /* Dreg{reg} = [Ireg{i}--]; */
+        /* XXX: No DISALGNEXCPT support */
+        tcg_gen_qemu_ld32u(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+        gen_dagsubi (dc, i, 4);
+    } else if (aop == 1 && W == 0 && m == 1) {
+        /* Dreg_lo{reg} = W[Ireg{i}--]; */
+        tmp = tcg_temp_new();
+        tcg_gen_qemu_ld16u(tmp, cpu_ireg[i], dc->mem_idx);
+        gen_mov_l_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+        gen_dagsubi (dc, i, 2);
+    } else if (aop == 1 && W == 0 && m == 2) {
+        /* Dreg_hi{reg} = W[Ireg{i}--]; */
+        tmp = tcg_temp_new();
+        tcg_gen_qemu_ld16u(tmp, cpu_ireg[i], dc->mem_idx);
+        gen_mov_h_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+        gen_dagsubi (dc, i, 2);
+    } else if (aop == 2 && W == 0 && m == 0) {
+        /* Dreg{reg} = [Ireg{i}]; */
+        /* XXX: No DISALGNEXCPT support */
+        tcg_gen_qemu_ld32u(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+    } else if (aop == 2 && W == 0 && m == 1) {
+        /* Dreg_lo{reg} = W[Ireg{i}]; */
+        tmp = tcg_temp_new();
+        tcg_gen_qemu_ld16u(tmp, cpu_ireg[i], dc->mem_idx);
+        gen_mov_l_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+    } else if (aop == 2 && W == 0 && m == 2) {
+        /* Dreg_hi{reg} = W[Ireg{i}]; */
+        tmp = tcg_temp_new();
+        tcg_gen_qemu_ld16u(tmp, cpu_ireg[i], dc->mem_idx);
+        gen_mov_h_tl(cpu_dreg[reg], tmp);
+        tcg_temp_free(tmp);
+    } else if (aop == 0 && W == 1 && m == 0) {
+        /* [Ireg{i}++] = Dreg{reg}; */
+        tcg_gen_qemu_st32(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+        gen_dagaddi (dc, i, 4);
+    } else if (aop == 0 && W == 1 && m == 1) {
+        /* W[Ireg{i}++] = Dreg_lo{reg}; */
+        tcg_gen_qemu_st16(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+        gen_dagaddi (dc, i, 2);
+    } else if (aop == 0 && W == 1 && m == 2) {
+        /* W[Ireg{i}++] = Dreg_hi{reg}; */
+        tmp = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[reg], 16);
+        tcg_gen_qemu_st16(tmp, cpu_ireg[i], dc->mem_idx);
+        tcg_temp_free(tmp);
+        gen_dagaddi (dc, i, 2);
+    } else if (aop == 1 && W == 1 && m == 0) {
+        /* [Ireg{i}--] = Dreg{reg}; */
+        tcg_gen_qemu_st32(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+        gen_dagsubi (dc, i, 4);
+    } else if (aop == 1 && W == 1 && m == 1) {
+        /* W[Ireg{i}--] = Dreg_lo{reg}; */
+        tcg_gen_qemu_st16(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+        gen_dagsubi (dc, i, 2);
+    } else if (aop == 1 && W == 1 && m == 2) {
+        /* W[Ireg{i}--] = Dreg_hi{reg}; */
+        tmp = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[reg], 16);
+        tcg_gen_qemu_st16(tmp, cpu_ireg[i], dc->mem_idx);
+        tcg_temp_free(tmp);
+        gen_dagsubi (dc, i, 2);
+    } else if (aop == 2 && W == 1 && m == 0) {
+        /* [Ireg{i}] = Dreg{reg}; */
+        tcg_gen_qemu_st32(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+    } else if (aop == 2 && W == 1 && m == 1) {
+        /* W[Ireg{i}] = Dreg_lo{reg}; */
+        tcg_gen_qemu_st16(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+    } else if (aop == 2 && W == 1 && m == 2) {
+        /* W[Ireg{i}] = Dreg_hi{reg}; */
+        tmp = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[reg], 16);
+        tcg_gen_qemu_st16(tmp, cpu_ireg[i], dc->mem_idx);
+        tcg_temp_free(tmp);
+    } else if (aop == 3 && W == 0) {
+        /* Dreg{reg} = [Ireg{i} ++ Mreg{m}]; */
+        /* XXX: No DISALGNEXCPT support */
+        tcg_gen_qemu_ld32u(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+        gen_dagadd (dc, i, cpu_mreg[m]);
+    } else if (aop == 3 && W == 1) {
+        /* [Ireg{i} ++ Mreg{m}] = Dreg{reg}; */
+        tcg_gen_qemu_st32(cpu_dreg[reg], cpu_ireg[i], dc->mem_idx);
+        gen_dagadd (dc, i, cpu_mreg[m]);
+    } else
+        illegal_instruction(dc);
+}
+
+static void
+decode_LDST_0(DisasContext *dc, uint16_t iw0)
+{
+    /* LDST
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 0 | 1 |.sz....|.W.|.aop...|.Z.|.ptr.......|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int Z   = ((iw0 >> LDST_Z_bits) & LDST_Z_mask);
+    int W   = ((iw0 >> LDST_W_bits) & LDST_W_mask);
+    int sz  = ((iw0 >> LDST_sz_bits) & LDST_sz_mask);
+    int aop = ((iw0 >> LDST_aop_bits) & LDST_aop_mask);
+    int reg = ((iw0 >> LDST_reg_bits) & LDST_reg_mask);
+    int ptr = ((iw0 >> LDST_ptr_bits) & LDST_ptr_mask);
+
+    TRACE_EXTRACT("%s: sz:%i W:%i aop:%i Z:%i ptr:%i reg:%i",
+                  __func__, sz, W, aop, Z, ptr, reg);
+
+    if (aop == 3)
+        illegal_instruction(dc);
+
+    if (W == 0) {
+        if (sz == 0 && Z == 0)
+            /* Dreg{reg} = [Preg{ptr}{aop}]; */
+            tcg_gen_qemu_ld32u(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        else if (sz == 0 && Z == 1) {
+            /* Preg{reg} = [Preg{ptr}{aop}]; */
+            /*if (aop < 2 && ptr == reg)
+                illegal_instruction_combination(dc);*/
+            tcg_gen_qemu_ld32u(cpu_preg[reg], cpu_preg[ptr], dc->mem_idx);
+        } else if (sz == 1 && Z == 0)
+            /* Dreg{reg} = W[Preg{ptr}{aop}] (Z); */
+            tcg_gen_qemu_ld16u(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        else if (sz == 1 && Z == 1)
+            /* Dreg{reg} = W[Preg{ptr}{aop}] (X); */
+            tcg_gen_qemu_ld16s(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        else if (sz == 2 && Z == 0)
+            /* Dreg{reg} = B[Preg{ptr}{aop}] (Z); */
+            tcg_gen_qemu_ld8u(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        else if (sz == 2 && Z == 1)
+            /* Dreg{reg} = B[Preg{ptr}{aop}] (X); */
+            tcg_gen_qemu_ld8s(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        else
+            illegal_instruction(dc);
+    } else {
+        if (sz == 0 && Z == 0)
+            /* [Preg{ptr}{aop}] = Dreg{reg}; */
+            tcg_gen_qemu_st32(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        else if (sz == 0 && Z == 1)
+            /* [Preg{ptr}{aop}] = Preg{reg}; */
+            tcg_gen_qemu_st32(cpu_preg[reg], cpu_preg[ptr], dc->mem_idx);
+        else if (sz == 1 && Z == 0)
+            /* W[Preg{ptr}{aop}] = Dreg{reg}; */
+            tcg_gen_qemu_st16(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        else if (sz == 2 && Z == 0)
+            /* B[Preg{ptr}{aop}] = Dreg{reg}; */
+            tcg_gen_qemu_st8(cpu_dreg[reg], cpu_preg[ptr], dc->mem_idx);
+        else
+            illegal_instruction(dc);
+    }
+
+    if (aop == 0)
+        tcg_gen_addi_tl(cpu_preg[ptr], cpu_preg[ptr], 1 << (2 - sz));
+    if (aop == 1)
+        tcg_gen_subi_tl(cpu_preg[ptr], cpu_preg[ptr], 1 << (2 - sz));
+}
+
+static void
+decode_LDSTiiFP_0(DisasContext *dc, uint16_t iw0)
+{
+    /* LDSTiiFP
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 1 | 1 | 1 | 0 |.W.|.offset............|.reg...........|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    /* This isn't exactly a grp:reg as this insn only supports Dregs & Pregs,
+       but for our usage, its functionality the same thing.  */
+    int grp = ((iw0 >> 3) & 0x1);
+    int reg = ((iw0 >> LDSTiiFP_reg_bits) & 0x7 /*LDSTiiFP_reg_mask*/);
+    int offset = ((iw0 >> LDSTiiFP_offset_bits) & LDSTiiFP_offset_mask);
+    int W = ((iw0 >> LDSTiiFP_W_bits) & LDSTiiFP_W_mask);
+    uint32_t imm = negimm5s4(offset);
+    TCGv treg = get_allreg(dc, grp, reg);
+    TCGv ea;
+
+    TRACE_EXTRACT("%s: W:%i offset:%#x grp:%i reg:%i",
+                  __func__, W, offset, grp, reg);
+
+    ea = tcg_temp_new();
+    tcg_gen_addi_tl(ea, cpu_fpreg, imm);
+    if (W == 0)
+        /* DPreg{reg} = [FP + imm{offset}]; */
+        tcg_gen_qemu_ld32u(treg, ea, dc->mem_idx);
+    else
+        /* [FP + imm{offset}] = DPreg{reg}; */
+        tcg_gen_qemu_st32(treg, ea, dc->mem_idx);
+    tcg_temp_free(ea);
+}
+
+static void
+decode_LDSTii_0(DisasContext *dc, uint16_t iw0)
+{
+    /* LDSTii
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 0 | 1 |.W.|.op....|.offset........|.ptr.......|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int reg = ((iw0 >> LDSTii_reg_bit) & LDSTii_reg_mask);
+    int ptr = ((iw0 >> LDSTii_ptr_bit) & LDSTii_ptr_mask);
+    int offset = ((iw0 >> LDSTii_offset_bit) & LDSTii_offset_mask);
+    int op = ((iw0 >> LDSTii_op_bit) & LDSTii_op_mask);
+    int W = ((iw0 >> LDSTii_W_bit) & LDSTii_W_mask);
+    uint32_t imm;
+    TCGv ea;
+
+    TRACE_EXTRACT("%s: W:%i op:%i offset:%#x ptr:%i reg:%i",
+                  __func__, W, op, offset, ptr, reg);
+
+    if (op == 0 || op == 3)
+        imm = uimm4s4(offset);
+    else
+        imm = uimm4s2(offset);
+
+    ea = tcg_temp_new();
+    tcg_gen_addi_tl(ea, cpu_preg[ptr], imm);
+    if (W == 0) {
+        if (op == 0)
+            /* Dreg{reg} = [Preg{ptr} + imm{offset}]; */
+            tcg_gen_qemu_ld32u(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (op == 1)
+            /* Dreg{reg} = W[Preg{ptr} + imm{offset}] (Z); */
+            tcg_gen_qemu_ld16u(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (op == 2)
+            /* Dreg{reg} = W[Preg{ptr} + imm{offset}] (X); */
+            tcg_gen_qemu_ld16s(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (op == 3)
+            /* P%i = [Preg{ptr} + imm{offset}]; */
+            tcg_gen_qemu_ld32u(cpu_preg[reg], ea, dc->mem_idx);
+    } else {
+        if (op == 0)
+            /* [Preg{ptr} + imm{offset}] = Dreg{reg}; */
+            tcg_gen_qemu_st32(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (op == 1)
+            /* W[Preg{ptr} + imm{offset}] = Dreg{reg}; */
+            tcg_gen_qemu_st16(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (op == 3)
+            /* [Preg{ptr} + imm{offset}] = P%i; */
+            tcg_gen_qemu_st32(cpu_preg[reg], ea, dc->mem_idx);
+        else
+            illegal_instruction(dc);
+    }
+    tcg_temp_free(ea);
+}
+
+static void
+decode_LoopSetup_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* LoopSetup
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |.rop...|.c.|.soffset.......|
+       |.reg...........| - | - |.eoffset...............................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int c   = ((iw0 >> (LoopSetup_c_bits - 16)) & LoopSetup_c_mask);
+    int reg = ((iw1 >> LoopSetup_reg_bits) & LoopSetup_reg_mask);
+    int rop = ((iw0 >> (LoopSetup_rop_bits - 16)) & LoopSetup_rop_mask);
+    int soffset = ((iw0 >> (LoopSetup_soffset_bits - 16)) & LoopSetup_soffset_mask);
+    int eoffset = ((iw1 >> LoopSetup_eoffset_bits) & LoopSetup_eoffset_mask);
+    int spcrel = pcrel4(soffset);
+    int epcrel = lppcrel10(eoffset);
+
+    TRACE_EXTRACT("%s: rop:%i c:%i soffset:%i reg:%i eoffset:%i",
+                  __func__, rop, c, soffset, reg, eoffset);
+
+    if (rop == 0)
+        /* LSETUP (imm{soffset}, imm{eoffset}) LCreg{c}; */;
+    else if (rop == 1 && reg <= 7)
+        /* LSETUP (imm{soffset}, imm{eoffset}) LCreg{c} = Preg{reg}; */
+        tcg_gen_mov_tl(cpu_lcreg[c], cpu_preg[reg]);
+    else if (rop == 3 && reg <= 7)
+        /* LSETUP (imm{soffset}, imm{eoffset}) LCreg{c} = Preg{reg} >> 1; */
+        tcg_gen_shri_tl(cpu_lcreg[c], cpu_preg[reg], 1);
+    else
+        illegal_instruction(dc);
+
+    tcg_gen_movi_tl(cpu_ltreg[c], dc->pc + spcrel);
+    tcg_gen_movi_tl(cpu_lbreg[c], dc->pc + epcrel);
+    gen_gotoi_tb(dc, 0, dc->pc + 4);
+}
+
+static void
+decode_LDIMMhalf_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* LDIMMhalf
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 |.Z.|.H.|.S.|.grp...|.reg.......|
+       |.hword.........................................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int H = ((iw0 >> (LDIMMhalf_H_bits - 16)) & LDIMMhalf_H_mask);
+    int Z = ((iw0 >> (LDIMMhalf_Z_bits - 16)) & LDIMMhalf_Z_mask);
+    int S = ((iw0 >> (LDIMMhalf_S_bits - 16)) & LDIMMhalf_S_mask);
+    int reg = ((iw0 >> (LDIMMhalf_reg_bits - 16)) & LDIMMhalf_reg_mask);
+    int grp = ((iw0 >> (LDIMMhalf_grp_bits - 16)) & LDIMMhalf_grp_mask);
+    int hword = ((iw1 >> LDIMMhalf_hword_bits) & LDIMMhalf_hword_mask);
+    uint32_t val;
+    TCGv treg;
+
+    TRACE_EXTRACT("%s: Z:%i H:%i S:%i grp:%i reg:%i hword:%#x",
+                  __func__, Z, H, S, grp, reg, hword);
+
+    treg = get_allreg(dc, grp, reg);
+    if (S == 1)
+        val = imm16(hword);
+    else
+        val = luimm16(hword);
+
+    if (H == 0 && S == 1 && Z == 0)
+        /* genreg{grp,reg} = imm{hword} (X); */
+        /* Take care of immediate sign extension ourselves */
+        tcg_gen_movi_i32(treg, (int16_t)val);
+    else if (H == 0 && S == 0 && Z == 1)
+        /* genreg{grp,reg} = imm{hword} (Z); */
+        tcg_gen_movi_i32(treg, val);
+    else if (H == 0 && S == 0 && Z == 0) {
+        /* genreg_lo{grp,reg} = imm{hword}; */
+        /* XXX: Convert this to a helper.  */
+        tcg_gen_andi_tl(treg, treg, 0xffff0000);
+        tcg_gen_ori_tl(treg, treg, val);
+    } else if (H == 1 && S == 0 && Z == 0) {
+        /* genreg_hi{grp,reg} = imm{hword}; */
+        /* XXX: Convert this to a helper.  */
+        tcg_gen_andi_tl(treg, treg, 0xffff);
+        tcg_gen_ori_tl(treg, treg, val << 16);
+    } else
+        illegal_instruction(dc);
+}
+
+static void
+decode_CALLa_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* CALLa
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 0 | 0 | 0 | 1 |.S.|.msw...........................|
+       |.lsw...........................................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int S   = ((iw0 >> (CALLa_S_bits - 16)) & CALLa_S_mask);
+    int lsw = ((iw1 >> 0) & 0xffff);
+    int msw = ((iw0 >> 0) & 0xff);
+    int pcrel = pcrel24((msw << 16) | lsw);
+
+    TRACE_EXTRACT("%s: S:%i msw:%#x lsw:%#x", __func__, S, msw, lsw);
+
+    if (S == 1)
+        /* CALL imm{pcrel}; */
+        dc->is_jmp = DISAS_CALL;
+    else
+        /* JUMP.L imm{pcrel}; */
+        dc->is_jmp = DISAS_JUMP;
+    dc->hwloop_callback = gen_hwloop_br_pcrel_imm;
+    dc->hwloop_data = (void *)(unsigned long)pcrel;
+}
+
+static void
+decode_LDSTidxI_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* LDSTidxI
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 0 | 0 | 1 |.W.|.Z.|.sz....|.ptr.......|.reg.......|
+       |.offset........................................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int Z = ((iw0 >> (LDSTidxI_Z_bits - 16)) & LDSTidxI_Z_mask);
+    int W = ((iw0 >> (LDSTidxI_W_bits - 16)) & LDSTidxI_W_mask);
+    int sz = ((iw0 >> (LDSTidxI_sz_bits - 16)) & LDSTidxI_sz_mask);
+    int reg = ((iw0 >> (LDSTidxI_reg_bits - 16)) & LDSTidxI_reg_mask);
+    int ptr = ((iw0 >> (LDSTidxI_ptr_bits - 16)) & LDSTidxI_ptr_mask);
+    int offset = ((iw1 >> LDSTidxI_offset_bits) & LDSTidxI_offset_mask);
+    uint32_t imm_16s4 = imm16s4(offset);
+    uint32_t imm_16s2 = imm16s2(offset);
+    uint32_t imm_16 = imm16(offset);
+    TCGv ea;
+
+    TRACE_EXTRACT("%s: W:%i Z:%i sz:%i ptr:%i reg:%i offset:%#x",
+                  __func__, W, Z, sz, ptr, reg, offset);
+
+    ea = tcg_temp_new();
+    if (sz == 0)
+        tcg_gen_addi_tl(ea, cpu_preg[ptr], imm_16s4);
+    else if (sz == 1)
+        tcg_gen_addi_tl(ea, cpu_preg[ptr], imm_16s2);
+    else if (sz == 2)
+        tcg_gen_addi_tl(ea, cpu_preg[ptr], imm_16);
+    else
+        illegal_instruction(dc);
+
+    if (W == 0) {
+        if (sz == 0 && Z == 0)
+            /* Dreg{reg} = [Preg{ptr] + imm{offset}]; */
+            tcg_gen_qemu_ld32u(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (sz == 0 && Z == 1)
+            /* Preg{reg} = [Preg{ptr] + imm{offset}]; */
+            tcg_gen_qemu_ld32u(cpu_preg[reg], ea, dc->mem_idx);
+        else if (sz == 1 && Z == 0)
+            /* Dreg{reg} = W[Preg{ptr] + imm{offset}] (Z); */
+            tcg_gen_qemu_ld16u(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (sz == 1 && Z == 1)
+            /* Dreg{reg} = W[Preg{ptr} imm{offset}] (X); */
+            tcg_gen_qemu_ld16s(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (sz == 2 && Z == 0)
+            /* Dreg{reg} = B[Preg{ptr} + imm{offset}] (Z); */
+            tcg_gen_qemu_ld8u(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (sz == 2 && Z == 1)
+            /* Dreg{reg} = B[Preg{ptr} + imm{offset}] (X); */
+            tcg_gen_qemu_ld8s(cpu_dreg[reg], ea, dc->mem_idx);
+    } else {
+        if (sz == 0 && Z == 0)
+            /* [Preg{ptr} + imm{offset}] = Dreg{reg}; */
+            tcg_gen_qemu_st32(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (sz == 0 && Z == 1)
+            /* [Preg{ptr} + imm{offset}] = Preg{reg}; */
+            tcg_gen_qemu_st32(cpu_preg[reg], ea, dc->mem_idx);
+        else if (sz == 1 && Z == 0)
+            /* W[Preg{ptr} + imm{offset}] = Dreg{reg}; */
+            tcg_gen_qemu_st16(cpu_dreg[reg], ea, dc->mem_idx);
+        else if (sz == 2 && Z == 0)
+            /* B[Preg{ptr} + imm{offset}] = Dreg{reg}; */
+            tcg_gen_qemu_st8(cpu_dreg[reg], ea, dc->mem_idx);
+        else
+            illegal_instruction(dc);
+    }
+
+    tcg_temp_free(ea);
+}
+
+static void
+decode_linkage_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* linkage
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |.R.|
+       |.framesize.....................................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int R = ((iw0 >> (Linkage_R_bits - 16)) & Linkage_R_mask);
+    int framesize = ((iw1 >> Linkage_framesize_bits) & Linkage_framesize_mask);
+
+    TRACE_EXTRACT("%s: R:%i framesize:%#x", __func__, R, framesize);
+
+    if (R == 0) {
+        /* LINK imm{framesize}; */
+        int size = uimm16s4(framesize);
+        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
+        tcg_gen_qemu_st32(cpu_rets, cpu_spreg, dc->mem_idx);
+        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, 4);
+        tcg_gen_qemu_st32(cpu_fpreg, cpu_spreg, dc->mem_idx);
+        tcg_gen_mov_tl(cpu_fpreg, cpu_spreg);
+        tcg_gen_subi_tl(cpu_spreg, cpu_spreg, size);
+    } else if (framesize == 0) {
+        /* UNLINK; */
+        /* Restore SP from FP.  */
+        tcg_gen_mov_tl(cpu_spreg, cpu_fpreg);
+        tcg_gen_qemu_ld32u(cpu_fpreg, cpu_spreg, dc->mem_idx);
+        tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
+        tcg_gen_qemu_ld32u(cpu_rets, cpu_spreg, dc->mem_idx);
+        tcg_gen_addi_tl(cpu_spreg, cpu_spreg, 4);
+    } else
+        illegal_instruction(dc);
+}
+
+static void
+decode_dsp32mac_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+  /* dsp32mac
+     +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+     | 1 | 1 | 0 | 0 |.M.| 0 | 0 |.mmod..........|.MM|.P.|.w1|.op1...|
+     |.h01|.h11|.w0|.op0...|.h00|.h10|.dst.......|.src0......|.src1..|
+     +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+  int op1  = ((iw0 >> (DSP32Mac_op1_bits - 16)) & DSP32Mac_op1_mask);
+  int w1   = ((iw0 >> (DSP32Mac_w1_bits - 16)) & DSP32Mac_w1_mask);
+  int P    = ((iw0 >> (DSP32Mac_p_bits - 16)) & DSP32Mac_p_mask);
+  int MM   = ((iw0 >> (DSP32Mac_MM_bits - 16)) & DSP32Mac_MM_mask);
+  int mmod = ((iw0 >> (DSP32Mac_mmod_bits - 16)) & DSP32Mac_mmod_mask);
+  int M    = ((iw0 >> (DSP32Mac_M_bits - 16)) & DSP32Mac_M_mask);
+  int w0   = ((iw1 >> DSP32Mac_w0_bits) & DSP32Mac_w0_mask);
+  int src0 = ((iw1 >> DSP32Mac_src0_bits) & DSP32Mac_src0_mask);
+  int src1 = ((iw1 >> DSP32Mac_src1_bits) & DSP32Mac_src1_mask);
+  int dst  = ((iw1 >> DSP32Mac_dst_bits) & DSP32Mac_dst_mask);
+  int h10  = ((iw1 >> DSP32Mac_h10_bits) & DSP32Mac_h10_mask);
+  int h00  = ((iw1 >> DSP32Mac_h00_bits) & DSP32Mac_h00_mask);
+  int op0  = ((iw1 >> DSP32Mac_op0_bits) & DSP32Mac_op0_mask);
+  int h11  = ((iw1 >> DSP32Mac_h11_bits) & DSP32Mac_h11_mask);
+  int h01  = ((iw1 >> DSP32Mac_h01_bits) & DSP32Mac_h01_mask);
+
+//  bu32 res = DREG (dst);
+  int v_i = 0; //, zero = 0;
+  TCGv res;
+
+  TRACE_EXTRACT("%s: M:%i mmod:%i MM:%i P:%i w1:%i op1:%i h01:%i h11:%i "
+		      "w0:%i op0:%i h00:%i h10:%i dst:%i src0:%i src1:%i",
+		 __func__, M, mmod, MM, P, w1, op1, h01, h11, w0, op0, h00, h10,
+		 dst, src0, src1);
+
+  /*
+  if (w0 == 0 && w1 == 0 && op1 == 3 && op0 == 3)
+    illegal_instruction(dc);
+
+  if (op1 == 3 && MM)
+    illegal_instruction(dc);
+
+  if ((w1 || w0) && mmod == M_W32)
+    illegal_instruction(dc);
+
+  if (((1 << mmod) & (P ? 0x131b : 0x1b5f)) == 0)
+    illegal_instruction(dc);
+  */
+
+  /* XXX: Missing TRACE_INSN - this is as good as it gets for now  */
+  if (w0 && w1 && P)
+    TRACE_INSN (cpu, "R%i = macfunc, R%i = macfunc", dst + 1, dst);
+  else if (w0 && P)
+    TRACE_INSN (cpu, "R%i = macfunc", dst);
+  else if (w1 && P)
+    TRACE_INSN (cpu, "R%i = macfunc", dst + 1);
+  else if (w0 && !P)
+    TRACE_INSN (cpu, "R%i.L = macfunc", dst);
+  else if (w1 && !P)
+    TRACE_INSN (cpu, "R%i.H = macfunc", dst);
+  else if (!w0 && !w1 && (op1 != 3 && op0 != 3))
+    TRACE_INSN (cpu, "A0 = macfunc, A1 = macfunc");
+  else if (!w0 && w1 && (op1 != 3 && op0 != 3))
+    TRACE_INSN (cpu, "A1 = macfunc");
+  else if (w0 && !w1 && (op1 != 3 && op0 != 3))
+    TRACE_INSN (cpu, "A0 = macfunc");
+
+  res = tcg_temp_local_new();
+  tcg_gen_mov_tl(res, cpu_dreg[dst]);
+  if (w1 == 1 || op1 != 3)
+    {
+      TCGv res1 = decode_macfunc (dc, 1, op1, h01, h11, src0, src1, mmod, MM, P, &v_i);
+//      if (op1 == 3)
+//	zero = !!(res1 == 0);
+      if (w1)
+	{
+	  if (P)
+//	    STORE (DREG (dst + 1), res1);
+	    tcg_gen_mov_tl(cpu_dreg[dst + 1], res1);
+	  else
+	    {
+//	      if (res1 & 0xffff0000)
+//		illegal_instruction(dc);
+//	      res = REG_H_L (res1 << 16, res);
+	      gen_mov_h_tl(res, res1);
+	    }
+	}
+      tcg_temp_free(res1);
+//unhandled_instruction(dc, "dsp32mac 1");
+    }
+  if (w0 == 1 || op0 != 3)
+    {
+      TCGv res0 = decode_macfunc (dc, 0, op0, h00, h10, src0, src1, mmod, 0, P, &v_i);
+//      if (op1 == 3)
+//	zero |= !!(res0 == 0);
+      if (w0)
+	{
+	  if (P)
+//	    STORE (DREG (dst), res0);
+	    tcg_gen_mov_tl(cpu_dreg[dst], res0);
+	  else
+	    {
+//	      if (res0 & 0xffff0000)
+//		illegal_instruction(dc);
+//	      res = REG_H_L (res, res0);
+	      gen_mov_l_tl(res, res0);
+	    }
+	}
+      tcg_temp_free(res0);
+//unhandled_instruction(dc, "dsp32mac 2");
+    }
+
+  if (!P && (w0 || w1))
+    {
+      tcg_gen_mov_tl(cpu_dreg[dst], res);
+/*
+      STORE (DREG (dst), res);
+      SET_ASTATREG (v, v_i);
+      if (v_i)
+	SET_ASTATREG (vs, v_i);
+*/
+    }
+  else if (P)
+    {
+/*
+      SET_ASTATREG (v, v_i);
+      if (v_i)
+	SET_ASTATREG (vs, v_i);
+*/
+    }
+  if (op0 == 3 || op1 == 3)
+{}
+//    SET_ASTATREG (az, zero);
+
+  tcg_temp_free(res);
+}
+
+static void
+decode_dsp32mult_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* dsp32mult
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 0 | 0 |.M.| 0 | 1 |.mmod..........|.MM|.P.|.w1|.op1...|
+       |.h01|.h11|.w0|.op0...|.h00|.h10|.dst.......|.src0......|.src1..|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int op1  = ((iw0 >> (DSP32Mac_op1_bits - 16)) & DSP32Mac_op1_mask);
+    int w1   = ((iw0 >> (DSP32Mac_w1_bits - 16)) & DSP32Mac_w1_mask);
+    int P    = ((iw0 >> (DSP32Mac_p_bits - 16)) & DSP32Mac_p_mask);
+    int MM   = ((iw0 >> (DSP32Mac_MM_bits - 16)) & DSP32Mac_MM_mask);
+    int mmod = ((iw0 >> (DSP32Mac_mmod_bits - 16)) & DSP32Mac_mmod_mask);
+    int M    = ((iw0 >> (DSP32Mac_M_bits - 16)) & DSP32Mac_M_mask);
+    int w0   = ((iw1 >> DSP32Mac_w0_bits) & DSP32Mac_w0_mask);
+    int src0 = ((iw1 >> DSP32Mac_src0_bits) & DSP32Mac_src0_mask);
+    int src1 = ((iw1 >> DSP32Mac_src1_bits) & DSP32Mac_src1_mask);
+    int dst  = ((iw1 >> DSP32Mac_dst_bits) & DSP32Mac_dst_mask);
+    int h10  = ((iw1 >> DSP32Mac_h10_bits) & DSP32Mac_h10_mask);
+    int h00  = ((iw1 >> DSP32Mac_h00_bits) & DSP32Mac_h00_mask);
+    int op0  = ((iw1 >> DSP32Mac_op0_bits) & DSP32Mac_op0_mask);
+    int h11  = ((iw1 >> DSP32Mac_h11_bits) & DSP32Mac_h11_mask);
+    int h01  = ((iw1 >> DSP32Mac_h01_bits) & DSP32Mac_h01_mask);
+
+//  bu32 res = DREG (dst);
+    TCGv res;
+    int sat0 = 0, sat1 = 0;
+
+    TRACE_EXTRACT("%s: M:%i mmod:%i MM:%i P:%i w1:%i op1:%i h01:%i h11:%i "
+                  "w0:%i op0:%i h00:%i h10:%i dst:%i src0:%i src1:%i",
+                  __func__, M, mmod, MM, P, w1, op1, h01, h11, w0, op0, h00, h10,
+                  dst, src0, src1);
+
+    if (w1 == 0 && w0 == 0)
+        illegal_instruction(dc);
+    if (((1 << mmod) & (P ? 0x313 : 0x1b57)) == 0)
+        illegal_instruction(dc);
+    if (P && ((dst & 1) || (op1 != 0) || (op0 != 0) || !is_macmod_pmove (mmod)))
+        illegal_instruction(dc);
+    if (!P && ((op1 != 0) || (op0 != 0) || !is_macmod_hmove (mmod)))
+        illegal_instruction(dc);
+
+  if (w0 && w1 && P)
+    TRACE_INSN (cpu, "R%i:%i = dsp32mult", dst + 1, dst);
+  else if (w0 && w1 && !P)
+    TRACE_INSN (cpu, "R%i.L = R%i.%s * R%i.%s, R%i.H = R%i.%s * R%i.%s;",
+		dst, src0, h01 ? "L" : "H" , src1, h11 ? "L" : "H",
+		dst, src0, h00 ? "L" : "H" , src1, h10 ? "L" : "H");
+  else if (w0 && P)
+    TRACE_INSN (cpu, "R%i = R%i.%s * R%i.%s;",
+		dst, src0, h00 ? "L" : "H" , src1, h10 ? "L" : "H");
+  else if (w1 && P)
+    TRACE_INSN (cpu, "R%i = R%i.%s * R%i.%s;",
+		dst + 1, src0, h01 ? "L" : "H" , src1, h11 ? "L" : "H");
+  else if (w0 && !P)
+    TRACE_INSN (cpu, "R%i.L = R%i.%s * R%i.%s;",
+		dst, src0, h00 ? "L" : "H" , src1, h10 ? "L" : "H");
+  else if (w1 && !P)
+    TRACE_INSN (cpu, "R%i.H = R%i.%s * R%i.%s;",
+		dst, src0, h01 ? "L" : "H" , src1, h11 ? "L" : "H");
+
+  res = tcg_temp_local_new();
+  tcg_gen_mov_tl(res, cpu_dreg[dst]);
+  if (w1)
+    {
+      TCGv r = decode_multfunc_tl(dc, h01, h11, src0, src1, mmod, MM, &sat1);
+#define res1 r
+//      bu32 res1 = extract_mult (dc, r, mmod, MM, P, NULL);
+      if (P)
+	//STORE (DREG (dst + 1), res1);
+	tcg_gen_mov_tl(cpu_dreg[dst + 1], res1);
+      else
+	{
+//	  if (res1 & 0xFFFF0000)
+//	    illegal_instruction(dc);
+//	  res = REG_H_L (res1 << 16, res);
+	  gen_mov_h_tl(res, res1);
+	}
+      tcg_temp_free(r);
+//unhandled_instruction(dc,  "dsp32mult 1");
+#undef res1
+    }
+
+  if (w0)
+    {
+      TCGv r = decode_multfunc_tl(dc, h00, h10, src0, src1, mmod, 0, &sat0);
+#define res0 r
+//      bu32 res0 = extract_mult (dc, r, mmod, 0, P, NULL);
+      if (P)
+	//STORE (DREG (dst), res0);
+	tcg_gen_mov_tl(cpu_dreg[dst], res0);
+      else
+	{
+//	  if (res0 & 0xFFFF0000)
+//	    illegal_instruction(dc);
+//	  res = REG_H_L (res, res0);
+	  gen_mov_l_tl(res, res0);
+	}
+      tcg_temp_free(r);
+//unhandled_instruction(dc,  "dsp32mult 2");
+#undef res0
+    }
+
+  if (!P && (w0 || w1))
+//    STORE (DREG (dst), res);
+    tcg_gen_mov_tl(cpu_dreg[dst], res);
+
+  if (w0 || w1)
+    {
+/*
+      STORE (ASTATREG (v), sat0 | sat1);
+      STORE (ASTATREG (v_copy), sat0 | sat1);
+      if (sat0 | sat1)
+	STORE (ASTATREG (vs), 1);
+*/
+    }
+
+  tcg_temp_free(res);
+}
+
+static void
+decode_dsp32alu_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* dsp32alu
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 0 | 0 |.M.| 1 | 0 | - | - | - |.HL|.aopcde............|
+       |.aop...|.s.|.x.|.dst0......|.dst1......|.src0......|.src1......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int s    = ((iw1 >> DSP32Alu_s_bits) & DSP32Alu_s_mask);
+    int x    = ((iw1 >> DSP32Alu_x_bits) & DSP32Alu_x_mask);
+    int aop  = ((iw1 >> DSP32Alu_aop_bits) & DSP32Alu_aop_mask);
+    int src0 = ((iw1 >> DSP32Alu_src0_bits) & DSP32Alu_src0_mask);
+    int src1 = ((iw1 >> DSP32Alu_src1_bits) & DSP32Alu_src1_mask);
+    int dst0 = ((iw1 >> DSP32Alu_dst0_bits) & DSP32Alu_dst0_mask);
+    int dst1 = ((iw1 >> DSP32Alu_dst1_bits) & DSP32Alu_dst1_mask);
+    int M    = ((iw0 >> (DSP32Alu_M_bits - 16)) & DSP32Alu_M_mask);
+    int HL   = ((iw0 >> (DSP32Alu_HL_bits - 16)) & DSP32Alu_HL_mask);
+    int aopcde = ((iw0 >> (DSP32Alu_aopcde_bits - 16)) & DSP32Alu_aopcde_mask);
+    TCGv tmp;
+    TCGv_i64 tmp64;
+
+    TRACE_EXTRACT("%s: M:%i HL:%i aopcde:%i aop:%i s:%i x:%i dst0:%i "
+                  "dst1:%i src0:%i src1:%i",
+                  __func__, M, HL, aopcde, aop, s, x, dst0, dst1, src0, src1);
+
+    if ((aop == 0 || aop == 2) && aopcde == 9 && HL == 0 && s == 0) {
+        int a = aop >> 1;
+        /* Areg_lo{a} = Dreg_lo{src0}; */
+        tcg_gen_andi_i64(cpu_areg[a], cpu_areg[a], ~0xffff);
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, cpu_dreg[src0]);
+        tcg_gen_andi_i64(tmp64, tmp64, 0xffff);
+        tcg_gen_or_i64(cpu_areg[a], cpu_areg[a], tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else if ((aop == 0 || aop == 2) && aopcde == 9 && HL == 1 && s == 0) {
+        int a = aop >> 1;
+        /* Areg_hi{a} = Dreg_hi{src0}; */
+        tcg_gen_andi_i64(cpu_areg[a], cpu_areg[a], 0xff0000ffff);
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, cpu_dreg[src0]);
+        tcg_gen_andi_i64(tmp64, tmp64, 0xffff0000);
+        tcg_gen_or_i64(cpu_areg[a], cpu_areg[a], tmp64);
+        tcg_temp_free_i64(tmp64);
+    }
+#if 0
+  else if ((aop == 1 || aop == 0) && aopcde == 5)
+    {
+      bs32 val0 = DREG (src0);
+      bs32 val1 = DREG (src1);
+      bs32 res;
+      bs32 signRes;
+      bs32 ovX, sBit1, sBit2, sBitRes1, sBitRes2;
+
+      TRACE_INSN (cpu, "R%i.%s = R%i %s R%i (RND12)", dst0, HL ? "L" : "H",
+		  src0, aop & 0x1 ? "-" : "+", src1);
+
+      /* If subtract, just invert and add one */
+      if (aop & 0x1)
+	val1= ~val1 + 1;
+
+      /* Get the sign bits, since we need them later */
+      sBit1 = !!(val0 & 0x80000000);
+      sBit2 = !!(val1 & 0x80000000);
+
+      res = val0 + val1;
+
+      sBitRes1 = !!(res & 0x80000000);
+      /* Round to the 12th bit */
+      res += 0x0800;
+      sBitRes2 = !!(res & 0x80000000);
+
+      signRes = res;
+      signRes >>= 27;
+
+      /* Overflow if
+       * pos + pos = neg
+       * neg + neg = pos
+       * positive_res + positive_round = neg
+       * shift and upper 4 bits where not the same
+       */
+      if ((!(sBit1 ^ sBit2) && (sBit1 ^ sBitRes1)) ||
+	  (!sBit1 && !sBit2 && sBitRes2) ||
+	  ((signRes != 0) && (signRes != -1)))
+	{
+	  /* Both X1 and X2 Neg res is neg overflow */
+	  if (sBit1 && sBit2)
+	    res = 0x80000000;
+	  /* Both X1 and X2 Pos res is pos overflow */
+	  else if (!sBit1 && !sBit2)
+	    res = 0x7FFFFFFF;
+	  /* Pos+Neg or Neg+Pos take the sign of the result */
+	  else if (sBitRes1)
+	    res = 0x80000000;
+	  else
+	    res = 0x7FFFFFFF;
+
+	  ovX = 1;
+	}
+      else
+	{
+	  /* Shift up now after overflow detection */
+	  ovX = 0;
+	  res <<= 4;
+	}
+
+      res >>= 16;
+
+      if (HL)
+	STORE (DREG (dst0), REG_H_L (res << 16, DREG (dst0)));
+      else
+	STORE (DREG (dst0), REG_H_L (DREG (dst0), res));
+
+      SET_ASTATREG (az, res == 0);
+      SET_ASTATREG (an, res & 0x8000);
+      SET_ASTATREG (v, ovX);
+      if (ovX)
+	SET_ASTATREG (vs, ovX);
+    }
+  else if ((aop == 2 || aop == 3) && aopcde == 5)
+    {
+      bs32 val0 = DREG (src0);
+      bs32 val1 = DREG (src1);
+      bs32 res;
+
+      TRACE_INSN (cpu, "R%i.%s = R%i %s R%i (RND20)", dst0, HL ? "L" : "H",
+		  src0, aop & 0x1 ? "-" : "+", src1);
+
+      /* If subtract, just invert and add one */
+      if (aop & 0x1)
+	val1= ~val1 + 1;
+
+      res = (val0 >> 4) + (val1 >> 4) + (((val0 & 0xf) + (val1 & 0xf)) >> 4);
+      res += 0x8000;
+      /* Don't sign extend during the shift */
+      res = ((bu32)res >> 16);
+
+      /* Don't worry about overflows, since we are shifting right */
+
+      if (HL)
+	STORE (DREG (dst0), REG_H_L (res << 16, DREG (dst0)));
+      else
+	STORE (DREG (dst0), REG_H_L (DREG (dst0), res));
+
+      SET_ASTATREG (az, res == 0);
+      SET_ASTATREG (an, res & 0x8000);
+      SET_ASTATREG (v, 0);
+    }
+#endif
+  else if (aopcde == 2 || aopcde == 3)
+    {
+//      bu32 s1, s2, val, ac0_i = 0, v_i = 0;
+      TCGv s1, s2, d;
+
+      TRACE_INSN (cpu, "R%i.%c = R%i.%c %c R%i.%c%s;",
+		  dst0, HL ? 'H' : 'L',
+		  src0, (aop & 2) ? 'H' : 'L',
+		  (aopcde == 2) ? '+' : '-',
+		  src1, (aop & 1) ? 'H' : 'L',
+		  amod1 (s, x));
+
+      s1 = tcg_temp_new();
+      if (aop & 2)
+	tcg_gen_shri_tl(s1, cpu_dreg[src0], 16);
+      else
+	tcg_gen_ext16u_tl(s1, cpu_dreg[src0]);
+
+      s2 = tcg_temp_new();
+      if (aop & 1)
+	tcg_gen_shri_tl(s2, cpu_dreg[src1], 16);
+      else
+	tcg_gen_ext16u_tl(s2, cpu_dreg[src1]);
+
+      d = tcg_temp_new();
+      if (aopcde == 2)
+	tcg_gen_add_tl(d, s1, s2);
+      else
+	tcg_gen_sub_tl(d, s1, s2);
+      tcg_gen_andi_tl(d, d, 0xffff);
+
+      tcg_temp_free(s1);
+      tcg_temp_free(s2);
+
+      if (HL)
+	{
+	  tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0], 0xffff);
+	  tcg_gen_shli_tl(d, d, 16);
+	  tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], d);
+	}
+      else
+	{
+	  tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0], 0xffff0000);
+	  tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], d);
+	}
+      tcg_temp_free(d);
+/*
+      s1 = DREG (src0);
+      s2 = DREG (src1);
+      if (aop & 1)
+	s2 >>= 16;
+      if (aop & 2)
+	s1 >>= 16;
+
+      if (aopcde == 2)
+	val = add16(cpu, s1, s2, &ac0_i, &v_i, 0, 0, s, 0);
+      else
+	val = sub16(cpu, s1, s2, &ac0_i, &v_i, 0, 0, s, 0);
+
+      SET_ASTATREG (ac0, ac0_i);
+      SET_ASTATREG (v, v_i);
+      if (HL)
+	SET_DREG_H (dst0, val << 16);
+      else
+	SET_DREG_L (dst0, val);
+
+      SET_ASTATREG (an, val & 0x8000);
+*/
+    } else if ((aop == 0 || aop == 2) && aopcde == 9 && s == 1) {
+        int a = aop >> 1;
+        /* Areg{a} = Dreg{src0}; */
+        tcg_gen_ext_i32_i64(cpu_areg[a], cpu_dreg[src0]);
+    } else if ((aop == 1 || aop == 3) && aopcde == 9 && s == 0) {
+        int a = aop >> 1;
+        /* Areg_x{a} = Dreg_lo{src0}; */
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, cpu_dreg[src0]);
+        tcg_gen_ext8u_i64(tmp64, tmp64);
+        tcg_gen_shli_i64(tmp64, tmp64, 32);
+        tcg_gen_andi_i64(cpu_areg[a], cpu_areg[a], 0xffffffff);
+        tcg_gen_or_i64(cpu_areg[a], cpu_areg[a], tmp64);
+        tcg_temp_free_i64(tmp64);
+    } else if (aop == 3 && aopcde == 11 && (s == 0 || s == 1)) {
+//      bu64 acc0 = get_extended_acc (cpu, 0);
+//      bu64 acc1 = get_extended_acc (cpu, 1);
+//      bu32 carry = (bu40)acc1 < (bu40)acc0;
+//      bu32 sat = 0;
+
+      /* A0 -= A0 (W32){s==1}; */
+
+/*
+      acc0 -= acc1;
+      if ((bs64)acc0 < -0x8000000000ll)
+	acc0 = -0x8000000000ull, sat = 1;
+      else if ((bs64)acc0 >= 0x7fffffffffll)
+	acc0 = 0x7fffffffffull, sat = 1;
+*/
+      tcg_gen_sub_i64(cpu_areg[0], cpu_areg[0], cpu_areg[1]);
+
+      if (s == 1)
+	{
+unhandled_instruction(dc, "A0 -= A1 (W32)");
+//	  if (acc0 & (bu64)0x8000000000ll)
+//	    acc0 &= 0x80ffffffffll, sat = 1;
+//	  else
+//	    acc0 &= 0xffffffffll;
+	}
+/*
+      STORE (AXREG (0), (acc0 >> 32) & 0xff);
+      STORE (AWREG (0), acc0 & 0xffffffff);
+      STORE (ASTATREG (az), acc0 == 0);
+      STORE (ASTATREG (an), !!(acc0 & (bu64)0x8000000000ll));
+      STORE (ASTATREG (ac0), carry);
+      STORE (ASTATREG (ac0_copy), carry);
+      STORE (ASTATREG (av0), sat);
+      if (sat)
+	STORE (ASTATREG (av0s), sat);
+*/
+#if 0
+    } else if ((aop == 0 || aop == 1) && aopcde == 22) {
+      bu32 s0, s0L, s0H, s1, s1L, s1H;
+      bu32 tmp0, tmp1, i;
+      const char * const opts[] = { "rndl", "rndh", "tl", "th" };
+
+      TRACE_INSN (cpu, "R%i = BYTEOP2P (R%i:%i, R%i:%i) (%s%s);", dst0,
+		  src0 + 1, src0, src1 + 1, src1, opts[HL + (aop << 1)],
+		  s ? ", r" : "");
+
+      if (src0 == src1)
+	illegal_instruction_combination(dc);
+
+      s0L = DREG (src0);
+      s0H = DREG (src0 + 1);
+      s1L = DREG (src1);
+      s1H = DREG (src1 + 1);
+      if (s)
+	{
+	  s0 = algn (s0H, s0L, IREG (0) & 3);
+	  s1 = algn (s1H, s1L, IREG (0) & 3);
+	}
+      else
+	{
+	  s0 = algn (s0L, s0H, IREG (0) & 3);
+	  s1 = algn (s1L, s1H, IREG (0) & 3);
+	}
+
+      i = !aop * 2;
+      tmp0 = ((((s1 >>  8) & 0xff) + ((s1 >>  0) & 0xff) +
+	       ((s0 >>  8) & 0xff) + ((s0 >>  0) & 0xff) + i) >> 2) & 0xff;
+      tmp1 = ((((s1 >> 24) & 0xff) + ((s1 >> 16) & 0xff) +
+	       ((s0 >> 24) & 0xff) + ((s0 >> 16) & 0xff) + i) >> 2) & 0xff;
+      SET_DREG (dst0, (tmp1 << (16 + (HL * 8))) | (tmp0 << (HL * 8)));
+#endif
+    } else if ((aop == 0 || aop == 1) && s == 0 && aopcde == 8) {
+        /* Areg{aop} = 0; */
+        tcg_gen_movi_i64(cpu_areg[0], 0);
+    } else if (aop == 2 && s == 0 && aopcde == 8) {
+        /* A1 = A0 = 0; */
+        tcg_gen_movi_i64(cpu_areg[0], 0);
+        tcg_gen_mov_i64(cpu_areg[1], cpu_areg[0]);
+#if 0
+    } else if ((aop == 0 || aop == 1 || aop == 2) && s == 1 && aopcde == 8) {
+      bs40 acc0 = get_extended_acc (cpu, 0);
+      bs40 acc1 = get_extended_acc (cpu, 1);
+      bu32 sat;
+
+      if (aop == 0 || aop == 1)
+	TRACE_INSN (cpu, "A%i = A%i (S);", aop, aop);
+      else
+	TRACE_INSN (cpu, "A1 = A1 (S), A0 = A0 (S);");
+
+      if (aop == 0 || aop == 2)
+	{
+	  sat = 0;
+	  acc0 = saturate_s32(acc0, &sat);
+	  acc0 |= -(acc0 & 0x80000000ull);
+	  SET_AXREG (0, (acc0 >> 31) & 0xFF);
+	  SET_AWREG (0, acc0 & 0xFFFFFFFF);
+	  SET_ASTATREG (av0, sat);
+	  if (sat)
+	    SET_ASTATREG (av0s, sat);
+	}
+      else
+	acc0 = 1;
+
+      if (aop == 1 || aop == 2)
+	{
+	  sat = 0;
+	  acc1 = saturate_s32(acc1, &sat);
+	  acc1 |= -(acc1 & 0x80000000ull);
+	  SET_AXREG (1, (acc1 >> 31) & 0xFF);
+	  SET_AWREG (1, acc1 & 0xFFFFFFFF);
+	  SET_ASTATREG (av1, sat);
+	  if (sat)
+	    SET_ASTATREG (av1s, sat);
+	}
+      else
+	acc1 = 1;
+
+      SET_ASTATREG (az, (acc0 == 0) || (acc1 == 0));
+      SET_ASTATREG (an, ((acc0 >> 31) & 1) || ((acc1 >> 31) & 1) );
+#endif
+    } else if (aop == 3 && (s == 0 || s == 1) && aopcde == 8) {
+        /* Areg{s} = Areg{!s}; */
+        tcg_gen_mov_i64(cpu_areg[s], cpu_areg[!s]);
+    } else if (aop == 3 && HL == 0 && aopcde == 16) {
+        /* A1 = ABS A1 , A0 = ABS A0; */
+        int i;
+
+        /* XXX: Missing ASTAT updates and saturation */
+        for (i = 0; i < 2; ++i) {
+            gen_abs_i64(cpu_areg[i], cpu_areg[i]);
+        }
+#if 0
+    } else if (aop == 0 && aopcde == 23) {
+      bu32 s0, s0L, s0H, s1, s1L, s1H;
+      bs32 tmp0, tmp1;
+
+      TRACE_INSN (cpu, "R%i = BYTEOP3P (R%i:%i, R%i:%i) (%s%s);", dst0,
+		  src0 + 1, src0, src1 + 1, src1, HL ? "HI" : "LO",
+		  s ? ", R" : "");
+
+      if (src0 == src1)
+	illegal_instruction_combination(dc);
+
+      s0L = DREG (src0);
+      s0H = DREG (src0 + 1);
+      s1L = DREG (src1);
+      s1H = DREG (src1 + 1);
+      if (s)
+	{
+	  s0 = algn (s0H, s0L, IREG (0) & 3);
+	  s1 = algn (s1H, s1L, IREG (1) & 3);
+	}
+      else
+	{
+	  s0 = algn (s0L, s0H, IREG (0) & 3);
+	  s1 = algn (s1L, s1H, IREG (1) & 3);
+	}
+
+      tmp0 = (bs32)(bs16)(s0 >>  0) + ((s1 >> ( 0 + (8 * !HL))) & 0xff);
+      tmp1 = (bs32)(bs16)(s0 >> 16) + ((s1 >> (16 + (8 * !HL))) & 0xff);
+      SET_DREG (dst0, (CLAMP (tmp0, 0, 255) << ( 0 + (8 * HL))) |
+		      (CLAMP (tmp1, 0, 255) << (16 + (8 * HL))));
+    }
+#endif
+    } else if ((aop == 0 || aop == 1) && aopcde == 16) {
+        /* Areg{HL} = ABS Areg{aop}; */
+
+        /* XXX: Missing ASTAT updates */
+        /* XXX: Missing saturation */
+        gen_abs_i64(cpu_areg[aop], cpu_areg[aop]);
+#if 0
+    } else if (aop == 3 && aopcde == 12) {
+      bs32 res = DREG (src0);
+      bs32 ovX;
+      bool sBit_a, sBit_b;
+
+      TRACE_INSN (cpu, "R%i.%s = R%i (RND);", dst0, HL == 0 ? "L" : "H", src0);
+
+      sBit_b = !!(res & 0x80000000);
+
+      res += 0x8000;
+      sBit_a = !!(res & 0x80000000);
+
+      /* Overflow if the sign bit changed when we rounded */
+      if ((res >> 16) && (sBit_b != sBit_a))
+	{
+	  ovX = 1;
+	  if (!sBit_b)
+	    res = 0x7FFF;
+	  else
+	    res = 0x8000;
+	}
+      else
+	{
+	  res = res >> 16;
+	  ovX = 0;
+	}
+
+      if (!HL)
+	SET_DREG (dst0, REG_H_L (DREG (dst0), res));
+      else
+	SET_DREG (dst0, REG_H_L (res << 16, DREG (dst0)));
+
+      SET_ASTATREG (az, res == 0);
+      SET_ASTATREG (an, res < 0);
+      SET_ASTATREG (v, ovX);
+      if (ovX)
+	SET_ASTATREG (vs, ovX);
+    }
+  else if (aop == 3 && HL == 0 && aopcde == 15)
+    {
+      bu32 hi = (-(bs16)(DREG (src0) >> 16)) << 16;
+      bu32 lo = (-(bs16)(DREG (src0) & 0xFFFF)) & 0xFFFF;
+      int v, ac0, ac1;
+
+      TRACE_INSN (cpu, "R%i = -R%i (V);", dst0, src0);
+
+      v = ac0 = ac1 = 0;
+
+      if (hi == 0x80000000)
+	{
+	  hi = 0x7fff0000;
+	  v = 1;
+	}
+      else if (hi == 0)
+	ac1 = 1;
+
+      if (lo == 0x8000)
+	{
+	  lo = 0x7fff;
+	  v = 1;
+	}
+      else if (lo == 0)
+	ac0 = 1;
+
+      SET_DREG (dst0, hi | lo);
+
+      SET_ASTATREG (v, v);
+      if (v)
+	SET_ASTATREG (vs, 1);
+      SET_ASTATREG (ac0, ac0);
+      SET_ASTATREG (ac1, ac1);
+      setflags_nz_2x16(cpu, DREG (dst0));
+#endif
+    } else if (aop == 3 && HL == 0 && aopcde == 14) {
+        /* A1 = -A1 , A0 = -A0; */
+        tcg_gen_neg_i64(cpu_areg[1], cpu_areg[1]);
+        tcg_gen_neg_i64(cpu_areg[0], cpu_areg[0]);
+        /* XXX: what ASTAT flags need updating ?  */
+    } else if ((aop == 0 || aop == 1) && (HL == 0 || HL == 1) && aopcde == 14) {
+        /* Areg{HL} = -Areg{aop}; */
+        tcg_gen_neg_i64(cpu_areg[HL], cpu_areg[aop]);
+        /* XXX: Missing ASTAT updates */
+#if 0
+    } else if (aop == 0 && aopcde == 12) {
+      bs16 tmp0_hi = DREG (src0) >> 16;
+      bs16 tmp0_lo = DREG (src0);
+      bs16 tmp1_hi = DREG (src1) >> 16;
+      bs16 tmp1_lo = DREG (src1);
+
+      TRACE_INSN (cpu, "R%i.L = R%i.H = SIGN(R%i.H) * R%i.H + SIGN(R%i.L) & R%i.L;",
+		  dst0, dst0, src0, src1, src0, src1);
+
+      if ((tmp0_hi >> 15) & 1)
+	tmp1_hi = ~tmp1_hi + 1;
+
+      if ((tmp0_lo >> 15) & 1)
+	tmp1_lo = ~tmp1_lo + 1;
+
+      tmp1_hi = tmp1_hi + tmp1_lo;
+
+      STORE (DREG (dst0), REG_H_L (tmp1_hi << 16, tmp1_hi));
+#endif
+    } else if (aopcde == 0) {
+/*
+      bu32 s0 = DREG (src0);
+      bu32 s1 = DREG (src1);
+      bu32 s0h = s0 >> 16;
+      bu32 s0l = s0 & 0xFFFF;
+      bu32 s1h = s1 >> 16;
+      bu32 s1l = s1 & 0xFFFF;
+      bu32 t0, t1;
+      bu32 ac1_i = 0, ac0_i = 0, v_i = 0, z_i = 0, n_i = 0;
+*/
+      TCGv s0, s1, t0, t1;
+
+      TRACE_INSN (cpu, "R%i = R%i %c|%c R%i%s;", dst0, src0,
+		  (aop & 2) ? '-' : '+', (aop & 1) ? '-' : '+', src1,
+		  amod0 (s, x));
+
+      if (s || x)
+	unhandled_instruction(dc, "S/CO/SCO with +|+/-|-");
+
+      s0 = tcg_temp_local_new();
+      s1 = tcg_temp_local_new();
+
+/*
+      if (aop & 2)
+	t0 = sub16(cpu, s0h, s1h, &ac1_i, &v_i, &z_i, &n_i, s, 0);
+      else
+	t0 = add16(cpu, s0h, s1h, &ac1_i, &v_i, &z_i, &n_i, s, 0);
+*/
+
+      t0 = tcg_temp_local_new();
+      tcg_gen_shri_tl(s0, cpu_dreg[src0], 16);
+      tcg_gen_shri_tl(s1, cpu_dreg[src1], 16);
+      if (aop & 2)
+	tcg_gen_sub_tl(t0, s0, s1);
+      else
+	tcg_gen_add_tl(t0, s0, s1);
+
+/*
+      if (aop & 1)
+	t1 = sub16(cpu, s0l, s1l, &ac0_i, &v_i, &z_i, &n_i, s, 0);
+      else
+	t1 = add16(cpu, s0l, s1l, &ac0_i, &v_i, &z_i, &n_i, s, 0);
+*/
+
+      t1 = tcg_temp_local_new();
+      tcg_gen_andi_tl(s0, cpu_dreg[src0], 0xffff);
+      tcg_gen_andi_tl(s1, cpu_dreg[src1], 0xffff);
+      if (aop & 1)
+	tcg_gen_sub_tl(t1, s0, s1);
+      else
+	tcg_gen_add_tl(t1, s0, s1);
+
+      tcg_temp_free(s1);
+      tcg_temp_free(s0);
+
+      astat_queue_state2(dc, ARRAY_OP_VECTOR_ADD_ADD + aop, t0, t1);
+
+/*
+      SET_ASTATREG (ac1, ac1_i);
+      SET_ASTATREG (ac0, ac0_i);
+      SET_ASTATREG (az, z_i);
+      SET_ASTATREG (an, n_i);
+      SET_ASTATREG (v, v_i);
+      if (v_i)
+	SET_ASTATREG (vs, v_i);
+*/
+
+      if (x)
+	{
+	  /* dst0.h = t1; dst0.l = t0 */
+	  tcg_gen_ext16u_tl(cpu_dreg[dst0], t0);
+	  tcg_gen_shli_tl(t1, t1, 16);
+	  tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], t1);
+	}
+      else
+	{
+	  /* dst0.h = t0; dst0.l = t1 */
+	  tcg_gen_ext16u_tl(cpu_dreg[dst0], t1);
+	  tcg_gen_shli_tl(t0, t0, 16);
+	  tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], t0);
+	}
+
+      tcg_temp_free(t0);
+      tcg_temp_free(t1);
+#if 0
+    } else if (aop == 1 && aopcde == 12) {
+      bu32 val0 = (((AWREG (0) & 0xFFFF0000) >> 16) + (AWREG (0) & 0xFFFF)) & 0xFFFF;
+      bu32 val1 = (((AWREG (1) & 0xFFFF0000) >> 16) + (AWREG (1) & 0xFFFF)) & 0xFFFF;
+
+      TRACE_INSN (cpu, "R%i = A1.L + A1.H, R%i = A0.L + A0.H;", dst1, dst0);
+
+      if (dst0 == dst1)
+	illegal_instruction_combination(dc);
+
+      if (val0 & 0x8000)
+	val0 |= 0xFFFF0000;
+
+      if (val1 & 0x8000)
+	val1 |= 0xFFFF0000;
+
+      SET_DREG (dst0, val0);
+      SET_DREG (dst1, val1);
+      /* XXX: ASTAT ?  */
+    }
+  else if (aopcde == 1)
+    {
+      bu32 d0, d1;
+      bu32 x0, x1;
+      bu16 s0L =  (DREG (src0) & 0xFFFF);
+      bu16 s0H = ((DREG (src0) >> 16) & 0xFFFF);
+      bu16 s1L =  (DREG (src1) & 0xFFFF);
+      bu16 s1H = ((DREG (src1) >> 16) & 0xFFFF);
+      bu32 v_i = 0, n_i = 0, z_i = 0;
+
+      TRACE_INSN (cpu, "R%i = R%i %s R%i, R%i = R%i %s R%i%s;",
+		  dst1, src0, HL ? "+|-" : "+|+", src1,
+		  dst0, src0, HL ? "-|+" : "-|-", src1,
+		  amod0amod2(s, x, aop));
+
+      if (dst0 == dst1)
+	illegal_instruction_combination(dc);
+
+      if (HL == 0)
+	{
+	  x0 = add16(cpu, s0H, s1H, 0, &v_i, &z_i, &n_i, s, aop) & 0xffff;
+	  x1 = add16(cpu, s0L, s1L, 0, &v_i, &z_i, &n_i, s, aop) & 0xffff;
+	  d1 = (x0 << 16) | x1;
+
+	  x0 = sub16(cpu, s0H, s1H, 0, &v_i, &z_i, &n_i, s, aop) & 0xffff;
+	  x1 = sub16(cpu, s0L, s1L, 0, &v_i, &z_i, &n_i, s, aop) & 0xffff;
+	  if (x == 0)
+	    d0 =(x0 << 16) | x1;
+	  else
+	    d0 = (x1 << 16) | x0;
+	}
+      else
+	{
+	  x0 = add16(cpu, s0H, s1H, 0, &v_i, &z_i, &n_i, s, aop) & 0xffff;
+	  x1 = sub16(cpu, s0L, s1L, 0, &v_i, &z_i, &n_i, s, aop) & 0xffff;
+	  d1 = (x0 << 16) | x1;
+
+	  x0 = sub16(cpu, s0H, s1H, 0, &v_i, &z_i, &n_i, s, aop) & 0xffff;
+	  x1 = add16(cpu, s0L, s1L, 0, &v_i, &z_i, &n_i, s, aop) & 0xffff;
+	  if (x == 0)
+	    d0 = (x0 << 16) | x1;
+	  else
+	    d0 = (x1 << 16) | x0;
+	}
+      SET_ASTATREG (az, z_i);
+      SET_ASTATREG (an, n_i);
+      SET_ASTATREG (v, v_i);
+      if (v_i)
+	SET_ASTATREG (vs, v_i);
+
+      STORE (DREG (dst0), d0);
+      STORE (DREG (dst1), d1);
+#endif
+    } else if ((aop == 0 || aop == 1 || aop == 2) && aopcde == 11) {
+/*
+      bs40 acc0 = get_extended_acc (cpu, 0);
+      bs40 acc1 = get_extended_acc (cpu, 1);
+      bu32 v, dreg, sat = 0;
+      bu32 carry = !!((bu40)~acc1 < (bu40)acc0);
+*/
+
+      if (aop == 0)
+	TRACE_INSN (cpu, "R%i = (A0 += A1);", dst0);
+      else if (aop == 1)
+	TRACE_INSN (cpu, "R%i.%c = (A0 += A1);", dst0, HL ? 'H' : 'L');
+      else
+	TRACE_INSN (cpu, "A0 += A1%s;", s ? " (W32)" : "");
+
+/*
+      acc0 += acc1;
+      acc0 = saturate_s40_astat (acc0, &v);
+*/
+
+      tcg_gen_add_i64(cpu_areg[0], cpu_areg[0], cpu_areg[1]);
+
+      if (aop == 2 && s == 1)   /* A0 += A1 (W32) */
+	{
+unhandled_instruction(dc, "A0 += A1 (W32)");
+//	  if (acc0 & (bs40)0x8000000000ll)
+//	    acc0 &= 0x80ffffffffll;
+//	  else
+//	    acc0 &= 0xffffffffll;
+	}
+/*
+      STORE (AXREG (0), acc0 >> 32);
+      STORE (AWREG (0), acc0);
+      SET_ASTATREG (av0, v && acc1);
+      if (v)
+	SET_ASTATREG (av0s, v);
+*/
+
+      if (aop == 0)
+	{
+	  /* Dregs = A0 += A1 */
+	  tcg_gen_trunc_i64_i32(cpu_dreg[dst0], cpu_areg[0]);
+	}
+      else if (aop == 1)
+	{
+	  /* Dregs_lo = A0 += A1 */
+	  tmp = tcg_temp_new();
+	  tcg_gen_trunc_i64_i32(tmp, cpu_areg[0]);
+	  gen_mov_l_tl(cpu_dreg[dst0], tmp);
+	  tcg_temp_free(tmp);
+	}
+
+#if 0
+      if (aop == 0 || aop == 1)
+	{
+	  if (aop)	/* Dregs_lo = A0 += A1 */
+	    {
+	      dreg = saturate_s32(rnd16(acc0) << 16, &sat);
+	      if (HL)
+		STORE (DREG (dst0), REG_H_L (dreg, DREG (dst0)));
+	      else
+		STORE (DREG (dst0), REG_H_L (DREG (dst0), dreg >> 16));
+	    }
+	  else		/* Dregs = A0 += A1 */
+	    {
+	      dreg = saturate_s32(acc0, &sat);
+	      STORE (DREG (dst0), dreg);
+	    }
+
+	  STORE (ASTATREG (az), dreg == 0);
+	  STORE (ASTATREG (an), !!(dreg & 0x80000000));
+	  STORE (ASTATREG (ac0), carry);
+	  STORE (ASTATREG (ac0_copy), carry);
+	  STORE (ASTATREG (v), sat);
+	  STORE (ASTATREG (v_copy), sat);
+	  if (sat)
+	    STORE (ASTATREG (vs), sat);
+	}
+      else
+	{
+	  STORE (ASTATREG (az), acc0 == 0);
+	  STORE (ASTATREG (an), !!(acc0 & 0x8000000000ull));
+	  STORE (ASTATREG (ac0), carry);
+	  STORE (ASTATREG (ac0_copy), carry);
+	}
+#endif
+    } else if ((aop == 0 || aop == 1) && aopcde == 10) {
+        /* Dreg_lo{dst0} = Areg_x{aop}; */
+//      SET_DREG_L (dst0, (bs8)AXREG (aop));
+        tmp = tcg_temp_new();
+        tmp64 = tcg_temp_new_i64();
+        tcg_gen_shri_i64(tmp64, cpu_areg[aop], 32);
+        tcg_gen_trunc_i64_i32(tmp, tmp64);
+        tcg_temp_free_i64(tmp64);
+        tcg_gen_ext8s_tl(tmp, tmp);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if (aop == 0 && aopcde == 4) {
+        /* Dreg{dst0} = Dreg{src0} + Dreg{src1} (amod1(s,x)); */
+//      SET_DREG (dst0, add32 (cpu, DREG (src0), DREG (src1), 1, s));
+        tcg_gen_add_tl(cpu_dreg[dst0], cpu_dreg[src0], cpu_dreg[src1]);
+    } else if (aop == 1 && aopcde == 4) {
+        /* Dreg{dst0} = Dreg{src0} - Dreg{src1} (amod1(s,x)); */
+//      SET_DREG (dst0, sub32 (cpu, DREG (src0), DREG (src1), 1, s, 0));
+        tcg_gen_sub_tl(cpu_dreg[dst0], cpu_dreg[src0], cpu_dreg[src1]);
+        astat_queue_state3(dc, ASTAT_OP_SUB32, cpu_dreg[dst0], cpu_dreg[src0], cpu_dreg[src1]);
+    } else if (aop == 2 && aopcde == 4) {
+        /* Dreg{dst1} = Dreg{src0} + Dreg{src1}, Dreg{dst0} = Dreg{src0} - Dreg{src1} (amod1(s,x)); */
+
+        /*if (dst0 == dst1)
+            illegal_instruction_combination(dc);*/
+
+//      STORE (DREG (dst1), add32 (cpu, DREG (src0), DREG (src1), 1, s));
+        tcg_gen_add_tl(cpu_dreg[dst1], cpu_dreg[src0], cpu_dreg[src1]);
+//      STORE (DREG (dst0), sub32 (cpu, DREG (src0), DREG (src1), 1, s, 1));
+        tcg_gen_sub_tl(cpu_dreg[dst0], cpu_dreg[src0], cpu_dreg[src1]);
+        /* XXX: Missing ASTAT updates */
+    }
+#if 0
+  else if ((aop == 0 || aop == 1) && aopcde == 17)
+    {
+      bs40 acc0 = get_extended_acc (cpu, 0);
+      bs40 acc1 = get_extended_acc (cpu, 1);
+      bs40 val0, val1, sval0, sval1;
+      bu32 sat, sat_i;
+
+      TRACE_INSN (cpu, "R%i = A%i + A%i, R%i = A%i - A%i%s",
+		  dst1, !aop, aop, dst0, !aop, aop, amod1 (s, x));
+
+      if (dst0 == dst1)
+	illegal_instruction_combination(dc);
+
+      val1 = acc0 + acc1;
+      if (aop)
+	val0 = acc0 - acc1;
+      else
+	val0 = acc1 - acc0;
+
+      sval0 = saturate_s32(val0, &sat);
+      sat_i = sat;
+      sval1 = saturate_s32(val1, &sat);
+      sat_i |= sat;
+      if (s)
+	{
+	  val0 = sval0;
+	  val1 = sval1;
+	}
+
+      STORE (DREG (dst0), val0);
+      STORE (DREG (dst1), val1);
+      SET_ASTATREG (v, sat_i);
+      if (sat_i)
+	SET_ASTATREG (vs, sat_i);
+      SET_ASTATREG (an, val0 & 0x80000000 || val1 & 0x80000000);
+      SET_ASTATREG (az, val0 == 0 || val1 == 0);
+      SET_ASTATREG (ac1, (bu40)~acc0 < (bu40)acc1);
+      if (aop)
+	SET_ASTATREG (ac0, !!((bu40)acc1 <= (bu40)acc0));
+      else
+	SET_ASTATREG (ac0, !!((bu40)acc0 <= (bu40)acc1));
+    }
+  else if (aop == 0 && aopcde == 18)
+    {
+      bu40 acc0 = get_extended_acc (cpu, 0);
+      bu40 acc1 = get_extended_acc (cpu, 1);
+      bu32 s0L = DREG (src0);
+      bu32 s0H = DREG (src0 + 1);
+      bu32 s1L = DREG (src1);
+      bu32 s1H = DREG (src1 + 1);
+      bu32 s0, s1;
+      bs16 tmp0, tmp1, tmp2, tmp3;
+
+      /* This instruction is only defined for register pairs R1:0 and R3:2 */
+      if (!((src0 == 0 || src0 == 2) && (src1 == 0 || src1 == 2)))
+	illegal_instruction(dc);
+
+      TRACE_INSN (cpu, "SAA (R%i:%i, R%i:%i)%s", src0 + 1, src0,
+		  src1 + 1, src1, s ? " (R)" :"");
+
+      /* Bit s determines the order of the two registers from a pair:
+       * if s=0 the low-order bytes come from the low reg in the pair,
+       * and if s=1 the low-order bytes come from the high reg.
+       */
+
+      if (s)
+	{
+	  s0 = algn (s0H, s0L, IREG (0) & 3);
+	  s1 = algn (s1H, s1L, IREG (1) & 3);
+	}
+      else
+	{
+	  s0 = algn (s0L, s0H, IREG (0) & 3);
+	  s1 = algn (s1L, s1H, IREG (1) & 3);
+	}
+
+      /* find the absolute difference between pairs,
+       * make it absolute, then add it to the existing accumulator half
+       */
+      /* Byte 0 */
+      tmp0  = ((s0 << 24) >> 24) - ((s1 << 24) >> 24);
+      tmp1  = ((s0 << 16) >> 24) - ((s1 << 16) >> 24);
+      tmp2  = ((s0 <<  8) >> 24) - ((s1 <<  8) >> 24);
+      tmp3  = ((s0 <<  0) >> 24) - ((s1 <<  0) >> 24);
+
+      tmp0  = (tmp0 < 0) ? -tmp0 : tmp0;
+      tmp1  = (tmp1 < 0) ? -tmp1 : tmp1;
+      tmp2  = (tmp2 < 0) ? -tmp2 : tmp2;
+      tmp3  = (tmp3 < 0) ? -tmp3 : tmp3;
+
+      s0L = saturate_u16((bu32)tmp0 + ( acc0        & 0xffff), 0);
+      s0H = saturate_u16((bu32)tmp1 + ((acc0 >> 16) & 0xffff), 0);
+      s1L = saturate_u16((bu32)tmp2 + ( acc1        & 0xffff), 0);
+      s1H = saturate_u16((bu32)tmp3 + ((acc1 >> 16) & 0xffff), 0);
+
+      STORE (AWREG (0), (s0H << 16) | (s0L & 0xFFFF));
+      STORE (AXREG (0), 0);
+      STORE (AWREG (1), (s1H << 16) | (s1L & 0xFFFF));
+      STORE (AXREG (1), 0);
+    }
+#endif
+    else if (aop == 3 && aopcde == 18)
+        /* DISALGNEXCPT; */
+        unhandled_instruction(dc, "DISALGNEXCPT");
+#if 0
+  else if ((aop == 0 || aop == 1) && aopcde == 20)
+    {
+      bu32 s0, s0L, s0H, s1, s1L, s1H;
+      const char * const opts[] = { "", " (R)", " (T)", " (T, R)" };
+
+      TRACE_INSN (cpu, "R%i = BYTEOP1P (R%i:%i, R%i:%i)%s;", dst0,
+		  src0 + 1, src0, src1 + 1, src1, opts[s + (aop << 1)]);
+
+      if (src0 == src1)
+	illegal_instruction_combination(dc);
+
+      s0L = DREG (src0);
+      s0H = DREG (src0 + 1);
+      s1L = DREG (src1);
+      s1H = DREG (src1 + 1);
+      if (s)
+	{
+	  s0 = algn (s0H, s0L, IREG (0) & 3);
+	  s1 = algn (s1H, s1L, IREG (1) & 3);
+	}
+      else
+	{
+	  s0 = algn (s0L, s0H, IREG (0) & 3);
+	  s1 = algn (s1L, s1H, IREG (1) & 3);
+	}
+
+      SET_DREG (dst0,
+		(((((s0 >>  0) & 0xff) + ((s1 >>  0) & 0xff) + !aop) >> 1) <<  0) |
+		(((((s0 >>  8) & 0xff) + ((s1 >>  8) & 0xff) + !aop) >> 1) <<  8) |
+		(((((s0 >> 16) & 0xff) + ((s1 >> 16) & 0xff) + !aop) >> 1) << 16) |
+		(((((s0 >> 24) & 0xff) + ((s1 >> 24) & 0xff) + !aop) >> 1) << 24));
+    }
+  else if (aop == 0 && aopcde == 21)
+    {
+      bu32 s0, s0L, s0H, s1, s1L, s1H;
+
+      TRACE_INSN (cpu, "(R%i, R%i) = BYTEOP16P (R%i:%i, R%i:%i)%s;", dst1, dst0,
+		  src0 + 1, src0, src1 + 1, src1, s ? " (R)" : "");
+
+      if (dst0 == dst1)
+	illegal_instruction_combination(dc);
+
+      s0L = DREG (src0);
+      s0H = DREG (src0 + 1);
+      s1L = DREG (src1);
+      s1H = DREG (src1 + 1);
+      if (s)
+	{
+	  s0 = algn (s0H, s0L, IREG (0) & 3);
+	  s1 = algn (s1H, s1L, IREG (1) & 3);
+	}
+      else
+	{
+	  s0 = algn (s0L, s0H, IREG (0) & 3);
+	  s1 = algn (s1L, s1H, IREG (1) & 3);
+	}
+
+      SET_DREG (dst0,
+		((((s0 >>  0) & 0xff) + ((s1 >>  0) & 0xff)) <<  0) |
+		((((s0 >>  8) & 0xff) + ((s1 >>  8) & 0xff)) << 16));
+      SET_DREG (dst1,
+		((((s0 >> 16) & 0xff) + ((s1 >> 16) & 0xff)) <<  0) |
+		((((s0 >> 24) & 0xff) + ((s1 >> 24) & 0xff)) << 16));
+    }
+  else if (aop == 1 && aopcde == 21)
+    {
+      bu32 s0, s0L, s0H, s1, s1L, s1H;
+
+      TRACE_INSN (cpu, "(R%i, R%i) = BYTEOP16M (R%i:%i, R%i:%i)%s;", dst1, dst0,
+		  src0 + 1, src0, src1 + 1, src1, s ? " (R)" : "");
+
+      if (dst0 == dst1)
+	illegal_instruction_combination(dc);
+
+      s0L = DREG (src0);
+      s0H = DREG (src0 + 1);
+      s1L = DREG (src1);
+      s1H = DREG (src1 + 1);
+      if (s)
+	{
+	  s0 = algn (s0H, s0L, IREG (0) & 3);
+	  s1 = algn (s1H, s1L, IREG (1) & 3);
+	}
+      else
+	{
+	  s0 = algn (s0L, s0H, IREG (0) & 3);
+	  s1 = algn (s1L, s1H, IREG (1) & 3);
+	}
+
+      SET_DREG (dst0,
+		(((((s0 >>  0) & 0xff) - ((s1 >>  0) & 0xff)) <<  0) & 0xffff) |
+		(((((s0 >>  8) & 0xff) - ((s1 >>  8) & 0xff)) << 16)));
+      SET_DREG (dst1,
+		(((((s0 >> 16) & 0xff) - ((s1 >> 16) & 0xff)) <<  0) & 0xffff) |
+		(((((s0 >> 24) & 0xff) - ((s1 >> 24) & 0xff)) << 16)));
+    }
+#endif
+    else if (aop == 1 && aopcde == 7) {
+        int l;
+        /* Dreg{dst0} = MIN (Dreg{src0}, Dreg{src1}); */
+        /* Source and dest regs might be the same, so we can't clobber;
+           XXX: Well, we could, but we need the logic here to be smarter */
+        tmp = tcg_temp_local_new();
+        l = gen_new_label();
+        tcg_gen_mov_tl(tmp, cpu_dreg[src0]);
+        tcg_gen_brcond_tl(TCG_COND_GE, cpu_dreg[src1], cpu_dreg[src0], l);
+        tcg_gen_mov_tl(tmp, cpu_dreg[src1]);
+        gen_set_label(l);
+        tcg_gen_mov_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+
+        astat_queue_state1(dc, ASTAT_OP_MIN_MAX, cpu_dreg[dst0]);
+    } else if (aop == 0 && aopcde == 7) {
+        int l;
+        /* Dreg{dst0} = MAX (Dreg{src0}, Dreg{src1}); */
+        /* Source and dest regs might be the same, so we can't clobber;
+           XXX: Well, we could, but we need the logic here to be smarter */
+        tmp = tcg_temp_local_new();
+        l = gen_new_label();
+        tcg_gen_mov_tl(tmp, cpu_dreg[src0]);
+        tcg_gen_brcond_tl(TCG_COND_LT, cpu_dreg[src1], cpu_dreg[src0], l);
+        tcg_gen_mov_tl(tmp, cpu_dreg[src1]);
+        gen_set_label(l);
+        tcg_gen_mov_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+
+        astat_queue_state1(dc, ASTAT_OP_MIN_MAX, cpu_dreg[dst0]);
+    } else if (aop == 2 && aopcde == 7) {
+        /* Dreg{dst0} = ABS Dreg{src0}; */
+
+        /* XXX: Missing saturation support (and ASTAT V/VS) */
+        gen_abs_tl(cpu_dreg[dst0], cpu_dreg[src0]);
+
+        astat_queue_state2(dc, ASTAT_OP_ABS, cpu_dreg[dst0], cpu_dreg[src0]);
+    } else if (aop == 3 && aopcde == 7) {
+        /* Dreg{dst0} = -Dreg{src0} (amod1(s,0)); */
+        int l, endl;
+
+        l = gen_new_label();
+        endl = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_NE, cpu_dreg[src0], 0x80000000, l);
+        if (s) {
+            tcg_gen_movi_tl(cpu_dreg[dst0], 0x7fffffff);
+            tmp = tcg_const_tl(1);
+            _gen_astat_store(ASTAT_V, tmp);
+            _gen_astat_store(ASTAT_V_COPY, tmp);
+            _gen_astat_store(ASTAT_VS, tmp);
+            tcg_temp_free(tmp);
+        } else
+            tcg_gen_movi_tl(cpu_dreg[dst0], 0x80000000);
+
+        gen_set_label(l);
+        tcg_gen_neg_tl(cpu_dreg[dst0], cpu_dreg[src0]);
+        gen_set_label(endl);
+        astat_queue_state2(dc, ASTAT_OP_NEGATE, cpu_dreg[dst0], cpu_dreg[src0]);
+#if 0
+    } else if (aop == 2 && aopcde == 6) {
+      bu32 in = DREG (src0);
+      bu32 hi = (in & 0x80000000 ? (bu32)-(bs16)(in >> 16) : in >> 16) << 16;
+      bu32 lo = (in & 0x8000 ? (bu32)-(bs16)(in & 0xFFFF) : in) & 0xFFFF;
+      int v;
+
+      TRACE_INSN (cpu, "R%i = ABS R%i (V);", dst0, src0);
+
+      v = 0;
+      if (hi == 0x80000000)
+	{
+	  hi = 0x7fff0000;
+	  v = 1;
+	}
+      if (lo == 0x8000)
+	{
+	  lo = 0x7fff;
+	  v = 1;
+	}
+      SET_DREG (dst0, hi | lo);
+
+      SET_ASTATREG (v, v);
+      if (v)
+	SET_ASTATREG (vs, 1);
+      setflags_nz_2x16(cpu, DREG (dst0));
+    }
+  else if (aop == 1 && aopcde == 6)
+    {
+      TRACE_INSN (cpu, "R%i = MIN (R%i, R%i) (V);", dst0, src0, src1);
+      SET_DREG (dst0, min2x16(cpu, DREG (src0), DREG (src1)));
+    }
+  else if (aop == 0 && aopcde == 6)
+    {
+      TRACE_INSN (cpu, "R%i = MAX (R%i, R%i) (V);", dst0, src0, src1);
+      SET_DREG (dst0, max2x16(cpu, DREG (src0), DREG (src1)));
+#endif
+    } else if (aop == 0 && aopcde == 24) {
+        TCGv dst;
+        /* Dreg{dst0} BYTEPACK (Dreg{src0}, Dreg{src1}); */
+
+        /* XXX: could optimize a little if dst0 is diff from src0 or src1 */
+        /* dst |= (((src0 >>  0) & 0xff) <<  0) */
+        dst = tcg_temp_new();
+        tcg_gen_andi_tl(dst, cpu_dreg[src0], 0xff);
+        tmp = tcg_temp_new();
+        /* dst |= (((src0 >> 16) & 0xff) <<  8) */
+        tcg_gen_andi_tl(tmp, cpu_dreg[src0], 0xff0000);
+        tcg_gen_shri_tl(tmp, tmp, 8);
+        tcg_gen_or_tl(dst, dst, tmp);
+        /* dst |= (((src1 >>  0) & 0xff) << 16) */
+        tcg_gen_andi_tl(tmp, cpu_dreg[src1], 0xff);
+        tcg_gen_shli_tl(tmp, tmp, 16);
+        tcg_gen_or_tl(dst, dst, tmp);
+        /* dst |= (((src1 >> 16) & 0xff) << 24) */
+        tcg_gen_andi_tl(tmp, cpu_dreg[src1], 0xff0000);
+        tcg_gen_shli_tl(tmp, tmp, 8);
+        tcg_gen_or_tl(cpu_dreg[dst0], dst, tmp);
+        tcg_temp_free(tmp);
+        tcg_temp_free(dst);
+    } else if (aop == 1 && aopcde == 24) {
+        /* (Dreg{dst1}, Dreg{dst0} = BYTEUNPACK Dreg{src0+1}:{src0} (R){s}; */
+        TCGv lo, hi;
+        TCGv_i64 tmp64_2;
+
+        /*if (dst0 == dst1)
+            illegal_instruction_combination(dc);*/
+
+        if (s)
+            hi = cpu_dreg[src0], lo = cpu_dreg[src0 + 1];
+        else
+            hi = cpu_dreg[src0 + 1], lo = cpu_dreg[src0];
+
+        /* Create one field of the two regs */
+        tmp64 = tcg_temp_local_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, hi);
+        tcg_gen_shli_i64(tmp64, tmp64, 32);
+        tmp64_2 = tcg_temp_local_new_i64();
+        tcg_gen_extu_i32_i64(tmp64_2, lo);
+        tcg_gen_or_i64(tmp64, tmp64, tmp64_2);
+
+        /* Adjust the two regs field by the Ireg[0] order */
+        tcg_gen_extu_i32_i64(tmp64_2, cpu_ireg[0]);
+        tcg_gen_andi_i64(tmp64_2, tmp64_2, 0x3);
+        tcg_gen_shli_i64(tmp64_2, tmp64_2, 3);    /* multiply by 8 */
+        tcg_gen_shr_i64(tmp64, tmp64, tmp64_2);
+        tcg_temp_free_i64(tmp64_2);
+
+        /* Now that the 4 bytes we want are in the low 32bit, truncate */
+        tmp = tcg_temp_local_new();
+        tcg_gen_trunc_i64_i32(tmp, tmp64);
+        tcg_temp_free_i64(tmp64);
+
+        /* Load bytea into dst0 */
+        tcg_gen_andi_tl(cpu_dreg[dst0], tmp, 0xff);
+        /* Load byted into dst1 */
+        tcg_gen_shri_tl(cpu_dreg[dst1], tmp, 8);
+        tcg_gen_andi_tl(cpu_dreg[dst1], cpu_dreg[dst1], 0xff0000);
+        /* Load byteb into dst0 */
+        tcg_gen_shli_tl(tmp, tmp, 8);
+        tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], tmp);
+        tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0], 0xff00ff);
+        /* Load bytec into dst1 */
+        tcg_gen_shri_tl(tmp, tmp, 24);
+        tcg_gen_or_tl(cpu_dreg[dst1], cpu_dreg[dst1], tmp);
+        tcg_gen_andi_tl(cpu_dreg[dst1], cpu_dreg[dst1], 0xff00ff);
+        tcg_temp_free(tmp);
+    } else if (aopcde == 13) {
+        int l;
+        TCGv a_lo;
+        TCGCond conds[] = {
+            /* GT */ TCG_COND_LE,
+            /* GE */ TCG_COND_LT,
+            /* LT */ TCG_COND_GE,
+            /* LE */ TCG_COND_GT,
+        };
+
+        /* (Dreg{dst1}, Dreg{dst0}) = SEARCH Dreg{src0} (mode{aop}); */
+
+        /*if (dst0 == dst1)
+            illegal_instruction_combination(dc);*/
+
+        a_lo = tcg_temp_local_new();
+        tmp = tcg_temp_local_new();
+
+        /* Compare A1 to Dreg_hi{src0} */
+        tcg_gen_trunc_i64_i32(a_lo, cpu_areg[1]);
+        tcg_gen_ext16s_tl(a_lo, a_lo);
+        tcg_gen_sari_tl(tmp, cpu_dreg[src0], 16);
+
+        l = gen_new_label();
+        tcg_gen_brcond_tl(conds[aop], tmp, a_lo, l);
+        /* Move Dreg_hi{src0} into A0 */
+        tcg_gen_ext_i32_i64(cpu_areg[1], tmp);
+        /* Move Preg{0} into Dreg{dst1} */
+        tcg_gen_mov_tl(cpu_dreg[dst1], cpu_preg[0]);
+        gen_set_label(l);
+
+        /* Compare A0 to Dreg_lo{src0} */
+        tcg_gen_trunc_i64_i32(a_lo, cpu_areg[0]);
+        tcg_gen_ext16s_tl(a_lo, a_lo);
+        tcg_gen_ext16s_tl(tmp, cpu_dreg[src0]);
+
+        l = gen_new_label();
+        tcg_gen_brcond_tl(conds[aop], tmp, a_lo, l);
+        /* Move Dreg_lo{src0} into A0 */
+        tcg_gen_ext_i32_i64(cpu_areg[0], tmp);
+        /* Move Preg{0} into Dreg{dst0} */
+        tcg_gen_mov_tl(cpu_dreg[dst0], cpu_preg[0]);
+        gen_set_label(l);
+
+        tcg_temp_free(a_lo);
+        tcg_temp_free(tmp);
+    } else
+        illegal_instruction(dc);
+}
+
+static void
+decode_dsp32shift_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* dsp32shift
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 0 | 0 |.M.| 1 | 1 | 0 | 0 | - | - |.sopcde............|
+       |.sop...|.HLs...|.dst0......| - | - | - |.src0......|.src1......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int HLs  = ((iw1 >> DSP32Shift_HLs_bits) & DSP32Shift_HLs_mask);
+    int sop  = ((iw1 >> DSP32Shift_sop_bits) & DSP32Shift_sop_mask);
+    int src0 = ((iw1 >> DSP32Shift_src0_bits) & DSP32Shift_src0_mask);
+    int src1 = ((iw1 >> DSP32Shift_src1_bits) & DSP32Shift_src1_mask);
+    int dst0 = ((iw1 >> DSP32Shift_dst0_bits) & DSP32Shift_dst0_mask);
+    int sopcde = ((iw0 >> (DSP32Shift_sopcde_bits - 16)) & DSP32Shift_sopcde_mask);
+    int M = ((iw0 >> (DSP32Shift_M_bits - 16)) & DSP32Shift_M_mask);
+    TCGv tmp;
+    TCGv_i64 tmp64;
+
+    TRACE_EXTRACT("%s: M:%i sopcde:%i sop:%i HLs:%i dst0:%i src0:%i src1:%i",
+                  __func__, M, sopcde, sop, HLs, dst0, src0, src1);
+
+    if ((sop == 0 || sop == 1) && sopcde == 0) {
+        int l, endl;
+        TCGv val;
+
+        /* Dreg{dst0}_hi{HLs&2} = ASHIFT Dreg{src1}_hi{HLs&1} BY Dreg_lo{src0} (S){sop==1}; */
+        /* Dreg{dst0}_lo{!HLs&2} = ASHIFT Dreg{src1}_lo{!HLs&1} BY Dreg_lo{src0} (S){sop==1}; */
+
+        tmp = tcg_temp_local_new();
+        gen_extNsi_tl(tmp, cpu_dreg[src0], 6);
+
+        val = tcg_temp_local_new();
+        if (HLs & 1)
+            tcg_gen_sari_tl(val, cpu_dreg[src1], 16);
+        else
+            tcg_gen_ext16s_tl(val, cpu_dreg[src1]);
+
+        /* Positive shift magnitudes produce Logical Left shifts.
+         * Negative shift magnitudes produce Arithmetic Right shifts.
+         */
+        endl = gen_new_label();
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_GE, tmp, 0, l);
+        tcg_gen_neg_tl(tmp, tmp);
+        tcg_gen_sar_tl(val, val, tmp);
+        astat_queue_state1(dc, ASTAT_OP_ASHIFT16, val);
+        tcg_gen_br(endl);
+        gen_set_label(l);
+        tcg_gen_shl_tl(val, val, tmp);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT16, val);
+        gen_set_label(endl);
+
+        if (HLs & 2)
+            gen_mov_h_tl(cpu_dreg[dst0], val);
+        else
+            gen_mov_l_tl(cpu_dreg[dst0], val);
+
+        tcg_temp_free(val);
+        tcg_temp_free(tmp);
+
+        /* XXX: Missing V updates */
+    } else if (sop == 2 && sopcde == 0) {
+        int l, endl;
+        TCGv val;
+
+        /* Dreg{dst0}_hi{HLs&2} = LSHIFT Dreg{src1}_hi{HLs&1} BY Dreg_lo{src0}; */
+        /* Dreg{dst0}_lo{!HLs&2} = LSHIFT Dreg{src1}_lo{!HLs&1} BY Dreg_lo{src0}; */
+
+        tmp = tcg_temp_local_new();
+        gen_extNsi_tl(tmp, cpu_dreg[src0], 6);
+
+        val = tcg_temp_local_new();
+        if (HLs & 1)
+            tcg_gen_shri_tl(val, cpu_dreg[src1], 16);
+        else
+            tcg_gen_ext16u_tl(val, cpu_dreg[src1]);
+
+        /* Negative shift magnitudes means shift right */
+        endl = gen_new_label();
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_GE, tmp, 0, l);
+        tcg_gen_neg_tl(tmp, tmp);
+        tcg_gen_shr_tl(val, val, tmp);
+        tcg_gen_br(endl);
+        gen_set_label(l);
+        tcg_gen_shl_tl(val, val, tmp);
+        gen_set_label(endl);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT16, val);
+
+        if (HLs & 2)
+            gen_mov_h_tl(cpu_dreg[dst0], val);
+        else
+            gen_mov_l_tl(cpu_dreg[dst0], val);
+
+        tcg_temp_free(val);
+        tcg_temp_free(tmp);
+
+        /* XXX: Missing AZ/AN/V updates */
+    } else if (sop == 2 && sopcde == 3 && (HLs == 1 || HLs == 0)) {
+        /* Areg{HLs} = ROT Areg{HLs} BY Dreg_lo{src0}; */
+        tmp64 = tcg_temp_local_new_i64();
+        tcg_gen_extu_i32_i64(tmp64, cpu_dreg[src0]);
+        tcg_gen_ext16s_i64(tmp64, tmp64);
+        gen_rot_i64(cpu_areg[HLs], cpu_areg[HLs], tmp64);
+        tcg_temp_free_i64(tmp64);
+#if 0
+    } else if (sop == 0 && sopcde == 3 && (HLs == 0 || HLs == 1)) {
+      bs32 shft = (bs8)(DREG (src0) << 2) >> 2;
+      bu64 val = get_extended_acc (cpu, HLs);
+
+      HLs = !!HLs;
+      TRACE_INSN (cpu, "A%i = ASHIFT A%i BY R%i.L;", HLs, HLs, src0);
+
+      if (shft <= 0)
+	val = ashiftrt (cpu, val, -shft, 40);
+      else
+	val = lshift (cpu, val, shft, 40, 0);
+
+      STORE (AXREG (HLs), (val >> 32) & 0xff);
+      STORE (AWREG (HLs), (val & 0xffffffff));
+    }
+  else if (sop == 1 && sopcde == 3 && (HLs == 0 || HLs == 1))
+    {
+      bs32 shft = (bs8)(DREG (src0) << 2) >> 2;
+      bu64 val;
+
+      HLs = !!HLs;
+      TRACE_INSN (cpu, "A%i = LSHIFT A%i BY R%i.L;", HLs, HLs, src0);
+      val = get_extended_acc (cpu, HLs);
+
+      if (shft <= 0)
+	val = lshiftrt (cpu, val, -shft, 40);
+      else
+	val = lshift (cpu, val, shft, 40, 0);
+
+      STORE (AXREG (HLs), (val >> 32) & 0xff);
+      STORE (AWREG (HLs), (val & 0xffffffff));
+    }
+  else if ((sop == 0 || sop == 1) && sopcde == 1)
+    {
+      bs32 shft = (bs8)(DREG (src0) << 2) >> 2;
+      bu16 val0, val1;
+      bu32 astat;
+
+      TRACE_INSN (cpu, "R%i = ASHIFT R%i BY R%i.L (V%s);",
+		  dst0, src1, src0, sop == 1 ? ",S" : "");
+
+      val0 = (bu16)DREG (src1) & 0xFFFF;
+      val1 = (bu16)((DREG (src1) & 0xFFFF0000) >> 16);
+
+      if (shft <= 0)
+	{
+	  val0 = ashiftrt (cpu, val0, -shft, 16);
+	  astat = ASTAT;
+	  val1 = ashiftrt (cpu, val1, -shft, 16);
+	}
+      else
+	{
+	  val0 = lshift (cpu, val0, shft, 16, sop == 1);
+	  astat = ASTAT;
+	  val1 = lshift (cpu, val1, shft, 16, sop == 1);
+	}
+      SET_ASTAT (ASTAT | astat);
+      STORE (DREG (dst0), (val1 << 16) | val0);
+#endif
+    } else if ((sop == 0 || sop == 1 || sop == 2) && sopcde == 2) {
+        /* Dreg{dst0} = [LA]SHIFT Dreg{src1} BY Dreg_lo{src0} (opt_S); */
+        /* sop == 1 : opt_S */
+        int l, endl;
+
+        /* XXX: Missing V/VS update */
+        if (sop == 1)
+            unhandled_instruction(dc, "[AL]SHIFT with (S)");
+
+        tmp = tcg_temp_local_new();
+        gen_extNsi_tl(tmp, cpu_dreg[src0], 6);
+
+        /* Negative shift means logical or arith shift right */
+        endl = gen_new_label();
+        l = gen_new_label();
+        tcg_gen_brcondi_tl(TCG_COND_GE, tmp, 0, l);
+        tcg_gen_neg_tl(tmp, tmp);
+        if (sop == 2) {
+            tcg_gen_shr_tl(cpu_dreg[dst0], cpu_dreg[src1], tmp);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst0]);
+        } else {
+            tcg_gen_sar_tl(cpu_dreg[dst0], cpu_dreg[src1], tmp);
+            astat_queue_state1(dc, ASTAT_OP_ASHIFT32, cpu_dreg[dst0]);
+        }
+        tcg_gen_br(endl);
+
+        /* Positive shift is a logical left shift */
+        gen_set_label(l);
+        tcg_gen_shl_tl(cpu_dreg[dst0], cpu_dreg[src1], tmp);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst0]);
+        gen_set_label(endl);
+
+        tcg_temp_free(tmp);
+    } else if (sop == 3 && sopcde == 2) {
+        /* Dreg{dst0} = ROT Dreg{src1} BY Dreg_lo{src0}; */
+        tmp = tcg_temp_local_new();
+        tcg_gen_ext16s_tl(tmp, cpu_dreg[src0]);
+        gen_rot_tl(cpu_dreg[dst0], cpu_dreg[src1], tmp);
+        tcg_temp_free(tmp);
+    }
+#if 0
+  else if (sop == 2 && sopcde == 1)
+    {
+      bs32 shft = (bs8)(DREG (src0) << 2) >> 2;
+      bu16 val0, val1;
+      bu32 astat;
+
+      TRACE_INSN (cpu, "R%i = LSHIFT R%i BY R%i.L (V);", dst0, src1, src0);
+
+      val0 = (bu16)DREG (src1) & 0xFFFF;
+      val1 = (bu16)((DREG (src1) & 0xFFFF0000) >> 16);
+
+      if (shft <= 0)
+	{
+	  val0 = lshiftrt (cpu, val0, -shft, 16);
+	  astat = ASTAT;
+	  val1 = lshiftrt (cpu, val1, -shft, 16);
+	}
+      else
+	{
+	  val0 = lshift (cpu, val0, shft, 16, 0);
+	  astat = ASTAT;
+	  val1 = lshift (cpu, val1, shft, 16, 0);
+	}
+      SET_ASTAT (ASTAT | astat);
+      STORE (DREG (dst0), (val1 << 16) | val0);
+    }
+#endif
+    else if (sopcde == 4) {
+        /* Dreg{dst0} = PACK (Dreg{src1}_hi{sop&2}, Dreg{src0}_hi{sop&1}); */
+        /* Dreg{dst0} = PACK (Dreg{src1}_lo{!sop&2}, Dreg{src0}_lo{!sop&1}); */
+        TCGv tmph;
+        tmp = tcg_temp_new();
+        if (sop & 1)
+            tcg_gen_shri_tl(tmp, cpu_dreg[src0], 16);
+        else
+            tcg_gen_andi_tl(tmp, cpu_dreg[src0], 0xffff);
+        tmph = tcg_temp_new();
+        if (sop & 2)
+            tcg_gen_andi_tl(tmph, cpu_dreg[src1], 0xffff0000);
+        else
+            tcg_gen_shli_tl(tmph, cpu_dreg[src1], 16);
+        tcg_gen_or_tl(cpu_dreg[dst0], tmph, tmp);
+        tcg_temp_free(tmph);
+        tcg_temp_free(tmp);
+    } else if (sop == 0 && sopcde == 5) {
+        /* Dreg_lo{dst0} = SIGNBITS Dreg{src1}; */
+        tmp = tcg_temp_new();
+        gen_signbitsi_tl(tmp, cpu_dreg[src1], 32);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if (sop == 1 && sopcde == 5) {
+        /* Dreg_lo{dst0} = SIGNBITS Dreg_lo{src1}; */
+        tmp = tcg_temp_new();
+        gen_signbitsi_tl(tmp, cpu_dreg[src1], 16);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if (sop == 2 && sopcde == 5) {
+        /* Dreg_lo{dst0} = SIGNBITS Dreg_hi{src1}; */
+        tmp = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[src1], 16);
+        gen_signbitsi_tl(tmp, tmp, 16);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if ((sop == 0 || sop == 1) && sopcde == 6) {
+        /* Dreg_lo{dst0} = SIGNBITS Areg{sop}; */
+        tmp = tcg_temp_new();
+        gen_signbitsi_i64_i32(tmp, cpu_areg[sop], 40);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    } else if (sop == 3 && sopcde == 6) {
+        /* Dreg_lo{dst0} = ONES Dreg{src1}; */
+        tmp = tcg_temp_new();
+        gen_helper_ones(tmp, cpu_dreg[src1]);
+        gen_mov_l_tl(cpu_dreg[dst0], tmp);
+        tcg_temp_free(tmp);
+    }
+#if 0
+  else if (sop == 0 && sopcde == 7)
+    {
+      bu16 sv1 = (bu16)signbits (DREG (src1), 32);
+      bu16 sv0 = (bu16)DREG (src0);
+      bu16 dst_lo;
+
+      TRACE_INSN (cpu, "R%i.L = EXPADJ (R%i, R%i.L);", dst0, src1, src0);
+
+      if ((sv1 & 0x1f) < (sv0 & 0x1f))
+	dst_lo = sv1;
+      else
+	dst_lo = sv0;
+      STORE (DREG (dst0), REG_H_L (DREG (dst0), dst_lo));
+    }
+  else if (sop == 1 && sopcde == 7)
+    {
+      /*
+       * Exponent adjust on two 16-bit inputs. Select smallest norm
+       * among 3 inputs
+       */
+      bs16 src1_hi = (DREG (src1) & 0xFFFF0000) >> 16;
+      bs16 src1_lo = (DREG (src1) & 0xFFFF);
+      bu16 src0_lo = (DREG (src0) & 0xFFFF);
+      bu16 tmp_hi, tmp_lo, tmp;
+
+      TRACE_INSN (cpu, "R%i.L = EXPADJ (R%i, R%i.L) (V);", dst0, src1, src0);
+
+      tmp_hi = signbits (src1_hi, 16);
+      tmp_lo = signbits (src1_lo, 16);
+
+      if ((tmp_hi & 0xf) < (tmp_lo & 0xf))
+	if ((tmp_hi & 0xf) < (src0_lo & 0xf))
+	  tmp = tmp_hi;
+	else
+	  tmp = src0_lo;
+      else
+	if ((tmp_lo & 0xf) < (src0_lo & 0xf))
+	  tmp = tmp_lo;
+	else
+	  tmp = src0_lo;
+      STORE (DREG (dst0), REG_H_L (DREG (dst0), tmp));
+    }
+  else if (sop == 2 && sopcde == 7)
+    {
+      /*
+       * exponent adjust on single 16-bit register
+       */
+      bu16 tmp;
+      bu16 src0_lo = (bu16)(DREG (src0) & 0xFFFF);
+
+      TRACE_INSN (cpu, "R%i.L = EXPADJ (R%i.L, R%i.L);", dst0, src1, src0);
+
+      tmp = signbits (DREG (src1) & 0xFFFF, 16);
+
+      if ((tmp & 0xf) < (src0_lo & 0xf))
+	SET_DREG_L (dst0, tmp);
+      else
+	SET_DREG_L (dst0, src0_lo);
+    }
+  else if (sop == 3 && sopcde == 7)
+    {
+      bu16 tmp;
+      bu16 src0_lo = (bu16)(DREG (src0) & 0xFFFF);
+
+      TRACE_INSN (cpu, "R%i.L = EXPADJ (R%i.H, R%i.L);", dst0, src1, src0);
+
+      tmp = signbits ((DREG (src1) & 0xFFFF0000) >> 16, 16);
+
+      if ((tmp & 0xf) < (src0_lo & 0xf))
+	SET_DREG_L (dst0, tmp);
+      else
+	SET_DREG_L (dst0, src0_lo);
+    }
+  else if (sop == 0 && sopcde == 8)
+    {
+      bu64 acc = get_unextended_acc (cpu, 0);
+      bu32 s0, s1;
+
+      TRACE_INSN (cpu, "BITMUX (R%i, R%i, A0) (ASR);", src0, src1);
+
+      if (src0 == src1)
+	illegal_instruction_combination(dc);
+
+      s0 = DREG (src0);
+      s1 = DREG (src1);
+      acc = (acc >> 2) |
+	(((bu64)s0 & 1) << 38) |
+	(((bu64)s1 & 1) << 39);
+      SET_DREG (src0, s0 >> 1);
+      SET_DREG (src1, s1 >> 1);
+
+      SET_AREG (0, acc);
+    }
+  else if (sop == 1 && sopcde == 8)
+    {
+      bu64 acc = get_unextended_acc (cpu, 0);
+      bu32 s0, s1;
+
+      TRACE_INSN (cpu, "BITMUX (R%i, R%i, A0) (ASL);", src0, src1);
+
+      if (src0 == src1)
+	illegal_instruction_combination(dc);
+
+      s0 = DREG (src0);
+      s1 = DREG (src1);
+      acc = (acc << 2) |
+	((s0 >> 31) & 1) |
+	((s1 >> 30) & 2);
+      SET_DREG (src0, s0 << 1);
+      SET_DREG (src1, s1 << 1);
+
+      SET_AREG (0, acc);
+    }
+  else if ((sop == 0 || sop == 1) && sopcde == 9)
+    {
+      bs40 acc0 = get_extended_acc (cpu, 0);
+      bs16 sL, sH, out;
+
+      TRACE_INSN (cpu, "R%i.L = VIT_MAX (R%i) (AS%c);",
+		  dst0, src1, sop & 1 ? 'R' : 'L');
+
+      sL = DREG (src1);
+      sH = DREG (src1) >> 16;
+
+      if (sop & 1)
+	acc0 >>= 1;
+      else
+	acc0 <<= 1;
+
+      if (((sH - sL) & 0x8000) == 0)
+	{
+	  out = sH;
+	  acc0 |= (sop & 1) ? 0x80000000 : 1;
+	}
+      else
+	out = sL;
+
+      SET_AREG (0, acc0);
+      SET_DREG (dst0, REG_H_L (DREG (dst0), out));
+    }
+  else if ((sop == 2 || sop == 3) && sopcde == 9)
+    {
+      bs40 acc0 = get_extended_acc (cpu, 0);
+      bs16 s0L, s0H, s1L, s1H, out0, out1;
+
+      TRACE_INSN (cpu, "R%i = VIT_MAX (R%i, R%i) (AS%c);",
+		  dst0, src1, src0, sop & 1 ? 'R' : 'L');
+
+      s0L = DREG (src0);
+      s0H = DREG (src0) >> 16;
+      s1L = DREG (src1);
+      s1H = DREG (src1) >> 16;
+
+      if (sop & 1)
+	acc0 >>= 2;
+      else
+	acc0 <<= 2;
+
+      if (((s0H - s0L) & 0x8000) == 0)
+	{
+	  out0 = s0H;
+	  acc0 |= (sop & 1) ? 0x40000000 : 2;
+	}
+      else
+	out0 = s0L;
+
+      if (((s1H - s1L) & 0x8000) == 0)
+	{
+	  out1 = s1H;
+	  acc0 |= (sop & 1) ? 0x80000000 : 1;
+	}
+      else
+	out1 = s1L;
+
+      SET_AREG (0, acc0);
+      SET_DREG (dst0, REG_H_L (out1 << 16, out0));
+    }
+#endif
+    else if ((sop == 0 || sop == 1) && sopcde == 10) {
+        TCGv mask, x, sgn;
+        /* Dreg{dst0} = EXTRACT (Dreg{src1}, Dreg_lo{src0}) (X{sop==1}); */
+        /* Dreg{dst0} = EXTRACT (Dreg{src1}, Dreg_lo{src0}) (Z{sop==0}); */
+
+        /* mask = 1 << (src0 & 0x1f) */
+        tmp = tcg_temp_new();
+        tcg_gen_andi_tl(tmp, cpu_dreg[src0], 0x1f);
+        mask = tcg_temp_local_new();
+        tcg_gen_movi_tl(mask, 1);
+        tcg_gen_shl_tl(mask, mask, tmp);
+        tcg_temp_free(tmp);
+        if (sop) {
+            /* sgn = mask >> 1 */
+            sgn = tcg_temp_local_new();
+            tcg_gen_shri_tl(sgn, mask, 1);
+        }
+        /* mask -= 1 */
+        tcg_gen_subi_tl(mask, mask, 1);
+
+        /* x = src1 >> ((src0 >> 8) & 0x1f) */
+        tmp = tcg_temp_new();
+        x = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, cpu_dreg[src0], 8);
+        tcg_gen_andi_tl(tmp, tmp, 0x1f);
+        tcg_gen_shr_tl(x, cpu_dreg[src1], tmp);
+        tcg_temp_free(tmp);
+        /* dst0 = x & mask */
+        tcg_gen_and_tl(cpu_dreg[dst0], x, mask);
+        tcg_temp_free(x);
+
+        if (sop) {
+            /* if (dst0 & sgn) dst0 |= ~mask */
+            int l;
+            l = gen_new_label();
+            tmp = tcg_temp_new();
+            tcg_gen_and_tl(tmp, cpu_dreg[dst0], sgn);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, tmp, 0, l);
+            tcg_gen_not_tl(mask, mask);
+            tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], mask);
+            gen_set_label(l);
+            tcg_temp_free(sgn);
+            tcg_temp_free(tmp);
+        }
+
+        tcg_temp_free(mask);
+
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst0]);
+    } else if ((sop == 2 || sop == 3) && sopcde == 10) {
+        /* The first dregs is the "background" while the second dregs is the
+         * "foreground".  The fg reg is used to overlay the bg reg and is:
+         * | nnnn nnnn | nnnn nnnn | xxxp pppp | xxxL LLLL |
+         *  n = the fg bit field
+         *  p = bit position in bg reg to start LSB of fg field
+         *  L = number of fg bits to extract
+         * Using (X) sign-extends the fg bit field.
+         */
+        TCGv fg, bg, len, mask, fgnd, shft;
+
+        /* Dreg{dst0} = DEPOSIT (Dreg{src1}, Dreg{src0}) (X){sop==3}; */
+        fg = cpu_dreg[src0];
+        bg = cpu_dreg[src1];
+
+        len = tcg_temp_new();
+        tcg_gen_andi_tl(len, fg, 0x1f);
+
+        mask = tcg_temp_new();
+        tcg_gen_movi_tl(mask, 1);
+        tcg_gen_shl_tl(mask, mask, len);
+        tcg_gen_subi_tl(mask, mask, 1);
+        tcg_gen_andi_tl(mask, mask, 0xffff);
+
+        fgnd = tcg_temp_new();
+        tcg_gen_shri_tl(fgnd, fg, 16);
+        tcg_gen_and_tl(fgnd, fgnd, mask);
+
+        shft = tcg_temp_new();
+        tcg_gen_shri_tl(shft, fg, 8);
+        tcg_gen_andi_tl(shft, shft, 0x1f);
+
+        if (sop == 3) {
+            /* Sign extend the fg bit field.  */
+            tcg_gen_movi_tl(mask, -1);
+            gen_extNs_tl(fgnd, fgnd, len);
+        }
+        tcg_gen_shl_tl(fgnd, fgnd, shft);
+        tcg_gen_shl_tl(mask, mask, shft);
+        tcg_gen_not_tl(mask, mask);
+        tcg_gen_and_tl(mask, bg, mask);
+
+        tcg_gen_or_tl(cpu_dreg[dst0], mask, fgnd);
+
+        tcg_temp_free(shft);
+        tcg_temp_free(fgnd);
+        tcg_temp_free(mask);
+        tcg_temp_free(len);
+
+        astat_queue_state1(dc, ASTAT_OP_LOGICAL, cpu_dreg[dst0]);
+    }
+#if 0
+    else if (sop == 0 && sopcde == 11) {
+        bu64 acc0 = get_unextended_acc (cpu, 0);
+
+        /* Dreg_lo{dst0} = CC = BXORSHIFT (A0, Dreg{src0}); */
+
+        acc0 <<= 1;
+        SET_CCREG (xor_reduce (acc0, DREG (src0)));
+        SET_DREG (dst0, REG_H_L (DREG (dst0), CCREG));
+        SET_AREG (0, acc0);
+    } else if (sop == 1 && sopcde == 11) {
+        bu64 acc0 = get_unextended_acc (cpu, 0);
+
+        /* Dreg_lo{dst0} = CC = BXOR (A0, Dreg{src0}); */
+
+        SET_CCREG (xor_reduce (acc0, DREG (src0)));
+        SET_DREG (dst0, REG_H_L (DREG (dst0), CCREG));
+    } else if (sop == 0 && sopcde == 12) {
+        bu64 acc0 = get_unextended_acc (cpu, 0);
+        bu64 acc1 = get_unextended_acc (cpu, 1);
+
+        /* A0 = BXORSHIFT (A0, A1, CC); */
+
+        acc0 = (acc0 << 1) | (CCREG ^ xor_reduce (acc0, acc1));
+        SET_AREG (0, acc0);
+    } else if (sop == 1 && sopcde == 12) {
+        bu64 acc0 = get_unextended_acc (cpu, 0);
+        bu64 acc1 = get_unextended_acc (cpu, 1);
+
+        /* Dreg_lo{dst0} = CC = BXOR (A0, A1, CC); */
+
+        SET_CCREG (CCREG ^ xor_reduce (acc0, acc1));
+        acc0 = (acc0 << 1) | CCREG;
+        SET_DREG (dst0, REG_H_L (DREG (dst0), CCREG));
+    }
+#endif
+    else if ((sop == 0 || sop == 1 || sop == 2) && sopcde == 13) {
+        int shift = (sop + 1) * 8;
+        TCGv tmp2;
+        /* Dreg{dst0} = ALIGN{shift} (Dreg{src1}, Dreg{src0}); */
+        /* XXX: could be optimized a bit if dst0 is not src1 or src0 */
+        tmp = tcg_temp_new();
+        tmp2 = tcg_temp_new();
+        tcg_gen_shli_tl(tmp, cpu_dreg[src1], 32 - shift);
+        tcg_gen_shri_tl(tmp2, cpu_dreg[src0], shift);
+        tcg_gen_or_tl(cpu_dreg[dst0], tmp, tmp2);
+        tcg_temp_free(tmp2);
+        tcg_temp_free(tmp);
+    } else
+        illegal_instruction(dc);
+}
+
+static void
+decode_dsp32shiftimm_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* dsp32shiftimm
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 0 | 0 |.M.| 1 | 1 | 0 | 1 | - | - |.sopcde............|
+       |.sop...|.HLs...|.dst0......|.immag.................|.src1......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int src1     = ((iw1 >> DSP32ShiftImm_src1_bits) & DSP32ShiftImm_src1_mask);
+    int sop      = ((iw1 >> DSP32ShiftImm_sop_bits) & DSP32ShiftImm_sop_mask);
+    int bit8     = ((iw1 >> 8) & 0x1);
+    int immag    = ((iw1 >> DSP32ShiftImm_immag_bits) & DSP32ShiftImm_immag_mask);
+    int newimmag = (-(iw1 >> DSP32ShiftImm_immag_bits) & DSP32ShiftImm_immag_mask);
+    int dst0     = ((iw1 >> DSP32ShiftImm_dst0_bits) & DSP32ShiftImm_dst0_mask);
+    int M        = ((iw0 >> (DSP32ShiftImm_M_bits - 16)) & DSP32ShiftImm_M_mask);
+    int sopcde   = ((iw0 >> (DSP32ShiftImm_sopcde_bits - 16)) & DSP32ShiftImm_sopcde_mask);
+    int HLs      = ((iw1 >> DSP32ShiftImm_HLs_bits) & DSP32ShiftImm_HLs_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("%s: M:%i sopcde:%i sop:%i HLs:%i dst0:%i immag:%#x src1:%i",
+                  __func__, M, sopcde, sop, HLs, dst0, immag, src1);
+
+    if (sopcde == 0) {
+        tmp = tcg_temp_new();
+
+        if (HLs & 1) {
+            if (sop == 0)
+                tcg_gen_sari_tl(tmp, cpu_dreg[src1], 16);
+            else
+                tcg_gen_shri_tl(tmp, cpu_dreg[src1], 16);
+        } else {
+            if (sop == 0)
+                tcg_gen_ext16s_tl(tmp, cpu_dreg[src1]);
+            else
+                tcg_gen_ext16u_tl(tmp, cpu_dreg[src1]);
+        }
+
+        if (sop == 0) {
+            /* dregs_hi/lo = dregs_hi/lo >>> imm4 */
+            tcg_gen_sari_tl(tmp, tmp, newimmag);
+            astat_queue_state1(dc, ASTAT_OP_ASHIFT16, tmp);
+        } else if (sop == 1 && bit8 == 0) {
+            /*  dregs_hi/lo = dregs_hi/lo << imm4 (S) */
+            tcg_gen_shli_tl(tmp, tmp, immag);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT16, tmp);
+        } else if (sop == 1 && bit8) {
+            /* dregs_hi/lo = dregs_hi/lo >>> imm4 (S) */
+            tcg_gen_shri_tl(tmp, tmp, immag);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT16, tmp);
+        } else if (sop == 2 && bit8) {
+            /* dregs_hi/lo = dregs_hi/lo >> imm4 */
+            tcg_gen_shri_tl(tmp, tmp, newimmag);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT16, tmp);
+        } else if (sop == 2 && bit8 == 0) {
+            /* dregs_hi/lo = dregs_hi/lo << imm4 */
+            tcg_gen_shli_tl(tmp, tmp, immag);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT16, tmp);
+        } else
+            illegal_instruction(dc);
+
+        if (HLs & 2)
+            gen_mov_h_tl(cpu_dreg[dst0], tmp);
+        else
+            gen_mov_l_tl(cpu_dreg[dst0], tmp);
+
+        tcg_temp_free(tmp);
+    } else if (sop == 2 && sopcde == 3 && (HLs == 1 || HLs == 0)) {
+        /* Areg{HLs} = ROT Areg{HLs} BY imm{immag}; */
+        int shift = imm6(immag);
+        gen_roti_i64(cpu_areg[HLs], cpu_areg[HLs], shift);
+    } else if (sop == 0 && sopcde == 3 && bit8 == 1) {
+        /* Arithmetic shift, so shift in sign bit copies */
+        int shift = uimm5(newimmag);
+        HLs = !!HLs;
+
+        /* Areg{HLs} = Aregs{HLs} >>> imm{newimmag}; */
+        tcg_gen_sari_i64(cpu_areg[HLs], cpu_areg[HLs], shift);
+    } else if ((sop == 0 && sopcde == 3 && bit8 == 0) ||
+               (sop == 1 && sopcde == 3)) {
+        int shiftup = uimm5(immag);
+        int shiftdn = uimm5(newimmag);
+        HLs = !!HLs;
+
+        if (sop == 0)
+            /* Areg{HLs} = Aregs{HLs} <<{sop} imm{immag}; */
+            tcg_gen_shli_i64(cpu_areg[HLs], cpu_areg[HLs], shiftup);
+        else
+            /* Areg{HLs} = Aregs{HLs} >>{sop} imm{newimmag}; */
+            tcg_gen_shri_i64(cpu_areg[HLs], cpu_areg[HLs], shiftdn);
+
+//      SET_AREG (HLs, acc);
+//      SET_ASTATREG (an, !!(acc & 0x8000000000ull));
+//      SET_ASTATREG (az, acc == 0);
+    }
+#if 0
+    else if (sop == 1 && sopcde == 1 && bit8 == 0) {
+        int count = imm5(immag);
+        bu16 val0 = DREG (src1) >> 16;
+        bu16 val1 = DREG (src1) & 0xFFFF;
+        bu32 astat;
+
+        TRACE_INSN (cpu, "R%i = R%i << %i (V,S);", dst0, src1, count);
+        val0 = lshift (cpu, val0, count, 16, 1);
+        astat = ASTAT;
+        val1 = lshift (cpu, val1, count, 16, 1);
+        SET_ASTAT (ASTAT | astat);
+
+        STORE (DREG (dst0), (val0 << 16) | val1);
+    }
+#endif
+    else if (sop == 2 && sopcde == 1 && bit8 == 1) {
+        int count = imm5(newimmag);
+//      bu16 val0 = DREG (src1) & 0xFFFF;
+//      bu16 val1 = DREG (src1) >> 16;
+//      bu32 astat;
+
+        /* Dreg{dst0} = Dreg{src1} >> imm{count} (V); */
+/*
+        val0 = lshiftrt (cpu, val0, count, 16);
+        astat = ASTAT;
+        val1 = lshiftrt (cpu, val1, count, 16);
+        SET_ASTAT (ASTAT | astat);
+
+        STORE (DREG (dst0), val0 | (val1 << 16));
+*/
+        if (count > 0 && count <= 15) {
+            tcg_gen_shri_tl(cpu_dreg[dst0], cpu_dreg[src1], count);
+            tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0],
+                            0xffff0000 | ((1 << (16 - count)) - 1));
+        } else if (count)
+            tcg_gen_movi_tl(cpu_dreg[dst0], 0);
+    } else if (sop == 2 && sopcde == 1 && bit8 == 0) {
+        int count = imm5(immag);
+//      bu16 val0 = DREG (src1) & 0xFFFF;
+//      bu16 val1 = DREG (src1) >> 16;
+//      bu32 astat;
+
+        /* Dreg{dst0} = Dreg{src1} << imm{count} (V); */
+/*
+        val0 = lshift (cpu, val0, count, 16, 0);
+        astat = ASTAT;
+        val1 = lshift (cpu, val1, count, 16, 0);
+        SET_ASTAT (ASTAT | astat);
+
+        STORE (DREG (dst0), val0 | (val1 << 16));
+*/
+        /* XXX: No ASTAT handling */
+        if (count > 0 && count <= 15) {
+            tcg_gen_shli_tl(cpu_dreg[dst0], cpu_dreg[src1], count);
+            tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0],
+                            ~(((1 << count) - 1) << 16));
+        } else if (count)
+            tcg_gen_movi_tl(cpu_dreg[dst0], 0);
+    } else if (sopcde == 1 && (sop == 0 || (sop == 1 && bit8 == 1))) {
+        int count = uimm5(newimmag);
+//      bu16 val0 = DREG (src1) & 0xFFFF;
+//      bu16 val1 = DREG (src1) >> 16;
+//      bu32 astat;
+
+        TRACE_INSN (cpu, "R%i = R%i >>> %i %s;", dst0, src1, count,
+		  sop == 0 ? "(V)" : "(V,S)");
+
+        if (sop == 1)
+            unhandled_instruction(dc, "ashiftrt (S)");
+
+/*
+        val0 = ashiftrt (cpu, val0, count, 16);
+        astat = ASTAT;
+        val1 = ashiftrt (cpu, val1, count, 16);
+        SET_ASTAT (ASTAT | astat);
+
+        STORE (DREG (dst0), REG_H_L (val1 << 16, val0));
+*/
+        /* XXX: No ASTAT handling */
+        if (count > 0 && count <= 15) {
+            tmp = tcg_temp_new();
+            tcg_gen_ext16s_tl(tmp, cpu_dreg[src1]);
+            tcg_gen_sari_tl(tmp, tmp, count);
+            tcg_gen_andi_tl(tmp, tmp, 0xffff);
+            tcg_gen_sari_tl(cpu_dreg[dst0], cpu_dreg[src1], count);
+            tcg_gen_andi_tl(cpu_dreg[dst0], cpu_dreg[dst0], 0xffff0000);
+            tcg_gen_or_tl(cpu_dreg[dst0], cpu_dreg[dst0], tmp);
+            tcg_temp_free(tmp);
+        } else if (count)
+            unhandled_instruction(dc, "ashiftrt (S)");
+    } else if (sop == 1 && sopcde == 2) {
+        int count = imm6(immag);
+
+        /* Dreg{dst0} = Dreg{src1} << imm{count} (S); */
+        //STORE (DREG (dst0), lshift (cpu, DREG (src1), count, 32, 1));
+        tcg_gen_shli_tl(cpu_dreg[dst0], cpu_dreg[src1], -count);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst0]);
+    } else if (sop == 2 && sopcde == 2) {
+        int count = imm6(newimmag);
+
+        /* Dreg{dst0} = Dreg{src1} >> imm{count}; */
+        if (count < 0) {
+            tcg_gen_shli_tl(cpu_dreg[dst0], cpu_dreg[src1], -count);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst0]);
+        } else {
+            tcg_gen_shri_tl(cpu_dreg[dst0], cpu_dreg[src1], count);
+            astat_queue_state1(dc, ASTAT_OP_LSHIFT_RT32, cpu_dreg[dst0]);
+        }
+    } else if (sop == 3 && sopcde == 2) {
+        /* Dreg{dst0} = ROT Dreg{src1} BY imm{shift}; */
+        int shift = imm6(immag);
+        gen_roti_tl(cpu_dreg[dst0], cpu_dreg[src1], shift);
+    } else if (sop == 0 && sopcde == 2) {
+        int count = imm6(newimmag);
+
+/*
+        if (count < 0)
+            STORE (DREG (dst0), lshift (cpu, DREG (src1), -count, 32, 0));
+        else
+            STORE (DREG (dst0), ashiftrt (cpu, DREG (src1), count, 32));
+*/
+        /* Dreg{dst0} = Dreg{src1} >>> imm{count}; */
+        tcg_gen_sari_tl(cpu_dreg[dst0], cpu_dreg[src1], count);
+        astat_queue_state1(dc, ASTAT_OP_LSHIFT32, cpu_dreg[dst0]);
+    } else
+        illegal_instruction(dc);
+}
+
+static void
+decode_psedoDEBUG_0(DisasContext *dc, uint16_t iw0)
+{
+    /* psedoDEBUG
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 |.fn....|.grp.......|.reg.......|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int fn  = ((iw0 >> PseudoDbg_fn_bits) & PseudoDbg_fn_mask);
+    int grp = ((iw0 >> PseudoDbg_grp_bits) & PseudoDbg_grp_mask);
+    int reg = ((iw0 >> PseudoDbg_reg_bits) & PseudoDbg_reg_mask);
+
+    TRACE_EXTRACT("%s: fn:%i grp:%i reg:%i", __func__, fn, grp, reg);
+
+    if ((reg == 0 || reg == 1) && fn == 3)
+        /* DBG Areg{reg}; */
+        unhandled_instruction(dc, "DBG Areg");
+    else if (reg == 3 && fn == 3)
+        /* ABORT; */
+        cec_exception(dc, EXCP_ABORT);
+    else if (reg == 4 && fn == 3)
+        /* HLT; */
+        cec_exception(dc, EXCP_HLT);
+    else if (reg == 5 && fn == 3)
+        unhandled_instruction(dc, "DBGHALT");
+    else if (reg == 6 && fn == 3)
+        unhandled_instruction(dc, "DBGCMPLX (dregs)");
+    else if (reg == 7 && fn == 3)
+        unhandled_instruction(dc, "DBG");
+    else if (grp == 0 && fn == 2)
+        /* OUTC Dreg{reg}; */
+        gen_helper_outc(cpu_dreg[reg]);
+    else if (fn == 0) {
+        /* DBG allreg{grp,reg}; */
+        TCGv tmp_grp = tcg_const_tl(grp);
+        TCGv tmp_reg = tcg_const_tl(reg);
+        gen_helper_dbg(get_allreg(dc, grp, reg), tmp_grp, tmp_reg);
+        tcg_temp_free(tmp_reg);
+        tcg_temp_free(tmp_grp);
+    } else if (fn == 1)
+        unhandled_instruction(dc, "PRNT allregs");
+    else
+        illegal_instruction(dc);
+}
+
+static void
+decode_psedoOChar_0(DisasContext *dc, uint16_t iw0)
+{
+    /* psedoOChar
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 |.ch............................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int ch = ((iw0 >> PseudoChr_ch_bits) & PseudoChr_ch_mask);
+    TCGv tmp;
+
+    TRACE_EXTRACT("%s: ch:%#x", __func__, ch);
+
+    /* OUTC imm{ch}; */
+    tmp = tcg_temp_new();
+    tcg_gen_movi_tl(tmp, ch);
+    gen_helper_outc(tmp);
+    tcg_temp_free(tmp);
+}
+
+static void
+decode_psedodbg_assert_0(DisasContext *dc, uint16_t iw0, uint16_t iw1)
+{
+    /* psedodbg_assert
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+
+       | 1 | 1 | 1 | 1 | 0 | - | - | - | dbgop |.grp.......|.regtest...|
+       |.expected......................................................|
+       +---+---+---+---|---+---+---+---|---+---+---+---|---+---+---+---+  */
+    int expected = ((iw1 >> PseudoDbg_Assert_expected_bits) & PseudoDbg_Assert_expected_mask);
+    int dbgop    = ((iw0 >> (PseudoDbg_Assert_dbgop_bits - 16)) & PseudoDbg_Assert_dbgop_mask);
+    int grp      = ((iw0 >> (PseudoDbg_Assert_grp_bits - 16)) & PseudoDbg_Assert_grp_mask);
+    int regtest  = ((iw0 >> (PseudoDbg_Assert_regtest_bits - 16)) & PseudoDbg_Assert_regtest_mask);
+    const char *dbg_name, *dbg_appd;
+    TCGv reg, exp, pc;
+
+    TRACE_EXTRACT("%s: dbgop:%i grp:%i regtest:%i expected:%#x",
+                  __func__, dbgop, grp, regtest, expected);
+
+    if (dbgop == 0 || dbgop == 2) {
+        /* DBGA (genreg_lo{grp,regtest}, imm{expected} */
+        /* DBGAL (genreg{grp,regtest}, imm{expected} */
+        dbg_name = dbgop == 0 ? "DBGA" : "DBGAL";
+        dbg_appd = dbgop == 0 ? ".L" : "";
+    } else if (dbgop == 1 || dbgop == 3) {
+        /* DBGA (genreg_hi{grp,regtest}, imm{expected} */
+        /* DBGAH (genreg{grp,regtest}, imm{expected} */
+        dbg_name = dbgop == 1 ? "DBGA" : "DBGAH";
+        dbg_appd = dbgop == 1 ? ".H" : "";
+    } else
+        illegal_instruction(dc);
+
+    reg = get_allreg(dc, grp, regtest);
+    exp = tcg_temp_new();
+    tcg_gen_movi_tl(exp, expected);
+    pc = tcg_const_tl(dc->pc);
+    if (dbgop & 1)
+        gen_helper_dbga_h(pc, reg, exp);
+    else
+        gen_helper_dbga_l(pc, reg, exp);
+    tcg_temp_free(pc);
+    tcg_temp_free(exp);
+}
+
+/* Interpret a single 16bit/32bit insn; no parallel insn handling */
+static void
+_interp_insn_bfin(DisasContext *dc, target_ulong pc)
+{
+    uint16_t iw0, iw1;
+
+    iw0 = lduw_code(pc);
+    if ((iw0 & 0xc000) != 0xc000) {
+        /* 16-bit opcode */
+        dc->insn_len = 2;
+
+        TRACE_EXTRACT("%s: iw0:%#x", __func__, iw0);
+        if ((iw0 & 0xFF00) == 0x0000)
+            decode_ProgCtrl_0(dc, iw0);
+        else if ((iw0 & 0xFFC0) == 0x0240)
+            decode_CaCTRL_0(dc, iw0);
+        else if ((iw0 & 0xFF80) == 0x0100)
+            decode_PushPopReg_0(dc, iw0);
+        else if ((iw0 & 0xFE00) == 0x0400)
+            decode_PushPopMultiple_0(dc, iw0);
+        else if ((iw0 & 0xFE00) == 0x0600)
+            decode_ccMV_0(dc, iw0);
+        else if ((iw0 & 0xF800) == 0x0800)
+            decode_CCflag_0(dc, iw0);
+        else if ((iw0 & 0xFFE0) == 0x0200)
+            decode_CC2dreg_0(dc, iw0);
+        else if ((iw0 & 0xFF00) == 0x0300)
+            decode_CC2stat_0(dc, iw0);
+        else if ((iw0 & 0xF000) == 0x1000)
+            decode_BRCC_0(dc, iw0);
+        else if ((iw0 & 0xF000) == 0x2000)
+            decode_UJUMP_0(dc, iw0);
+        else if ((iw0 & 0xF000) == 0x3000)
+            decode_REGMV_0(dc, iw0);
+        else if ((iw0 & 0xFC00) == 0x4000)
+            decode_ALU2op_0(dc, iw0);
+        else if ((iw0 & 0xFE00) == 0x4400)
+            decode_PTR2op_0(dc, iw0);
+        else if ((iw0 & 0xF800) == 0x4800)
+            decode_LOGI2op_0(dc, iw0);
+        else if ((iw0 & 0xF000) == 0x5000)
+            decode_COMP3op_0(dc, iw0);
+        else if ((iw0 & 0xF800) == 0x6000)
+            decode_COMPI2opD_0(dc, iw0);
+        else if ((iw0 & 0xF800) == 0x6800)
+            decode_COMPI2opP_0(dc, iw0);
+        else if ((iw0 & 0xF000) == 0x8000)
+            decode_LDSTpmod_0(dc, iw0);
+        else if ((iw0 & 0xFF60) == 0x9E60)
+            decode_dagMODim_0(dc, iw0);
+        else if ((iw0 & 0xFFF0) == 0x9F60)
+            decode_dagMODik_0(dc, iw0);
+        else if ((iw0 & 0xFC00) == 0x9C00)
+            decode_dspLDST_0(dc, iw0);
+        else if ((iw0 & 0xF000) == 0x9000)
+            decode_LDST_0(dc, iw0);
+        else if ((iw0 & 0xFC00) == 0xB800)
+            decode_LDSTiiFP_0(dc, iw0);
+        else if ((iw0 & 0xE000) == 0xA000)
+            decode_LDSTii_0(dc, iw0);
+        else {
+            TRACE_EXTRACT("%s: no matching 16-bit pattern", __func__);
+            illegal_instruction(dc);
+        }
+        return;
+    }
+
+    /* Grab the next 16 bits to determine if it's a 32-bit or 64-bit opcode */
+    iw1 = lduw_code(pc + 2);
+    if ((iw0 & BIT_MULTI_INS) && (iw0 & 0xe800) != 0xe800 /* not linkage */)
+        dc->insn_len = 8;
+    else
+        dc->insn_len = 4;
+
+    TRACE_EXTRACT("%s: iw0:%#x iw1:%#x insn_len:%i", __func__,
+                  iw0, iw1, dc->insn_len);
+
+    if ((iw0 & 0xf7ff) == 0xc003 && iw1 == 0x1800)
+        /* MNOP; */;
+    else if (((iw0 & 0xFF80) == 0xE080) && ((iw1 & 0x0C00) == 0x0000))
+        decode_LoopSetup_0(dc, iw0, iw1);
+    else if (((iw0 & 0xFF00) == 0xE100) && ((iw1 & 0x0000) == 0x0000))
+        decode_LDIMMhalf_0(dc, iw0, iw1);
+    else if (((iw0 & 0xFE00) == 0xE200) && ((iw1 & 0x0000) == 0x0000))
+        decode_CALLa_0(dc, iw0, iw1);
+    else if (((iw0 & 0xFC00) == 0xE400) && ((iw1 & 0x0000) == 0x0000))
+        decode_LDSTidxI_0(dc, iw0, iw1);
+    else if (((iw0 & 0xFFFE) == 0xE800) && ((iw1 & 0x0000) == 0x0000))
+        decode_linkage_0(dc, iw0, iw1);
+    else if (((iw0 & 0xF600) == 0xC000) && ((iw1 & 0x0000) == 0x0000))
+        decode_dsp32mac_0(dc, iw0, iw1);
+    else if (((iw0 & 0xF600) == 0xC200) && ((iw1 & 0x0000) == 0x0000))
+        decode_dsp32mult_0(dc, iw0, iw1);
+    else if (((iw0 & 0xF7C0) == 0xC400) && ((iw1 & 0x0000) == 0x0000))
+        decode_dsp32alu_0(dc, iw0, iw1);
+    else if (((iw0 & 0xF7E0) == 0xC600) && ((iw1 & 0x01C0) == 0x0000))
+        decode_dsp32shift_0(dc, iw0, iw1);
+    else if (((iw0 & 0xF7E0) == 0xC680) && ((iw1 & 0x0000) == 0x0000))
+        decode_dsp32shiftimm_0(dc, iw0, iw1);
+    else if ((iw0 & 0xFF00) == 0xF800)
+        decode_psedoDEBUG_0(dc, iw0), dc->insn_len = 2;
+    else if ((iw0 & 0xFF00) == 0xF900)
+        decode_psedoOChar_0(dc, iw0), dc->insn_len = 2;
+    else if (((iw0 & 0xFF00) == 0xF000) && ((iw1 & 0x0000) == 0x0000))
+        decode_psedodbg_assert_0(dc, iw0, iw1);
+    else {
+        TRACE_EXTRACT("%s: no matching 32-bit pattern", __func__);
+        illegal_instruction(dc);
+    }
+}
+
+/* Interpret a single Blackfin insn; breaks up parallel insns */
+static void
+interp_insn_bfin(DisasContext *dc)
+{
+    _interp_insn_bfin(dc, dc->pc);
+
+    /* Proper display of multiple issue instructions */
+    if (dc->insn_len == 8) {
+        _interp_insn_bfin(dc, dc->pc + 4);
+        _interp_insn_bfin(dc, dc->pc + 6);
+        /* Reset back for higher levels to process branches */
+        dc->insn_len = 8;
+    }
+}
diff --git a/target-bfin/bfin-tdep.h b/target-bfin/bfin-tdep.h
new file mode 100644
index 0000000..ef6d325
--- /dev/null
+++ b/target-bfin/bfin-tdep.h
@@ -0,0 +1,94 @@ 
+/* Target-dependent code for Analog Devices Blackfin processer, for GDB.
+
+   Copyright (C) 2005 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+enum gdb_regnum {
+  /* Core Registers */
+  BFIN_R0_REGNUM = 0,
+  BFIN_R1_REGNUM,
+  BFIN_R2_REGNUM,
+  BFIN_R3_REGNUM,
+  BFIN_R4_REGNUM,
+  BFIN_R5_REGNUM,
+  BFIN_R6_REGNUM,
+  BFIN_R7_REGNUM,
+  BFIN_P0_REGNUM,
+  BFIN_P1_REGNUM,
+  BFIN_P2_REGNUM,
+  BFIN_P3_REGNUM,
+  BFIN_P4_REGNUM,
+  BFIN_P5_REGNUM,
+  BFIN_SP_REGNUM,
+  BFIN_FP_REGNUM,
+  BFIN_I0_REGNUM,
+  BFIN_I1_REGNUM,
+  BFIN_I2_REGNUM,
+  BFIN_I3_REGNUM,
+  BFIN_M0_REGNUM,
+  BFIN_M1_REGNUM,
+  BFIN_M2_REGNUM,
+  BFIN_M3_REGNUM,
+  BFIN_B0_REGNUM,
+  BFIN_B1_REGNUM,
+  BFIN_B2_REGNUM,
+  BFIN_B3_REGNUM,
+  BFIN_L0_REGNUM,
+  BFIN_L1_REGNUM,
+  BFIN_L2_REGNUM,
+  BFIN_L3_REGNUM,
+  BFIN_A0_DOT_X_REGNUM,
+  BFIN_A0_DOT_W_REGNUM,
+  BFIN_A1_DOT_X_REGNUM,
+  BFIN_A1_DOT_W_REGNUM,
+  BFIN_ASTAT_REGNUM,
+  BFIN_RETS_REGNUM,
+  BFIN_LC0_REGNUM,
+  BFIN_LT0_REGNUM,
+  BFIN_LB0_REGNUM,
+  BFIN_LC1_REGNUM,
+  BFIN_LT1_REGNUM,
+  BFIN_LB1_REGNUM,
+  BFIN_CYCLES_REGNUM,
+  BFIN_CYCLES2_REGNUM,
+  BFIN_USP_REGNUM,
+  BFIN_SEQSTAT_REGNUM,
+  BFIN_SYSCFG_REGNUM,
+  BFIN_RETI_REGNUM,
+  BFIN_RETX_REGNUM,
+  BFIN_RETN_REGNUM,
+  BFIN_RETE_REGNUM,
+
+  /* Pseudo Registers */
+  BFIN_PC_REGNUM,
+  BFIN_CC_REGNUM,
+  BFIN_TEXT_ADDR,		/* Address of .text section.  */
+  BFIN_TEXT_END_ADDR,		/* Address of the end of .text section.  */
+  BFIN_DATA_ADDR,		/* Address of .data section.  */
+
+  BFIN_FDPIC_EXEC_REGNUM,
+  BFIN_FDPIC_INTERP_REGNUM,
+
+  /* MMRs */
+  BFIN_IPEND_REGNUM,
+
+  /* LAST ENTRY SHOULD NOT BE CHANGED.  */
+  BFIN_NUM_REGS			/* The number of all registers.  */
+};
diff --git a/target-bfin/cpu.h b/target-bfin/cpu.h
new file mode 100644
index 0000000..161e6b7
--- /dev/null
+++ b/target-bfin/cpu.h
@@ -0,0 +1,215 @@ 
+/*
+ * Blackfin emulation
+ *
+ * Copyright 2007-2011 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#ifndef CPU_BFIN_H
+#define CPU_BFIN_H
+
+struct DisasContext;
+
+#define TARGET_LONG_BITS 32
+
+#include "cpu-defs.h"
+
+#define TARGET_HAS_ICE 1
+
+#define EXCP_SYSCALL        0
+#define EXCP_SOFT_BP        1
+#define EXCP_STACK_OVERFLOW 3
+#define EXCP_SINGLE_STEP    0x10
+#define EXCP_TRACE_FULL     0x11
+#define EXCP_UNDEF_INST     0x21
+#define EXCP_ILL_INST       0x22
+#define EXCP_DCPLB_VIOLATE  0x23
+#define EXCP_DATA_MISALGIN  0x24
+#define EXCP_UNRECOVERABLE  0x25
+#define EXCP_DCPLB_MISS     0x26
+#define EXCP_DCPLB_MULT     0x27
+#define EXCP_EMU_WATCH      0x28
+#define EXCP_MISALIG_INST   0x2a
+#define EXCP_ICPLB_PROT     0x2b
+#define EXCP_ICPLB_MISS     0x2c
+#define EXCP_ICPLB_MULT     0x2d
+#define EXCP_ILL_SUPV       0x2e
+#define EXCP_ABORT          0x100
+#define EXCP_DBGA           0x101
+#define EXCP_OUTC           0x102
+
+#define BFIN_L1_CACHE_BYTES 32
+
+/* Blackfin does 1K/4K/1M/4M, but for now only support 4k */
+#define TARGET_PAGE_BITS    12
+#define NB_MMU_MODES        2
+
+#define TARGET_PHYS_ADDR_SPACE_BITS 32
+#define TARGET_VIRT_ADDR_SPACE_BITS 32
+
+#define CPUState struct CPUBFINState
+#define cpu_init cpu_bfin_init
+#define cpu_exec cpu_bfin_exec
+#define cpu_gen_code cpu_bfin_gen_code
+#define cpu_signal_handler cpu_bfin_signal_handler
+
+/* Indexes into astat array; matches bitpos in hardware too */
+enum {
+    ASTAT_AZ = 0,
+    ASTAT_AN,
+    ASTAT_AC0_COPY,
+    ASTAT_V_COPY,
+    ASTAT_CC = 5,
+    ASTAT_AQ,
+    ASTAT_RND_MOD = 8,
+    ASTAT_AC0 = 12,
+    ASTAT_AC1,
+    ASTAT_AV0 = 16,
+    ASTAT_AV0S,
+    ASTAT_AV1,
+    ASTAT_AV1S,
+    ASTAT_V = 24,
+    ASTAT_VS
+};
+
+typedef struct CPUBFINState {
+    CPU_COMMON
+    uint32_t dreg[8];
+    uint32_t preg[8];
+    uint32_t ireg[4];
+    uint32_t mreg[4];
+    uint32_t breg[4];
+    uint32_t lreg[4];
+    uint64_t areg[2];
+    uint32_t rets;
+    uint32_t lcreg[2], ltreg[2], lbreg[2];
+    uint32_t cycles[2];
+    uint32_t uspreg;
+    uint32_t seqstat;
+    uint32_t syscfg;
+    uint32_t reti;
+    uint32_t retx;
+    uint32_t retn;
+    uint32_t rete;
+    uint32_t emudat;
+    uint32_t pc;
+
+    /* ASTAT bits; broken up for speeeeeeeed */
+    uint32_t astat[32];
+    /* ASTAT delayed helpers */
+    uint32_t astat_op, astat_arg[3];
+} CPUBFINState;
+#define spreg preg[6]
+#define fpreg preg[7]
+
+static inline uint32_t bfin_astat_read(CPUState *env)
+{
+    unsigned int i, ret;
+
+    ret = 0;
+    for (i = 0; i < 32; ++i)
+        ret |= (env->astat[i] << i);
+
+    return ret;
+}
+
+static inline void bfin_astat_write(CPUState *env, uint32_t astat)
+{
+    unsigned int i;
+    for (i = 0; i < 32; ++i)
+        env->astat[i] = !!(astat & (1 << i));
+}
+
+enum astat_ops {
+    ASTAT_OP_NONE,
+    ASTAT_OP_DYNAMIC,
+    ASTAT_OP_ABS,
+    ASTAT_OP_ADD16,
+    ASTAT_OP_ADD32,
+    ASTAT_OP_ASHIFT16,
+    ASTAT_OP_ASHIFT32,
+    ASTAT_OP_COMPARE_SIGNED,
+    ASTAT_OP_COMPARE_UNSIGNED,
+    ASTAT_OP_LOGICAL,
+    ASTAT_OP_LSHIFT16,
+    ASTAT_OP_LSHIFT32,
+    ASTAT_OP_LSHIFT_RT16,
+    ASTAT_OP_LSHIFT_RT32,
+    ASTAT_OP_MIN_MAX,
+    ASTAT_OP_NEGATE,
+    ASTAT_OP_SUB16,
+    ASTAT_OP_SUB32,
+    ARRAY_OP_VECTOR_ADD_ADD,    /* +|+ */
+    ARRAY_OP_VECTOR_ADD_SUB,    /* +|- */
+    ARRAY_OP_VECTOR_SUB_SUB,    /* -|- */
+    ARRAY_OP_VECTOR_SUB_ADD,    /* -|+ */
+};
+
+typedef void (*hwloop_callback)(struct DisasContext *dc, int loop);
+
+typedef struct DisasContext {
+    CPUState *env;
+    struct TranslationBlock *tb;
+    /* The current PC we're decoding (could be middle of parallel insn) */
+    target_ulong pc;
+    /* Length of current insn (2/4/8) */
+    target_ulong insn_len;
+
+    /* For delayed ASTAT handling */
+    enum astat_ops astat_op;
+
+    /* For hardware loop processing */
+    hwloop_callback hwloop_callback;
+    void *hwloop_data;
+
+    int is_jmp;
+    int mem_idx;
+} DisasContext;
+
+void do_interrupt(CPUState *env);
+CPUState *cpu_init(const char *cpu_model);
+int cpu_exec(CPUState *s);
+int cpu_bfin_signal_handler(int host_signum, void *pinfo, void *puc);
+
+extern const char * const greg_names[];
+extern const char *get_allreg_name(int grp, int reg);
+
+#define MMU_KERNEL_IDX 0
+#define MMU_USER_IDX   1
+
+int cpu_bfin_handle_mmu_fault(CPUState *env, target_ulong address, int rw,
+                              int mmu_idx, int is_softmmu);
+#define cpu_handle_mmu_fault cpu_bfin_handle_mmu_fault
+
+#if defined(CONFIG_USER_ONLY)
+static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
+{
+    if (newsp)
+        env->spreg = newsp;
+}
+#endif
+
+#include "cpu-all.h"
+#include "exec-all.h"
+
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+    env->pc = tb->pc;
+}
+
+static inline target_ulong cpu_get_pc(CPUState *env)
+{
+    return env->pc;
+}
+
+static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
+                                        target_ulong *cs_base, int *flags)
+{
+    *pc = env->pc;
+    *cs_base = 0;
+    *flags = 1;
+}
+
+#endif
diff --git a/target-bfin/exec.h b/target-bfin/exec.h
new file mode 100644
index 0000000..9ca9225
--- /dev/null
+++ b/target-bfin/exec.h
@@ -0,0 +1,37 @@ 
+/*
+ * Blackfin execution defines
+ *
+ * Copyright 2007-2011 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#ifndef EXEC_BFIN_H
+#define EXEC_BFIN_H
+
+#include "config.h"
+#include "dyngen-exec.h"
+
+register struct CPUBFINState *env asm(AREG0);
+
+#include "cpu.h"
+#include "exec-all.h"
+
+static inline int cpu_has_work(CPUState *env)
+{
+    return (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI));
+}
+
+static inline int cpu_halted(CPUState *env)
+{
+    if (!env->halted)
+        return 0;
+    if (env->interrupt_request & CPU_INTERRUPT_HARD) {
+        env->halted = 0;
+        return 0;
+    }
+    return EXCP_HALTED;
+}
+
+#endif
diff --git a/target-bfin/helper.c b/target-bfin/helper.c
new file mode 100644
index 0000000..f62e7d5
--- /dev/null
+++ b/target-bfin/helper.c
@@ -0,0 +1,37 @@ 
+/*
+ * Blackfin helpers
+ *
+ * Copyright 2007-2011 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <assert.h>
+
+#include "config.h"
+#include "cpu.h"
+#include "exec-all.h"
+#include "host-utils.h"
+
+#if defined(CONFIG_USER_ONLY)
+
+void do_interrupt(CPUState *env)
+{
+    env->exception_index = -1;
+}
+
+int cpu_bfin_handle_mmu_fault(CPUState *env, target_ulong address, int rw,
+                              int mmu_idx, int is_softmmu)
+{
+    env->exception_index = EXCP_DCPLB_VIOLATE;
+    return 1;
+}
+
+#endif
diff --git a/target-bfin/helper.h b/target-bfin/helper.h
new file mode 100644
index 0000000..690bb3b
--- /dev/null
+++ b/target-bfin/helper.h
@@ -0,0 +1,21 @@ 
+#include "def-helper.h"
+
+DEF_HELPER_2(raise_exception, void, i32, i32)
+
+DEF_HELPER_3(dbga_l, void, i32, i32, i32)
+DEF_HELPER_3(dbga_h, void, i32, i32, i32)
+DEF_HELPER_1(outc, void, i32)
+DEF_HELPER_3(dbg, void, i32, i32, i32)
+
+DEF_HELPER_0(astat_load, i32)
+DEF_HELPER_1(astat_store, void, i32)
+
+DEF_HELPER_1(ones, i32, i32)
+DEF_HELPER_2(signbits, i32, i32, i32)
+DEF_HELPER_2(signbits_64, i32, i64, i32)
+
+DEF_HELPER_4(dagadd, i32, i32, i32, i32, i32)
+DEF_HELPER_4(dagsub, i32, i32, i32, i32, i32)
+DEF_HELPER_2(add_brev, i32, i32, i32)
+
+#include "def-helper.h"
diff --git a/target-bfin/op_helper.c b/target-bfin/op_helper.c
new file mode 100644
index 0000000..4534510
--- /dev/null
+++ b/target-bfin/op_helper.c
@@ -0,0 +1,213 @@ 
+/*
+ * Blackfin helpers
+ *
+ * Copyright 2007-2011 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#include "exec.h"
+#include "helper.h"
+
+void helper_raise_exception(uint32_t index, uint32_t pc)
+{
+    env->exception_index = index;
+    if (pc != -1)
+        env->pc = pc;
+    cpu_loop_exit();
+}
+
+void helper_dbga_l(uint32_t pc, uint32_t actual, uint32_t expected)
+{
+    if ((actual & 0xffff) != expected)
+        helper_raise_exception(EXCP_DBGA, pc);
+}
+
+void helper_dbga_h(uint32_t pc, uint32_t actual, uint32_t expected)
+{
+    if ((actual >> 16) != expected)
+        helper_raise_exception(EXCP_DBGA, pc);
+}
+
+void helper_outc(uint32_t ch)
+{
+    putc(ch, stdout);
+    if (ch == '\n')
+        fflush(stdout);
+}
+
+void helper_dbg(uint32_t val, uint32_t grp, uint32_t reg)
+{
+    printf("DBG : %s = 0x%08x\n", get_allreg_name(grp, reg), val);
+}
+
+uint32_t helper_astat_load(void)
+{
+    return bfin_astat_read(env);
+}
+
+void helper_astat_store(uint32_t astat)
+{
+    bfin_astat_write(env, astat);
+}
+
+/* Count the number of bits set to 1 in the 32bit value */
+uint32_t helper_ones(uint32_t val)
+{
+    uint32_t i;
+    uint32_t ret;
+
+    ret = 0;
+    for (i = 0; i < 32; ++i)
+        ret += !!(val & (1 << i));
+
+    return ret;
+}
+
+/* Count number of leading bits that match the sign bit */
+uint32_t helper_signbits(uint32_t val, uint32_t size)
+{
+    uint32_t mask = 1 << (size - 1);
+    uint32_t bit = val & mask;
+    uint32_t count = 0;
+
+    for (;;) {
+        mask >>= 1;
+        bit >>= 1;
+        if (mask == 0)
+            break;
+        if ((val & mask) != bit)
+            break;
+        ++count;
+    }
+
+    return count;
+}
+
+/* Count number of leading bits that match the sign bit */
+uint32_t helper_signbits_64(uint64_t val, uint32_t size)
+{
+    uint64_t mask = (uint64_t)1 << (size - 1);
+    uint64_t bit = val & mask;
+    uint32_t count = 0;
+
+    for (;;) {
+        mask >>= 1;
+        bit >>= 1;
+        if (mask == 0)
+            break;
+        if ((val & mask) != bit)
+            break;
+        ++count;
+    }
+
+    if (size == 40)
+        count -= 8;
+
+    return count;
+}
+
+/* This is a bit crazy, but we want to simulate the hardware behavior exactly
+   rather than worry about the circular buffers being used correctly.  Which
+   isn't to say there isn't room for improvement here, just that we want to
+   be conservative.  See also dagsub().  */
+uint32_t helper_dagadd(uint32_t I, uint32_t L, uint32_t B, uint32_t M)
+{
+    uint64_t i = I;
+    uint64_t l = L;
+    uint64_t b = B;
+    uint64_t m = M;
+
+    uint64_t LB, IM, IML;
+    uint32_t im32, iml32, lb32, res;
+    uint64_t msb, car;
+
+    msb = (uint64_t)1 << 31;
+    car = (uint64_t)1 << 32;
+
+    IM = i + m;
+    im32 = IM;
+    LB = l + b;
+    lb32 = LB;
+
+    if ((int32_t)M < 0) {
+        IML = i + m + l;
+        iml32 = IML;
+        if ((i & msb) || (IM & car))
+            res = (im32 < b) ? iml32 : im32;
+        else
+            res = (im32 < b) ? im32 : iml32;
+    } else {
+        IML = i + m - l;
+        iml32 = IML;
+        if ((IM & car) == (LB & car))
+            res = (im32 < lb32) ? im32 : iml32;
+        else
+            res = (im32 < lb32) ? iml32 : im32;
+    }
+
+    return res;
+}
+
+/* See dagadd() notes above.  */
+uint32_t helper_dagsub(uint32_t I, uint32_t L, uint32_t B, uint32_t M)
+{
+    uint64_t i = I;
+    uint64_t l = L;
+    uint64_t b = B;
+    uint64_t m = M;
+
+    uint64_t mbar = (uint32_t)(~m + 1);
+    uint64_t LB, IM, IML;
+    uint32_t b32, im32, iml32, lb32, res;
+    uint64_t msb, car;
+
+    msb = (uint64_t)1 << 31;
+    car = (uint64_t)1 << 32;
+
+    IM = i + mbar;
+    im32 = IM;
+    LB = l + b;
+    lb32 = LB;
+
+    if ((int32_t)M < 0) {
+        IML = i + mbar - l;
+        iml32 = IML;
+        if (!!((i & msb) && (IM & car)) == !!(LB & car))
+            res = (im32 < lb32) ? im32 : iml32;
+        else
+            res = (im32 < lb32) ? iml32 : im32;
+    } else {
+        IML = i + mbar + l;
+        iml32 = IML;
+        b32 = b;
+        if (M == 0 || IM & car)
+            res = (im32 < b32) ? iml32 : im32;
+        else
+            res = (im32 < b32) ? im32 : iml32;
+    }
+
+    return res;
+}
+
+uint32_t helper_add_brev(uint32_t addend1, uint32_t addend2)
+{
+    uint32_t mask, b, r;
+    int i, cy;
+
+    mask = 0x80000000;
+    r = 0;
+    cy = 0;
+
+    for (i = 31; i >= 0; --i) {
+        b = ((addend1 & mask) >> i) + ((addend2 & mask) >> i);
+        b += cy;
+        cy = b >> 1;
+        b &= 1;
+        r |= b << i;
+        mask >>= 1;
+    }
+
+    return r;
+}
diff --git a/target-bfin/translate.c b/target-bfin/translate.c
new file mode 100644
index 0000000..c3c2bea
--- /dev/null
+++ b/target-bfin/translate.c
@@ -0,0 +1,1267 @@ 
+/*
+ * Blackfin translation
+ *
+ * Copyright 2007-2011 Mike Frysinger
+ * Copyright 2007-2011 Analog Devices, Inc.
+ *
+ * Licensed under the Lesser GPL 2 or later.
+ */
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "disas.h"
+#include "tcg-op.h"
+#include "qemu-common.h"
+#include "opcode/bfin.h"
+
+#include "helper.h"
+#define GEN_HELPER 1
+#include "helper.h"
+
+/* We're making a call (which means we need to update RTS) */
+#define DISAS_CALL 0xad0
+
+static TCGv_ptr cpu_env;
+static TCGv cpu_dreg[8];
+static TCGv cpu_preg[8];
+#define cpu_spreg cpu_preg[6]
+#define cpu_fpreg cpu_preg[7]
+static TCGv cpu_ireg[4];
+static TCGv cpu_mreg[4];
+static TCGv cpu_breg[4];
+static TCGv cpu_lreg[4];
+static TCGv_i64 cpu_areg[2];
+static TCGv cpu_rets;
+static TCGv cpu_lcreg[2], cpu_ltreg[2], cpu_lbreg[2];
+static TCGv cpu_cycles[2];
+static TCGv cpu_uspreg;
+static TCGv cpu_seqstat;
+static TCGv cpu_syscfg;
+static TCGv cpu_reti;
+static TCGv cpu_retx;
+static TCGv cpu_retn;
+static TCGv cpu_rete;
+static TCGv cpu_emudat;
+static TCGv cpu_pc;
+static TCGv cpu_cc;
+static TCGv /*cpu_astat_op,*/ cpu_astat_arg[3];
+
+#include "gen-icount.h"
+
+void cpu_reset(CPUState *env)
+{
+    env->pc = 0xEF000000;
+}
+
+static inline void
+bfin_tcg_new_set3(TCGv *tcgv, unsigned int cnt, unsigned int offbase,
+                  const char * const *names)
+{
+    unsigned int i;
+    for (i = 0; i < cnt; ++i)
+        tcgv[i] = tcg_global_mem_new(TCG_AREG0, offbase + (i * 4), names[i]);
+}
+#define bfin_tcg_new_set2(tcgv, cnt, reg, name_idx) \
+    bfin_tcg_new_set3(tcgv, cnt, offsetof(CPUState, reg), &greg_names[name_idx])
+#define bfin_tcg_new_set(reg, name_idx) \
+    bfin_tcg_new_set2(cpu_##reg, ARRAY_SIZE(cpu_##reg), reg, name_idx)
+#define bfin_tcg_new(reg, name_idx) \
+    bfin_tcg_new_set2(&cpu_##reg, 1, reg, name_idx)
+
+CPUState *cpu_init(const char *cpu_model)
+{
+    CPUState *env;
+    static int tcg_initialized = 0;
+
+    env = qemu_mallocz(sizeof(*env));
+    if (!env)
+        return NULL;
+
+    cpu_exec_init(env);
+    cpu_reset(env);
+
+    if (tcg_initialized)
+        return env;
+
+    tcg_initialized = 1;
+
+#define GEN_HELPER 2
+#include "helper.h"
+
+    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
+
+    cpu_pc = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUState, pc), "PC");
+    cpu_cc = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUState, astat[ASTAT_CC]), "CC");
+
+    /*cpu_astat_op = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUState, astat_op), "astat_op");*/
+    cpu_astat_arg[0] = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUState, astat_arg[0]), "astat_arg[0]");
+    cpu_astat_arg[1] = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUState, astat_arg[1]), "astat_arg[1]");
+    cpu_astat_arg[2] = tcg_global_mem_new(TCG_AREG0,
+        offsetof(CPUState, astat_arg[2]), "astat_arg[2]");
+
+    cpu_areg[0] = tcg_global_mem_new_i64(TCG_AREG0,
+        offsetof(CPUState, areg[0]), "A0");
+    cpu_areg[1] = tcg_global_mem_new_i64(TCG_AREG0,
+        offsetof(CPUState, areg[1]), "A1");
+
+    bfin_tcg_new_set(dreg, 0);
+    bfin_tcg_new_set(preg, 8);
+    bfin_tcg_new_set(ireg, 16);
+    bfin_tcg_new_set(mreg, 20);
+    bfin_tcg_new_set(breg, 24);
+    bfin_tcg_new_set(lreg, 28);
+    bfin_tcg_new(rets, 39);
+    bfin_tcg_new(lcreg[0], 48);
+    bfin_tcg_new(ltreg[0], 49);
+    bfin_tcg_new(lbreg[0], 50);
+    bfin_tcg_new(lcreg[1], 51);
+    bfin_tcg_new(ltreg[1], 52);
+    bfin_tcg_new(lbreg[1], 53);
+    bfin_tcg_new_set(cycles, 54);
+    bfin_tcg_new(uspreg, 56);
+    bfin_tcg_new(seqstat, 57);
+    bfin_tcg_new(syscfg, 58);
+    bfin_tcg_new(reti, 59);
+    bfin_tcg_new(retx, 60);
+    bfin_tcg_new(retn, 61);
+    bfin_tcg_new(rete, 62);
+    bfin_tcg_new(emudat, 63);
+
+    return env;
+}
+
+#define _astat_printf(bit) cpu_fprintf(f, "%s" #bit " ", (env->astat[ASTAT_##bit] ? "" : "~"))
+void cpu_dump_state(CPUState *env, FILE *f,
+                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+                    int flags)
+{
+//    uint64_t iw;
+//    target_ulong len;
+
+    cpu_fprintf(f, "              SYSCFG: %04lx   SEQSTAT: %08x\n",
+                env->syscfg, env->seqstat);
+    cpu_fprintf(f, "RETE: %08x  RETN: %08x  RETX: %08x\n",
+                env->rete, env->retn, env->retx);
+    cpu_fprintf(f, "RETI: %08x  RETS: %08x   PC : %08x\n",
+                env->reti, env->rets, env->pc);
+    cpu_fprintf(f, " R0 : %08x   R4 : %08x   P0 : %08x   P4 : %08x\n",
+                env->dreg[0], env->dreg[4], env->preg[0], env->preg[4]);
+    cpu_fprintf(f, " R1 : %08x   R5 : %08x   P1 : %08x   P5 : %08x\n",
+                env->dreg[1], env->dreg[5], env->preg[1], env->preg[5]);
+    cpu_fprintf(f, " R2 : %08x   R6 : %08x   P2 : %08x   SP : %08x\n",
+                env->dreg[2], env->dreg[6], env->preg[2], env->spreg);
+    cpu_fprintf(f, " R3 : %08x   R7 : %08x   P3 : %08x   FP : %08x\n",
+                env->dreg[3], env->dreg[7], env->preg[3], env->fpreg);
+    cpu_fprintf(f, " LB0: %08x   LT0: %08x   LC0: %08x\n",
+                env->lbreg[0], env->ltreg[0], env->lcreg[0]);
+    cpu_fprintf(f, " LB1: %08x   LT1: %08x   LC1: %08x\n",
+                env->lbreg[1], env->ltreg[1], env->lcreg[1]);
+    cpu_fprintf(f, " B0 : %08x   L0 : %08x   M0 : %08x   I0 : %08x\n",
+                env->breg[0], env->lreg[0], env->mreg[0], env->ireg[0]);
+    cpu_fprintf(f, " B1 : %08x   L1 : %08x   M1 : %08x   I1 : %08x\n",
+                env->breg[1], env->lreg[1], env->mreg[1], env->ireg[1]);
+    cpu_fprintf(f, " B2 : %08x   L2 : %08x   M2 : %08x   I2 : %08x\n",
+                env->breg[2], env->lreg[2], env->mreg[2], env->ireg[2]);
+    cpu_fprintf(f, " B3 : %08x   L3 : %08x   M3 : %08x   I3 : %08x\n",
+                env->breg[3], env->lreg[3], env->mreg[3], env->ireg[3]);
+    cpu_fprintf(f, "  A0: %010lx                 A1: %010lx\n",
+                env->areg[0] & 0xffffffffff, env->areg[1] & 0xffffffffff);
+    cpu_fprintf(f, " USP: %08x ASTAT: %08x   CC : %08x\n",
+                env->uspreg, bfin_astat_read(env), env->astat[ASTAT_CC]);
+    cpu_fprintf(f, "ASTAT BITS: ");
+    _astat_printf(VS);
+    _astat_printf(V);
+    _astat_printf(AV1S);
+    _astat_printf(AV1);
+    _astat_printf(AV0S);
+    _astat_printf(AV0);
+    _astat_printf(AC1);
+    _astat_printf(AC0);
+    _astat_printf(AQ);
+    _astat_printf(CC);
+    _astat_printf(V_COPY);
+    _astat_printf(AC0_COPY);
+    _astat_printf(AN);
+    _astat_printf(AZ);
+    cpu_fprintf(f, "\nASTAT CACHE:   OP: %02u   ARG: %08x %08x %08x\n",
+                env->astat_op, env->astat_arg[0], env->astat_arg[1], env->astat_arg[2]);
+    cpu_fprintf(f, "              CYCLES: %08x %08x\n",
+                env->cycles[0], env->cycles[1]);
+
+/*
+    iw = ldq_code(env->pc);
+    if ((iw & 0xc000) != 0xc000)
+        len = 2;
+    else if ((iw & BIT_MULTI_INS) && (iw & 0xe800) != 0xe800)
+        len = 8;
+    else
+        len = 4;
+    log_target_disas(env->pc, len, 0);
+*/
+}
+
+static void gen_astat_update(DisasContext *, bool);
+
+static void gen_goto_tb(DisasContext *dc, int tb_num, TCGv dest)
+{
+    TranslationBlock *tb;
+    tb = dc->tb;
+/*
+    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
+        tcg_gen_goto_tb(tb_num);
+        tcg_gen_mov_tl(cpu_pc, dest);
+        tcg_gen_exit_tb((long)tb + tb_num);
+    } else */{
+        gen_astat_update(dc, false);
+        tcg_gen_goto_tb(0);
+        tcg_gen_mov_tl(cpu_pc, dest);
+        tcg_gen_exit_tb(0);
+    }
+}
+
+static void gen_gotoi_tb(DisasContext *dc, int tb_num, target_ulong dest)
+{
+    TCGv tmp = tcg_temp_local_new();
+    tcg_gen_movi_tl(tmp, dest);
+    gen_goto_tb(dc, tb_num, tmp);
+    tcg_temp_free(tmp);
+}
+
+static void cec_exception(DisasContext *dc, int excp)
+{
+    TCGv tmp = tcg_const_tl(excp);
+    TCGv pc = tcg_const_tl(dc->pc);
+    gen_helper_raise_exception(tmp, pc);
+    tcg_temp_free(tmp);
+    dc->is_jmp = DISAS_UPDATE;
+}
+
+static void cec_require_supervisor(DisasContext *dc)
+{
+#ifdef CONFIG_LINUX_USER
+    cec_exception(dc, EXCP_ILL_SUPV);
+#else
+# error todo
+#endif
+}
+
+/*
+ * If a LB reg is written, we need to invalidate the two translation
+ * blocks that could be affected -- the TB's referenced by the old LB
+ * could have LC/LT handling which we no longer want, and the new LB
+ * is probably missing LC/LT handling which we want.  In both cases,
+ * we need to regenerate the block.
+ */
+static void gen_maybe_lb_exit_tb(DisasContext *dc, TCGv reg)
+{
+    if (!TCGV_EQUAL(reg, cpu_lbreg[0]) && !TCGV_EQUAL(reg, cpu_lbreg[1]))
+        return;
+
+    //tb_invalidate_phys_page_range
+    dc->is_jmp = DISAS_UPDATE;
+    /* XXX: Not entirely correct, but very few things load
+     *      directly into LB ... */
+    gen_gotoi_tb(dc, 0, dc->pc + dc->insn_len);
+}
+
+static void gen_hwloop_default(DisasContext *dc, int loop)
+{
+    if (loop != -1)
+        gen_goto_tb(dc, 0, cpu_ltreg[loop]);
+}
+
+static void _gen_hwloop_call(DisasContext *dc, int loop)
+{
+    if (dc->is_jmp != DISAS_CALL)
+        return;
+
+    if (loop == -1)
+        tcg_gen_movi_tl(cpu_rets, dc->pc + dc->insn_len);
+    else
+        tcg_gen_mov_tl(cpu_rets, cpu_ltreg[loop]);
+}
+
+static void gen_hwloop_br_pcrel_cc(DisasContext *dc, int loop)
+{
+    int l;
+    int pcrel = (unsigned long)dc->hwloop_data;
+    int T = pcrel & 1;
+    pcrel &= ~1;
+
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_cc, T, l);
+    gen_gotoi_tb(dc, 0, dc->pc + pcrel);
+    gen_set_label(l);
+    if (loop == -1)
+        dc->hwloop_callback = gen_hwloop_default;
+    else
+        gen_hwloop_default(dc, loop);
+}
+
+static void gen_hwloop_br_pcrel(DisasContext *dc, int loop)
+{
+    TCGv *reg = dc->hwloop_data;
+    _gen_hwloop_call(dc, loop);
+    tcg_gen_addi_tl(cpu_pc, *reg, dc->pc);
+    gen_goto_tb(dc, 0, cpu_pc);
+}
+
+static void gen_hwloop_br_pcrel_imm(DisasContext *dc, int loop)
+{
+    int pcrel = (unsigned long)dc->hwloop_data;
+    TCGv tmp;
+
+    _gen_hwloop_call(dc, loop);
+    tmp = tcg_const_tl(pcrel);
+    tcg_gen_addi_tl(cpu_pc, tmp, dc->pc);
+    tcg_temp_free(tmp);
+    gen_goto_tb(dc, 0, cpu_pc);
+}
+
+static void gen_hwloop_br_direct(DisasContext *dc, int loop)
+{
+    TCGv *reg = dc->hwloop_data;
+    _gen_hwloop_call(dc, loop);
+    gen_goto_tb(dc, 0, *reg);
+}
+
+static void _gen_hwloop_check(DisasContext *dc, int loop, int l)
+{
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_lcreg[loop], 0, l);
+    tcg_gen_subi_tl(cpu_lcreg[loop], cpu_lcreg[loop], 1);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_lcreg[loop], 0, l);
+    dc->hwloop_callback(dc, loop);
+}
+
+static void gen_hwloop_check(DisasContext *dc)
+{
+    bool loop1, loop0;
+    int endl;
+
+    loop1 = (dc->pc == dc->env->lbreg[1]);
+    loop0 = (dc->pc == dc->env->lbreg[0]);
+
+    if (loop1 || loop0)
+        endl = gen_new_label();
+
+    if (loop1) {
+        int l;
+        if (loop0)
+            l = gen_new_label();
+        else
+            l = endl;
+
+        _gen_hwloop_check(dc, 1, l);
+
+        if (loop0) {
+            tcg_gen_br(endl);
+            gen_set_label(l);
+        }
+    }
+
+    if (loop0)
+        _gen_hwloop_check(dc, 0, endl);
+
+    if (loop1 || loop0)
+        gen_set_label(endl);
+
+    dc->hwloop_callback(dc, -1);
+}
+
+/* R#.L = reg */
+static void gen_mov_l_tl(TCGv dst, TCGv src)
+{
+    tcg_gen_andi_tl(dst, dst, 0xffff0000);
+    tcg_gen_andi_tl(src, src, 0xffff);
+    tcg_gen_or_tl(dst, dst, src);
+}
+
+/* R#.L = imm32 */
+/*
+static void gen_movi_l_tl(TCGv dst, uint32_t src)
+{
+    tcg_gen_andi_tl(dst, dst, 0xffff0000);
+    tcg_gen_ori_tl(dst, dst, src & 0xffff);
+}
+*/
+
+/* R#.H = reg */
+/* XXX: This modifies the source ... assumes it is a temp ... */
+static void gen_mov_h_tl(TCGv dst, TCGv src)
+{
+    tcg_gen_andi_tl(dst, dst, 0xffff);
+    tcg_gen_shli_tl(src, src, 16);
+    tcg_gen_or_tl(dst, dst, src);
+}
+
+/* R#.H = imm32 */
+/*
+static void gen_movi_h_tl(TCGv dst, uint32_t src)
+{
+    tcg_gen_andi_tl(dst, dst, 0xffff);
+    tcg_gen_ori_tl(dst, dst, src << 16);
+}
+*/
+
+static void gen_extNs_tl(TCGv dst, TCGv src, TCGv n)
+{
+    /* Shift the sign bit up, and then back down */
+    TCGv tmp = tcg_temp_new();
+    tcg_gen_subfi_tl(tmp, 32, n);
+    tcg_gen_shl_tl(dst, src, tmp);
+    tcg_gen_sar_tl(dst, dst, tmp);
+    tcg_temp_free(tmp);
+}
+
+static void gen_extNsi_tl(TCGv dst, TCGv src, uint32_t n)
+{
+    /* Shift the sign bit up, and then back down */
+    tcg_gen_shli_tl(dst, src, 32 - n);
+    tcg_gen_sari_tl(dst, dst, 32 - n);
+}
+
+#if 0
+static void gen_extNu_tl(TCGv dst, TCGv src, TCGv n)
+{
+    /* Just mask off the higher bits */
+    tcg_gen_andi_tl(dst, src, ~((1 << n) - 1));
+}
+
+static void gen_extNui_tl(TCGv dst, TCGv src, uint32_t n)
+{
+    /* Just mask off the higher bits */
+    tcg_gen_andi_tl(dst, src, ~((1 << n) - 1));
+}
+#endif
+
+static void gen_signbitsi_tl(TCGv dst, TCGv src, uint32_t size)
+{
+    TCGv tmp_size = tcg_const_tl(size);
+    gen_helper_signbits(dst, src, tmp_size);
+    tcg_temp_free(tmp_size);
+}
+
+static void gen_signbitsi_i64_i32(TCGv dst, TCGv_i64 src, uint32_t size)
+{
+    TCGv tmp_size = tcg_const_tl(size);
+    gen_helper_signbits_64(dst, src, tmp_size);
+    tcg_temp_free(tmp_size);
+}
+
+static void gen_abs_tl(TCGv ret, TCGv arg)
+{
+    int l = gen_new_label();
+    tcg_gen_mov_tl(ret, arg);
+    tcg_gen_brcondi_tl(TCG_COND_GE, arg, 0, l);
+    tcg_gen_neg_tl(ret, ret);
+    gen_set_label(l);
+}
+
+static void gen_abs_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    int l = gen_new_label();
+    tcg_gen_mov_i64(ret, arg);
+    tcg_gen_brcondi_i64(TCG_COND_GE, arg, 0, l);
+    tcg_gen_neg_i64(ret, ret);
+    gen_set_label(l);
+}
+
+/* Common tail code for DIVQ/DIVS insns */
+static void _gen_divqs(TCGv pquo, TCGv r, TCGv aq, TCGv div)
+{
+    /*
+     * pquo <<= 1
+     * pquo |= aq
+     * pquo = (pquo & 0x1FFFF) | (r << 17)
+     */
+    tcg_gen_shli_tl(pquo, pquo, 1);
+    tcg_gen_or_tl(pquo, pquo, aq);
+    tcg_gen_andi_tl(pquo, pquo, 0x1FFFF);
+    tcg_gen_shli_tl(r, r, 17);
+    tcg_gen_or_tl(pquo, pquo, r);
+
+    tcg_temp_free(r);
+    tcg_temp_free(aq);
+    tcg_temp_free(div);
+}
+
+/* Common AQ ASTAT bit management for DIVQ/DIVS insns */
+static void _gen_divqs_st_aq(TCGv r, TCGv aq, TCGv div)
+{
+    /* aq = (r ^ div) >> 15 */
+    tcg_gen_xor_tl(aq, r, div);
+    tcg_gen_shri_tl(aq, aq, 15);
+    tcg_gen_andi_tl(aq, aq, 1);
+    tcg_gen_st_tl(aq, cpu_env, offsetof(CPUState, astat[ASTAT_AQ]));
+}
+
+/* DIVQ ( Dreg, Dreg ) ;
+ * Based on AQ status bit, either add or subtract the divisor from
+ * the dividend. Then set the AQ status bit based on the MSBs of the
+ * 32-bit dividend and the 16-bit divisor. Left shift the dividend one
+ * bit. Copy the logical inverse of AQ into the dividend LSB.
+ */
+static void gen_divq(TCGv pquo, TCGv src)
+{
+    int l;
+    TCGv af, r, aq, div;
+
+    /* div = R#.L */
+    div = tcg_temp_local_new();
+    tcg_gen_ext16u_tl(div, src);
+
+    /* af = pquo >> 16 */
+    af = tcg_temp_local_new();
+    tcg_gen_shri_tl(af, pquo, 16);
+
+    /*
+     * we take this:
+     *  if (ASTAT_AQ)
+     *    r = div + af;
+     *  else
+     *    r = af - div;
+     *
+     * and turn it into:
+     *  r = div;
+     *  if (aq == 0)
+     *    r = -r;
+     *  r += af;
+     */
+    aq = tcg_temp_local_new();
+    tcg_gen_ld_tl(aq, cpu_env, offsetof(CPUState, astat[ASTAT_AQ]));
+
+    l = gen_new_label();
+    r = tcg_temp_local_new();
+    tcg_gen_mov_tl(r, div);
+    tcg_gen_brcondi_tl(TCG_COND_NE, aq, 0, l);
+    tcg_gen_neg_tl(r, r);
+    gen_set_label(l);
+    tcg_gen_add_tl(r, r, af);
+
+    tcg_temp_free(af);
+
+    _gen_divqs_st_aq(r, aq, div);
+
+    /* aq = !aq */
+    tcg_gen_xori_tl(aq, aq, 1);
+
+    _gen_divqs(pquo, r, aq, div);
+}
+
+/* DIVS ( Dreg, Dreg ) ;
+ * Initialize for DIVQ. Set the AQ status bit based on the signs of
+ * the 32-bit dividend and the 16-bit divisor. Left shift the dividend
+ * one bit. Copy AQ into the dividend LSB.
+ */
+static void gen_divs(TCGv pquo, TCGv src)
+{
+    TCGv r, aq, div;
+
+    /* div = R#.L */
+    div = tcg_temp_local_new();
+    tcg_gen_ext16u_tl(div, src);
+
+    /* r = pquo >> 16 */
+    r = tcg_temp_local_new();
+    tcg_gen_shri_tl(r, pquo, 16);
+
+    aq = tcg_temp_local_new();
+
+    _gen_divqs_st_aq(r, aq, div);
+
+    _gen_divqs(pquo, r, aq, div);
+}
+
+/* Reg = ROT reg BY reg/imm
+ * The Blackfin rotate is not like the TCG rotate.  It shifts through the
+ * CC bit too giving it 33 bits to play with.  So we have to reduce things
+ * to shifts ourself.
+ */
+static void gen_rot_tl(TCGv dst, TCGv src, TCGv orig_shift)
+{
+    uint32_t nbits = 32;
+    TCGv shift, ret, tmp, tmp_shift;
+    int l, endl;
+
+    /* shift = CLAMP (shift, -nbits, nbits); */
+
+    endl = gen_new_label();
+
+    /* if (shift == 0) */
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, orig_shift, 0, l);
+    tcg_gen_mov_tl(dst, src);
+    tcg_gen_br(endl);
+    gen_set_label(l);
+
+    /* Reduce everything to rotate left */
+    shift = tcg_temp_local_new();
+    tcg_gen_mov_tl(shift, orig_shift);
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_GE, shift, 0, l);
+    tcg_gen_addi_tl(shift, shift, nbits + 1);
+    gen_set_label(l);
+
+    if (TCGV_EQUAL(dst, src))
+        ret = tcg_temp_local_new();
+    else
+        ret = dst;
+
+    /* ret = shift == nbits ? 0 : val << shift; */
+    tcg_gen_movi_tl(ret, 0);
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, shift, nbits, l);
+    tcg_gen_shl_tl(ret, src, shift);
+    gen_set_label(l);
+
+    /* ret |= shift == 1 ? 0 : val >> ((nbits + 1) - shift); */
+    l = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_EQ, shift, 1, l);
+    tmp = tcg_temp_new();
+    tmp_shift = tcg_temp_new();
+    tcg_gen_subfi_tl(tmp_shift, nbits + 1, shift);
+    tcg_gen_shr_tl(tmp, src, tmp_shift);
+    tcg_gen_or_tl(ret, ret, tmp);
+    tcg_temp_free(tmp_shift);
+    tcg_temp_free(tmp);
+    gen_set_label(l);
+
+    /* Then add in and output feedback via the CC register */
+    tcg_gen_subi_tl(shift, shift, 1);
+    tcg_gen_shl_tl(cpu_cc, cpu_cc, shift);
+    tcg_gen_or_tl(ret, ret, cpu_cc);
+    tcg_gen_subfi_tl(shift, nbits - 1, shift);
+    tcg_gen_shr_tl(cpu_cc, src, shift);
+    tcg_gen_andi_tl(cpu_cc, cpu_cc, 1);
+
+    if (TCGV_EQUAL(dst, src)) {
+        tcg_gen_mov_tl(dst, ret);
+        tcg_temp_free(ret);
+    }
+
+    tcg_temp_free(shift);
+    gen_set_label(endl);
+}
+
+static void gen_roti_tl(TCGv dst, TCGv src, int32_t shift)
+{
+    uint32_t nbits = 32;
+    TCGv ret;
+
+    /* shift = CLAMP (shift, -nbits, nbits); */
+
+    if (shift == 0) {
+        tcg_gen_mov_tl(dst, src);
+        return;
+    }
+
+    /* Reduce everything to rotate left */
+    if (shift < 0)
+        shift += nbits + 1;
+
+    if (TCGV_EQUAL(dst, src))
+        ret = tcg_temp_new();
+    else
+        ret = dst;
+
+    /* First rotate the main register */
+    if (shift == nbits)
+        tcg_gen_movi_tl(ret, 0);
+    else
+        tcg_gen_shli_tl(ret, src, shift);
+    if (shift != 1) {
+        TCGv tmp = tcg_temp_new();
+        tcg_gen_shri_tl(tmp, src, (nbits + 1) - shift);
+        tcg_gen_or_tl(ret, ret, tmp);
+        tcg_temp_free(tmp);
+    }
+
+    /* Then add in and output feedback via the CC register */
+    tcg_gen_shli_tl(cpu_cc, cpu_cc, shift - 1);
+    tcg_gen_or_tl(ret, ret, cpu_cc);
+    tcg_gen_shri_tl(cpu_cc, src, nbits - shift);
+    tcg_gen_andi_tl(cpu_cc, cpu_cc, 1);
+
+    if (TCGV_EQUAL(dst, src)) {
+        tcg_gen_mov_tl(dst, ret);
+        tcg_temp_free(ret);
+    }
+}
+
+static void gen_rot_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 orig_shift)
+{
+    uint32_t nbits = 40;
+    TCGv_i64 shift, ret, tmp, tmp_shift, cc64;
+    int l, endl;
+
+    /* shift = CLAMP (shift, -nbits, nbits); */
+
+    endl = gen_new_label();
+
+    /* if (shift == 0) */
+    l = gen_new_label();
+    tcg_gen_brcondi_i64(TCG_COND_NE, orig_shift, 0, l);
+    tcg_gen_mov_i64(dst, src);
+    tcg_gen_br(endl);
+    gen_set_label(l);
+
+    /* Reduce everything to rotate left */
+    shift = tcg_temp_local_new_i64();
+    tcg_gen_mov_i64(shift, orig_shift);
+    l = gen_new_label();
+    tcg_gen_brcondi_i64(TCG_COND_GE, shift, 0, l);
+    tcg_gen_addi_i64(shift, shift, nbits + 1);
+    gen_set_label(l);
+
+    if (TCGV_EQUAL_I64(dst, src))
+        ret = tcg_temp_local_new_i64();
+    else
+        ret = dst;
+
+    /* ret = shift == nbits ? 0 : val << shift; */
+    tcg_gen_movi_i64(ret, 0);
+    l = gen_new_label();
+    tcg_gen_brcondi_i64(TCG_COND_EQ, shift, nbits, l);
+    tcg_gen_shl_i64(ret, src, shift);
+    gen_set_label(l);
+
+    /* ret |= shift == 1 ? 0 : val >> ((nbits + 1) - shift); */
+    l = gen_new_label();
+    tcg_gen_brcondi_i64(TCG_COND_EQ, shift, 1, l);
+    tmp = tcg_temp_new_i64();
+    tmp_shift = tcg_temp_new_i64();
+    tcg_gen_subfi_i64(tmp_shift, nbits + 1, shift);
+    tcg_gen_shr_i64(tmp, src, tmp_shift);
+    tcg_gen_or_i64(ret, ret, tmp);
+    tcg_temp_free_i64(tmp_shift);
+    tcg_temp_free_i64(tmp);
+    gen_set_label(l);
+
+    /* Then add in and output feedback via the CC register */
+    cc64 = tcg_temp_new_i64();
+    tcg_gen_ext_i32_i64(cc64, cpu_cc);
+    tcg_gen_subi_i64(shift, shift, 1);
+    tcg_gen_shl_i64(cc64, cc64, shift);
+    tcg_gen_or_i64(ret, ret, cc64);
+    tcg_gen_subfi_i64(shift, nbits - 1, shift);
+    tcg_gen_shr_i64(cc64, src, shift);
+    tcg_gen_andi_i64(cc64, cc64, 1);
+    tcg_gen_trunc_i64_i32(cpu_cc, cc64);
+    tcg_temp_free_i64(cc64);
+
+    if (TCGV_EQUAL_I64(dst, src)) {
+        tcg_gen_mov_i64(dst, ret);
+        tcg_temp_free_i64(ret);
+    }
+
+    tcg_temp_free_i64(shift);
+    gen_set_label(endl);
+}
+
+static void gen_roti_i64(TCGv_i64 dst, TCGv_i64 src, int32_t shift)
+{
+    uint32_t nbits = 40;
+    TCGv_i64 ret, cc64;
+
+    /* shift = CLAMP (shift, -nbits, nbits); */
+
+    if (shift == 0) {
+        tcg_gen_mov_i64(dst, src);
+        return;
+    }
+
+    /* Reduce everything to rotate left */
+    if (shift < 0)
+        shift += nbits + 1;
+
+    if (TCGV_EQUAL_I64(dst, src))
+        ret = tcg_temp_new_i64();
+    else
+        ret = dst;
+
+    /* First rotate the main register */
+    if (shift == nbits)
+        tcg_gen_movi_i64(ret, 0);
+    else
+        tcg_gen_shli_i64(ret, src, shift);
+    if (shift != 1) {
+        TCGv_i64 tmp = tcg_temp_new_i64();
+        tcg_gen_shri_i64(tmp, src, (nbits + 1) - shift);
+        tcg_gen_or_i64(ret, ret, tmp);
+        tcg_temp_free_i64(tmp);
+    }
+
+    /* Then add in and output feedback via the CC register */
+    cc64 = tcg_temp_new_i64();
+    tcg_gen_ext_i32_i64(cc64, cpu_cc);
+    tcg_gen_shli_i64(cc64, cc64, shift - 1);
+    tcg_gen_or_i64(ret, ret, cc64);
+    tcg_gen_shri_i64(cc64, src, nbits - shift);
+    tcg_gen_andi_i64(cc64, cc64, 1);
+    tcg_gen_trunc_i64_i32(cpu_cc, cc64);
+    tcg_temp_free_i64(cc64);
+
+    if (TCGV_EQUAL_I64(dst, src)) {
+        tcg_gen_mov_i64(dst, ret);
+        tcg_temp_free_i64(ret);
+    }
+}
+
+/* This is a bit crazy, but we want to simulate the hardware behavior exactly
+   rather than worry about the circular buffers being used correctly.  Which
+   isn't to say there isn't room for improvement here, just that we want to
+   be conservative.  See also dagsub().  */
+static void gen_dagadd(DisasContext *dc, int dagno, TCGv M)
+{
+    int l, endl;
+
+    /* Optimize for when circ buffers are not used */
+    l = gen_new_label();
+    endl = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_lreg[dagno], 0, l);
+    tcg_gen_add_tl(cpu_ireg[dagno], cpu_ireg[dagno], M);
+    tcg_gen_br(endl);
+    gen_set_label(l);
+
+    /* Fallback to the big guns */
+    gen_helper_dagadd(cpu_ireg[dagno], cpu_ireg[dagno],
+                      cpu_lreg[dagno], cpu_breg[dagno], M);
+
+    gen_set_label(endl);
+}
+
+static void gen_dagaddi(DisasContext *dc, int dagno, uint32_t M)
+{
+    TCGv m = tcg_temp_local_new();
+    tcg_gen_movi_tl(m, M);
+    gen_dagadd(dc, dagno, m);
+    tcg_temp_free(m);
+}
+
+/* See dagadd() notes above.  */
+static void gen_dagsub(DisasContext *dc, int dagno, TCGv M)
+{
+    int l, endl;
+
+    /* Optimize for when circ buffers are not used */
+    l = gen_new_label();
+    endl = gen_new_label();
+    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_lreg[dagno], 0, l);
+    tcg_gen_sub_tl(cpu_ireg[dagno], cpu_ireg[dagno], M);
+    tcg_gen_br(endl);
+    gen_set_label(l);
+
+    /* Fallback to the big guns */
+    gen_helper_dagsub(cpu_ireg[dagno], cpu_ireg[dagno],
+                      cpu_lreg[dagno], cpu_breg[dagno], M);
+
+    gen_set_label(endl);
+}
+
+static void gen_dagsubi(DisasContext *dc, int dagno, uint32_t M)
+{
+    TCGv m = tcg_temp_local_new();
+    tcg_gen_movi_tl(m, M);
+    gen_dagsub(dc, dagno, m);
+    tcg_temp_free(m);
+}
+
+#define _gen_astat_store(bit, reg) tcg_gen_st_tl(reg, cpu_env, offsetof(CPUState, astat[bit]))
+
+static void _gen_astat_update_az(TCGv reg, TCGv tmp)
+{
+    tcg_gen_setcondi_tl(TCG_COND_EQ, tmp, reg, 0);
+    _gen_astat_store(ASTAT_AZ, tmp);
+}
+
+static void _gen_astat_update_az2(TCGv reg, TCGv reg2, TCGv tmp)
+{
+    TCGv tmp2 = tcg_temp_new();
+    tcg_gen_setcondi_tl(TCG_COND_EQ, tmp, reg, 0);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, tmp2, reg2, 0);
+    tcg_gen_or_tl(tmp, tmp, tmp2);
+    tcg_temp_free(tmp2);
+    _gen_astat_store(ASTAT_AZ, tmp);
+}
+
+static void _gen_astat_update_an(TCGv reg, TCGv tmp, uint32_t len)
+{
+    tcg_gen_setcondi_tl(TCG_COND_GEU, tmp, reg, 1 << (len - 1));
+    _gen_astat_store(ASTAT_AN, tmp);
+}
+
+static void _gen_astat_update_nz(TCGv reg, TCGv tmp, uint32_t len)
+{
+    _gen_astat_update_az(reg, tmp);
+    _gen_astat_update_an(reg, tmp, len);
+}
+
+static void gen_astat_update(DisasContext *dc, bool clear)
+{
+    TCGv tmp = tcg_temp_local_new();
+    uint32_t len = 16;
+
+    switch (dc->astat_op) {
+    case ASTAT_OP_ABS:    /* [0] = ABS( [1] ) */
+        len = 32;
+        /* XXX: Missing V/VS updates */
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, len);
+        break;
+
+    case ASTAT_OP_ADD32:    /* [0] = [1] + [2] */
+        /* XXX: Missing V/VS updates */
+        len = 32;
+        tcg_gen_not_tl(tmp, cpu_astat_arg[1]);
+        tcg_gen_setcond_tl(TCG_COND_LTU, tmp, tmp, cpu_astat_arg[2]);
+        _gen_astat_store(ASTAT_AC0, tmp);
+        _gen_astat_store(ASTAT_AC0_COPY, tmp);
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, 32);
+        break;
+
+    case ASTAT_OP_ASHIFT32:
+        len *= 2;
+    case ASTAT_OP_ASHIFT16:
+        tcg_gen_movi_tl(tmp, 0);
+        /* Need to update AC0 ? */
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, len);
+        break;
+
+    case ASTAT_OP_COMPARE_SIGNED: {
+        TCGv flgs, flgo, overflow, flgn, res = tcg_temp_new();
+        tcg_gen_sub_tl(res, cpu_astat_arg[0], cpu_astat_arg[1]);
+        _gen_astat_update_az(res, tmp);
+        tcg_gen_setcond_tl(TCG_COND_LEU, tmp, cpu_astat_arg[1], cpu_astat_arg[0]);
+        _gen_astat_store(ASTAT_AC0, tmp);
+        _gen_astat_store(ASTAT_AC0_COPY, tmp);
+        /* XXX: This has got to be simpler ... */
+        /* int flgs = srcop >> 31; */
+        flgs = tcg_temp_new();
+        tcg_gen_shri_tl(flgs, cpu_astat_arg[0], 31);
+        /* int flgo = dstop >> 31; */
+        flgo = tcg_temp_new();
+        tcg_gen_shri_tl(flgo, cpu_astat_arg[1], 31);
+        /* int flgn = result >> 31; */
+        flgn = tcg_temp_new();
+        tcg_gen_shri_tl(flgn, res, 31);
+        /* int overflow = (flgs ^ flgo) & (flgn ^ flgs); */
+        overflow = tcg_temp_new();
+        tcg_gen_xor_tl(tmp, flgs, flgo);
+        tcg_gen_xor_tl(overflow, flgn, flgs);
+        tcg_gen_and_tl(overflow, tmp, overflow);
+        /* an = (flgn && !overflow) || (!flgn && overflow); */
+        tcg_gen_not_tl(tmp, overflow);
+        tcg_gen_and_tl(tmp, flgn, tmp);
+        tcg_gen_not_tl(res, flgn);
+        tcg_gen_and_tl(res, res, overflow);
+        tcg_gen_or_tl(tmp, tmp, res);
+        tcg_temp_free(flgn);
+        tcg_temp_free(overflow);
+        tcg_temp_free(flgo);
+        tcg_temp_free(flgs);
+        tcg_temp_free(res);
+        _gen_astat_store(ASTAT_AN, tmp);
+        break;
+    }
+
+    case ASTAT_OP_COMPARE_UNSIGNED:
+        tcg_gen_sub_tl(tmp, cpu_astat_arg[0], cpu_astat_arg[1]);
+        _gen_astat_update_az(tmp, tmp);
+        tcg_gen_setcond_tl(TCG_COND_LEU, tmp, cpu_astat_arg[1], cpu_astat_arg[0]);
+        _gen_astat_store(ASTAT_AC0, tmp);
+        _gen_astat_store(ASTAT_AC0_COPY, tmp);
+        tcg_gen_setcond_tl(TCG_COND_GTU, tmp, cpu_astat_arg[1], cpu_astat_arg[0]);
+        _gen_astat_store(ASTAT_AN, tmp);
+        break;
+
+    case ASTAT_OP_LOGICAL:
+        len = 32;
+        tcg_gen_movi_tl(tmp, 0);
+        /* AC0 is correct ? */
+        _gen_astat_store(ASTAT_AC0, tmp);
+        _gen_astat_store(ASTAT_AC0_COPY, tmp);
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, len);
+        break;
+
+    case ASTAT_OP_LSHIFT32:
+        len *= 2;
+    case ASTAT_OP_LSHIFT16:
+        _gen_astat_update_az(cpu_astat_arg[0], tmp);
+        /* XXX: should be checking bit shifted */
+        tcg_gen_setcondi_tl(TCG_COND_GEU, tmp, cpu_astat_arg[0], 1 << (len - 1));
+        _gen_astat_store(ASTAT_AN, tmp);
+        /* XXX: No saturation handling ... */
+        tcg_gen_movi_tl(tmp, 0);
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        break;
+
+    case ASTAT_OP_LSHIFT_RT32:
+        len *= 2;
+    case ASTAT_OP_LSHIFT_RT16:
+        _gen_astat_update_az(cpu_astat_arg[0], tmp);
+        /* XXX: should be checking bit shifted */
+        tcg_gen_setcondi_tl(TCG_COND_GEU, tmp, cpu_astat_arg[0], 1 << (len - 1));
+        _gen_astat_store(ASTAT_AN, tmp);
+        tcg_gen_movi_tl(tmp, 0);
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        break;
+
+    case ASTAT_OP_MIN_MAX:    /* [0] = MAX/MIN( [1], [2] ) */
+        tcg_gen_movi_tl(tmp, 0);
+        _gen_astat_store(ASTAT_V, tmp);
+        _gen_astat_store(ASTAT_V_COPY, tmp);
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, 32);
+        break;
+
+    case ASTAT_OP_NEGATE:    /* [0] = -[1] */
+        len = 32;
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, 32);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, tmp, cpu_astat_arg[0], 1 << (len - 1));
+        _gen_astat_store(ASTAT_V, tmp);
+        /* XXX: Should "VS |= V;" */
+        tcg_gen_setcondi_tl(TCG_COND_EQ, tmp, cpu_astat_arg[0], 0);
+        _gen_astat_store(ASTAT_AC0, tmp);
+        break;
+
+    case ASTAT_OP_SUB32:    /* [0] = [1] - [2] */
+        len = 32;
+        /* XXX: Missing V/VS updates */
+        tcg_gen_setcond_tl(TCG_COND_LEU, tmp, cpu_astat_arg[2], cpu_astat_arg[1]);
+        _gen_astat_store(ASTAT_AC0, tmp);
+        _gen_astat_store(ASTAT_AC0_COPY, tmp);
+        _gen_astat_update_nz(cpu_astat_arg[0], tmp, len);
+        break;
+
+    case ARRAY_OP_VECTOR_ADD_ADD:    /* [0][1] = [2] +|+ [3] */
+    case ARRAY_OP_VECTOR_ADD_SUB:    /* [0][1] = [2] +|- [3] */
+    case ARRAY_OP_VECTOR_SUB_SUB:    /* [0][1] = [2] -|- [3] */
+    case ARRAY_OP_VECTOR_SUB_ADD:    /* [0][1] = [2] -|+ [3] */
+        _gen_astat_update_az2(cpu_astat_arg[0], cpu_astat_arg[1], tmp);
+        /* Need AN, AC0/AC1, V */
+        break;
+
+    default:
+        fprintf(stderr, "qemu: unhandled astat op %u\n", dc->astat_op);
+        abort();
+    case ASTAT_OP_DYNAMIC:
+    case ASTAT_OP_NONE:
+        break;
+    }
+
+    tcg_temp_free(tmp);
+
+    if (clear)
+        dc->astat_op = ASTAT_OP_NONE;
+}
+
+static void
+_astat_queue_state(DisasContext *dc, enum astat_ops op, unsigned int num,
+                   TCGv arg0, TCGv arg1, TCGv arg2)
+{
+    dc->astat_op = op;
+    /*tcg_gen_movi_tl(cpu_astat_op, dc->astat_op);*/
+
+    tcg_gen_mov_tl(cpu_astat_arg[0], arg0);
+    if (num > 1)
+        tcg_gen_mov_tl(cpu_astat_arg[1], arg1);
+    else
+        tcg_gen_discard_tl(cpu_astat_arg[1]);
+    if (num > 2)
+        tcg_gen_mov_tl(cpu_astat_arg[2], arg2);
+    else
+        tcg_gen_discard_tl(cpu_astat_arg[2]);
+}
+#define astat_queue_state1(dc, op, arg0)             _astat_queue_state(dc, op, 1, arg0, arg0, arg0)
+#define astat_queue_state2(dc, op, arg0, arg1)       _astat_queue_state(dc, op, 2, arg0, arg1, arg1)
+#define astat_queue_state3(dc, op, arg0, arg1, arg2) _astat_queue_state(dc, op, 3, arg0, arg1, arg2)
+
+static void gen_astat_load(DisasContext *dc, TCGv reg)
+{
+    gen_astat_update(dc, true);
+    gen_helper_astat_load(reg);
+}
+
+static void gen_astat_store(DisasContext *dc, TCGv reg)
+{
+    unsigned int i;
+
+    gen_helper_astat_store(reg);
+
+    dc->astat_op = ASTAT_OP_NONE;
+    /*tcg_gen_movi_tl(cpu_astat_op, dc->astat_op);*/
+
+    for (i = 0; i < ARRAY_SIZE(cpu_astat_arg); ++i)
+        tcg_gen_discard_tl(cpu_astat_arg[i]);
+}
+
+static void interp_insn_bfin(DisasContext *dc);
+
+static void check_breakpoint(CPUState *env, DisasContext *dc)
+{
+    CPUBreakpoint *bp;
+
+    if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
+        QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
+            if (bp->pc == dc->pc) {
+                cec_exception(dc, EXCP_DEBUG);
+                dc->is_jmp = DISAS_UPDATE;
+             }
+        }
+    }
+}
+
+static void
+gen_intermediate_code_internal(CPUState *env, TranslationBlock *tb,
+                               int search_pc)
+{
+    uint16_t *gen_opc_end;
+    uint32_t pc_start;
+    int j, lj;
+    struct DisasContext ctx;
+    struct DisasContext *dc = &ctx;
+    uint32_t next_page_start;
+    int num_insns;
+    int max_insns;
+
+    qemu_log_try_set_file(stderr);
+
+    pc_start = tb->pc;
+    dc->env = env;
+    dc->tb = tb;
+    /* XXX: handle super/user mode here.  */
+    dc->mem_idx = 0;
+
+    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+
+    dc->is_jmp = DISAS_NEXT;
+    dc->pc = pc_start;
+    dc->astat_op = ASTAT_OP_DYNAMIC;
+    dc->hwloop_callback = gen_hwloop_default;
+
+    next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+    lj = -1;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+
+    gen_icount_start();
+    do {
+#ifdef CONFIG_USER_ONLY
+        /* Intercept jump to the magic kernel page.  */
+        if ((dc->pc & 0xFFFFFF00) == 0x400) {
+        }
+#endif
+        check_breakpoint(env, dc);
+
+        if (search_pc) {
+            j = gen_opc_ptr - gen_opc_buf;
+            if (lj < j) {
+                lj++;
+                while (lj < j)
+                    gen_opc_instr_start[lj++] = 0;
+            }
+            gen_opc_pc[lj] = dc->pc;
+            gen_opc_instr_start[lj] = 1;
+            gen_opc_icount[lj] = num_insns;
+        }
+
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
+
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+            tcg_gen_debug_insn_start(dc->pc);
+
+        interp_insn_bfin(dc);
+        gen_hwloop_check(dc);
+        dc->pc += dc->insn_len;
+
+        ++num_insns;
+    } while (!dc->is_jmp &&
+        gen_opc_ptr < gen_opc_end &&
+        !env->singlestep_enabled &&
+        !singlestep &&
+        dc->pc < next_page_start &&
+        num_insns < max_insns);
+
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
+
+    if (unlikely(env->singlestep_enabled)) {
+        cec_exception(dc, EXCP_DEBUG);
+    } else {
+        switch (dc->is_jmp) {
+            case DISAS_NEXT:
+                gen_gotoi_tb(dc, 1, dc->pc);
+                break;
+            default:
+            case DISAS_UPDATE:
+                /* indicate that the hash table must be used
+                   to find the next TB */
+                tcg_gen_exit_tb(0);
+                break;
+            case DISAS_CALL:
+            case DISAS_JUMP:
+            case DISAS_TB_JUMP:
+                /* nothing more to generate */
+                break;
+        }
+    }
+
+    gen_icount_end(tb, num_insns);
+    *gen_opc_ptr = INDEX_op_end;
+
+    if (search_pc) {
+        j = gen_opc_ptr - gen_opc_buf;
+        lj++;
+        while (lj <= j)
+            gen_opc_instr_start[lj++] = 0;
+    } else {
+        tb->size = dc->pc - pc_start;
+        tb->icount = num_insns;
+    }
+
+#ifdef DEBUG_DISAS
+    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+        qemu_log("----------------\n");
+        qemu_log("IN: %s\n", lookup_symbol(pc_start));
+        log_target_disas(pc_start, dc->pc - pc_start, 0);
+        qemu_log("\n");
+    }
+#endif
+}
+
+void gen_intermediate_code(CPUState *env, struct TranslationBlock *tb)
+{
+    gen_intermediate_code_internal(env, tb, 0);
+}
+
+void gen_intermediate_code_pc(CPUState *env, struct TranslationBlock *tb)
+{
+    gen_intermediate_code_internal(env, tb, 1);
+}
+
+void gen_pc_load(CPUState *env, TranslationBlock *tb,
+                 unsigned long searched_pc, int pc_pos, void *puc)
+{
+    env->pc = gen_opc_pc[pc_pos];
+}
+
+#include "bfin-sim.c"