Index: tcg/sh4/tcg-target.c
===================================================================
--- tcg/sh4/tcg-target.c	(revision 5)
+++ tcg/sh4/tcg-target.c	(working copy)
@@ -86,7 +86,6 @@
     TCG_REG_R0,
     TCG_REG_R1,
     TCG_REG_R2,
-    TCG_REG_R3,
 };
 
 static const int tcg_target_call_oarg_regs[2] = {
@@ -95,6 +94,7 @@
 };
 
 static const int tcg_target_callee_save_regs[] = {
+/*
     TCG_REG_R1,
     TCG_REG_R2,
     TCG_REG_R3,
@@ -102,13 +102,14 @@
     TCG_REG_R5,
     TCG_REG_R6,
     TCG_REG_R7,
+*/
+    TCG_REG_R14,
+    TCG_REG_R13,
+    TCG_REG_R12,
+    TCG_REG_R11,
+    TCG_REG_R10,
+    TCG_REG_R9,
     TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14,
 };
 
 #define OPC_ADD 0x300c
@@ -196,13 +197,19 @@
     uint8_t *reloc_pos;
     int needs_align = tcg_sh4_need_pc_align(s);
 
-    tcg_out16(s, MOVLI(1, ret));
     if (!needs_align || opc2 == OPC_NOP) {
-        tcg_out16(s, BRA(3 - needs_align));
+        tcg_out16(s, MOVLI(1, ret));
+        if (needs_align == 1) {
+            tcg_out16(s, BRA(2));
+        } else {
+            tcg_out16(s, BRA(3));
+        }
         tcg_out16(s, opc1); /* delay slot */
-        if (!needs_align)
+        if (needs_align == 0) {
             tcg_out16(s, MOV(0, 0)); /* Never reached */
+        }
     } else {
+        tcg_out16(s, MOVLI(1, ret));
         tcg_out16(s, opc1);
         tcg_out16(s, BRA(2));
         tcg_out16(s, opc2); /* delay slot */
@@ -226,7 +233,7 @@
             tcg_out16(s, MOVI(arg & 0xff, ret));
             if (opc1 != OPC_NOP)
                 tcg_out16(s, opc1);
-           break;
+            break;
         }
 
         if (arg == (uint8_t) arg) {
@@ -265,7 +272,7 @@
     }
 
     if (offset)
-        tmp = TCG_REG_R14;
+        tmp = TCG_REG_R3;
     else
         tmp = arg1;
 
@@ -285,7 +292,7 @@
     }
 
     if (offset)
-        tcg_sh4_movi(s, TCG_REG_R14, offset, ADD(arg1, TCG_REG_R14), opc);
+        tcg_sh4_movi(s, TCG_REG_R3, offset, ADD(arg1, TCG_REG_R3), opc);
     else
         tcg_out16(s, opc);
 
@@ -327,7 +334,7 @@
     }
 
     if (offset)
-        tmp = TCG_REG_R14;
+        tmp = TCG_REG_R3;
     else
         tmp = arg;
 
@@ -347,7 +354,7 @@
     }
 
     if (offset)
-        tcg_sh4_movi(s, TCG_REG_R14, offset, ADD(arg, TCG_REG_R14), opc);
+        tcg_sh4_movi(s, TCG_REG_R3, offset, ADD(arg, TCG_REG_R3), opc);
     else
         tcg_out16(s, opc);
 }
@@ -357,14 +364,14 @@
 {
     if (offset == 0) {
         if (size == 16) {
-            tcg_out16(s, SWAPB(arg1, TCG_REG_R14));
-            tcg_out16(s, MOVWS(TCG_REG_R14, arg));
+            tcg_out16(s, SWAPB(arg1, TCG_REG_R3));
+            tcg_out16(s, MOVWS(TCG_REG_R3, arg));
         }
         if (size == 32) {
-            tcg_out16(s, SWAPB(arg1, TCG_REG_R14));
-            tcg_out16(s, SWAPW(TCG_REG_R14, TCG_REG_R14));
-            tcg_out16(s, SWAPB(TCG_REG_R14, TCG_REG_R14));
-            tcg_out16(s, MOVLS(TCG_REG_R14, arg));
+            tcg_out16(s, SWAPB(arg1, TCG_REG_R3));
+            tcg_out16(s, SWAPW(TCG_REG_R3, TCG_REG_R3));
+            tcg_out16(s, SWAPB(TCG_REG_R3, TCG_REG_R3));
+            tcg_out16(s, MOVLS(TCG_REG_R3, arg));
        }
     } else {
         if (size == 16 || size == 32)
@@ -389,7 +396,7 @@
 static void tcg_sh4_alu(TCGContext *s, int ret, unsigned int opc, int arg1,
                         tcg_target_long arg2, int const_arg2)
 {
-    int tmp = TCG_REG_R14;
+    int tmp = TCG_REG_R3;
 
     if (const_arg2) { 
         if (ret == arg1)
@@ -397,12 +404,25 @@
         else
             tcg_sh4_movi(s, tmp, arg2, OPC_MN(opc, arg1, tmp), MOV(tmp, ret));
     } else {
-        if (ret == arg1)
-            tcg_out16(s, OPC_MN(opc, arg2, ret));
+        if (ret == arg1) {
+            if (opc == OPC_SUB) {
+                    tcg_out16(s, OPC_MN(opc, ret, arg2));
+            } 
+            else {
+                    tcg_out16(s, OPC_MN(opc, arg2, ret));
+            }
+        }
         else {
-            tcg_out16(s, MOV(arg2, tmp));
-            tcg_out16(s, OPC_MN(opc, arg1, tmp));
-            tcg_out16(s, MOV(tmp, ret));
+            if (opc == OPC_SUB) {
+                    tcg_out16(s, MOV(arg2, tmp));
+                    tcg_out16(s, OPC_MN(opc, tmp, arg1));
+                    tcg_out16(s, MOV(arg1, ret));
+            }
+            else {
+                    tcg_out16(s, MOV(arg2, tmp));
+                    tcg_out16(s, OPC_MN(opc, arg1, tmp));
+                    tcg_out16(s, MOV(tmp, ret));
+            }
         }
     }
 }
@@ -414,12 +434,12 @@
         tcg_sh4_alu(s, ret, opc, arg1, -arg2, 1);
     else {
         if (ret == arg1) {
-            tcg_out16(s, NEG(arg2, TCG_REG_R14));
-            tcg_out16(s, OPC_MN(opc, TCG_REG_R14, ret));
+            tcg_out16(s, NEG(arg2, TCG_REG_R3));
+            tcg_out16(s, OPC_MN(opc, TCG_REG_R3, ret));
         } else {
-            tcg_out16(s, NEG(arg2, TCG_REG_R14));
+            tcg_out16(s, NEG(arg2, TCG_REG_R3));
             tcg_out16(s, MOV(arg1, ret));
-            tcg_out16(s, OPC_MN(opc, TCG_REG_R14, ret));
+            tcg_out16(s, OPC_MN(opc, TCG_REG_R3, ret));
         }
     }
 }
@@ -427,7 +447,7 @@
 static void tcg_sh4_mul(TCGContext *s, int ret, int arg1,
                         tcg_target_long arg2, int const_arg2)
 {
-    int tmp = TCG_REG_R14;
+    int tmp = TCG_REG_R3;
 
     if (const_arg2)
         tcg_sh4_movi(s, tmp, arg2, OPC_MN(OPC_MULS, arg1, tmp), STS_MACL(ret));
@@ -489,8 +509,8 @@
 
 static void tcg_sh4_jmp_imm(TCGContext *s, tcg_target_long arg)
 {
-    tcg_sh4_movi(s, TCG_REG_R14, arg, OPC_NOP, OPC_NOP);
-    tcg_sh4_jmp_reg(s, TCG_REG_R14);
+    tcg_sh4_movi(s, TCG_REG_R3, arg, OPC_NOP, OPC_NOP);
+    tcg_sh4_jmp_reg(s, TCG_REG_R3);
 }
 
 static void tcg_sh4_jmp(TCGContext *s, tcg_target_long arg, int const_arg)
@@ -506,12 +526,14 @@
 {
     uint8_t *reloc_slot;
 
-    reloc_slot = tcg_sh4_movi32(s, TCG_REG_R14, 0, opc1, OPC_NOP);
+    reloc_slot = tcg_sh4_movi32(s, TCG_REG_R3, 0, opc1, OPC_NOP);
 
-    if (opc2 != OPC_NOP)
+    if (opc2 != OPC_NOP) {
         tcg_out16(s, opc2);
+    }
 
-    tcg_sh4_jmp_reg(s, TCG_REG_R14);
+    tcg_sh4_jmp_reg(s, TCG_REG_R3);
+
     return reloc_slot;
 }
 
@@ -521,10 +543,10 @@
     tcg_out_reloc(s, tcg_sh4_jmp_imm32(s, opc1, opc2), 0, index, 0);
 }
 
-static void tcg_sh4_brcond(TCGContext *s, int arg0, int arg1, int cond,
+static void tcg_sh4_brcond(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
                            int index)
 {
-    unsigned int opc1 = tcg_sh4_cmp_opc(s, cond, arg0, arg1);
+    unsigned int opc1 = tcg_sh4_cmp_opc(s, cond, arg1, arg2);
     unsigned int opc2 = tcg_sh4_cmp_inv(s, cond) ? BT(1) : BF(1);
 
     tcg_sh4_jmp_index(s, opc1, opc2, index);
@@ -533,8 +555,8 @@
 static void tcg_sh4_jsr(TCGContext *s, tcg_target_long arg, int const_arg)
 {
     if (const_arg) {
-        tcg_sh4_movi(s, TCG_REG_R14, arg, STSMPR(TCG_REG_R15), OPC_NOP);
-        arg = TCG_REG_R14;
+        tcg_sh4_movi(s, TCG_REG_R3, arg, STSMPR(TCG_REG_R15), OPC_NOP);
+        arg = TCG_REG_R3;
     }
     else
         tcg_out16(s, STSMPR(TCG_REG_R15));
@@ -555,8 +577,14 @@
 
 static void tcg_sh4_qemu_st(TCGContext *s, const TCGArg *args, int size)
 {
+    int data_reg, addr_reg, mem_index;
+
+    data_reg = *args++;
+    addr_reg = *args++;
+    mem_index = *args;
+
     if (size == 8 || !swap_endian)
-        tcg_sh4_st(s, size, args[0], args[1], GUEST_BASE);
+        tcg_sh4_st(s, size, addr_reg, data_reg, GUEST_BASE);
     else
         tcg_sh4_st_swap(s, size, args[0], args[1], GUEST_BASE);
 }
@@ -646,7 +674,14 @@
     /* save all callee saved registers */
     for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++)
         tcg_out16(s, MOVLM(tcg_target_callee_save_regs[i], TCG_REG_R15));
+    
+    /* save PR register */
+    tcg_out16(s, STSMPR(TCG_REG_R15));
 
+    /* set the stack frame pointer */
+    tcg_sh4_mov(s, TCG_REG_R14, TCG_REG_R15);
+    tcg_out_addi(s, TCG_REG_R14, 4 * ARRAY_SIZE(tcg_target_callee_save_regs) + 4);
+
     /* reserve some stack space */
     push_size = 4 + ARRAY_SIZE(tcg_target_callee_save_regs) * 4;
     frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
@@ -658,9 +693,17 @@
     tcg_sh4_jmp_reg(s, TCG_REG_R4); /* tb_ptr in R4 from tcg_qemu_tb_exec() */
 
     /* TB epilogue */
+    /* .align 4    */
+    if (tcg_sh4_need_pc_align(s) == 1) {
+        tcg_out16(s, OPC_NOP);
+    }
     tb_ret_addr = s->code_ptr;
-    tcg_out_addi(s, TCG_REG_R15, stack_addend);
+    /* tcg_out_addi(s, TCG_REG_R15, stack_addend); */
+    tcg_out_addi(s, TCG_REG_R14, -(4 * ARRAY_SIZE(tcg_target_callee_save_regs) + 4));
+    tcg_sh4_mov(s, TCG_REG_R15, TCG_REG_R14);
 
+    tcg_out16(s, LDSMPR(TCG_REG_R15));
+
     for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--)
         tcg_out16(s, MOVLP(TCG_REG_R15, tcg_target_callee_save_regs[i]));
 
@@ -696,7 +739,7 @@
         tcg_sh4_jmp(s, args[0], const_args[0]);
         break;
     case INDEX_op_mov_i32:
-        tcg_sh4_mov(s, args[0], args[1]);
+        tcg_sh4_mov(s, args[1], args[0]);
         break;
     case INDEX_op_movi_i32:
         tcg_sh4_movi(s, args[0], args[1], OPC_NOP, OPC_NOP);
@@ -723,7 +766,7 @@
         tcg_sh4_st(s, 16, args[0], args[1], args[2]);
         break;
     case INDEX_op_st_i32:
-        tcg_sh4_st(s, 32, args[0], args[1], args[2]);
+        tcg_sh4_st(s, 32, args[1], args[0], args[2]);
         break;
     case INDEX_op_add_i32:
         tcg_sh4_alu(s, args[0], OPC_ADD, args[1],args[2], const_args[2]);
@@ -765,7 +808,7 @@
         tcg_abort();
         break;
     case INDEX_op_brcond_i32:
-        tcg_sh4_brcond(s, args[0], args[1], args[2], args[3]);
+        tcg_sh4_brcond(s, args[2], args[0], args[1], args[3]);
         break;
     case INDEX_op_qemu_ld8u:
         tcg_sh4_qemu_ld(s, args, 8, 0);
@@ -861,7 +904,8 @@
     tcg_regset_set32(tcg_target_call_clobber_regs, 0, 0);
 
     tcg_regset_clear(s->reserved_regs);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R14); /* Scratch */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3); /* Scratch */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R14); /* Stack frame */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R15); /* Stack pointer */
 
     tcg_add_target_add_op_defs(sh4_op_defs);
Index: tcg/tcg.h
===================================================================
--- tcg/tcg.h	(revision 5)
+++ tcg/tcg.h	(working copy)
@@ -298,8 +298,8 @@
 extern TCGContext tcg_ctx;
 extern uint16_t *gen_opc_ptr;
 extern TCGArg *gen_opparam_ptr;
-extern uint16_t gen_opc_buf[];
-extern TCGArg gen_opparam_buf[];
+extern uint16_t gen_opc_buf[OPC_BUF_SIZE];
+extern TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
 
 /* pool based memory allocation */
 
@@ -454,7 +454,7 @@
 void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, 
                    int label_index, long addend);
 
-extern uint8_t code_gen_prologue[];
+extern uint8_t code_gen_prologue[1024];
 #if defined(_ARCH_PPC) && !defined(_ARCH_PPC64)
 #define tcg_qemu_tb_exec(tb_ptr) \
     ((long REGPARM __attribute__ ((longcall)) (*)(void *))code_gen_prologue)(tb_ptr)
Index: configure
===================================================================
--- configure	(revision 5)
+++ configure	(working copy)
@@ -1271,6 +1271,9 @@
 CFLAGS="-g $CFLAGS"
 if test "$debug" = "no" ; then
   CFLAGS="-O2 $CFLAGS"
+else
+  CFLAGS="-fmudflap $CFLAGS"
+  LIBS="-lmudflap $LIBS"
 fi
 QEMU_CFLAGS="-Wall -Wundef -Wendif-labels -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS"
 QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS"
Index: fpu/softfloat-native.c
===================================================================
--- fpu/softfloat-native.c	(revision 5)
+++ fpu/softfloat-native.c	(working copy)
@@ -15,7 +15,7 @@
 #elif defined(__arm__)
     /* nothing to do */
 #else
-    fesetround(val);
+//    fesetround(val);
 #endif
 }
 
Index: fpu/softfloat-native.h
===================================================================
--- fpu/softfloat-native.h	(revision 5)
+++ fpu/softfloat-native.h	(working copy)
@@ -6,7 +6,7 @@
 #include <ieeefp.h>
 #define fabsf(f) ((float)fabs(f))
 #else
-#include <fenv.h>
+//#include <fenv.h>
 #endif
 
 #if defined(__OpenBSD__) || defined(__NetBSD__)
@@ -135,10 +135,10 @@
 };
 #else
 enum {
-    float_round_nearest_even = FE_TONEAREST,
-    float_round_down         = FE_DOWNWARD,
-    float_round_up           = FE_UPWARD,
-    float_round_to_zero      = FE_TOWARDZERO
+    float_round_nearest_even = 0,
+    float_round_down         = 1,
+    float_round_up           = 2,
+    float_round_to_zero      = 3 
 };
 #endif
 
Index: thunk.h
===================================================================
--- thunk.h	(revision 5)
+++ thunk.h	(working copy)
@@ -73,7 +73,12 @@
                              const argtype *type_ptr, int to_host);
 #ifndef NO_THUNK_TYPE_SIZE
 
+#ifdef CONFIG_DEBUG_EXEC
+#define MAX_STRUCTS 128
+extern StructEntry struct_entries[MAX_STRUCTS];
+#else
 extern StructEntry struct_entries[];
+#endif
 
 int thunk_type_size_array(const argtype *type_ptr, int is_host);
 int thunk_type_align_array(const argtype *type_ptr, int is_host);
Index: linux-user/syscall.c
===================================================================
--- linux-user/syscall.c	(revision 5)
+++ linux-user/syscall.c	(working copy)
@@ -16,6 +16,7 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
+
 #define _ATFILE_SOURCE
 #include <stdlib.h>
 #include <stdio.h>
@@ -28,6 +29,7 @@
 #include <fcntl.h>
 #include <time.h>
 #include <limits.h>
+#include <features.h>
 #include <sys/types.h>
 #include <sys/ipc.h>
 #include <sys/msg.h>
@@ -70,6 +72,9 @@
 #define tchars host_tchars /* same as target */
 #define ltchars host_ltchars /* same as target */
 
+
+#undef __BIG_ENDIAN
+
 #include <linux/termios.h>
 #include <linux/unistd.h>
 #include <linux/utsname.h>
@@ -86,6 +91,8 @@
 #include "qemu.h"
 #include "qemu-common.h"
 
+#define bzero(x, y) (memset((x), 0, (y)))
+
 #if defined(CONFIG_USE_NPTL)
 #define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
     CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
@@ -3586,7 +3593,12 @@
             info.parent_tidptr = parent_tidptr;
 
         ret = pthread_attr_init(&attr);
+#if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 
         ret = pthread_attr_setstack(&attr, new_stack, NEW_STACK_SIZE);
+#else
+        ret = pthread_attr_setstackaddr(&attr, new_stack);
+        ret = pthread_attr_setstacksize(&attr, NEW_STACK_SIZE);
+#endif
         /* It is not safe to deliver signals until the child has finished
            initializing, so temporarily block all signals.  */
         sigfillset(&sigmask);
Index: linux-user/signal.c
===================================================================
--- linux-user/signal.c	(revision 5)
+++ linux-user/signal.c	(working copy)
@@ -366,7 +366,7 @@
 }
 
 /* abort execution with signal */
-static void QEMU_NORETURN force_sig(int sig)
+static void force_sig(int sig)
 {
     TaskState *ts = (TaskState *)thread_env->opaque;
     int host_sig, core_dumped = 0;
@@ -428,6 +428,16 @@
 #if defined(DEBUG_SIGNAL)
     fprintf(stderr, "queue_signal: sig=%d\n",
             sig);
+    fprintf(stderr, "current eip = 0x%08lX\n", (unsigned long)env->eip);
+    fprintf(stderr, "current eax = 0x%08lX\n", (unsigned long)env->regs[0]);
+    fprintf(stderr, "current ecx = 0x%08lX\n", (unsigned long)env->regs[1]);
+    fprintf(stderr, "current edx = 0x%08lX\n", (unsigned long)env->regs[2]);
+    fprintf(stderr, "current ebx = 0x%08lX\n", (unsigned long)env->regs[3]);
+    fprintf(stderr, "current esp = 0x%08lX\n", (unsigned long)env->regs[4]);
+    fprintf(stderr, "current ebp = 0x%08lX\n", (unsigned long)env->regs[5]);
+    fprintf(stderr, "current esi = 0x%08lX\n", (unsigned long)env->regs[6]);
+    fprintf(stderr, "current edi = 0x%08lX\n", (unsigned long)env->regs[7]);
+
 #endif
     k = &ts->sigtab[sig - 1];
     queue = gdb_queuesig ();
@@ -479,6 +489,7 @@
         ts->signal_pending = 1;
         return 1; /* indicates that the signal was queued */
     }
+    return 0;
 }
 
 static void host_signal_handler(int host_signum, siginfo_t *info,
Index: linux-user/main.c
===================================================================
--- linux-user/main.c	(revision 5)
+++ linux-user/main.c	(working copy)
@@ -45,6 +45,10 @@
 int have_guest_base;
 #endif
 
+#ifdef CONFIG_DEBUG_EXEC
+extern CPULogItem *cpu_log_items;
+#endif
+
 static const char *interp_prefix = CONFIG_QEMU_PREFIX;
 const char *qemu_uname_release = CONFIG_UNAME_RELEASE;
 
Index: exec.c
===================================================================
--- exec.c	(revision 5)
+++ exec.c	(working copy)
@@ -223,6 +223,7 @@
 static void map_exec(void *addr, long size)
 {
     unsigned long start, end, page_size;
+    int ret;
     
     page_size = getpagesize();
     start = (unsigned long)addr;
@@ -232,8 +233,11 @@
     end += page_size - 1;
     end &= ~(page_size - 1);
     
-    mprotect((void *)start, end - start,
+    ret = mprotect((void *)start, end - start,
              PROT_READ | PROT_WRITE | PROT_EXEC);
+    if (ret) {
+	printf("mprotect error!\n");
+    }
 }
 #endif
 
@@ -317,6 +321,9 @@
         /* Don't use qemu_malloc because it may recurse.  */
         p = mmap(NULL, len, PROT_READ | PROT_WRITE,
                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        if (p) {
+            memset(p, 0, len);
+        }
         *lp = p;
         if (h2g_valid(p)) {
             unsigned long addr = h2g(p);
@@ -398,7 +405,7 @@
 #define mmap_unlock() do { } while(0)
 #endif
 
-#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
+#define DEFAULT_CODE_GEN_BUFFER_SIZE (4 * 1024 * 1024)
 
 #if defined(CONFIG_USER_ONLY)
 /* Currently it is not recommended to allocate big chunks of data in
@@ -1621,6 +1628,7 @@
     { 0, NULL, NULL },
 };
 
+
 static int cmp1(const char *s1, int n, const char *s2)
 {
     if (strlen(s2) != n)
Index: cpu-all.h
===================================================================
--- cpu-all.h	(revision 5)
+++ cpu-all.h	(working copy)
@@ -834,7 +834,9 @@
     const char *help;
 } CPULogItem;
 
+#ifndef CONFIG_DEBUG_EXEC
 extern const CPULogItem cpu_log_items[];
+#endif
 
 void cpu_set_log(int log_flags);
 void cpu_set_log_filename(const char *filename);
