Patchwork target-arm: tcg temp variable usage cleanup

login
register
mail settings
Submitter Juha.Riihimaki@nokia.com
Date Oct. 29, 2009, 2:01 p.m.
Message ID <1256824875-46345-1-git-send-email-juha.riihimaki@nokia.com>
Download mbox | patch
Permalink /patch/37196/
State New
Headers show

Comments

Juha.Riihimaki@nokia.com - Oct. 29, 2009, 2:01 p.m.
From: Juha Riihimäki <juha.riihimaki@nokia.com>

TCG temporary variable handling in target-arm/translate.c is currently
somewhat inconsistent; some functions allocate new temporaries that the
calling function is expected to free and some other functions free
temporaries that are passed in as parameters. This patch will remove all
such instances in the code and make the lifespan of the temporaries more
clearly visible as they are always allocated and freed within one function.
The only exception to this are the global temporaries allocated in the
beginning of the gen_intermediate_code_internal function.

Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com>
---
 target-arm/translate.c | 2723 ++++++++++++++++++++++++++----------------------
 1 files changed, 1502 insertions(+), 1221 deletions(-)
Stuart Brady - Oct. 29, 2009, 6:22 p.m.
On Thu, Oct 29, 2009 at 04:01:15PM +0200, juha.riihimaki@nokia.com wrote:
>  
> -static inline TCGv gen_ld8s(TCGv addr, int index)
> +static inline void gen_ld8s(TCGv ret, TCGv addr, int index)
>  {
> -    TCGv tmp = new_tmp();
> -    tcg_gen_qemu_ld8s(tmp, addr, index);
> -    return tmp;
> +    tcg_gen_qemu_ld8s(ret, addr, index);
>  }
[...]
>  static inline void gen_st8(TCGv val, TCGv addr, int index)
>  {
>      tcg_gen_qemu_st8(val, addr, index);
> -    dead_tmp(val);
>  }
>  static inline void gen_st16(TCGv val, TCGv addr, int index)
>  {
>      tcg_gen_qemu_st16(val, addr, index);
> -    dead_tmp(val);
>  }
[...]

Why not remove these functions entirely, replacing them with calls to
tcg_gen_qemu_ld/st()?

I presume there's a reason, given this:

> @@ -1001,25 +955,24 @@ VFP_GEN_FIX(ulto)
>  static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv addr)
>  {
>      if (dp)
> -        tcg_gen_qemu_ld64(cpu_F0d, addr, IS_USER(s));
> +        gen_ld64(cpu_F0d, addr, IS_USER(s));
>      else
> -        tcg_gen_qemu_ld32u(cpu_F0s, addr, IS_USER(s));
> +        gen_ld32(cpu_F0s, addr, IS_USER(s));
>  }

Cheers,
Laurent Desnogues - Oct. 29, 2009, 6:32 p.m.
On Thu, Oct 29, 2009 at 7:22 PM, Stuart Brady <sdbrady@ntlworld.com> wrote:
> On Thu, Oct 29, 2009 at 04:01:15PM +0200, juha.riihimaki@nokia.com wrote:
>>
>> -static inline TCGv gen_ld8s(TCGv addr, int index)
>> +static inline void gen_ld8s(TCGv ret, TCGv addr, int index)
>>  {
>> -    TCGv tmp = new_tmp();
>> -    tcg_gen_qemu_ld8s(tmp, addr, index);
>> -    return tmp;
>> +    tcg_gen_qemu_ld8s(ret, addr, index);
>>  }
> [...]
>>  static inline void gen_st8(TCGv val, TCGv addr, int index)
>>  {
>>      tcg_gen_qemu_st8(val, addr, index);
>> -    dead_tmp(val);
>>  }
>>  static inline void gen_st16(TCGv val, TCGv addr, int index)
>>  {
>>      tcg_gen_qemu_st16(val, addr, index);
>> -    dead_tmp(val);
>>  }
> [...]
>
> Why not remove these functions entirely, replacing them with calls to
> tcg_gen_qemu_ld/st()?

It eases adding calls to a memory tracer for instance.


Laurent

> I presume there's a reason, given this:
>
>> @@ -1001,25 +955,24 @@ VFP_GEN_FIX(ulto)
>>  static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv addr)
>>  {
>>      if (dp)
>> -        tcg_gen_qemu_ld64(cpu_F0d, addr, IS_USER(s));
>> +        gen_ld64(cpu_F0d, addr, IS_USER(s));
>>      else
>> -        tcg_gen_qemu_ld32u(cpu_F0s, addr, IS_USER(s));
>> +        gen_ld32(cpu_F0s, addr, IS_USER(s));
>>  }
>
> Cheers,
> --
> Stuart Brady
>
>
>
Laurent Desnogues - Nov. 1, 2009, 12:08 a.m.
On Thu, Oct 29, 2009 at 3:01 PM,  <juha.riihimaki@nokia.com> wrote:
> From: Juha Riihimäki <juha.riihimaki@nokia.com>
>
> TCG temporary variable handling in target-arm/translate.c is currently
> somewhat inconsistent; some functions allocate new temporaries that the
> calling function is expected to free and some other functions free
> temporaries that are passed in as parameters. This patch will remove all
> such instances in the code and make the lifespan of the temporaries more
> clearly visible as they are always allocated and freed within one function.
> The only exception to this are the global temporaries allocated in the
> beginning of the gen_intermediate_code_internal function.
>
> Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com>
> ---
>  target-arm/translate.c | 2723 ++++++++++++++++++++++++++----------------------
>  1 files changed, 1502 insertions(+), 1221 deletions(-)
>
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 5784566..6982bad 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
[...]
> @@ -3684,12 +3678,12 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
>     TCGv_i64 tmp64;
>
>     if (!vfp_enabled(env))
> -      return 1;
> +        return 1;
>     VFP_DREG_D(rd, insn);
>     rn = (insn >> 16) & 0xf;
>     rm = insn & 0xf;
>     load = (insn & (1 << 21)) != 0;
> -    addr = new_tmp();
> +    addr = tcg_temp_new_i32();

addr should be allocated after the undefined instructions
dectection.

>                         if (load) {
>                             TCGV_UNUSED(tmp2);
>                             for (n = 0; n < 4; n++) {
> -                                tmp = gen_ld8u(addr, IS_USER(s));
> +                                gen_ld8u(tmp, addr, IS_USER(s));
>                                 tcg_gen_addi_i32(addr, addr, stride);
>                                 if (n == 0) {
>                                     tmp2 = tmp;
> +                                    tmp = tcg_temp_new_i32();
>                                 } else {
>                                     gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff);
> -                                    dead_tmp(tmp);
> +                                    tcg_temp_free_i32(tmp);
>                                 }
>                             }
>                             neon_store_reg(rd, pass, tmp2);
> +                            tmp = tmp2;

This is completely wrong :-)  It should be rewritten
as the code for store that follows.

> @@ -3795,31 +3795,36 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
>             nregs = ((insn >> 8) & 3) + 1;
>             stride = (insn & (1 << 5)) ? 2 : 1;
>             load_reg_var(s, addr, rn);
> +            tmp = tcg_temp_new_i32();
> +            tmp2 = tcg_temp_new_i32();
>             for (reg = 0; reg < nregs; reg++) {
>                 switch (size) {
>                 case 0:
> -                    tmp = gen_ld8u(addr, IS_USER(s));
> +                    gen_ld8u(tmp, addr, IS_USER(s));
>                     gen_neon_dup_u8(tmp, 0);
>                     break;
>                 case 1:
> -                    tmp = gen_ld16u(addr, IS_USER(s));
> +                    gen_ld16u(tmp, addr, IS_USER(s));
>                     gen_neon_dup_low16(tmp);
>                     break;
>                 case 2:
> -                    tmp = gen_ld32(addr, IS_USER(s));
> +                    gen_ld32(tmp, addr, IS_USER(s));
>                     break;
>                 case 3:
> +                    tcg_temp_free_i32(tmp2);
> +                    tcg_temp_free_i32(tmp);

Missing free of addr.

> @@ -4184,278 +4183,304 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)

I won't comment on this function, it lacks too many
undefined detection and some instructions.


> @@ -6622,23 +6782,26 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
>                         default: goto illegal_op;

Missing free of of tmp before the goto.

The rest is at least OK from a temp leak point of view.

I tested your patch by running translate + TCG code gen
for all of the opcodes in the range e0000000-ffffffff.
For the NEON instructions I had to add correct undefined
detection to let my program process the range (OTOH I
didn't bother fixing the wrong decoding and/or codegen,
I was just doing sanity check on your patch).

Next step is to also do that for Thumb2.  And then run
some real programs.


Laurent
Juha.Riihimaki@nokia.com - Nov. 2, 2009, 8:06 a.m.
On Nov 1, 2009, at 02:08, ext Laurent Desnogues wrote:

> On Thu, Oct 29, 2009 at 3:01 PM,  <juha.riihimaki@nokia.com> wrote:
>> From: Juha Riihimäki <juha.riihimaki@nokia.com>
>>
>> TCG temporary variable handling in target-arm/translate.c is  
>> currently
>> somewhat inconsistent; some functions allocate new temporaries that  
>> the
>> calling function is expected to free and some other functions free
>> temporaries that are passed in as parameters. This patch will  
>> remove all
>> such instances in the code and make the lifespan of the temporaries  
>> more
>> clearly visible as they are always allocated and freed within one  
>> function.
>> The only exception to this are the global temporaries allocated in  
>> the
>> beginning of the gen_intermediate_code_internal function.
[...]
> I tested your patch by running translate + TCG code gen
> for all of the opcodes in the range e0000000-ffffffff.
> For the NEON instructions I had to add correct undefined
> detection to let my program process the range (OTOH I
> didn't bother fixing the wrong decoding and/or codegen,
> I was just doing sanity check on your patch).
>
> Next step is to also do that for Thumb2.  And then run
> some real programs.

Thanks for your work so far. I fixed the things you pointed out, but  
I'll hold submitting a new version of the patch until you have had  
time to do more testing. I tested with the n810 system emulation, it  
was working fine.


Regards,
Juha

Patch

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 5784566..6982bad 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -104,35 +104,17 @@  void arm_translate_init(void)
 #include "helpers.h"
 }
 
-static int num_temps;
-
-/* Allocate a temporary variable.  */
-static TCGv_i32 new_tmp(void)
+static inline void load_cpu_offset(TCGv ret, int offset)
 {
-    num_temps++;
-    return tcg_temp_new_i32();
+    tcg_gen_ld_i32(ret, cpu_env, offset);
 }
 
-/* Release a temporary variable.  */
-static void dead_tmp(TCGv tmp)
-{
-    tcg_temp_free(tmp);
-    num_temps--;
-}
-
-static inline TCGv load_cpu_offset(int offset)
-{
-    TCGv tmp = new_tmp();
-    tcg_gen_ld_i32(tmp, cpu_env, offset);
-    return tmp;
-}
-
-#define load_cpu_field(name) load_cpu_offset(offsetof(CPUState, name))
+#define load_cpu_field(var, name) \
+    load_cpu_offset(var, offsetof(CPUState, name))
 
 static inline void store_cpu_offset(TCGv var, int offset)
 {
     tcg_gen_st_i32(var, cpu_env, offset);
-    dead_tmp(var);
 }
 
 #define store_cpu_field(var, name) \
@@ -154,16 +136,7 @@  static void load_reg_var(DisasContext *s, TCGv var, int reg)
     }
 }
 
-/* Create a new temporary and set it to the value of a CPU register.  */
-static inline TCGv load_reg(DisasContext *s, int reg)
-{
-    TCGv tmp = new_tmp();
-    load_reg_var(s, tmp, reg);
-    return tmp;
-}
-
-/* Set a CPU register.  The source must be a temporary and will be
-   marked as dead.  */
+/* Set a CPU register to the value of a variable. Clobbers var. */
 static void store_reg(DisasContext *s, int reg, TCGv var)
 {
     if (reg == 15) {
@@ -171,7 +144,6 @@  static void store_reg(DisasContext *s, int reg, TCGv var)
         s->is_jmp = DISAS_JUMP;
     }
     tcg_gen_mov_i32(cpu_R[reg], var);
-    dead_tmp(var);
 }
 
 /* Value extensions.  */
@@ -195,49 +167,49 @@  static inline void gen_set_cpsr(TCGv var, uint32_t mask)
 
 static void gen_exception(int excp)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     tcg_gen_movi_i32(tmp, excp);
     gen_helper_exception(tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 static void gen_smul_dual(TCGv a, TCGv b)
 {
-    TCGv tmp1 = new_tmp();
-    TCGv tmp2 = new_tmp();
+    TCGv tmp1 = tcg_temp_new_i32();
+    TCGv tmp2 = tcg_temp_new_i32();
     tcg_gen_ext16s_i32(tmp1, a);
     tcg_gen_ext16s_i32(tmp2, b);
     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
-    dead_tmp(tmp2);
+    tcg_temp_free_i32(tmp2);
     tcg_gen_sari_i32(a, a, 16);
     tcg_gen_sari_i32(b, b, 16);
     tcg_gen_mul_i32(b, b, a);
     tcg_gen_mov_i32(a, tmp1);
-    dead_tmp(tmp1);
+    tcg_temp_free_i32(tmp1);
 }
 
 /* Byteswap each halfword.  */
 static void gen_rev16(TCGv var)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     tcg_gen_shri_i32(tmp, var, 8);
     tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
     tcg_gen_shli_i32(var, var, 8);
     tcg_gen_andi_i32(var, var, 0xff00ff00);
     tcg_gen_or_i32(var, var, tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 /* Byteswap low halfword and sign extend.  */
 static void gen_revsh(TCGv var)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     tcg_gen_shri_i32(tmp, var, 8);
     tcg_gen_andi_i32(tmp, tmp, 0x00ff);
     tcg_gen_shli_i32(var, var, 8);
     tcg_gen_ext8s_i32(var, var);
     tcg_gen_or_i32(var, var, tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 /* Unsigned bitfield extract.  */
@@ -281,33 +253,25 @@  static void gen_roundqd(TCGv a, TCGv b)
 
 /* FIXME: Most targets have native widening multiplication.
    It would be good to use that instead of a full wide multiply.  */
-/* 32x32->64 multiply.  Marks inputs as dead.  */
-static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
+/* 32x32->64 multiply. */
+static void gen_mulu_i64_i32(TCGv_i64 ret, TCGv a, TCGv b)
 {
-    TCGv_i64 tmp1 = tcg_temp_new_i64();
-    TCGv_i64 tmp2 = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
 
-    tcg_gen_extu_i32_i64(tmp1, a);
-    dead_tmp(a);
-    tcg_gen_extu_i32_i64(tmp2, b);
-    dead_tmp(b);
-    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
-    tcg_temp_free_i64(tmp2);
-    return tmp1;
+    tcg_gen_extu_i32_i64(ret, a);
+    tcg_gen_extu_i32_i64(tmp, b);
+    tcg_gen_mul_i64(ret, ret, tmp);
+    tcg_temp_free_i64(tmp);
 }
 
-static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
+static void gen_muls_i64_i32(TCGv_i64 ret, TCGv a, TCGv b)
 {
-    TCGv_i64 tmp1 = tcg_temp_new_i64();
-    TCGv_i64 tmp2 = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
 
-    tcg_gen_ext_i32_i64(tmp1, a);
-    dead_tmp(a);
-    tcg_gen_ext_i32_i64(tmp2, b);
-    dead_tmp(b);
-    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
-    tcg_temp_free_i64(tmp2);
-    return tmp1;
+    tcg_gen_ext_i32_i64(ret, a);
+    tcg_gen_ext_i32_i64(tmp, b);
+    tcg_gen_mul_i64(ret, ret, tmp);
+    tcg_temp_free_i64(tmp);
 }
 
 /* Signed 32x32->64 multiply.  */
@@ -329,14 +293,14 @@  static void gen_imull(TCGv a, TCGv b)
 /* Swap low and high halfwords.  */
 static void gen_swap_half(TCGv var)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     tcg_gen_shri_i32(tmp, var, 16);
     tcg_gen_shli_i32(var, var, 16);
     tcg_gen_or_i32(var, var, tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
-/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
+/* Dual 16-bit add.  Result placed in t0. Clobbers t1.
     tmp = (t0 ^ t1) & 0x8000;
     t0 &= ~0x8000;
     t1 &= ~0x8000;
@@ -345,15 +309,14 @@  static void gen_swap_half(TCGv var)
 
 static void gen_add16(TCGv t0, TCGv t1)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     tcg_gen_xor_i32(tmp, t0, t1);
     tcg_gen_andi_i32(tmp, tmp, 0x8000);
     tcg_gen_andi_i32(t0, t0, ~0x8000);
     tcg_gen_andi_i32(t1, t1, ~0x8000);
     tcg_gen_add_i32(t0, t0, t1);
     tcg_gen_xor_i32(t0, t0, tmp);
-    dead_tmp(tmp);
-    dead_tmp(t1);
+    tcg_temp_free_i32(tmp);
 }
 
 #define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF))
@@ -361,10 +324,10 @@  static void gen_add16(TCGv t0, TCGv t1)
 /* Set CF to the top bit of var.  */
 static void gen_set_CF_bit31(TCGv var)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     tcg_gen_shri_i32(tmp, var, 31);
     gen_set_CF(tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 /* Set N and Z flags from var.  */
@@ -379,9 +342,10 @@  static void gen_adc(TCGv t0, TCGv t1)
 {
     TCGv tmp;
     tcg_gen_add_i32(t0, t0, t1);
-    tmp = load_cpu_field(CF);
+    tmp = tcg_temp_new_i32();
+    load_cpu_field(tmp, CF);
     tcg_gen_add_i32(t0, t0, tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 /* dest = T0 + T1 + CF. */
@@ -389,9 +353,10 @@  static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
 {
     TCGv tmp;
     tcg_gen_add_i32(dest, t0, t1);
-    tmp = load_cpu_field(CF);
+    tmp = tcg_temp_new_i32();
+    load_cpu_field(tmp, CF);
     tcg_gen_add_i32(dest, dest, tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 /* dest = T0 - T1 + CF - 1.  */
@@ -399,10 +364,11 @@  static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 {
     TCGv tmp;
     tcg_gen_sub_i32(dest, t0, t1);
-    tmp = load_cpu_field(CF);
+    tmp = tcg_temp_new_i32();
+    load_cpu_field(tmp, CF);
     tcg_gen_add_i32(dest, dest, tmp);
+    tcg_temp_free_i32(tmp);
     tcg_gen_subi_i32(dest, dest, 1);
-    dead_tmp(tmp);
 }
 
 /* FIXME:  Implement this natively.  */
@@ -410,7 +376,7 @@  static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 
 static void shifter_out_im(TCGv var, int shift)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     if (shift == 0) {
         tcg_gen_andi_i32(tmp, var, 1);
     } else {
@@ -419,12 +385,14 @@  static void shifter_out_im(TCGv var, int shift)
             tcg_gen_andi_i32(tmp, tmp, 1);
     }
     gen_set_CF(tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 /* Shift by immediate.  Includes special handling for shift == 0.  */
 static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
 {
+    TCGv tmp;
+
     switch (shiftop) {
     case 0: /* LSL */
         if (shift != 0) {
@@ -461,13 +429,14 @@  static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
                 shifter_out_im(var, shift - 1);
             tcg_gen_rotri_i32(var, var, shift); break;
         } else {
-            TCGv tmp = load_cpu_field(CF);
+            tmp = tcg_temp_new_i32();
+            load_cpu_field(tmp, CF);
             if (flags)
                 shifter_out_im(var, 0);
             tcg_gen_shri_i32(var, var, 1);
             tcg_gen_shli_i32(tmp, tmp, 31);
             tcg_gen_or_i32(var, var, tmp);
-            dead_tmp(tmp);
+            tcg_temp_free_i32(tmp);
         }
     }
 };
@@ -491,7 +460,6 @@  static inline void gen_arm_shift_reg(TCGv var, int shiftop,
                 tcg_gen_rotr_i32(var, var, shift); break;
         }
     }
-    dead_tmp(shift);
 }
 
 #define PAS_OP(pfx) \
@@ -593,96 +561,97 @@  static void gen_test_cc(int cc, int label)
     TCGv tmp2;
     int inv;
 
+    tmp = tcg_temp_new_i32();
     switch (cc) {
     case 0: /* eq: Z */
-        tmp = load_cpu_field(ZF);
+        load_cpu_field(tmp, ZF);
         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
         break;
     case 1: /* ne: !Z */
-        tmp = load_cpu_field(ZF);
+        load_cpu_field(tmp, ZF);
         tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
         break;
     case 2: /* cs: C */
-        tmp = load_cpu_field(CF);
+        load_cpu_field(tmp, CF);
         tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
         break;
     case 3: /* cc: !C */
-        tmp = load_cpu_field(CF);
+        load_cpu_field(tmp, CF);
         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
         break;
     case 4: /* mi: N */
-        tmp = load_cpu_field(NF);
+        load_cpu_field(tmp, NF);
         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
         break;
     case 5: /* pl: !N */
-        tmp = load_cpu_field(NF);
+        load_cpu_field(tmp, NF);
         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
         break;
     case 6: /* vs: V */
-        tmp = load_cpu_field(VF);
+        load_cpu_field(tmp, VF);
         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
         break;
     case 7: /* vc: !V */
-        tmp = load_cpu_field(VF);
+        load_cpu_field(tmp, VF);
         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
         break;
     case 8: /* hi: C && !Z */
         inv = gen_new_label();
-        tmp = load_cpu_field(CF);
+        load_cpu_field(tmp, CF);
         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
-        dead_tmp(tmp);
-        tmp = load_cpu_field(ZF);
+        load_cpu_field(tmp, ZF);
         tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
         gen_set_label(inv);
         break;
     case 9: /* ls: !C || Z */
-        tmp = load_cpu_field(CF);
+        load_cpu_field(tmp, CF);
         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        dead_tmp(tmp);
-        tmp = load_cpu_field(ZF);
+        load_cpu_field(tmp, ZF);
         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
         break;
     case 10: /* ge: N == V -> N ^ V == 0 */
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
+        load_cpu_field(tmp, VF);
+        tmp2 = tcg_temp_new_i32();
+        load_cpu_field(tmp2, NF);
         tcg_gen_xor_i32(tmp, tmp, tmp2);
-        dead_tmp(tmp2);
+        tcg_temp_free_i32(tmp2);
         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
         break;
     case 11: /* lt: N != V -> N ^ V != 0 */
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
+        load_cpu_field(tmp, VF);
+        tmp2 = tcg_temp_new_i32();
+        load_cpu_field(tmp2, NF);
         tcg_gen_xor_i32(tmp, tmp, tmp2);
-        dead_tmp(tmp2);
+        tcg_temp_free_i32(tmp2);
         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
         break;
     case 12: /* gt: !Z && N == V */
         inv = gen_new_label();
-        tmp = load_cpu_field(ZF);
+        load_cpu_field(tmp, ZF);
         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
-        dead_tmp(tmp);
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
+        load_cpu_field(tmp, VF);
+        tmp2 = tcg_temp_new_i32();
+        load_cpu_field(tmp2, NF);
         tcg_gen_xor_i32(tmp, tmp, tmp2);
-        dead_tmp(tmp2);
+        tcg_temp_free_i32(tmp2);
         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
         gen_set_label(inv);
         break;
     case 13: /* le: Z || N != V */
-        tmp = load_cpu_field(ZF);
+        load_cpu_field(tmp, ZF);
         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        dead_tmp(tmp);
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
+        load_cpu_field(tmp, VF);
+        tmp2 = tcg_temp_new_i32();
+        load_cpu_field(tmp2, NF);
         tcg_gen_xor_i32(tmp, tmp, tmp2);
-        dead_tmp(tmp2);
+        tcg_temp_free_i32(tmp2);
         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
         break;
     default:
         fprintf(stderr, "Bad condition code 0x%x\n", cc);
         abort();
     }
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 static const uint8_t table_logic_cc[16] = {
@@ -711,15 +680,15 @@  static inline void gen_bx_im(DisasContext *s, uint32_t addr)
 
     s->is_jmp = DISAS_UPDATE;
     if (s->thumb != (addr & 1)) {
-        tmp = new_tmp();
+        tmp = tcg_temp_new_i32();
         tcg_gen_movi_i32(tmp, addr & 1);
         tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb));
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
     }
     tcg_gen_movi_i32(cpu_R[15], addr & ~1);
 }
 
-/* Set PC and Thumb state from var.  var is marked as dead.  */
+/* Set PC and Thumb state from var. Clobbers var. */
 static inline void gen_bx(DisasContext *s, TCGv var)
 {
     s->is_jmp = DISAS_UPDATE;
@@ -729,8 +698,7 @@  static inline void gen_bx(DisasContext *s, TCGv var)
 }
 
 /* Variant of store_reg which uses branch&exchange logic when storing
-   to r15 in ARM architecture v7 and above. The source must be a temporary
-   and will be marked as dead. */
+   to r15 in ARM architecture v7 and above. */
 static inline void store_reg_bx(CPUState *env, DisasContext *s,
                                 int reg, TCGv var)
 {
@@ -741,61 +709,45 @@  static inline void store_reg_bx(CPUState *env, DisasContext *s,
     }
 }
 
-static inline TCGv gen_ld8s(TCGv addr, int index)
+static inline void gen_ld8s(TCGv ret, TCGv addr, int index)
 {
-    TCGv tmp = new_tmp();
-    tcg_gen_qemu_ld8s(tmp, addr, index);
-    return tmp;
+    tcg_gen_qemu_ld8s(ret, addr, index);
 }
-static inline TCGv gen_ld8u(TCGv addr, int index)
+static inline void gen_ld8u(TCGv ret, TCGv addr, int index)
 {
-    TCGv tmp = new_tmp();
-    tcg_gen_qemu_ld8u(tmp, addr, index);
-    return tmp;
+    tcg_gen_qemu_ld8u(ret, addr, index);
 }
-static inline TCGv gen_ld16s(TCGv addr, int index)
+static inline void gen_ld16s(TCGv ret, TCGv addr, int index)
 {
-    TCGv tmp = new_tmp();
-    tcg_gen_qemu_ld16s(tmp, addr, index);
-    return tmp;
+    tcg_gen_qemu_ld16s(ret, addr, index);
 }
-static inline TCGv gen_ld16u(TCGv addr, int index)
+static inline void gen_ld16u(TCGv ret, TCGv addr, int index)
 {
-    TCGv tmp = new_tmp();
-    tcg_gen_qemu_ld16u(tmp, addr, index);
-    return tmp;
+    tcg_gen_qemu_ld16u(ret, addr, index);
 }
-static inline TCGv gen_ld32(TCGv addr, int index)
+static inline void gen_ld32(TCGv ret, TCGv addr, int index)
 {
-    TCGv tmp = new_tmp();
-    tcg_gen_qemu_ld32u(tmp, addr, index);
-    return tmp;
+    tcg_gen_qemu_ld32u(ret, addr, index);
 }
-static inline TCGv_i64 gen_ld64(TCGv addr, int index)
+static inline void gen_ld64(TCGv_i64 ret, TCGv addr, int index)
 {
-    TCGv_i64 tmp = tcg_temp_new_i64();
-    tcg_gen_qemu_ld64(tmp, addr, index);
-    return tmp;
+    tcg_gen_qemu_ld64(ret, addr, index);
 }
 static inline void gen_st8(TCGv val, TCGv addr, int index)
 {
     tcg_gen_qemu_st8(val, addr, index);
-    dead_tmp(val);
 }
 static inline void gen_st16(TCGv val, TCGv addr, int index)
 {
     tcg_gen_qemu_st16(val, addr, index);
-    dead_tmp(val);
 }
 static inline void gen_st32(TCGv val, TCGv addr, int index)
 {
     tcg_gen_qemu_st32(val, addr, index);
-    dead_tmp(val);
 }
 static inline void gen_st64(TCGv_i64 val, TCGv addr, int index)
 {
     tcg_gen_qemu_st64(val, addr, index);
-    tcg_temp_free_i64(val);
 }
 
 static inline void gen_set_pc_im(uint32_t val)
@@ -828,13 +780,14 @@  static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
         rm = (insn) & 0xf;
         shift = (insn >> 7) & 0x1f;
         shiftop = (insn >> 5) & 3;
-        offset = load_reg(s, rm);
+        offset = tcg_temp_new_i32();
+        load_reg_var(s, offset, rm);
         gen_arm_shift_im(offset, shiftop, shift, 0);
         if (!(insn & (1 << 23)))
             tcg_gen_sub_i32(var, var, offset);
         else
             tcg_gen_add_i32(var, var, offset);
-        dead_tmp(offset);
+        tcg_temp_free_i32(offset);
     }
 }
 
@@ -857,12 +810,13 @@  static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
         if (extra)
             tcg_gen_addi_i32(var, var, extra);
         rm = (insn) & 0xf;
-        offset = load_reg(s, rm);
+        offset = tcg_temp_new_i32();
+        load_reg_var(s, offset, rm);
         if (!(insn & (1 << 23)))
             tcg_gen_sub_i32(var, var, offset);
         else
             tcg_gen_add_i32(var, var, offset);
-        dead_tmp(offset);
+        tcg_temp_free_i32(offset);
     }
 }
 
@@ -1001,25 +955,24 @@  VFP_GEN_FIX(ulto)
 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv addr)
 {
     if (dp)
-        tcg_gen_qemu_ld64(cpu_F0d, addr, IS_USER(s));
+        gen_ld64(cpu_F0d, addr, IS_USER(s));
     else
-        tcg_gen_qemu_ld32u(cpu_F0s, addr, IS_USER(s));
+        gen_ld32(cpu_F0s, addr, IS_USER(s));
 }
 
 static inline void gen_vfp_st(DisasContext *s, int dp, TCGv addr)
 {
     if (dp)
-        tcg_gen_qemu_st64(cpu_F0d, addr, IS_USER(s));
+        gen_st64(cpu_F0d, addr, IS_USER(s));
     else
-        tcg_gen_qemu_st32(cpu_F0s, addr, IS_USER(s));
+        gen_st32(cpu_F0s, addr, IS_USER(s));
 }
 
-static inline long
-vfp_reg_offset (int dp, int reg)
+static inline long vfp_reg_offset(int dp, int reg)
 {
-    if (dp)
+    if (dp) {
         return offsetof(CPUARMState, vfp.regs[reg]);
-    else if (reg & 1) {
+    } else if (reg & 1) {
         return offsetof(CPUARMState, vfp.regs[reg >> 1])
           + offsetof(CPU_DoubleU, l.upper);
     } else {
@@ -1030,25 +983,21 @@  vfp_reg_offset (int dp, int reg)
 
 /* Return the offset of a 32-bit piece of a NEON register.
    zero is the least significant end of the register.  */
-static inline long
-neon_reg_offset (int reg, int n)
+static inline long neon_reg_offset(int reg, int n)
 {
     int sreg;
     sreg = reg * 2 + n;
     return vfp_reg_offset(0, sreg);
 }
 
-static TCGv neon_load_reg(int reg, int pass)
+static void neon_load_reg(TCGv ret, int reg, int pass)
 {
-    TCGv tmp = new_tmp();
-    tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
-    return tmp;
+    tcg_gen_ld_i32(ret, cpu_env, neon_reg_offset(reg, pass));
 }
 
 static void neon_store_reg(int reg, int pass, TCGv var)
 {
     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
-    dead_tmp(var);
 }
 
 static inline void neon_load_reg64(TCGv_i64 var, int reg)
@@ -1102,11 +1051,9 @@  static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
     tcg_gen_st_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
 }
 
-static inline TCGv iwmmxt_load_creg(int reg)
+static inline void iwmmxt_load_creg(TCGv ret, int reg)
 {
-    TCGv var = new_tmp();
-    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
-    return var;
+    tcg_gen_ld_i32(ret, cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
 }
 
 static inline void iwmmxt_store_creg(int reg, TCGv var)
@@ -1226,25 +1173,28 @@  IWMMXT_OP_ENV(packsq)
 
 static void gen_op_iwmmxt_set_mup(void)
 {
-    TCGv tmp;
-    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
+    TCGv tmp = tcg_temp_new_i32();
+    load_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
     tcg_gen_ori_i32(tmp, tmp, 2);
     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
+    tcg_temp_free_i32(tmp);
 }
 
 static void gen_op_iwmmxt_set_cup(void)
 {
-    TCGv tmp;
-    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
+    TCGv tmp = tcg_temp_new_i32();
+    load_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
     tcg_gen_ori_i32(tmp, tmp, 1);
     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
+    tcg_temp_free_i32(tmp);
 }
 
 static void gen_op_iwmmxt_setpsr_nz(void)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
+    tcg_temp_free_i32(tmp);
 }
 
 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
@@ -1261,7 +1211,8 @@  static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, TCGv dest)
     TCGv tmp;
 
     rd = (insn >> 16) & 0xf;
-    tmp = load_reg(s, rd);
+    tmp = tcg_temp_new_i32();
+    load_reg_var(s, tmp, rd);
 
     offset = (insn & 0xff) << ((insn >> 7) & 2);
     if (insn & (1 << 24)) {
@@ -1273,8 +1224,6 @@  static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, TCGv dest)
         tcg_gen_mov_i32(dest, tmp);
         if (insn & (1 << 21))
             store_reg(s, rd, tmp);
-        else
-            dead_tmp(tmp);
     } else if (insn & (1 << 21)) {
         /* Post indexed */
         tcg_gen_mov_i32(dest, tmp);
@@ -1283,8 +1232,11 @@  static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, TCGv dest)
         else
             tcg_gen_addi_i32(tmp, tmp, -offset);
         store_reg(s, rd, tmp);
-    } else if (!(insn & (1 << 23)))
+    } else if (!(insn & (1 << 23))) {
+        tcg_temp_free_i32(tmp);
         return 1;
+    }
+    tcg_temp_free_i32(tmp);
     return 0;
 }
 
@@ -1296,17 +1248,17 @@  static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv dest)
     if (insn & (1 << 8)) {
         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
             return 1;
-        } else {
-            tmp = iwmmxt_load_creg(rd);
         }
+        tmp = tcg_temp_new_i32();
+        iwmmxt_load_creg(tmp, rd);
     } else {
-        tmp = new_tmp();
+        tmp = tcg_temp_new_i32();
         iwmmxt_load_reg(cpu_V0, rd);
         tcg_gen_trunc_i64_i32(tmp, cpu_V0);
     }
     tcg_gen_andi_i32(tmp, tmp, mask);
     tcg_gen_mov_i32(dest, tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
     return 0;
 }
 
@@ -1338,54 +1290,60 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         }
 
         wrd = (insn >> 12) & 0xf;
-        addr = new_tmp();
+        addr = tcg_temp_new_i32();
         if (gen_iwmmxt_address(s, insn, addr)) {
-            dead_tmp(addr);
+            tcg_temp_free_i32(addr);
             return 1;
         }
         if (insn & ARM_CP_RW_BIT) {
             if ((insn >> 28) == 0xf) {			/* WLDRW wCx */
-                tmp = new_tmp();
-                tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+                tmp = tcg_temp_new_i32();
+                gen_ld32(tmp, addr, IS_USER(s));
                 iwmmxt_store_creg(wrd, tmp);
+                tcg_temp_free_i32(tmp);
             } else {
                 i = 1;
                 if (insn & (1 << 8)) {
                     if (insn & (1 << 22)) {		/* WLDRD */
-                        tcg_gen_qemu_ld64(cpu_M0, addr, IS_USER(s));
+                        gen_ld64(cpu_M0, addr, IS_USER(s));
                         i = 0;
                     } else {				/* WLDRW wRd */
-                        tmp = gen_ld32(addr, IS_USER(s));
+                        tmp = tcg_temp_new_i32();
+                        gen_ld32(tmp, addr, IS_USER(s));
                     }
                 } else {
+                    tmp = tcg_temp_new_i32();
                     if (insn & (1 << 22)) {		/* WLDRH */
-                        tmp = gen_ld16u(addr, IS_USER(s));
+                        gen_ld16u(tmp, addr, IS_USER(s));
                     } else {				/* WLDRB */
-                        tmp = gen_ld8u(addr, IS_USER(s));
+                        gen_ld8u(tmp, addr, IS_USER(s));
                     }
                 }
                 if (i) {
                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
-                    dead_tmp(tmp);
+                    tcg_temp_free_i32(tmp);
                 }
                 gen_op_iwmmxt_movq_wRn_M0(wrd);
             }
         } else {
             if ((insn >> 28) == 0xf) {			/* WSTRW wCx */
-                tmp = iwmmxt_load_creg(wrd);
+                tmp = tcg_temp_new_i32();
+                iwmmxt_load_creg(tmp, wrd);
                 gen_st32(tmp, addr, IS_USER(s));
+                tcg_temp_free_i32(tmp);
             } else {
                 gen_op_iwmmxt_movq_M0_wRn(wrd);
-                tmp = new_tmp();
                 if (insn & (1 << 8)) {
                     if (insn & (1 << 22)) {		/* WSTRD */
-                        dead_tmp(tmp);
-                        tcg_gen_qemu_st64(cpu_M0, addr, IS_USER(s));
+                        gen_st64(cpu_M0, addr, IS_USER(s));
                     } else {				/* WSTRW wRd */
+                        tmp = tcg_temp_new_i32();
                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
                         gen_st32(tmp, addr, IS_USER(s));
+                        tcg_temp_free_i32(tmp);
                     }
                 } else {
+                    tmp = tcg_temp_new_i32();
                     if (insn & (1 << 22)) {		/* WSTRH */
                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
                         gen_st16(tmp, addr, IS_USER(s));
@@ -1393,6 +1351,7 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
                         gen_st8(tmp, addr, IS_USER(s));
                     }
+                    tcg_temp_free_i32(tmp);
                 }
             }
         }
@@ -1427,19 +1386,24 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             gen_op_iwmmxt_set_cup();
             /* Fall through.  */
         case ARM_IWMMXT_wCSSF:
-            tmp = iwmmxt_load_creg(wrd);
-            tmp2 = load_reg(s, rd);
+            tmp = tcg_temp_new_i32();
+            iwmmxt_load_creg(tmp, wrd);
+            tmp2 = tcg_temp_new_i32();
+            load_reg_var(s, tmp2, rd);
             tcg_gen_andc_i32(tmp, tmp, tmp2);
-            dead_tmp(tmp2);
+            tcg_temp_free_i32(tmp2);
             iwmmxt_store_creg(wrd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         case ARM_IWMMXT_wCGR0:
         case ARM_IWMMXT_wCGR1:
         case ARM_IWMMXT_wCGR2:
         case ARM_IWMMXT_wCGR3:
             gen_op_iwmmxt_set_cup();
-            tmp = load_reg(s, rd);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rd);
             iwmmxt_store_creg(wrd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         default:
             return 1;
@@ -1461,8 +1425,10 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             return 1;
         rd = (insn >> 12) & 0xf;
         wrd = (insn >> 16) & 0xf;
-        tmp = iwmmxt_load_creg(wrd);
+        tmp = tcg_temp_new_i32();
+        iwmmxt_load_creg(tmp, wrd);
         store_reg(s, rd, tmp);
+        tcg_temp_free_i32(tmp);
         break;
     case 0x300:						/* WANDN */
         wrd = (insn >> 12) & 0xf;
@@ -1639,11 +1605,12 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         rd0 = (insn >> 16) & 0xf;
         rd1 = (insn >> 0) & 0xf;
         gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
+        tmp = tcg_temp_new_i32();
+        iwmmxt_load_creg(tmp, ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
         tcg_gen_andi_i32(tmp, tmp, 7);
         iwmmxt_load_reg(cpu_V1, rd1);
         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
@@ -1652,7 +1619,8 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             return 1;
         rd = (insn >> 12) & 0xf;
         wrd = (insn >> 16) & 0xf;
-        tmp = load_reg(s, rd);
+        tmp = tcg_temp_new_i32();
+        load_reg_var(s, tmp, rd);
         gen_op_iwmmxt_movq_M0_wRn(wrd);
         switch ((insn >> 6) & 3) {
         case 0:
@@ -1674,7 +1642,7 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
         tcg_temp_free(tmp3);
         tcg_temp_free(tmp2);
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
@@ -1684,7 +1652,7 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         if (rd == 15 || ((insn >> 22) & 3) == 3)
             return 1;
         gen_op_iwmmxt_movq_M0_wRn(wrd);
-        tmp = new_tmp();
+        tmp = tcg_temp_new_i32();
         switch ((insn >> 22) & 3) {
         case 0:
             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
@@ -1710,11 +1678,13 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             break;
         }
         store_reg(s, rd, tmp);
+        tcg_temp_free_i32(tmp);
         break;
     case 0x117: case 0x517: case 0x917: case 0xd17:	/* TEXTRC */
         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
             return 1;
-        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
+        tmp = tcg_temp_new_i32();
+        iwmmxt_load_creg(tmp, ARM_IWMMXT_wCASF);
         switch ((insn >> 22) & 3) {
         case 0:
             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
@@ -1728,14 +1698,15 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         }
         tcg_gen_shli_i32(tmp, tmp, 28);
         gen_set_nzcv(tmp);
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
         break;
     case 0x401: case 0x405: case 0x409: case 0x40d:	/* TBCST */
         if (((insn >> 6) & 3) == 3)
             return 1;
         rd = (insn >> 12) & 0xf;
         wrd = (insn >> 16) & 0xf;
-        tmp = load_reg(s, rd);
+        tmp = tcg_temp_new_i32();
+        load_reg_var(s, tmp, rd);
         switch ((insn >> 6) & 3) {
         case 0:
             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
@@ -1747,15 +1718,16 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
             break;
         }
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
     case 0x113: case 0x513: case 0x913: case 0xd13:	/* TANDC */
         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
             return 1;
-        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
-        tmp2 = new_tmp();
+        tmp = tcg_temp_new_i32();
+        iwmmxt_load_creg(tmp, ARM_IWMMXT_wCASF);
+        tmp2 = tcg_temp_new_i32();
         tcg_gen_mov_i32(tmp2, tmp);
         switch ((insn >> 22) & 3) {
         case 0:
@@ -1776,8 +1748,8 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             break;
         }
         gen_set_nzcv(tmp);
-        dead_tmp(tmp2);
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp2);
+        tcg_temp_free_i32(tmp);
         break;
     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:	/* WACC */
         wrd = (insn >> 12) & 0xf;
@@ -1802,8 +1774,9 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
     case 0x115: case 0x515: case 0x915: case 0xd15:	/* TORC */
         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
             return 1;
-        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
-        tmp2 = new_tmp();
+        tmp = tcg_temp_new_i32();
+        iwmmxt_load_creg(tmp, ARM_IWMMXT_wCASF);
+        tmp2 = tcg_temp_new_i32();
         tcg_gen_mov_i32(tmp2, tmp);
         switch ((insn >> 22) & 3) {
         case 0:
@@ -1824,8 +1797,8 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             break;
         }
         gen_set_nzcv(tmp);
-        dead_tmp(tmp2);
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp2);
+        tcg_temp_free_i32(tmp);
         break;
     case 0x103: case 0x503: case 0x903: case 0xd03:	/* TMOVMSK */
         rd = (insn >> 12) & 0xf;
@@ -1833,7 +1806,7 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
             return 1;
         gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = new_tmp();
+        tmp = tcg_temp_new_i32();
         switch ((insn >> 22) & 3) {
         case 0:
             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
@@ -1846,6 +1819,7 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             break;
         }
         store_reg(s, rd, tmp);
+        tcg_temp_free_i32(tmp);
         break;
     case 0x106: case 0x306: case 0x506: case 0x706:	/* WCMPGT */
     case 0x906: case 0xb06: case 0xd06: case 0xf06:
@@ -1948,9 +1922,9 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = new_tmp();
+        tmp = tcg_temp_new_i32();
         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
-            dead_tmp(tmp);
+            tcg_temp_free_i32(tmp);
             return 1;
         }
         switch ((insn >> 22) & 3) {
@@ -1964,7 +1938,7 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         }
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
@@ -1976,9 +1950,9 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = new_tmp();
+        tmp = tcg_temp_new_i32();
         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
-            dead_tmp(tmp);
+            tcg_temp_free_i32(tmp);
             return 1;
         }
         switch ((insn >> 22) & 3) {
@@ -1992,7 +1966,7 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         }
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
@@ -2004,9 +1978,9 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = new_tmp();
+        tmp = tcg_temp_new_i32();
         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
-            dead_tmp(tmp);
+            tcg_temp_free_i32(tmp);
             return 1;
         }
         switch ((insn >> 22) & 3) {
@@ -2020,7 +1994,7 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         }
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
@@ -2032,31 +2006,31 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         wrd = (insn >> 12) & 0xf;
         rd0 = (insn >> 16) & 0xf;
         gen_op_iwmmxt_movq_M0_wRn(rd0);
-        tmp = new_tmp();
+        tmp = tcg_temp_new_i32();
         switch ((insn >> 22) & 3) {
         case 1:
             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
-                dead_tmp(tmp);
+                tcg_temp_free_i32(tmp);
                 return 1;
             }
             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         case 2:
             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
-                dead_tmp(tmp);
+                tcg_temp_free_i32(tmp);
                 return 1;
             }
             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         case 3:
             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
-                dead_tmp(tmp);
+                tcg_temp_free_i32(tmp);
                 return 1;
             }
             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
             break;
         }
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         gen_op_iwmmxt_set_cup();
@@ -2280,8 +2254,10 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
         if (rd0 == 0xf || rd1 == 0xf)
             return 1;
         gen_op_iwmmxt_movq_M0_wRn(wrd);
-        tmp = load_reg(s, rd0);
-        tmp2 = load_reg(s, rd1);
+        tmp = tcg_temp_new_i32();
+        load_reg_var(s, tmp, rd0);
+        tmp2 = tcg_temp_new_i32();
+        load_reg_var(s, tmp2, rd1);
         switch ((insn >> 16) & 0xf) {
         case 0x0:					/* TMIA */
             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
@@ -2297,12 +2273,12 @@  static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
             break;
         default:
-            dead_tmp(tmp2);
-            dead_tmp(tmp);
+            tcg_temp_free_i32(tmp2);
+            tcg_temp_free_i32(tmp);
             return 1;
         }
-        dead_tmp(tmp2);
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp2);
+        tcg_temp_free_i32(tmp);
         gen_op_iwmmxt_movq_wRn_M0(wrd);
         gen_op_iwmmxt_set_mup();
         break;
@@ -2329,8 +2305,10 @@  static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
         if (acc != 0)
             return 1;
 
-        tmp = load_reg(s, rd0);
-        tmp2 = load_reg(s, rd1);
+        tmp = tcg_temp_new_i32();
+        load_reg_var(s, tmp, rd0);
+        tmp2 = tcg_temp_new_i32();
+        load_reg_var(s, tmp2, rd1);
         switch ((insn >> 16) & 0xf) {
         case 0x0:					/* MIA */
             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
@@ -2351,8 +2329,8 @@  static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
         default:
             return 1;
         }
-        dead_tmp(tmp2);
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp2);
+        tcg_temp_free_i32(tmp);
 
         gen_op_iwmmxt_movq_wRn_M0(acc);
         return 0;
@@ -2398,21 +2376,21 @@  static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
         if (!env->cp[cp].cp_read)
             return 1;
         gen_set_pc_im(s->pc);
-        tmp = new_tmp();
+        tmp = tcg_temp_new_i32();
         tmp2 = tcg_const_i32(insn);
         gen_helper_get_cp(tmp, cpu_env, tmp2);
-        tcg_temp_free(tmp2);
         store_reg(s, rd, tmp);
     } else {
         if (!env->cp[cp].cp_write)
             return 1;
         gen_set_pc_im(s->pc);
-        tmp = load_reg(s, rd);
+        tmp = tcg_temp_new_i32();
+        load_reg_var(s, tmp, rd);
         tmp2 = tcg_const_i32(insn);
         gen_helper_set_cp(cpu_env, tmp2, tmp);
-        tcg_temp_free(tmp2);
-        dead_tmp(tmp);
     }
+    tcg_temp_free_i32(tmp2);
+    tcg_temp_free_i32(tmp);
     return 0;
 }
 
@@ -2471,18 +2449,17 @@  static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
     }
     rd = (insn >> 12) & 0xf;
     tmp2 = tcg_const_i32(insn);
+    tmp = tcg_temp_new_i32();
     if (insn & ARM_CP_RW_BIT) {
-        tmp = new_tmp();
         gen_helper_get_cp15(tmp, cpu_env, tmp2);
         /* If the destination register is r15 then sets condition codes.  */
         if (rd != 15)
             store_reg(s, rd, tmp);
-        else
-            dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
     } else {
-        tmp = load_reg(s, rd);
+        load_reg_var(s, tmp, rd);
         gen_helper_set_cp15(cpu_env, tmp2, tmp);
-        dead_tmp(tmp);
+        tcg_temp_free_i32(tmp);
         /* Normally we would always end the TB here, but Linux
          * arch/arm/mach-pxa/sleep.S expects two instructions following
          * an MMU enable to execute from cache.  Imitate this behaviour.  */
@@ -2515,17 +2492,14 @@  static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
 
 /* Move between integer and VFP cores.  */
-static TCGv gen_vfp_mrs(void)
+static void gen_vfp_mrs(TCGv ret)
 {
-    TCGv tmp = new_tmp();
-    tcg_gen_mov_i32(tmp, cpu_F0s);
-    return tmp;
+    tcg_gen_mov_i32(ret, cpu_F0s);
 }
 
 static void gen_vfp_msr(TCGv tmp)
 {
     tcg_gen_mov_i32(cpu_F0s, tmp);
-    dead_tmp(tmp);
 }
 
 static inline int
@@ -2536,7 +2510,7 @@  vfp_enabled(CPUState * env)
 
 static void gen_neon_dup_u8(TCGv var, int shift)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     if (shift)
         tcg_gen_shri_i32(var, var, shift);
     tcg_gen_ext8u_i32(var, var);
@@ -2544,25 +2518,25 @@  static void gen_neon_dup_u8(TCGv var, int shift)
     tcg_gen_or_i32(var, var, tmp);
     tcg_gen_shli_i32(tmp, var, 16);
     tcg_gen_or_i32(var, var, tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 static void gen_neon_dup_low16(TCGv var)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     tcg_gen_ext16u_i32(var, var);
     tcg_gen_shli_i32(tmp, var, 16);
     tcg_gen_or_i32(var, var, tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 static void gen_neon_dup_high16(TCGv var)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     tcg_gen_andi_i32(var, var, 0xffff0000);
     tcg_gen_shri_i32(tmp, var, 16);
     tcg_gen_or_i32(var, var, tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 /* Disassemble a VFP instruction.  Returns nonzero if an error occured
@@ -2615,9 +2589,10 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     size = 2;
                     offset = 0;
                 }
+                tmp = tcg_temp_new_i32();
                 if (insn & ARM_CP_RW_BIT) {
                     /* vfp->arm */
-                    tmp = neon_load_reg(rn, pass);
+                    neon_load_reg(tmp, rn, pass);
                     switch (size) {
                     case 0:
                         if (offset)
@@ -2648,7 +2623,7 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     store_reg(s, rd, tmp);
                 } else {
                     /* arm->vfp */
-                    tmp = load_reg(s, rd);
+                    load_reg_var(s, tmp, rd);
                     if (insn & (1 << 23)) {
                         /* VDUP */
                         if (size == 0) {
@@ -2656,24 +2631,27 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         } else if (size == 1) {
                             gen_neon_dup_low16(tmp);
                         }
+                        tmp2 = tcg_temp_new_i32();
                         for (n = 0; n <= pass * 2; n++) {
-                            tmp2 = new_tmp();
                             tcg_gen_mov_i32(tmp2, tmp);
                             neon_store_reg(rn, n, tmp2);
                         }
+                        tcg_temp_free_i32(tmp2);
                         neon_store_reg(rn, n, tmp);
                     } else {
                         /* VMOV */
                         switch (size) {
                         case 0:
-                            tmp2 = neon_load_reg(rn, pass);
+                            tmp2 = tcg_temp_new_i32();
+                            neon_load_reg(tmp2, rn, pass);
                             gen_bfi(tmp, tmp2, tmp, offset, 0xff);
-                            dead_tmp(tmp2);
+                            tcg_temp_free_i32(tmp2);
                             break;
                         case 1:
-                            tmp2 = neon_load_reg(rn, pass);
+                            tmp2 = tcg_temp_new_i32();
+                            neon_load_reg(tmp2, rn, pass);
                             gen_bfi(tmp, tmp2, tmp, offset, 0xffff);
-                            dead_tmp(tmp2);
+                            tcg_temp_free_i32(tmp2);
                             break;
                         case 2:
                             break;
@@ -2681,6 +2659,7 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_store_reg(rn, pass, tmp);
                     }
                 }
+                tcg_temp_free_i32(tmp);
             } else { /* !dp */
                 if ((insn & 0x6f) != 0x00)
                     return 1;
@@ -2699,12 +2678,14 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             if (IS_USER(s)
                                 && arm_feature(env, ARM_FEATURE_VFP3))
                                 return 1;
-                            tmp = load_cpu_field(vfp.xregs[rn]);
+                            tmp = tcg_temp_new_i32();
+                            load_cpu_field(tmp, vfp.xregs[rn]);
                             break;
                         case ARM_VFP_FPEXC:
                             if (IS_USER(s))
                                 return 1;
-                            tmp = load_cpu_field(vfp.xregs[rn]);
+                            tmp = tcg_temp_new_i32();
+                            load_cpu_field(tmp, vfp.xregs[rn]);
                             break;
                         case ARM_VFP_FPINST:
                         case ARM_VFP_FPINST2:
@@ -2712,14 +2693,15 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             if (IS_USER(s)
                                 || arm_feature(env, ARM_FEATURE_VFP3))
                                 return 1;
-                            tmp = load_cpu_field(vfp.xregs[rn]);
+                            tmp = tcg_temp_new_i32();
+                            load_cpu_field(tmp, vfp.xregs[rn]);
                             break;
                         case ARM_VFP_FPSCR:
+                            tmp = tcg_temp_new_i32();
                             if (rd == 15) {
-                                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
+                                load_cpu_field(tmp, vfp.xregs[ARM_VFP_FPSCR]);
                                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
                             } else {
-                                tmp = new_tmp();
                                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
                             }
                             break;
@@ -2728,25 +2710,28 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             if (IS_USER(s)
                                 || !arm_feature(env, ARM_FEATURE_VFP3))
                                 return 1;
-                            tmp = load_cpu_field(vfp.xregs[rn]);
+                            tmp = tcg_temp_new_i32();
+                            load_cpu_field(tmp, vfp.xregs[rn]);
                             break;
                         default:
                             return 1;
                         }
                     } else {
                         gen_mov_F0_vreg(0, rn);
-                        tmp = gen_vfp_mrs();
+                        tmp = tcg_temp_new_i32();
+                        gen_vfp_mrs(tmp);
                     }
                     if (rd == 15) {
                         /* Set the 4 flag bits in the CPSR.  */
                         gen_set_nzcv(tmp);
-                        dead_tmp(tmp);
                     } else {
                         store_reg(s, rd, tmp);
                     }
+                    tcg_temp_free_i32(tmp);
                 } else {
                     /* arm->vfp */
-                    tmp = load_reg(s, rd);
+                    tmp = tcg_temp_new_i32();
+                    load_reg_var(s, tmp, rd);
                     if (insn & (1 << 21)) {
                         rn >>= 1;
                         /* system register */
@@ -2755,30 +2740,37 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         case ARM_VFP_MVFR0:
                         case ARM_VFP_MVFR1:
                             /* Writes are ignored.  */
+                            tcg_temp_free_i32(tmp);
                             break;
                         case ARM_VFP_FPSCR:
                             gen_helper_vfp_set_fpscr(cpu_env, tmp);
-                            dead_tmp(tmp);
+                            tcg_temp_free_i32(tmp);
                             gen_lookup_tb(s);
                             break;
                         case ARM_VFP_FPEXC:
-                            if (IS_USER(s))
+                            if (IS_USER(s)) {
+                                tcg_temp_free_i32(tmp);
                                 return 1;
+                            }
                             /* TODO: VFP subarchitecture support.
                              * For now, keep the EN bit only */
                             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
                             store_cpu_field(tmp, vfp.xregs[rn]);
+                            tcg_temp_free_i32(tmp);
                             gen_lookup_tb(s);
                             break;
                         case ARM_VFP_FPINST:
                         case ARM_VFP_FPINST2:
                             store_cpu_field(tmp, vfp.xregs[rn]);
+                            tcg_temp_free_i32(tmp);
                             break;
                         default:
+                            tcg_temp_free_i32(tmp);
                             return 1;
                         }
                     } else {
                         gen_vfp_msr(tmp);
+                        tcg_temp_free_i32(tmp);
                         gen_mov_vreg_F0(0, rn);
                     }
                 }
@@ -3122,41 +3114,43 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 rm = VFP_SREG_M(insn);
             }
 
+            tmp = tcg_temp_new_i32();
             if (insn & ARM_CP_RW_BIT) {
                 /* vfp->arm */
                 if (dp) {
                     gen_mov_F0_vreg(0, rm * 2);
-                    tmp = gen_vfp_mrs();
+                    gen_vfp_mrs(tmp);
                     store_reg(s, rd, tmp);
                     gen_mov_F0_vreg(0, rm * 2 + 1);
-                    tmp = gen_vfp_mrs();
+                    gen_vfp_mrs(tmp);
                     store_reg(s, rn, tmp);
                 } else {
                     gen_mov_F0_vreg(0, rm);
-                    tmp = gen_vfp_mrs();
+                    gen_vfp_mrs(tmp);
                     store_reg(s, rn, tmp);
                     gen_mov_F0_vreg(0, rm + 1);
-                    tmp = gen_vfp_mrs();
+                    gen_vfp_mrs(tmp);
                     store_reg(s, rd, tmp);
                 }
             } else {
                 /* arm->vfp */
                 if (dp) {
-                    tmp = load_reg(s, rd);
+                    load_reg_var(s, tmp, rd);
                     gen_vfp_msr(tmp);
                     gen_mov_vreg_F0(0, rm * 2);
-                    tmp = load_reg(s, rn);
+                    load_reg_var(s, tmp, rn);
                     gen_vfp_msr(tmp);
                     gen_mov_vreg_F0(0, rm * 2 + 1);
                 } else {
-                    tmp = load_reg(s, rn);
+                    load_reg_var(s, tmp, rn);
                     gen_vfp_msr(tmp);
                     gen_mov_vreg_F0(0, rm);
-                    tmp = load_reg(s, rd);
+                    load_reg_var(s, tmp, rd);
                     gen_vfp_msr(tmp);
                     gen_mov_vreg_F0(0, rm + 1);
                 }
             }
+            tcg_temp_free_i32(tmp);
         } else {
             /* Load/store */
             rn = (insn >> 16) & 0xf;
@@ -3164,11 +3158,11 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 VFP_DREG_D(rd, insn);
             else
                 rd = VFP_SREG_D(insn);
+            addr = tcg_temp_new_i32();
             if (s->thumb && rn == 15) {
-                addr = new_tmp();
                 tcg_gen_movi_i32(addr, s->pc & ~2);
             } else {
-                addr = load_reg(s, rn);
+                load_reg_var(s, addr, rn);
             }
             if ((insn & 0x01200000) == 0x01000000) {
                 /* Single load/store */
@@ -3183,7 +3177,6 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     gen_mov_F0_vreg(dp, rd);
                     gen_vfp_st(s, dp, addr);
                 }
-                dead_tmp(addr);
             } else {
                 /* load/store multiple */
                 if (dp)
@@ -3222,10 +3215,9 @@  static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     if (offset != 0)
                         tcg_gen_addi_i32(addr, addr, offset);
                     store_reg(s, rn, addr);
-                } else {
-                    dead_tmp(addr);
                 }
             }
+            tcg_temp_free_i32(addr);
         }
         break;
     default:
@@ -3305,7 +3297,7 @@  static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) {
     return mask;
 }
 
-/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
+/* Returns nonzero if access to the PSR is not permitted. Clobbers t0. */
 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv t0)
 {
     TCGv tmp;
@@ -3314,15 +3306,16 @@  static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv t0)
         if (IS_USER(s))
             return 1;
 
-        tmp = load_cpu_field(spsr);
+        tmp = tcg_temp_new_i32();
+        load_cpu_field(tmp, spsr);
         tcg_gen_andi_i32(tmp, tmp, ~mask);
         tcg_gen_andi_i32(t0, t0, mask);
         tcg_gen_or_i32(tmp, tmp, t0);
         store_cpu_field(tmp, spsr);
+        tcg_temp_free_i32(tmp);
     } else {
         gen_set_cpsr(t0, mask);
     }
-    dead_tmp(t0);
     gen_lookup_tb(s);
     return 0;
 }
@@ -3330,40 +3323,42 @@  static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv t0)
 /* Returns nonzero if access to the PSR is not permitted.  */
 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
 {
-    TCGv tmp;
-    tmp = new_tmp();
+    int result;
+    TCGv tmp = tcg_temp_new_i32();
     tcg_gen_movi_i32(tmp, val);
-    return gen_set_psr(s, mask, spsr, tmp);
+    result = gen_set_psr(s, mask, spsr, tmp);
+    tcg_temp_free_i32(tmp);
+    return result;
 }
 
-/* Generate an old-style exception return. Marks pc as dead. */
+/* Generate an old-style exception return. */
 static void gen_exception_return(DisasContext *s, TCGv pc)
 {
     TCGv tmp;
     store_reg(s, 15, pc);
-    tmp = load_cpu_field(spsr);
+    tmp = tcg_temp_new_i32();
+    load_cpu_field(tmp, spsr);
     gen_set_cpsr(tmp, 0xffffffff);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
     s->is_jmp = DISAS_UPDATE;
 }
 
-/* Generate a v6 exception return.  Marks both values as dead.  */
+/* Generate a v6 exception return. */
 static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr)
 {
     gen_set_cpsr(cpsr, 0xffffffff);
-    dead_tmp(cpsr);
     store_reg(s, 15, pc);
     s->is_jmp = DISAS_UPDATE;
 }
 
-static inline void
-gen_set_condexec (DisasContext *s)
+static inline void gen_set_condexec(DisasContext *s)
 {
     if (s->condexec_mask) {
         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
-        TCGv tmp = new_tmp();
+        TCGv tmp = tcg_temp_new_i32();
         tcg_gen_movi_i32(tmp, val);
         store_cpu_field(tmp, condexec_bits);
+        tcg_temp_free_i32(tmp);
     }
 }
 
@@ -3463,42 +3458,37 @@  static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1)
     default: return 1; \
     }} while (0)
 
-static TCGv neon_load_scratch(int scratch)
+static inline void neon_load_scratch(TCGv ret, int scratch)
 {
-    TCGv tmp = new_tmp();
-    tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
-    return tmp;
+    tcg_gen_ld_i32(ret, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
 }
 
-static void neon_store_scratch(int scratch, TCGv var)
+static inline void neon_store_scratch(int scratch, TCGv var)
 {
     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
-    dead_tmp(var);
 }
 
-static inline TCGv neon_get_scalar(int size, int reg)
+static inline void neon_get_scalar(TCGv ret, int size, int reg)
 {
-    TCGv tmp;
     if (size == 1) {
-        tmp = neon_load_reg(reg >> 1, reg & 1);
+        neon_load_reg(ret, reg >> 1, reg & 1);
     } else {
-        tmp = neon_load_reg(reg >> 2, (reg >> 1) & 1);
+        neon_load_reg(ret, reg >> 2, (reg >> 1) & 1);
         if (reg & 1) {
-            gen_neon_dup_low16(tmp);
+            gen_neon_dup_low16(ret);
         } else {
-            gen_neon_dup_high16(tmp);
+            gen_neon_dup_high16(ret);
         }
     }
-    return tmp;
 }
 
 static void gen_neon_unzip_u8(TCGv t0, TCGv t1)
 {
     TCGv rd, rm, tmp;
 
-    rd = new_tmp();
-    rm = new_tmp();
-    tmp = new_tmp();
+    rd = tcg_temp_new_i32();
+    rm = tcg_temp_new_i32();
+    tmp = tcg_temp_new_i32();
 
     tcg_gen_andi_i32(rd, t0, 0xff);
     tcg_gen_shri_i32(tmp, t0, 8);
@@ -3523,18 +3513,18 @@  static void gen_neon_unzip_u8(TCGv t0, TCGv t1)
     tcg_gen_or_i32(t1, rm, tmp);
     tcg_gen_mov_i32(t0, rd);
 
-    dead_tmp(tmp);
-    dead_tmp(rm);
-    dead_tmp(rd);
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(rm);
+    tcg_temp_free_i32(rd);
 }
 
 static void gen_neon_zip_u8(TCGv t0, TCGv t1)
 {
     TCGv rd, rm, tmp;
 
-    rd = new_tmp();
-    rm = new_tmp();
-    tmp = new_tmp();
+    rd = tcg_temp_new_i32();
+    rm = tcg_temp_new_i32();
+    tmp = tcg_temp_new_i32();
 
     tcg_gen_andi_i32(rd, t0, 0xff);
     tcg_gen_shli_i32(tmp, t1, 8);
@@ -3559,17 +3549,17 @@  static void gen_neon_zip_u8(TCGv t0, TCGv t1)
     tcg_gen_or_i32(t1, rm, tmp);
     tcg_gen_mov_i32(t0, rd);
 
-    dead_tmp(tmp);
-    dead_tmp(rm);
-    dead_tmp(rd);
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(rm);
+    tcg_temp_free_i32(rd);
 }
 
 static void gen_neon_zip_u16(TCGv t0, TCGv t1)
 {
     TCGv tmp, tmp2;
 
-    tmp = new_tmp();
-    tmp2 = new_tmp();
+    tmp = tcg_temp_new_i32();
+    tmp2 = tcg_temp_new_i32();
 
     tcg_gen_andi_i32(tmp, t0, 0xffff);
     tcg_gen_shli_i32(tmp2, t1, 16);
@@ -3579,8 +3569,8 @@  static void gen_neon_zip_u16(TCGv t0, TCGv t1)
     tcg_gen_or_i32(t1, t1, tmp2);
     tcg_gen_mov_i32(t0, tmp);
 
-    dead_tmp(tmp2);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp2);
+    tcg_temp_free_i32(tmp);
 }
 
 static void gen_neon_unzip(int reg, int q, int tmp, int size)
@@ -3588,9 +3578,11 @@  static void gen_neon_unzip(int reg, int q, int tmp, int size)
     int n;
     TCGv t0, t1;
 
+    t0 = tcg_temp_new_i32();
+    t1 = tcg_temp_new_i32();
     for (n = 0; n < q + 1; n += 2) {
-        t0 = neon_load_reg(reg, n);
-        t1 = neon_load_reg(reg, n + 1);
+        neon_load_reg(t0, reg, n);
+        neon_load_reg(t1, reg, n + 1);
         switch (size) {
         case 0: gen_neon_unzip_u8(t0, t1); break;
         case 1: gen_neon_zip_u16(t0, t1); break; /* zip and unzip are the same.  */
@@ -3600,14 +3592,16 @@  static void gen_neon_unzip(int reg, int q, int tmp, int size)
         neon_store_scratch(tmp + n, t0);
         neon_store_scratch(tmp + n + 1, t1);
     }
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
 }
 
 static void gen_neon_trn_u8(TCGv t0, TCGv t1)
 {
     TCGv rd, tmp;
 
-    rd = new_tmp();
-    tmp = new_tmp();
+    rd = tcg_temp_new_i32();
+    tmp = tcg_temp_new_i32();
 
     tcg_gen_shli_i32(rd, t0, 8);
     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
@@ -3620,16 +3614,16 @@  static void gen_neon_trn_u8(TCGv t0, TCGv t1)
     tcg_gen_or_i32(t1, t1, tmp);
     tcg_gen_mov_i32(t0, rd);
 
-    dead_tmp(tmp);
-    dead_tmp(rd);
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(rd);
 }
 
 static void gen_neon_trn_u16(TCGv t0, TCGv t1)
 {
     TCGv rd, tmp;
 
-    rd = new_tmp();
-    tmp = new_tmp();
+    rd = tcg_temp_new_i32();
+    tmp = tcg_temp_new_i32();
 
     tcg_gen_shli_i32(rd, t0, 16);
     tcg_gen_andi_i32(tmp, t1, 0xffff);
@@ -3639,8 +3633,8 @@  static void gen_neon_trn_u16(TCGv t0, TCGv t1)
     tcg_gen_or_i32(t1, t1, tmp);
     tcg_gen_mov_i32(t0, rd);
 
-    dead_tmp(tmp);
-    dead_tmp(rd);
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(rd);
 }
 
 
@@ -3684,12 +3678,12 @@  static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
     TCGv_i64 tmp64;
 
     if (!vfp_enabled(env))
-      return 1;
+        return 1;
     VFP_DREG_D(rd, insn);
     rn = (insn >> 16) & 0xf;
     rm = insn & 0xf;
     load = (insn & (1 << 21)) != 0;
-    addr = new_tmp();
+    addr = tcg_temp_new_i32();
     if ((insn & (1 << 23)) == 0) {
         /* Load store all elements.  */
         op = (insn >> 8) & 0xf;
@@ -3712,63 +3706,68 @@  static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 tcg_gen_addi_i32(addr, addr, 1 << size);
             }
             if (size == 3) {
+                tmp64 = tcg_temp_new_i64();
                 if (load) {
-                    tmp64 = gen_ld64(addr, IS_USER(s));
+                    gen_ld64(tmp64, addr, IS_USER(s));
                     neon_store_reg64(tmp64, rd);
-                    tcg_temp_free_i64(tmp64);
                 } else {
-                    tmp64 = tcg_temp_new_i64();
                     neon_load_reg64(tmp64, rd);
                     gen_st64(tmp64, addr, IS_USER(s));
                 }
+                tcg_temp_free_i64(tmp64);
                 tcg_gen_addi_i32(addr, addr, stride);
             } else {
+                tmp = tcg_temp_new_i32();
                 for (pass = 0; pass < 2; pass++) {
                     if (size == 2) {
                         if (load) {
-                            tmp = gen_ld32(addr, IS_USER(s));
+                            gen_ld32(tmp, addr, IS_USER(s));
                             neon_store_reg(rd, pass, tmp);
                         } else {
-                            tmp = neon_load_reg(rd, pass);
+                            neon_load_reg(tmp, rd, pass);
                             gen_st32(tmp, addr, IS_USER(s));
                         }
                         tcg_gen_addi_i32(addr, addr, stride);
                     } else if (size == 1) {
                         if (load) {
-                            tmp = gen_ld16u(addr, IS_USER(s));
+                            gen_ld16u(tmp, addr, IS_USER(s));
                             tcg_gen_addi_i32(addr, addr, stride);
-                            tmp2 = gen_ld16u(addr, IS_USER(s));
+                            tmp2 = tcg_temp_new_i32();
+                            gen_ld16u(tmp2, addr, IS_USER(s));
                             tcg_gen_addi_i32(addr, addr, stride);
                             gen_bfi(tmp, tmp, tmp2, 16, 0xffff);
-                            dead_tmp(tmp2);
+                            tcg_temp_free_i32(tmp2);
                             neon_store_reg(rd, pass, tmp);
                         } else {
-                            tmp = neon_load_reg(rd, pass);
-                            tmp2 = new_tmp();
+                            neon_load_reg(tmp, rd, pass);
+                            tmp2 = tcg_temp_new_i32();
                             tcg_gen_shri_i32(tmp2, tmp, 16);
                             gen_st16(tmp, addr, IS_USER(s));
                             tcg_gen_addi_i32(addr, addr, stride);
                             gen_st16(tmp2, addr, IS_USER(s));
+                            tcg_temp_free_i32(tmp2);
                             tcg_gen_addi_i32(addr, addr, stride);
                         }
                     } else /* size == 0 */ {
                         if (load) {
                             TCGV_UNUSED(tmp2);
                             for (n = 0; n < 4; n++) {
-                                tmp = gen_ld8u(addr, IS_USER(s));
+                                gen_ld8u(tmp, addr, IS_USER(s));
                                 tcg_gen_addi_i32(addr, addr, stride);
                                 if (n == 0) {
                                     tmp2 = tmp;
+                                    tmp = tcg_temp_new_i32();
                                 } else {
                                     gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff);
-                                    dead_tmp(tmp);
+                                    tcg_temp_free_i32(tmp);
                                 }
                             }
                             neon_store_reg(rd, pass, tmp2);
+                            tmp = tmp2;
                         } else {
-                            tmp2 = neon_load_reg(rd, pass);
+                            tmp2 = tcg_temp_new_i32();
+                            neon_load_reg(tmp2, rd, pass);
                             for (n = 0; n < 4; n++) {
-                                tmp = new_tmp();
                                 if (n == 0) {
                                     tcg_gen_mov_i32(tmp, tmp2);
                                 } else {
@@ -3777,10 +3776,11 @@  static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                 gen_st8(tmp, addr, IS_USER(s));
                                 tcg_gen_addi_i32(addr, addr, stride);
                             }
-                            dead_tmp(tmp2);
+                            tcg_temp_free_i32(tmp2);
                         }
                     }
                 }
+                tcg_temp_free_i32(tmp);
             }
             rd += spacing;
         }
@@ -3795,31 +3795,36 @@  static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
             nregs = ((insn >> 8) & 3) + 1;
             stride = (insn & (1 << 5)) ? 2 : 1;
             load_reg_var(s, addr, rn);
+            tmp = tcg_temp_new_i32();
+            tmp2 = tcg_temp_new_i32();
             for (reg = 0; reg < nregs; reg++) {
                 switch (size) {
                 case 0:
-                    tmp = gen_ld8u(addr, IS_USER(s));
+                    gen_ld8u(tmp, addr, IS_USER(s));
                     gen_neon_dup_u8(tmp, 0);
                     break;
                 case 1:
-                    tmp = gen_ld16u(addr, IS_USER(s));
+                    gen_ld16u(tmp, addr, IS_USER(s));
                     gen_neon_dup_low16(tmp);
                     break;
                 case 2:
-                    tmp = gen_ld32(addr, IS_USER(s));
+                    gen_ld32(tmp, addr, IS_USER(s));
                     break;
                 case 3:
+                    tcg_temp_free_i32(tmp2);
+                    tcg_temp_free_i32(tmp);
                     return 1;
                 default: /* Avoid compiler warnings.  */
                     abort();
                 }
                 tcg_gen_addi_i32(addr, addr, 1 << size);
-                tmp2 = new_tmp();
                 tcg_gen_mov_i32(tmp2, tmp);
                 neon_store_reg(rd, 0, tmp2);
                 neon_store_reg(rd, 1, tmp);
                 rd += stride;
             }
+            tcg_temp_free_i32(tmp2);
+            tcg_temp_free_i32(tmp);
             stride = (1 << size) * nregs;
         } else {
             /* Single element.  */
@@ -3842,29 +3847,31 @@  static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
             }
             nregs = ((insn >> 8) & 3) + 1;
             load_reg_var(s, addr, rn);
+            tmp = tcg_temp_new_i32();
             for (reg = 0; reg < nregs; reg++) {
                 if (load) {
                     switch (size) {
                     case 0:
-                        tmp = gen_ld8u(addr, IS_USER(s));
+                        gen_ld8u(tmp, addr, IS_USER(s));
                         break;
                     case 1:
-                        tmp = gen_ld16u(addr, IS_USER(s));
+                        gen_ld16u(tmp, addr, IS_USER(s));
                         break;
                     case 2:
-                        tmp = gen_ld32(addr, IS_USER(s));
+                        gen_ld32(tmp, addr, IS_USER(s));
                         break;
                     default: /* Avoid compiler warnings.  */
                         abort();
                     }
                     if (size != 2) {
-                        tmp2 = neon_load_reg(rd, pass);
+                        tmp2 = tcg_temp_new_i32();
+                        neon_load_reg(tmp2, rd, pass);
                         gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
-                        dead_tmp(tmp2);
+                        tcg_temp_free_i32(tmp2);
                     }
                     neon_store_reg(rd, pass, tmp);
                 } else { /* Store */
-                    tmp = neon_load_reg(rd, pass);
+                    neon_load_reg(tmp, rd, pass);
                     if (shift)
                         tcg_gen_shri_i32(tmp, tmp, shift);
                     switch (size) {
@@ -3882,23 +3889,24 @@  static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 rd += stride;
                 tcg_gen_addi_i32(addr, addr, 1 << size);
             }
+            tcg_temp_free_i32(tmp);
             stride = nregs * (1 << size);
         }
     }
-    dead_tmp(addr);
+    tcg_temp_free_i32(addr);
     if (rm != 15) {
-        TCGv base;
-
-        base = load_reg(s, rn);
+        tmp = tcg_temp_new_i32();
+        load_reg_var(s, tmp, rn);
         if (rm == 13) {
-            tcg_gen_addi_i32(base, base, stride);
+            tcg_gen_addi_i32(tmp, tmp, stride);
         } else {
-            TCGv index;
-            index = load_reg(s, rm);
-            tcg_gen_add_i32(base, base, index);
-            dead_tmp(index);
+            tmp2 = tcg_temp_new_i32();
+            load_reg_var(s, tmp2, rm);
+            tcg_gen_add_i32(tmp, tmp, tmp2);
+            tcg_temp_free_i32(tmp2);
         }
-        store_reg(s, rn, base);
+        store_reg(s, rn, tmp);
+        tcg_temp_free_i32(tmp);
     }
     return 0;
 }
@@ -3992,7 +4000,6 @@  static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u)
         default: abort();
         }
     }
-    dead_tmp(src);
 }
 
 static inline void gen_neon_addl(int size)
@@ -4036,21 +4043,13 @@  static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
 
 static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
 {
-    TCGv_i64 tmp;
-
     switch ((size << 1) | u) {
     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
-    case 4:
-        tmp = gen_muls_i64_i32(a, b);
-        tcg_gen_mov_i64(dest, tmp);
-        break;
-    case 5:
-        tmp = gen_mulu_i64_i32(a, b);
-        tcg_gen_mov_i64(dest, tmp);
-        break;
+    case 4: gen_muls_i64_i32(dest, a, b); break;
+    case 5: gen_mulu_i64_i32(dest, a, b); break;
     default: abort();
     }
 }
@@ -4184,278 +4183,304 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             break;
         }
 
+        tmp = tcg_temp_new_i32();
+        tmp2 = tcg_temp_new_i32();
         for (pass = 0; pass < (q ? 4 : 2); pass++) {
-
-        if (pairwise) {
-            /* Pairwise.  */
-            if (q)
-                n = (pass & 1) * 2;
-            else
-                n = 0;
-            if (pass < q + 1) {
-                tmp = neon_load_reg(rn, n);
-                tmp2 = neon_load_reg(rn, n + 1);
+            if (pairwise) {
+                /* Pairwise.  */
+                if (q)
+                    n = (pass & 1) * 2;
+                else
+                    n = 0;
+                if (pass < q + 1) {
+                    neon_load_reg(tmp, rn, n);
+                    neon_load_reg(tmp2, rn, n + 1);
+                } else {
+                    neon_load_reg(tmp, rm, n);
+                    neon_load_reg(tmp2, rm, n + 1);
+                }
             } else {
-                tmp = neon_load_reg(rm, n);
-                tmp2 = neon_load_reg(rm, n + 1);
+                /* Elementwise.  */
+                neon_load_reg(tmp, rn, pass);
+                neon_load_reg(tmp2, rm, pass);
             }
-        } else {
-            /* Elementwise.  */
-            tmp = neon_load_reg(rn, pass);
-            tmp2 = neon_load_reg(rm, pass);
-        }
-        switch (op) {
-        case 0: /* VHADD */
-            GEN_NEON_INTEGER_OP(hadd);
-            break;
-        case 1: /* VQADD */
-            GEN_NEON_INTEGER_OP_ENV(qadd);
-            break;
-        case 2: /* VRHADD */
-            GEN_NEON_INTEGER_OP(rhadd);
-            break;
-        case 3: /* Logic ops.  */
-            switch ((u << 2) | size) {
-            case 0: /* VAND */
-                tcg_gen_and_i32(tmp, tmp, tmp2);
+            switch (op) {
+            case 0: /* VHADD */
+                GEN_NEON_INTEGER_OP(hadd);
                 break;
-            case 1: /* BIC */
-                tcg_gen_andc_i32(tmp, tmp, tmp2);
+            case 1: /* VQADD */
+                GEN_NEON_INTEGER_OP_ENV(qadd);
                 break;
-            case 2: /* VORR */
-                tcg_gen_or_i32(tmp, tmp, tmp2);
+            case 2: /* VRHADD */
+                GEN_NEON_INTEGER_OP(rhadd);
                 break;
-            case 3: /* VORN */
-                tcg_gen_orc_i32(tmp, tmp, tmp2);
+            case 3: /* Logic ops.  */
+                switch ((u << 2) | size) {
+                case 0: /* VAND */
+                    tcg_gen_and_i32(tmp, tmp, tmp2);
+                    break;
+                case 1: /* BIC */
+                    tcg_gen_andc_i32(tmp, tmp, tmp2);
+                    break;
+                case 2: /* VORR */
+                    tcg_gen_or_i32(tmp, tmp, tmp2);
+                    break;
+                case 3: /* VORN */
+                    tcg_gen_orc_i32(tmp, tmp, tmp2);
+                    break;
+                case 4: /* VEOR */
+                    tcg_gen_xor_i32(tmp, tmp, tmp2);
+                    break;
+                case 5: /* VBSL */
+                    tmp3 = tcg_temp_new_i32();
+                    neon_load_reg(tmp3, rd, pass);
+                    gen_neon_bsl(tmp, tmp, tmp2, tmp3);
+                    tcg_temp_free_i32(tmp3);
+                    break;
+                case 6: /* VBIT */
+                    tmp3 = tcg_temp_new_i32();
+                    neon_load_reg(tmp3, rd, pass);
+                    gen_neon_bsl(tmp, tmp, tmp3, tmp2);
+                    tcg_temp_free_i32(tmp3);
+                    break;
+                case 7: /* VBIF */
+                    tmp3 = tcg_temp_new_i32();
+                    neon_load_reg(tmp3, rd, pass);
+                    gen_neon_bsl(tmp, tmp3, tmp, tmp2);
+                    tcg_temp_free_i32(tmp3);
+                    break;
+                }
                 break;
-            case 4: /* VEOR */
-                tcg_gen_xor_i32(tmp, tmp, tmp2);
+            case 4: /* VHSUB */
+                GEN_NEON_INTEGER_OP(hsub);
                 break;
-            case 5: /* VBSL */
-                tmp3 = neon_load_reg(rd, pass);
-                gen_neon_bsl(tmp, tmp, tmp2, tmp3);
-                dead_tmp(tmp3);
+            case 5: /* VQSUB */
+                GEN_NEON_INTEGER_OP_ENV(qsub);
                 break;
-            case 6: /* VBIT */
-                tmp3 = neon_load_reg(rd, pass);
-                gen_neon_bsl(tmp, tmp, tmp3, tmp2);
-                dead_tmp(tmp3);
+            case 6: /* VCGT */
+                GEN_NEON_INTEGER_OP(cgt);
                 break;
-            case 7: /* VBIF */
-                tmp3 = neon_load_reg(rd, pass);
-                gen_neon_bsl(tmp, tmp3, tmp, tmp2);
-                dead_tmp(tmp3);
+            case 7: /* VCGE */
+                GEN_NEON_INTEGER_OP(cge);
                 break;
-            }
-            break;
-        case 4: /* VHSUB */
-            GEN_NEON_INTEGER_OP(hsub);
-            break;
-        case 5: /* VQSUB */
-            GEN_NEON_INTEGER_OP_ENV(qsub);
-            break;
-        case 6: /* VCGT */
-            GEN_NEON_INTEGER_OP(cgt);
-            break;
-        case 7: /* VCGE */
-            GEN_NEON_INTEGER_OP(cge);
-            break;
-        case 8: /* VSHL */
-            GEN_NEON_INTEGER_OP(shl);
-            break;
-        case 9: /* VQSHL */
-            GEN_NEON_INTEGER_OP_ENV(qshl);
-            break;
-        case 10: /* VRSHL */
-            GEN_NEON_INTEGER_OP(rshl);
-            break;
-        case 11: /* VQRSHL */
-            GEN_NEON_INTEGER_OP_ENV(qrshl);
-            break;
-        case 12: /* VMAX */
-            GEN_NEON_INTEGER_OP(max);
-            break;
-        case 13: /* VMIN */
-            GEN_NEON_INTEGER_OP(min);
-            break;
-        case 14: /* VABD */
-            GEN_NEON_INTEGER_OP(abd);
-            break;
-        case 15: /* VABA */
-            GEN_NEON_INTEGER_OP(abd);
-            dead_tmp(tmp2);
-            tmp2 = neon_load_reg(rd, pass);
-            gen_neon_add(size, tmp, tmp2);
-            break;
-        case 16:
-            if (!u) { /* VADD */
-                if (gen_neon_add(size, tmp, tmp2))
-                    return 1;
-            } else { /* VSUB */
-                switch (size) {
-                case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
-                case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
-                case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
-                default: return 1;
-                }
-            }
-            break;
-        case 17:
-            if (!u) { /* VTST */
-                switch (size) {
-                case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
-                case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
-                case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
-                default: return 1;
+            case 8: /* VSHL */
+                GEN_NEON_INTEGER_OP(shl);
+                break;
+            case 9: /* VQSHL */
+                GEN_NEON_INTEGER_OP_ENV(qshl);
+                break;
+            case 10: /* VRSHL */
+                GEN_NEON_INTEGER_OP(rshl);
+                break;
+            case 11: /* VQRSHL */
+                GEN_NEON_INTEGER_OP_ENV(qrshl);
+                break;
+            case 12: /* VMAX */
+                GEN_NEON_INTEGER_OP(max);
+                break;
+            case 13: /* VMIN */
+                GEN_NEON_INTEGER_OP(min);
+                break;
+            case 14: /* VABD */
+                GEN_NEON_INTEGER_OP(abd);
+                break;
+            case 15: /* VABA */
+                GEN_NEON_INTEGER_OP(abd);
+                neon_load_reg(tmp2, rd, pass);
+                gen_neon_add(size, tmp, tmp2);
+                break;
+            case 16:
+                if (!u) { /* VADD */
+                    if (gen_neon_add(size, tmp, tmp2)) {
+                        tcg_temp_free_i32(tmp);
+                        tcg_temp_free_i32(tmp2);
+                        return 1;
+                    }
+                } else { /* VSUB */
+                    switch (size) {
+                    case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
+                    case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
+                    case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
+                        default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp2); return 1;
+                    }
                 }
-            } else { /* VCEQ */
-                switch (size) {
-                case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
-                case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
-                case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
-                default: return 1;
+                break;
+            case 17:
+                if (!u) { /* VTST */
+                    switch (size) {
+                    case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
+                    case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
+                    case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
+                    default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp2); return 1;
+                    }
+                } else { /* VCEQ */
+                    switch (size) {
+                    case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
+                    case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
+                    case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
+                    default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp2); return 1;
+                    }
                 }
-            }
-            break;
-        case 18: /* Multiply.  */
-            switch (size) {
-            case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
-            case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
-            case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-            default: return 1;
-            }
-            dead_tmp(tmp2);
-            tmp2 = neon_load_reg(rd, pass);
-            if (u) { /* VMLS */
-                gen_neon_rsb(size, tmp, tmp2);
-            } else { /* VMLA */
-                gen_neon_add(size, tmp, tmp2);
-            }
-            break;
-        case 19: /* VMUL */
-            if (u) { /* polynomial */
-                gen_helper_neon_mul_p8(tmp, tmp, tmp2);
-            } else { /* Integer */
+                break;
+            case 18: /* Multiply.  */
                 switch (size) {
                 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
                 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
                 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-                default: return 1;
+                default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp2); return 1;
                 }
-            }
-            break;
-        case 20: /* VPMAX */
-            GEN_NEON_INTEGER_OP(pmax);
-            break;
-        case 21: /* VPMIN */
-            GEN_NEON_INTEGER_OP(pmin);
-            break;
-        case 22: /* Hultiply high.  */
-            if (!u) { /* VQDMULH */
-                switch (size) {
-                case 1: gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
-                case 2: gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
-                default: return 1;
+                neon_load_reg(tmp2, rd, pass);
+                if (u) { /* VMLS */
+                    gen_neon_rsb(size, tmp, tmp2);
+                } else { /* VMLA */
+                    gen_neon_add(size, tmp, tmp2);
                 }
-            } else { /* VQRDHMUL */
-                switch (size) {
-                case 1: gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
-                case 2: gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
-                default: return 1;
+                break;
+            case 19: /* VMUL */
+                if (u) { /* polynomial */
+                    gen_helper_neon_mul_p8(tmp, tmp, tmp2);
+                } else { /* Integer */
+                    switch (size) {
+                    case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
+                    case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
+                    case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
+                    default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp2); return 1;
+                    }
                 }
-            }
-            break;
-        case 23: /* VPADD */
-            if (u)
-                return 1;
-            switch (size) {
-            case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
-            case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
-            case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
-            default: return 1;
-            }
-            break;
-        case 26: /* Floating point arithnetic.  */
-            switch ((u << 2) | size) {
-            case 0: /* VADD */
-                gen_helper_neon_add_f32(tmp, tmp, tmp2);
                 break;
-            case 2: /* VSUB */
-                gen_helper_neon_sub_f32(tmp, tmp, tmp2);
+            case 20: /* VPMAX */
+                GEN_NEON_INTEGER_OP(pmax);
                 break;
-            case 4: /* VPADD */
-                gen_helper_neon_add_f32(tmp, tmp, tmp2);
+            case 21: /* VPMIN */
+                GEN_NEON_INTEGER_OP(pmin);
+                break;
+            case 22: /* Hultiply high.  */
+                if (!u) { /* VQDMULH */
+                    switch (size) {
+                    case 1:
+                        gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
+                        break;
+                    case 2:
+                        gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
+                        break;
+                    default:
+                        tcg_temp_free_i32(tmp);
+                        tcg_temp_free_i32(tmp2);
+                        return 1;
+                    }
+                } else { /* VQRDHMUL */
+                    switch (size) {
+                    case 1:
+                        gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
+                        break;
+                    case 2:
+                        gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
+                        break;
+                    default:
+                        tcg_temp_free_i32(tmp);
+                        tcg_temp_free_i32(tmp2);
+                        return 1;
+                    }
+                }
                 break;
-            case 6: /* VABD */
-                gen_helper_neon_abd_f32(tmp, tmp, tmp2);
+            case 23: /* VPADD */
+                if (u) {
+                    tcg_temp_free_i32(tmp);
+                    tcg_temp_free_i32(tmp2);
+                    return 1;
+                }
+                switch (size) {
+                case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
+                case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
+                case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
+                default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp2); return 1;
+                }
                 break;
-            default:
-                return 1;
-            }
-            break;
-        case 27: /* Float multiply.  */
-            gen_helper_neon_mul_f32(tmp, tmp, tmp2);
-            if (!u) {
-                dead_tmp(tmp2);
-                tmp2 = neon_load_reg(rd, pass);
-                if (size == 0) {
+            case 26: /* Floating point arithnetic.  */
+                switch ((u << 2) | size) {
+                case 0: /* VADD */
+                    gen_helper_neon_add_f32(tmp, tmp, tmp2);
+                    break;
+                case 2: /* VSUB */
+                    gen_helper_neon_sub_f32(tmp, tmp, tmp2);
+                    break;
+                case 4: /* VPADD */
                     gen_helper_neon_add_f32(tmp, tmp, tmp2);
+                    break;
+                case 6: /* VABD */
+                    gen_helper_neon_abd_f32(tmp, tmp, tmp2);
+                    break;
+                default:
+                    tcg_temp_free_i32(tmp);
+                    tcg_temp_free_i32(tmp2);
+                    return 1;
+                }
+                break;
+            case 27: /* Float multiply.  */
+                gen_helper_neon_mul_f32(tmp, tmp, tmp2);
+                if (!u) {
+                    neon_load_reg(tmp2, rd, pass);
+                    if (size == 0) {
+                        gen_helper_neon_add_f32(tmp, tmp, tmp2);
+                    } else {
+                        gen_helper_neon_sub_f32(tmp, tmp2, tmp);
+                    }
+                }
+                break;
+            case 28: /* Float compare.  */
+                if (!u) {
+                    gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
                 } else {
-                    gen_helper_neon_sub_f32(tmp, tmp2, tmp);
+                    if (size == 0)
+                        gen_helper_neon_cge_f32(tmp, tmp, tmp2);
+                    else
+                        gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
+                }
+                break;
+            case 29: /* Float compare absolute.  */
+                if (!u) {
+                    tcg_temp_free_i32(tmp);
+                    tcg_temp_free_i32(tmp2);
+                    return 1;
                 }
-            }
-            break;
-        case 28: /* Float compare.  */
-            if (!u) {
-                gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
-            } else {
                 if (size == 0)
-                    gen_helper_neon_cge_f32(tmp, tmp, tmp2);
+                    gen_helper_neon_acge_f32(tmp, tmp, tmp2);
                 else
-                    gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
+                    gen_helper_neon_acgt_f32(tmp, tmp, tmp2);
+                break;
+            case 30: /* Float min/max.  */
+                if (size == 0)
+                    gen_helper_neon_max_f32(tmp, tmp, tmp2);
+                else
+                    gen_helper_neon_min_f32(tmp, tmp, tmp2);
+                break;
+            case 31:
+                if (size == 0)
+                    gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
+                else
+                    gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
+                break;
+            default:
+                abort();
             }
-            break;
-        case 29: /* Float compare absolute.  */
-            if (!u)
-                return 1;
-            if (size == 0)
-                gen_helper_neon_acge_f32(tmp, tmp, tmp2);
-            else
-                gen_helper_neon_acgt_f32(tmp, tmp, tmp2);
-            break;
-        case 30: /* Float min/max.  */
-            if (size == 0)
-                gen_helper_neon_max_f32(tmp, tmp, tmp2);
-            else
-                gen_helper_neon_min_f32(tmp, tmp, tmp2);
-            break;
-        case 31:
-            if (size == 0)
-                gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
-            else
-                gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
-            break;
-        default:
-            abort();
-        }
-        dead_tmp(tmp2);
-
-        /* Save the result.  For elementwise operations we can put it
-           straight into the destination register.  For pairwise operations
-           we have to be careful to avoid clobbering the source operands.  */
-        if (pairwise && rd == rm) {
-            neon_store_scratch(pass, tmp);
-        } else {
-            neon_store_reg(rd, pass, tmp);
-        }
 
+            /* Save the result.  For elementwise operations we can put it
+               straight into the destination register.  For pairwise operations
+               we have to be careful to avoid clobbering the source operands.*/
+            if (pairwise && rd == rm) {
+                neon_store_scratch(pass, tmp);
+            } else {
+                neon_store_reg(rd, pass, tmp);
+            }
         } /* for pass */
+        tcg_temp_free_i32(tmp2);
         if (pairwise && rd == rm) {
             for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                tmp = neon_load_scratch(pass);
+                neon_load_scratch(tmp, pass);
                 neon_store_reg(rd, pass, tmp);
             }
         }
+        tcg_temp_free_i32(tmp);
         /* End of 3 register same size operations.  */
     } else if (insn & (1 << 4)) {
         if ((insn & 0x00380080) != 0) {
@@ -4550,8 +4575,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_store_reg64(cpu_V0, rd + pass);
                     } else { /* size < 3 */
                         /* Operands in T0 and T1.  */
-                        tmp = neon_load_reg(rm, pass);
-                        tmp2 = new_tmp();
+                        tmp = tcg_temp_new_i32();
+                        neon_load_reg(tmp, rm, pass);
+                        tmp2 = tcg_temp_new_i32();
                         tcg_gen_movi_i32(tmp2, imm);
                         switch (op) {
                         case 0:  /* VSHR */
@@ -4563,8 +4589,11 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             GEN_NEON_INTEGER_OP(rshl);
                             break;
                         case 4: /* VSRI */
-                            if (!u)
+                            if (!u) {
+                                tcg_temp_free_i32(tmp2);
+                                tcg_temp_free_i32(tmp);
                                 return 1;
+                            }
                             GEN_NEON_INTEGER_OP(shl);
                             break;
                         case 5: /* VSHL, VSLI */
@@ -4572,7 +4601,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp2); tcg_temp_free_i32(tmp); return 1;
                             }
                             break;
                         case 6: /* VQSHL */
@@ -4583,17 +4612,15 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qshl_u8(tmp, cpu_env, tmp, tmp2); break;
                             case 1: gen_helper_neon_qshl_u16(tmp, cpu_env, tmp, tmp2); break;
                             case 2: gen_helper_neon_qshl_u32(tmp, cpu_env, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp2); return 1;
                             }
                             break;
                         }
-                        dead_tmp(tmp2);
 
                         if (op == 1 || op == 3) {
                             /* Accumulate.  */
-                            tmp2 = neon_load_reg(rd, pass);
+                            neon_load_reg(tmp2, rd, pass);
                             gen_neon_add(size, tmp2, tmp);
-                            dead_tmp(tmp2);
                         } else if (op == 4 || (op == 5 && u)) {
                             /* Insert */
                             switch (size) {
@@ -4625,13 +4652,14 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             default:
                                 abort();
                             }
-                            tmp2 = neon_load_reg(rd, pass);
+                            neon_load_reg(tmp2, rd, pass);
                             tcg_gen_andi_i32(tmp, tmp, mask);
                             tcg_gen_andi_i32(tmp2, tmp2, ~mask);
                             tcg_gen_or_i32(tmp, tmp, tmp2);
-                            dead_tmp(tmp2);
                         }
+                        tcg_temp_free_i32(tmp2);
                         neon_store_reg(rd, pass, tmp);
+                        tcg_temp_free_i32(tmp);
                     }
                 } /* for pass */
             } else if (op < 10) {
@@ -4674,15 +4702,17 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64);
                         }
                     } else {
-                        tmp = neon_load_reg(rm + pass, 0);
+                        tmp = tcg_temp_new_i32();
+                        neon_load_reg(tmp, rm + pass, 0);
                         gen_neon_shift_narrow(size, tmp, tmp2, q, u);
-                        tmp3 = neon_load_reg(rm + pass, 1);
+                        tmp3 = tcg_temp_new_i32();
+                        neon_load_reg(tmp3, rm + pass, 1);
                         gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
-                        dead_tmp(tmp);
-                        dead_tmp(tmp3);
+                        tcg_temp_free_i32(tmp);
+                        tcg_temp_free_i32(tmp3);
                     }
-                    tmp = new_tmp();
+                    tmp = tcg_temp_new_i32();
                     if (op == 8 && !u) {
                         gen_neon_narrow(size - 1, tmp, cpu_V0);
                     } else {
@@ -4692,23 +4722,27 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
                     }
                     neon_store_reg(rd, pass, tmp);
+                    tcg_temp_free_i32(tmp);
                 } /* for pass */
                 if (size == 3) {
                     tcg_temp_free_i64(tmp64);
                 } else {
-                    dead_tmp(tmp2);
+                    tcg_temp_free_i32(tmp2);
                 }
             } else if (op == 10) {
                 /* VSHLL */
                 if (q || size == 3)
                     return 1;
-                tmp = neon_load_reg(rm, 0);
-                tmp2 = neon_load_reg(rm, 1);
+                tmp = tcg_temp_new_i32();
+                neon_load_reg(tmp, rm, 0);
+                tmp2 = tcg_temp_new_i32();
+                neon_load_reg(tmp2, rm, 1);
                 for (pass = 0; pass < 2; pass++) {
                     if (pass == 1)
                         tmp = tmp2;
 
                     gen_neon_widen(cpu_V0, tmp, size, u);
+                    tcg_temp_free_i32(tmp);
 
                     if (shift != 0) {
                         /* The shift is less than the width of the source
@@ -4793,9 +4827,10 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             if (invert)
                 imm = ~imm;
 
+            tmp = tcg_temp_new_i32();
             for (pass = 0; pass < (q ? 4 : 2); pass++) {
                 if (op & 1 && op < 12) {
-                    tmp = neon_load_reg(rd, pass);
+                    neon_load_reg(tmp, rd, pass);
                     if (invert) {
                         /* The immediate value has already been inverted, so
                            BIC becomes AND.  */
@@ -4805,7 +4840,6 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                 } else {
                     /* VMOV, VMVN.  */
-                    tmp = new_tmp();
                     if (op == 14 && invert) {
                         uint32_t val;
                         val = 0;
@@ -4820,6 +4854,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 }
                 neon_store_reg(rd, pass, tmp);
             }
+            tcg_temp_free_i32(tmp);
         }
     } else { /* (insn & 0x00800010 == 0x00800000) */
         if (size != 3) {
@@ -4845,52 +4880,60 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     {0, 0, 0}, /* VQDMLSL */
                     {0, 0, 0}, /* Integer VMULL */
                     {0, 0, 0}, /* VQDMULL */
-                    {0, 0, 0}  /* Polynomial VMULL */
+                    {0, 0, 0}, /* Polynomial VMULL */
+                    {0, 0, 0}  /* RESERVED */
                 };
 
+                if (op == 15 ||
+                    (size == 0 && (op == 9 || op == 11 || op == 13)))
+                    return 1;
+
                 prewiden = neon_3reg_wide[op][0];
                 src1_wide = neon_3reg_wide[op][1];
                 src2_wide = neon_3reg_wide[op][2];
 
-                if (size == 0 && (op == 9 || op == 11 || op == 13))
-                    return 1;
-
                 /* Avoid overlapping operands.  Wide source operands are
                    always aligned so will never overlap with wide
                    destinations in problematic ways.  */
+                tmp = tcg_temp_new_i32();
                 if (rd == rm && !src2_wide) {
-                    tmp = neon_load_reg(rm, 1);
+                    neon_load_reg(tmp, rm, 1);
                     neon_store_scratch(2, tmp);
                 } else if (rd == rn && !src1_wide) {
-                    tmp = neon_load_reg(rn, 1);
+                    neon_load_reg(tmp, rn, 1);
                     neon_store_scratch(2, tmp);
                 }
+                tcg_temp_free_i32(tmp);
                 TCGV_UNUSED(tmp3);
                 for (pass = 0; pass < 2; pass++) {
                     if (src1_wide) {
                         neon_load_reg64(cpu_V0, rn + pass);
                         TCGV_UNUSED(tmp);
                     } else {
+                        tmp = tcg_temp_new_i32();
                         if (pass == 1 && rd == rn) {
-                            tmp = neon_load_scratch(2);
+                            neon_load_scratch(tmp, 2);
                         } else {
-                            tmp = neon_load_reg(rn, pass);
+                            neon_load_reg(tmp, rn, pass);
                         }
                         if (prewiden) {
                             gen_neon_widen(cpu_V0, tmp, size, u);
+                            tcg_temp_free_i32(tmp);
                         }
                     }
                     if (src2_wide) {
                         neon_load_reg64(cpu_V1, rm + pass);
                         TCGV_UNUSED(tmp2);
                     } else {
+                        tmp2 = tcg_temp_new_i32();
                         if (pass == 1 && rd == rm) {
-                            tmp2 = neon_load_scratch(2);
+                            neon_load_scratch(tmp2, 2);
                         } else {
-                            tmp2 = neon_load_reg(rm, pass);
+                            neon_load_reg(tmp2, rm, pass);
                         }
                         if (prewiden) {
                             gen_neon_widen(cpu_V1, tmp2, size, u);
+                            tcg_temp_free_i32(tmp2);
                         }
                     }
                     switch (op) {
@@ -4922,14 +4965,14 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             break;
                         default: abort();
                         }
-                        dead_tmp(tmp2);
-                        dead_tmp(tmp);
+                        tcg_temp_free_i32(tmp2);
+                        tcg_temp_free_i32(tmp);
                         break;
                     case 8: case 9: case 10: case 11: case 12: case 13:
                         /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
-                        dead_tmp(tmp2);
-                        dead_tmp(tmp);
+                        tcg_temp_free_i32(tmp2);
+                        tcg_temp_free_i32(tmp);
                         break;
                     case 14: /* Polynomial VMULL */
                         cpu_abort(env, "Polynomial VMULL not implemented");
@@ -4965,7 +5008,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_store_reg64(cpu_V0, rd + pass);
                     } else if (op == 4 || op == 6) {
                         /* Narrowing operation.  */
-                        tmp = new_tmp();
+                        tmp = tcg_temp_new_i32();
                         if (u) {
                             switch (size) {
                             case 0:
@@ -5001,6 +5044,8 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         } else {
                             neon_store_reg(rd, 0, tmp3);
                             neon_store_reg(rd, 1, tmp);
+                            tcg_temp_free_i32(tmp);
+                            tcg_temp_free_i32(tmp3);
                         }
                     } else {
                         /* Write back the result.  */
@@ -5018,11 +5063,13 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 9: /* Floating point VMUL scalar */
                 case 12: /* VQDMULH scalar */
                 case 13: /* VQRDMULH scalar */
-                    tmp = neon_get_scalar(size, rm);
+                    tmp = tcg_temp_new_i32();
+                    neon_get_scalar(tmp, size, rm);
                     neon_store_scratch(0, tmp);
+                    tmp2 = tcg_temp_new_i32();
                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
-                        tmp = neon_load_scratch(0);
-                        tmp2 = neon_load_reg(rn, pass);
+                        neon_load_scratch(tmp, 0);
+                        neon_load_reg(tmp2, rn, pass);
                         if (op == 12) {
                             if (size == 1) {
                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
@@ -5042,13 +5089,12 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp2); return 1;
                             }
                         }
-                        dead_tmp(tmp2);
                         if (op < 8) {
                             /* Accumulate.  */
-                            tmp2 = neon_load_reg(rd, pass);
+                            neon_load_reg(tmp2, rd, pass);
                             switch (op) {
                             case 0:
                                 gen_neon_add(size, tmp, tmp2);
@@ -5065,10 +5111,11 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             default:
                                 abort();
                             }
-                            dead_tmp(tmp2);
                         }
                         neon_store_reg(rd, pass, tmp);
                     }
+                    tcg_temp_free_i32(tmp2);
+                    tcg_temp_free_i32(tmp);
                     break;
                 case 2: /* VMLAL sclar */
                 case 3: /* VQDMLAL scalar */
@@ -5079,17 +5126,20 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     if (size == 0 && (op == 3 || op == 7 || op == 11))
                         return 1;
 
-                    tmp2 = neon_get_scalar(size, rm);
-                    tmp3 = neon_load_reg(rn, 1);
+                    tmp2 = tcg_temp_new_i32();
+                    tmp3 = tcg_temp_new_i32();
+                    neon_get_scalar(tmp2, size, rm);
+                    neon_load_reg(tmp3, rn, 1);
 
                     for (pass = 0; pass < 2; pass++) {
                         if (pass == 0) {
-                            tmp = neon_load_reg(rn, 0);
+                            tmp = tcg_temp_new_i32();
+                            neon_load_reg(tmp, rn, 0);
                         } else {
                             tmp = tmp3;
                         }
                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
-                        dead_tmp(tmp);
+                        tcg_temp_free_i32(tmp);
                         if (op == 6 || op == 7) {
                             gen_neon_negl(cpu_V0, size);
                         }
@@ -5116,7 +5166,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_store_reg64(cpu_V0, rd + pass);
                     }
 
-                    dead_tmp(tmp2);
+                    tcg_temp_free_i32(tmp2);
 
                     break;
                 default: /* 14 and 15 are RESERVED */
@@ -5184,9 +5234,11 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 0: /* VREV64 */
                     if (size == 3)
                         return 1;
+                    tmp = tcg_temp_new_i32();
+                    tmp2 = tcg_temp_new_i32();
                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
-                        tmp = neon_load_reg(rm, pass * 2);
-                        tmp2 = neon_load_reg(rm, pass * 2 + 1);
+                        neon_load_reg(tmp, rm, pass * 2);
+                        neon_load_reg(tmp2, rm, pass * 2 + 1);
                         switch (size) {
                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
                         case 1: gen_swap_half(tmp); break;
@@ -5205,15 +5257,18 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             neon_store_reg(rd, pass * 2, tmp2);
                         }
                     }
+                    tcg_temp_free_i32(tmp2);
+                    tcg_temp_free_i32(tmp);
                     break;
                 case 4: case 5: /* VPADDL */
                 case 12: case 13: /* VPADAL */
                     if (size == 3)
                         return 1;
+                    tmp = tcg_temp_new_i32();
                     for (pass = 0; pass < q + 1; pass++) {
-                        tmp = neon_load_reg(rm, pass * 2);
+                        neon_load_reg(tmp, rm, pass * 2);
                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
-                        tmp = neon_load_reg(rm, pass * 2 + 1);
+                        neon_load_reg(tmp, rm, pass * 2 + 1);
                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
                         switch (size) {
                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
@@ -5228,15 +5283,20 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         }
                         neon_store_reg64(cpu_V0, rd + pass);
                     }
+                    tcg_temp_free_i32(tmp);
                     break;
                 case 33: /* VTRN */
                     if (size == 2) {
+                        tmp = tcg_temp_new_i32();
+                        tmp2 = tcg_temp_new_i32();
                         for (n = 0; n < (q ? 4 : 2); n += 2) {
-                            tmp = neon_load_reg(rm, n);
-                            tmp2 = neon_load_reg(rd, n + 1);
+                            neon_load_reg(tmp, rm, n);
+                            neon_load_reg(tmp2, rd, n + 1);
                             neon_store_reg(rm, n, tmp2);
                             neon_store_reg(rd, n + 1, tmp);
                         }
+                        tcg_temp_free_i32(tmp2);
+                        tcg_temp_free_i32(tmp);
                     } else {
                         goto elementwise;
                     }
@@ -5250,12 +5310,13 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         return 1;
                     gen_neon_unzip(rd, q, 0, size);
                     gen_neon_unzip(rm, q, 4, size);
+                    tmp = tcg_temp_new_i32();
                     if (q) {
                         static int unzip_order_q[8] =
                             {0, 2, 4, 6, 1, 3, 5, 7};
                         for (n = 0; n < 8; n++) {
                             int reg = (n < 4) ? rd : rm;
-                            tmp = neon_load_scratch(unzip_order_q[n]);
+                            neon_load_scratch(tmp, unzip_order_q[n]);
                             neon_store_reg(reg, n % 4, tmp);
                         }
                     } else {
@@ -5263,10 +5324,11 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             {0, 4, 1, 5};
                         for (n = 0; n < 4; n++) {
                             int reg = (n < 2) ? rd : rm;
-                            tmp = neon_load_scratch(unzip_order[n]);
+                            neon_load_scratch(tmp, unzip_order[n]);
                             neon_store_reg(reg, n % 2, tmp);
                         }
                     }
+                    tcg_temp_free_i32(tmp);
                     break;
                 case 35: /* VZIP */
                     /* Reg  Before       After
@@ -5276,9 +5338,11 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     if (size == 3)
                         return 1;
                     count = (q ? 4 : 2);
+                    tmp = tcg_temp_new_i32();
+                    tmp2 = tcg_temp_new_i32();
                     for (n = 0; n < count; n++) {
-                        tmp = neon_load_reg(rd, n);
-                        tmp2 = neon_load_reg(rd, n);
+                        neon_load_reg(tmp, rd, n);
+                        neon_load_reg(tmp2, rd, n);
                         switch (size) {
                         case 0: gen_neon_zip_u8(tmp, tmp2); break;
                         case 1: gen_neon_zip_u16(tmp, tmp2); break;
@@ -5288,11 +5352,13 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_store_scratch(n * 2, tmp);
                         neon_store_scratch(n * 2 + 1, tmp2);
                     }
+                    tcg_temp_free_i32(tmp2);
                     for (n = 0; n < count * 2; n++) {
                         int reg = (n < count) ? rd : rm;
-                        tmp = neon_load_scratch(n);
+                        neon_load_scratch(tmp, n);
                         neon_store_reg(reg, n % count, tmp);
                     }
+                    tcg_temp_free_i32(tmp);
                     break;
                 case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
                     if (size == 3)
@@ -5300,7 +5366,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     TCGV_UNUSED(tmp2);
                     for (pass = 0; pass < 2; pass++) {
                         neon_load_reg64(cpu_V0, rm + pass);
-                        tmp = new_tmp();
+                        tmp = tcg_temp_new_i32();
                         if (op == 36 && q == 0) {
                             gen_neon_narrow(size, tmp, cpu_V0);
                         } else if (q) {
@@ -5313,18 +5379,23 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         } else {
                             neon_store_reg(rd, 0, tmp2);
                             neon_store_reg(rd, 1, tmp);
+                            tcg_temp_free_i32(tmp);
+                            tcg_temp_free_i32(tmp2);
                         }
                     }
                     break;
                 case 38: /* VSHLL */
                     if (q || size == 3)
                         return 1;
-                    tmp = neon_load_reg(rm, 0);
-                    tmp2 = neon_load_reg(rm, 1);
+                    tmp = tcg_temp_new_i32();
+                    neon_load_reg(tmp, rm, 0);
+                    tmp2 = tcg_temp_new_i32();
+                    neon_load_reg(tmp2, rm, 1);
                     for (pass = 0; pass < 2; pass++) {
                         if (pass == 1)
                             tmp = tmp2;
                         gen_neon_widen(cpu_V0, tmp, size, 1);
+                        tcg_temp_free_i32(tmp);
                         neon_store_reg64(cpu_V0, rd + pass);
                     }
                     break;
@@ -5336,19 +5407,22 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                            neon_reg_offset(rm, pass));
                             TCGV_UNUSED(tmp);
                         } else {
-                            tmp = neon_load_reg(rm, pass);
+                            tmp = tcg_temp_new_i32();
+                            neon_load_reg(tmp, rm, pass);
                         }
                         switch (op) {
                         case 1: /* VREV32 */
                             switch (size) {
                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
                             case 1: gen_swap_half(tmp); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp); return 1;
                             }
                             break;
                         case 2: /* VREV16 */
-                            if (size != 0)
+                            if (size != 0) {
+                                tcg_temp_free_i32(tmp);
                                 return 1;
+                            }
                             gen_rev16(tmp);
                             break;
                         case 8: /* CLS */
@@ -5356,7 +5430,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp); return 1;
                             }
                             break;
                         case 9: /* CLZ */
@@ -5364,17 +5438,21 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
                             case 2: gen_helper_clz(tmp, tmp); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp); return 1;
                             }
                             break;
                         case 10: /* CNT */
-                            if (size != 0)
+                            if (size != 0) {
+                                tcg_temp_free_i32(tmp);
                                 return 1;
+                            }
                             gen_helper_neon_cnt_u8(tmp, tmp);
                             break;
                         case 11: /* VNOT */
-                            if (size != 0)
+                            if (size != 0) {
+                                tcg_temp_free_i32(tmp);
                                 return 1;
+                            }
                             tcg_gen_not_i32(tmp, tmp);
                             break;
                         case 14: /* VQABS */
@@ -5382,7 +5460,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); break;
                             case 1: gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); break;
                             case 2: gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp); return 1;
                             }
                             break;
                         case 15: /* VQNEG */
@@ -5390,7 +5468,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); break;
                             case 1: gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); break;
                             case 2: gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp); return 1;
                             }
                             break;
                         case 16: case 19: /* VCGT #0, VCLE #0 */
@@ -5399,7 +5477,10 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default:
+                                tcg_temp_free_i32(tmp2);
+                                tcg_temp_free_i32(tmp);
+                                return 1;
                             }
                             tcg_temp_free(tmp2);
                             if (op == 19)
@@ -5411,7 +5492,10 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default:
+                                tcg_temp_free_i32(tmp2);
+                                tcg_temp_free_i32(tmp);
+                                return 1;
                             }
                             tcg_temp_free(tmp2);
                             if (op == 20)
@@ -5423,7 +5507,10 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default:
+                                tcg_temp_free_i32(tmp2);
+                                tcg_temp_free_i32(tmp);
+                                return 1;
                             }
                             tcg_temp_free(tmp2);
                             break;
@@ -5432,12 +5519,14 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
                             case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
                             case 2: tcg_gen_abs_i32(tmp, tmp); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp); return 1;
                             }
                             break;
                         case 23: /* VNEG */
-                            if (size == 3)
+                            if (size == 3) {
+                                tcg_temp_free_i32(tmp);
                                 return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_neon_rsb(size, tmp, tmp2);
                             tcg_temp_free(tmp2);
@@ -5468,18 +5557,22 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             gen_vfp_neg(0);
                             break;
                         case 32: /* VSWP */
-                            tmp2 = neon_load_reg(rd, pass);
+                            tmp2 = tcg_temp_new_i32();
+                            neon_load_reg(tmp2, rd, pass);
                             neon_store_reg(rm, pass, tmp2);
+                            tcg_temp_free_i32(tmp2);
                             break;
                         case 33: /* VTRN */
-                            tmp2 = neon_load_reg(rd, pass);
+                            tmp2 = tcg_temp_new_i32();
+                            neon_load_reg(tmp2, rd, pass);
                             switch (size) {
                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
                             case 2: abort();
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp2); return 1;
                             }
                             neon_store_reg(rm, pass, tmp2);
+                            tcg_temp_free_i32(tmp2);
                             break;
                         case 56: /* Integer VRECPE */
                             gen_helper_recpe_u32(tmp, tmp, cpu_env);
@@ -5507,6 +5600,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             break;
                         default:
                             /* Reserved: 21, 29, 39-56 */
+                            tcg_temp_free_i32(tmp);
                             return 1;
                         }
                         if (op == 30 || op == 31 || op >= 58) {
@@ -5514,6 +5608,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                            neon_reg_offset(rd, pass));
                         } else {
                             neon_store_reg(rd, pass, tmp);
+                            tcg_temp_free_i32(tmp);
                         }
                     }
                     break;
@@ -5521,36 +5616,39 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             } else if ((insn & (1 << 10)) == 0) {
                 /* VTBL, VTBX.  */
                 n = ((insn >> 5) & 0x18) + 8;
+                tmp = tcg_temp_new_i32();
                 if (insn & (1 << 6)) {
-                    tmp = neon_load_reg(rd, 0);
+                    neon_load_reg(tmp, rd, 0);
                 } else {
-                    tmp = new_tmp();
                     tcg_gen_movi_i32(tmp, 0);
                 }
-                tmp2 = neon_load_reg(rm, 0);
+                tmp2 = tcg_temp_new_i32();
+                neon_load_reg(tmp2, rm, 0);
                 tmp4 = tcg_const_i32(rn);
                 tmp5 = tcg_const_i32(n);
                 gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5);
-                dead_tmp(tmp);
                 if (insn & (1 << 6)) {
-                    tmp = neon_load_reg(rd, 1);
+                    neon_load_reg(tmp, rd, 1);
                 } else {
-                    tmp = new_tmp();
                     tcg_gen_movi_i32(tmp, 0);
                 }
-                tmp3 = neon_load_reg(rm, 1);
+                tmp3 = tcg_temp_new_i32();
+                neon_load_reg(tmp3, rm, 1);
                 gen_helper_neon_tbl(tmp3, tmp3, tmp, tmp4, tmp5);
                 tcg_temp_free_i32(tmp5);
                 tcg_temp_free_i32(tmp4);
                 neon_store_reg(rd, 0, tmp2);
                 neon_store_reg(rd, 1, tmp3);
-                dead_tmp(tmp);
+                tcg_temp_free_i32(tmp3);
+                tcg_temp_free_i32(tmp2);
+                tcg_temp_free_i32(tmp);
             } else if ((insn & 0x380) == 0) {
                 /* VDUP */
+                tmp = tcg_temp_new_i32();
                 if (insn & (1 << 19)) {
-                    tmp = neon_load_reg(rm, 1);
+                    neon_load_reg(tmp, rm, 1);
                 } else {
-                    tmp = neon_load_reg(rm, 0);
+                    neon_load_reg(tmp, rm, 0);
                 }
                 if (insn & (1 << 16)) {
                     gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
@@ -5560,12 +5658,13 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     else
                         gen_neon_dup_low16(tmp);
                 }
+                tmp2 = tcg_temp_new_i32();
                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                    tmp2 = new_tmp();
                     tcg_gen_mov_i32(tmp2, tmp);
                     neon_store_reg(rd, pass, tmp2);
                 }
-                dead_tmp(tmp);
+                tcg_temp_free_i32(tmp2);
+                tcg_temp_free_i32(tmp);
             } else {
                 return 1;
             }
@@ -5588,16 +5687,20 @@  static int disas_cp14_read(CPUState * env, DisasContext *s, uint32_t insn)
             /* TEECR */
             if (IS_USER(s))
                 return 1;
-            tmp = load_cpu_field(teecr);
+            tmp = tcg_temp_new_i32();
+            load_cpu_field(tmp, teecr);
             store_reg(s, rt, tmp);
+            tcg_temp_free_i32(tmp);
             return 0;
         }
         if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
             /* TEEHBR */
             if (IS_USER(s) && (env->teecr & 1))
                 return 1;
-            tmp = load_cpu_field(teehbr);
+            tmp = tcg_temp_new_i32();
+            load_cpu_field(tmp, teehbr);
             store_reg(s, rt, tmp);
+            tcg_temp_free_i32(tmp);
             return 0;
         }
     }
@@ -5620,17 +5723,20 @@  static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn)
             /* TEECR */
             if (IS_USER(s))
                 return 1;
-            tmp = load_reg(s, rt);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rt);
             gen_helper_set_teecr(cpu_env, tmp);
-            dead_tmp(tmp);
+            tcg_temp_free_i32(tmp);
             return 0;
         }
         if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
             /* TEEHBR */
             if (IS_USER(s) && (env->teecr & 1))
                 return 1;
-            tmp = load_reg(s, rt);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rt);
             store_cpu_field(tmp, teehbr);
+            tcg_temp_free_i32(tmp);
             return 0;
         }
     }
@@ -5645,21 +5751,21 @@  static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
 
     cpnum = (insn >> 8) & 0xf;
     if (arm_feature(env, ARM_FEATURE_XSCALE)
-	    && ((env->cp15.c15_cpar ^ 0x3fff) & (1 << cpnum)))
-	return 1;
+        && ((env->cp15.c15_cpar ^ 0x3fff) & (1 << cpnum)))
+        return 1;
 
     switch (cpnum) {
-      case 0:
-      case 1:
-	if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
-	    return disas_iwmmxt_insn(env, s, insn);
-	} else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
-	    return disas_dsp_insn(env, s, insn);
-	}
-	return 1;
+    case 0:
+    case 1:
+        if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
+            return disas_iwmmxt_insn(env, s, insn);
+        } else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
+            return disas_dsp_insn(env, s, insn);
+        }
+        return 1;
     case 10:
     case 11:
-	return disas_vfp_insn (env, s, insn);
+        return disas_vfp_insn(env, s, insn);
     case 14:
         /* Coprocessors 7-15 are architecturally reserved by ARM.
            Unfortunately Intel decided to ignore this.  */
@@ -5670,11 +5776,11 @@  static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
         else
             return disas_cp14_write(env, s, insn);
     case 15:
-	return disas_cp15_insn (env, s, insn);
+        return disas_cp15_insn(env, s, insn);
     default:
     board:
-	/* Unknown coprocessor.  See if the board has hooked it.  */
-	return disas_cp_insn (env, s, insn);
+        /* Unknown coprocessor.  See if the board has hooked it.  */
+        return disas_cp_insn(env, s, insn);
     }
 }
 
@@ -5683,13 +5789,13 @@  static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
 {
     TCGv tmp;
-    tmp = new_tmp();
+    tmp = tcg_temp_new_i32();
     tcg_gen_trunc_i64_i32(tmp, val);
     store_reg(s, rlow, tmp);
-    tmp = new_tmp();
     tcg_gen_shri_i64(val, val, 32);
     tcg_gen_trunc_i64_i32(tmp, val);
     store_reg(s, rhigh, tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 /* load a 32-bit value from a register and perform a 64-bit accumulate.  */
@@ -5700,9 +5806,10 @@  static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
 
     /* Load value and extend to 64 bits.  */
     tmp = tcg_temp_new_i64();
-    tmp2 = load_reg(s, rlow);
+    tmp2 = tcg_temp_new_i32();
+    load_reg_var(s, tmp2, rlow);
     tcg_gen_extu_i32_i64(tmp, tmp2);
-    dead_tmp(tmp2);
+    tcg_temp_free_i32(tmp2);
     tcg_gen_add_i64(val, val, tmp);
     tcg_temp_free_i64(tmp);
 }
@@ -5715,12 +5822,14 @@  static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
     TCGv tmph;
 
     /* Load 64-bit value rd:rn.  */
-    tmpl = load_reg(s, rlow);
-    tmph = load_reg(s, rhigh);
+    tmpl = tcg_temp_new_i32();
+    load_reg_var(s, tmpl, rlow);
+    tmph = tcg_temp_new_i32();
+    load_reg_var(s, tmph, rhigh);
     tmp = tcg_temp_new_i64();
     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
-    dead_tmp(tmpl);
-    dead_tmp(tmph);
+    tcg_temp_free_i32(tmpl);
+    tcg_temp_free_i32(tmph);
     tcg_gen_add_i64(val, val, tmp);
     tcg_temp_free_i64(tmp);
 }
@@ -5728,10 +5837,10 @@  static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
 /* Set N and Z flags from a 64-bit value.  */
 static void gen_logicq_cc(TCGv_i64 val)
 {
-    TCGv tmp = new_tmp();
+    TCGv tmp = tcg_temp_new_i32();
     gen_helper_logicq_cc(tmp, val);
     gen_logic_CC(tmp);
-    dead_tmp(tmp);
+    tcg_temp_free_i32(tmp);
 }
 
 static void disas_arm_insn(CPUState * env, DisasContext *s)
@@ -5802,10 +5911,10 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                 goto illegal_op;
             ARCH(6);
             op1 = (insn & 0x1f);
+            addr = tcg_temp_new_i32();
             if (op1 == (env->uncached_cpsr & CPSR_M)) {
-                addr = load_reg(s, 13);
+                load_reg_var(s, addr, 13);
             } else {
-                addr = new_tmp();
                 tmp = tcg_const_i32(op1);
                 gen_helper_get_r13_banked(addr, cpu_env, tmp);
                 tcg_temp_free_i32(tmp);
@@ -5820,11 +5929,13 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
             }
             if (offset)
                 tcg_gen_addi_i32(addr, addr, offset);
-            tmp = load_reg(s, 14);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, 14);
             gen_st32(tmp, addr, 0);
-            tmp = load_cpu_field(spsr);
+            load_cpu_field(tmp, spsr);
             tcg_gen_addi_i32(addr, addr, 4);
             gen_st32(tmp, addr, 0);
+            tcg_temp_free_i32(tmp);
             if (insn & (1 << 21)) {
                 /* Base writeback.  */
                 switch (i) {
@@ -5842,11 +5953,9 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                     tmp = tcg_const_i32(op1);
                     gen_helper_set_r13_banked(cpu_env, tmp, addr);
                     tcg_temp_free_i32(tmp);
-                    dead_tmp(addr);
                 }
-            } else {
-                dead_tmp(addr);
             }
+            tcg_temp_free_i32(addr);
         } else if ((insn & 0x0e5fffe0) == 0x081d0a00) {
             /* rfe */
             int32_t offset;
@@ -5854,7 +5963,8 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                 goto illegal_op;
             ARCH(6);
             rn = (insn >> 16) & 0xf;
-            addr = load_reg(s, rn);
+            addr = tcg_temp_new_i32();
+            load_reg_var(s, addr, rn);
             i = (insn >> 23) & 3;
             switch (i) {
             case 0: offset = -4; break; /* DA */
@@ -5866,9 +5976,11 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
             if (offset)
                 tcg_gen_addi_i32(addr, addr, offset);
             /* Load PC into tmp and CPSR into tmp2.  */
-            tmp = gen_ld32(addr, 0);
+            tmp = tcg_temp_new_i32();
+            gen_ld32(tmp, addr, 0);
             tcg_gen_addi_i32(addr, addr, 4);
-            tmp2 = gen_ld32(addr, 0);
+            tmp2 = tcg_temp_new_i32();
+            gen_ld32(tmp2, addr, 0);
             if (insn & (1 << 21)) {
                 /* Base writeback.  */
                 switch (i) {
@@ -5881,19 +5993,21 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                 if (offset)
                     tcg_gen_addi_i32(addr, addr, offset);
                 store_reg(s, rn, addr);
-            } else {
-                dead_tmp(addr);
             }
+            tcg_temp_free_i32(addr);
             gen_rfe(s, tmp, tmp2);
+            tcg_temp_free_i32(tmp2);
+            tcg_temp_free_i32(tmp);
             return;
         } else if ((insn & 0x0e000000) == 0x0a000000) {
             /* branch link and change to thumb (blx <offset>) */
             int32_t offset;
 
             val = (uint32_t)s->pc;
-            tmp = new_tmp();
+            tmp = tcg_temp_new_i32();
             tcg_gen_movi_i32(tmp, val);
             store_reg(s, 14, tmp);
+            tcg_temp_free_i32(tmp);
             /* Sign-extend the 24-bit offset */
             offset = (((int32_t)insn) << 8) >> 8;
             /* offset * 4 + bit24 * 2 + (thumb bit) */
@@ -5953,17 +6067,18 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
             ARCH(6T2);
             rd = (insn >> 12) & 0xf;
             val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
+            tmp = tcg_temp_new_i32();
             if ((insn & (1 << 22)) == 0) {
                 /* MOVW */
-                tmp = new_tmp();
                 tcg_gen_movi_i32(tmp, val);
             } else {
                 /* MOVT */
-                tmp = load_reg(s, rd);
+                load_reg_var(s, tmp, rd);
                 tcg_gen_ext16u_i32(tmp, tmp);
                 tcg_gen_ori_i32(tmp, tmp, val << 16);
             }
             store_reg(s, rd, tmp);
+            tcg_temp_free_i32(tmp);
         } else {
             if (((insn >> 12) & 0xf) != 0xf)
                 goto illegal_op;
@@ -5988,47 +6103,57 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
         rm = insn & 0xf;
         switch (sh) {
         case 0x0: /* move program status register */
+            tmp = tcg_temp_new_i32();
             if (op1 & 1) {
                 /* PSR = reg */
-                tmp = load_reg(s, rm);
+                load_reg_var(s, tmp, rm);
                 i = ((op1 & 2) != 0);
-                if (gen_set_psr(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i, tmp))
+                if (gen_set_psr(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i, tmp)) {
+                    tcg_temp_free_i32(tmp);
                     goto illegal_op;
+                }
             } else {
                 /* reg = PSR */
                 rd = (insn >> 12) & 0xf;
                 if (op1 & 2) {
-                    if (IS_USER(s))
+                    if (IS_USER(s)) {
+                        tcg_temp_free_i32(tmp);
                         goto illegal_op;
-                    tmp = load_cpu_field(spsr);
+                    }
+                    load_cpu_field(tmp, spsr);
                 } else {
-                    tmp = new_tmp();
                     gen_helper_cpsr_read(tmp);
                 }
                 store_reg(s, rd, tmp);
             }
+            tcg_temp_free_i32(tmp);
             break;
         case 0x1:
+            tmp = tcg_temp_new_i32();
             if (op1 == 1) {
                 /* branch/exchange thumb (bx).  */
-                tmp = load_reg(s, rm);
+                load_reg_var(s, tmp, rm);
                 gen_bx(s, tmp);
             } else if (op1 == 3) {
                 /* clz */
                 rd = (insn >> 12) & 0xf;
-                tmp = load_reg(s, rm);
+                load_reg_var(s, tmp, rm);
                 gen_helper_clz(tmp, tmp);
                 store_reg(s, rd, tmp);
             } else {
+                tcg_temp_free_i32(tmp);
                 goto illegal_op;
             }
+            tcg_temp_free_i32(tmp);
             break;
         case 0x2:
             if (op1 == 1) {
                 ARCH(5J); /* bxj */
                 /* Trivial implementation equivalent to bx.  */
-                tmp = load_reg(s, rm);
+                tmp = tcg_temp_new_i32();
+                load_reg_var(s, tmp, rm);
                 gen_bx(s, tmp);
+                tcg_temp_free_i32(tmp);
             } else {
                 goto illegal_op;
             }
@@ -6038,25 +6163,31 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
               goto illegal_op;
 
             /* branch link/exchange thumb (blx) */
-            tmp = load_reg(s, rm);
-            tmp2 = new_tmp();
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rm);
+            tmp2 = tcg_temp_new_i32();
             tcg_gen_movi_i32(tmp2, s->pc);
             store_reg(s, 14, tmp2);
+            tcg_temp_free_i32(tmp2);
             gen_bx(s, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         case 0x5: /* saturating add/subtract */
             rd = (insn >> 12) & 0xf;
             rn = (insn >> 16) & 0xf;
-            tmp = load_reg(s, rm);
-            tmp2 = load_reg(s, rn);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rm);
+            tmp2 = tcg_temp_new_i32();
+            load_reg_var(s, tmp2, rn);
             if (op1 & 2)
                 gen_helper_double_saturate(tmp2, tmp2);
             if (op1 & 1)
                 gen_helper_sub_saturate(tmp, tmp, tmp2);
             else
                 gen_helper_add_saturate(tmp, tmp, tmp2);
-            dead_tmp(tmp2);
+            tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         case 7: /* bkpt */
             gen_set_condexec(s);
@@ -6071,47 +6202,45 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
             rs = (insn >> 8) & 0xf;
             rn = (insn >> 12) & 0xf;
             rd = (insn >> 16) & 0xf;
+            tmp = tcg_temp_new_i32();
+            tmp2 = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rm);
+            load_reg_var(s, tmp2, rs);
             if (op1 == 1) {
                 /* (32 * 16) >> 16 */
-                tmp = load_reg(s, rm);
-                tmp2 = load_reg(s, rs);
                 if (sh & 4)
                     tcg_gen_sari_i32(tmp2, tmp2, 16);
                 else
                     gen_sxth(tmp2);
-                tmp64 = gen_muls_i64_i32(tmp, tmp2);
+                tmp64 = tcg_temp_new_i64();
+                gen_muls_i64_i32(tmp64, tmp, tmp2);
                 tcg_gen_shri_i64(tmp64, tmp64, 16);
-                tmp = new_tmp();
                 tcg_gen_trunc_i64_i32(tmp, tmp64);
                 tcg_temp_free_i64(tmp64);
                 if ((sh & 2) == 0) {
-                    tmp2 = load_reg(s, rn);
+                    load_reg_var(s, tmp2, rn);
                     gen_helper_add_setq(tmp, tmp, tmp2);
-                    dead_tmp(tmp2);
                 }
                 store_reg(s, rd, tmp);
             } else {
                 /* 16 * 16 */
-                tmp = load_reg(s, rm);
-                tmp2 = load_reg(s, rs);
                 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
-                dead_tmp(tmp2);
                 if (op1 == 2) {
                     tmp64 = tcg_temp_new_i64();
                     tcg_gen_ext_i32_i64(tmp64, tmp);
-                    dead_tmp(tmp);
                     gen_addq(s, tmp64, rn, rd);
                     gen_storeq_reg(s, rn, rd, tmp64);
                     tcg_temp_free_i64(tmp64);
                 } else {
                     if (op1 == 0) {
-                        tmp2 = load_reg(s, rn);
+                        load_reg_var(s, tmp2, rn);
                         gen_helper_add_setq(tmp, tmp, tmp2);
-                        dead_tmp(tmp2);
                     }
                     store_reg(s, rd, tmp);
                 }
             }
+            tcg_temp_free_i32(tmp2);
+            tcg_temp_free_i32(tmp);
             break;
         default:
             goto illegal_op;
@@ -6126,6 +6255,7 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
         logic_cc = table_logic_cc[op1] & set_cc;
 
         /* data processing instruction */
+        tmp2 = tcg_temp_new_i32();
         if (insn & (1 << 25)) {
             /* immediate operand */
             val = insn & 0xff;
@@ -6133,7 +6263,6 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
             if (shift) {
                 val = (val >> shift) | (val << (32 - shift));
             }
-            tmp2 = new_tmp();
             tcg_gen_movi_i32(tmp2, val);
             if (logic_cc && shift) {
                 gen_set_CF_bit31(tmp2);
@@ -6141,20 +6270,23 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
         } else {
             /* register */
             rm = (insn) & 0xf;
-            tmp2 = load_reg(s, rm);
+            load_reg_var(s, tmp2, rm);
             shiftop = (insn >> 5) & 3;
             if (!(insn & (1 << 4))) {
                 shift = (insn >> 7) & 0x1f;
                 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
             } else {
                 rs = (insn >> 8) & 0xf;
-                tmp = load_reg(s, rs);
+                tmp = tcg_temp_new_i32();
+                load_reg_var(s, tmp, rs);
                 gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
+                tcg_temp_free_i32(tmp);
             }
         }
         if (op1 != 0x0f && op1 != 0x0d) {
             rn = (insn >> 16) & 0xf;
-            tmp = load_reg(s, rn);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rn);
         } else {
             TCGV_UNUSED(tmp);
         }
@@ -6178,6 +6310,8 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
             if (set_cc && rd == 15) {
                 /* SUBS r15, ... is used for exception return.  */
                 if (IS_USER(s)) {
+                    tcg_temp_free_i32(tmp);
+                    tcg_temp_free_i32(tmp2);
                     goto illegal_op;
                 }
                 gen_helper_sub_cc(tmp, tmp, tmp2);
@@ -6236,26 +6370,22 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                 tcg_gen_and_i32(tmp, tmp, tmp2);
                 gen_logic_CC(tmp);
             }
-            dead_tmp(tmp);
             break;
         case 0x09:
             if (set_cc) {
                 tcg_gen_xor_i32(tmp, tmp, tmp2);
                 gen_logic_CC(tmp);
             }
-            dead_tmp(tmp);
             break;
         case 0x0a:
             if (set_cc) {
                 gen_helper_sub_cc(tmp, tmp, tmp2);
             }
-            dead_tmp(tmp);
             break;
         case 0x0b:
             if (set_cc) {
                 gen_helper_add_cc(tmp, tmp, tmp2);
             }
-            dead_tmp(tmp);
             break;
         case 0x0c:
             tcg_gen_or_i32(tmp, tmp, tmp2);
@@ -6268,6 +6398,7 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
             if (logic_cc && rd == 15) {
                 /* MOVS r15, ... is used for exception return.  */
                 if (IS_USER(s)) {
+                    tcg_temp_free_i32(tmp2);
                     goto illegal_op;
                 }
                 gen_exception_return(s, tmp2);
@@ -6295,8 +6426,9 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
             break;
         }
         if (op1 != 0x0f && op1 != 0x0d) {
-            dead_tmp(tmp2);
+            tcg_temp_free_i32(tmp);
         }
+        tcg_temp_free_i32(tmp2);
     } else {
         /* other instructions */
         op1 = (insn >> 24) & 0xf;
@@ -6312,41 +6444,42 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                     rs = (insn >> 8) & 0xf;
                     rm = (insn) & 0xf;
                     op1 = (insn >> 20) & 0xf;
+                    tmp = tcg_temp_new_i32();
+                    tmp2 = tcg_temp_new_i32();
+                    load_reg_var(s, tmp, rs);
+                    load_reg_var(s, tmp2, rm);
                     switch (op1) {
                     case 0: case 1: case 2: case 3: case 6:
                         /* 32 bit mul */
-                        tmp = load_reg(s, rs);
-                        tmp2 = load_reg(s, rm);
                         tcg_gen_mul_i32(tmp, tmp, tmp2);
-                        dead_tmp(tmp2);
+                        load_reg_var(s, tmp2, rn);
                         if (insn & (1 << 22)) {
                             /* Subtract (mls) */
-                            ARCH(6T2);
-                            tmp2 = load_reg(s, rn);
+                            ARCH(6T2); /* TODO: free tmp, tmp2! */
                             tcg_gen_sub_i32(tmp, tmp2, tmp);
-                            dead_tmp(tmp2);
                         } else if (insn & (1 << 21)) {
                             /* Add */
-                            tmp2 = load_reg(s, rn);
                             tcg_gen_add_i32(tmp, tmp, tmp2);
-                            dead_tmp(tmp2);
                         }
+                        tcg_temp_free_i32(tmp2);
                         if (insn & (1 << 20))
                             gen_logic_CC(tmp);
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                         break;
                     default:
                         /* 64 bit mul */
-                        tmp = load_reg(s, rs);
-                        tmp2 = load_reg(s, rm);
+                        tmp64 = tcg_temp_new_i64();
                         if (insn & (1 << 22))
-                            tmp64 = gen_muls_i64_i32(tmp, tmp2);
+                            gen_muls_i64_i32(tmp64, tmp, tmp2);
                         else
-                            tmp64 = gen_mulu_i64_i32(tmp, tmp2);
+                            gen_mulu_i64_i32(tmp64, tmp, tmp2);
+                        tcg_temp_free_i32(tmp2);
+                        tcg_temp_free_i32(tmp);
                         if (insn & (1 << 21)) /* mult accumulate */
                             gen_addq(s, tmp64, rn, rd);
                         if (!(insn & (1 << 23))) { /* double accumulate */
-                            ARCH(6);
+                            ARCH(6); /* TODO: free tmp64! */
                             gen_addq_lo(s, tmp64, rn);
                             gen_addq_lo(s, tmp64, rd);
                         }
@@ -6370,34 +6503,37 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                         load_reg_var(s, addr, rn);
                         if (insn & (1 << 20)) {
                             gen_helper_mark_exclusive(cpu_env, addr);
+                            tmp = tcg_temp_new_i32();
                             switch (op1) {
                             case 0: /* ldrex */
-                                tmp = gen_ld32(addr, IS_USER(s));
+                                gen_ld32(tmp, addr, IS_USER(s));
                                 break;
                             case 1: /* ldrexd */
-                                tmp = gen_ld32(addr, IS_USER(s));
+                                gen_ld32(tmp, addr, IS_USER(s));
                                 store_reg(s, rd, tmp);
                                 tcg_gen_addi_i32(addr, addr, 4);
-                                tmp = gen_ld32(addr, IS_USER(s));
+                                gen_ld32(tmp, addr, IS_USER(s));
                                 rd++;
                                 break;
                             case 2: /* ldrexb */
-                                tmp = gen_ld8u(addr, IS_USER(s));
+                                gen_ld8u(tmp, addr, IS_USER(s));
                                 break;
                             case 3: /* ldrexh */
-                                tmp = gen_ld16u(addr, IS_USER(s));
+                                gen_ld16u(tmp, addr, IS_USER(s));
                                 break;
                             default:
                                 abort();
                             }
                             store_reg(s, rd, tmp);
+                            tcg_temp_free_i32(tmp);
                         } else {
                             int label = gen_new_label();
                             rm = insn & 0xf;
                             tmp2 = tcg_temp_local_new_i32();
                             gen_helper_test_exclusive(tmp2, cpu_env, addr);
                             tcg_gen_brcondi_i32(TCG_COND_NE, tmp2, 0, label);
-                            tmp = load_reg(s,rm);
+                            tmp = tcg_temp_new_i32();
+                            load_reg_var(s, tmp, rm);
                             switch (op1) {
                             case 0:  /*  strex */
                                 gen_st32(tmp, addr, IS_USER(s));
@@ -6405,7 +6541,7 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                             case 1: /*  strexd */
                                 gen_st32(tmp, addr, IS_USER(s));
                                 tcg_gen_addi_i32(addr, addr, 4);
-                                tmp = load_reg(s, rm + 1);
+                                load_reg_var(s, tmp, rm + 1);
                                 gen_st32(tmp, addr, IS_USER(s));
                                 break;
                             case 2: /*  strexb */
@@ -6417,11 +6553,12 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                             default:
                                 abort();
                             }
+                            tcg_temp_free_i32(tmp);
                             gen_set_label(label);
                             tcg_gen_mov_i32(cpu_R[rd], tmp2);
-                            tcg_temp_free(tmp2);
+                            tcg_temp_free_i32(tmp2);
                         }
-                        tcg_temp_free(addr);
+                        tcg_temp_free_i32(addr);
                     } else {
                         /* SWP instruction */
                         rm = (insn) & 0xf;
@@ -6429,17 +6566,22 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                         /* ??? This is not really atomic.  However we know
                            we never have multiple CPUs running in parallel,
                            so it is good enough.  */
-                        addr = load_reg(s, rn);
-                        tmp = load_reg(s, rm);
+                        addr = tcg_temp_new_i32();
+                        load_reg_var(s, addr, rn);
+                        tmp = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rm);
+                        tmp2 = tcg_temp_new_i32();
                         if (insn & (1 << 22)) {
-                            tmp2 = gen_ld8u(addr, IS_USER(s));
+                            gen_ld8u(tmp2, addr, IS_USER(s));
                             gen_st8(tmp, addr, IS_USER(s));
                         } else {
-                            tmp2 = gen_ld32(addr, IS_USER(s));
+                            gen_ld32(tmp2, addr, IS_USER(s));
                             gen_st32(tmp, addr, IS_USER(s));
                         }
-                        dead_tmp(addr);
+                        tcg_temp_free_i32(tmp);
+                        tcg_temp_free_i32(addr);
                         store_reg(s, rd, tmp2);
+                        tcg_temp_free_i32(tmp2);
                     }
                 }
             } else {
@@ -6448,22 +6590,24 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                 /* Misc load/store */
                 rn = (insn >> 16) & 0xf;
                 rd = (insn >> 12) & 0xf;
-                addr = load_reg(s, rn);
+                addr = tcg_temp_new_i32();
+                load_reg_var(s, addr, rn);
                 if (insn & (1 << 24))
                     gen_add_datah_offset(s, insn, 0, addr);
                 address_offset = 0;
+                tmp = tcg_temp_new_i32();
                 if (insn & (1 << 20)) {
                     /* load */
                     switch(sh) {
                     case 1:
-                        tmp = gen_ld16u(addr, IS_USER(s));
+                        gen_ld16u(tmp, addr, IS_USER(s));
                         break;
                     case 2:
-                        tmp = gen_ld8s(addr, IS_USER(s));
+                        gen_ld8s(tmp, addr, IS_USER(s));
                         break;
                     default:
                     case 3:
-                        tmp = gen_ld16s(addr, IS_USER(s));
+                        gen_ld16s(tmp, addr, IS_USER(s));
                         break;
                     }
                     load = 1;
@@ -6471,26 +6615,28 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                     /* doubleword */
                     if (sh & 1) {
                         /* store */
-                        tmp = load_reg(s, rd);
+                        load_reg_var(s, tmp, rd);
                         gen_st32(tmp, addr, IS_USER(s));
                         tcg_gen_addi_i32(addr, addr, 4);
-                        tmp = load_reg(s, rd + 1);
+                        load_reg_var(s, tmp, rd + 1);
                         gen_st32(tmp, addr, IS_USER(s));
+                        tcg_temp_free_i32(tmp);
                         load = 0;
                     } else {
                         /* load */
-                        tmp = gen_ld32(addr, IS_USER(s));
+                        gen_ld32(tmp, addr, IS_USER(s));
                         store_reg(s, rd, tmp);
                         tcg_gen_addi_i32(addr, addr, 4);
-                        tmp = gen_ld32(addr, IS_USER(s));
+                        gen_ld32(tmp, addr, IS_USER(s));
                         rd++;
                         load = 1;
                     }
                     address_offset = -4;
                 } else {
                     /* store */
-                    tmp = load_reg(s, rd);
+                    load_reg_var(s, tmp, rd);
                     gen_st16(tmp, addr, IS_USER(s));
+                    tcg_temp_free_i32(tmp);
                     load = 0;
                 }
                 /* Perform base writeback before the loaded value to
@@ -6504,12 +6650,12 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                     if (address_offset)
                         tcg_gen_addi_i32(addr, addr, address_offset);
                     store_reg(s, rn, addr);
-                } else {
-                    dead_tmp(addr);
                 }
+                tcg_temp_free_i32(addr);
                 if (load) {
                     /* Complete the load.  */
                     store_reg(s, rd, tmp);
+                    tcg_temp_free_i32(tmp);
                 }
             }
             break;
@@ -6528,20 +6674,25 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                 switch ((insn >> 23) & 3) {
                 case 0: /* Parallel add/subtract.  */
                     op1 = (insn >> 20) & 7;
-                    tmp = load_reg(s, rn);
-                    tmp2 = load_reg(s, rm);
                     sh = (insn >> 5) & 7;
                     if ((op1 & 3) == 0 || sh == 5 || sh == 6)
                         goto illegal_op;
+                    tmp = tcg_temp_new_i32();
+                    tmp2 = tcg_temp_new_i32();
+                    load_reg_var(s, tmp, rn);
+                    load_reg_var(s, tmp2, rm);
                     gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
-                    dead_tmp(tmp2);
+                    tcg_temp_free_i32(tmp2);
                     store_reg(s, rd, tmp);
+                    tcg_temp_free_i32(tmp);
                     break;
                 case 1:
                     if ((insn & 0x00700020) == 0) {
                         /* Halfword pack.  */
-                        tmp = load_reg(s, rn);
-                        tmp2 = load_reg(s, rm);
+                        tmp = tcg_temp_new_i32();
+                        tmp2 = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rn);
+                        load_reg_var(s, tmp2, rm);
                         shift = (insn >> 7) & 0x1f;
                         if (insn & (1 << 6)) {
                             /* pkhtb */
@@ -6558,11 +6709,13 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                             tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
                         }
                         tcg_gen_or_i32(tmp, tmp, tmp2);
-                        dead_tmp(tmp2);
+                        tcg_temp_free_i32(tmp2);
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                     } else if ((insn & 0x00200020) == 0x00200000) {
                         /* [us]sat */
-                        tmp = load_reg(s, rm);
+                        tmp = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rm);
                         shift = (insn >> 7) & 0x1f;
                         if (insn & (1 << 6)) {
                             if (shift == 0)
@@ -6581,9 +6734,11 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                             tcg_temp_free_i32(tmp2);
                         }
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                     } else if ((insn & 0x00300fe0) == 0x00200f20) {
                         /* [us]sat16 */
-                        tmp = load_reg(s, rm);
+                        tmp = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rm);
                         sh = (insn >> 16) & 0x1f;
                         if (sh != 0) {
                             tmp2 = tcg_const_i32(sh);
@@ -6594,18 +6749,23 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                             tcg_temp_free_i32(tmp2);
                         }
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                     } else if ((insn & 0x00700fe0) == 0x00000fa0) {
                         /* Select bytes.  */
-                        tmp = load_reg(s, rn);
-                        tmp2 = load_reg(s, rm);
-                        tmp3 = new_tmp();
+                        tmp = tcg_temp_new_i32();
+                        tmp2 = tcg_temp_new_i32();
+                        tmp3 = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rn);
+                        load_reg_var(s, tmp2, rm);
                         tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
                         gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
-                        dead_tmp(tmp3);
-                        dead_tmp(tmp2);
+                        tcg_temp_free_i32(tmp3);
+                        tcg_temp_free_i32(tmp2);
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                     } else if ((insn & 0x000003e0) == 0x00000060) {
-                        tmp = load_reg(s, rm);
+                        tmp = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rm);
                         shift = (insn >> 10) & 3;
                         /* ??? In many cases it's not neccessary to do a
                            rotate, a shift is sufficient.  */
@@ -6622,23 +6782,26 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                         default: goto illegal_op;
                         }
                         if (rn != 15) {
-                            tmp2 = load_reg(s, rn);
+                            tmp2 = tcg_temp_new_i32();
+                            load_reg_var(s, tmp2, rn);
                             if ((op1 & 3) == 0) {
                                 gen_add16(tmp, tmp2);
                             } else {
                                 tcg_gen_add_i32(tmp, tmp, tmp2);
-                                dead_tmp(tmp2);
                             }
+                            tcg_temp_free_i32(tmp2);
                         }
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                     } else if ((insn & 0x003f0f60) == 0x003f0f20) {
                         /* rev */
-                        tmp = load_reg(s, rm);
+                        tmp = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rm);
                         if (insn & (1 << 22)) {
                             if (insn & (1 << 7)) {
                                 gen_revsh(tmp);
                             } else {
-                                ARCH(6T2);
+                                ARCH(6T2); /* TODO: free tmp! */
                                 gen_helper_rbit(tmp, tmp);
                             }
                         } else {
@@ -6648,30 +6811,32 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                                 tcg_gen_bswap32_i32(tmp, tmp);
                         }
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                     } else {
                         goto illegal_op;
                     }
                     break;
                 case 2: /* Multiplies (Type 3).  */
-                    tmp = load_reg(s, rm);
-                    tmp2 = load_reg(s, rs);
+                    tmp = tcg_temp_new_i32();
+                    tmp2 = tcg_temp_new_i32();
+                    load_reg_var(s, tmp, rm);
+                    load_reg_var(s, tmp2, rs);
                     if (insn & (1 << 20)) {
                         /* Signed multiply most significant [accumulate].  */
-                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
+                        tmp64 = tcg_temp_new_i64();
+                        gen_muls_i64_i32(tmp64, tmp, tmp2);
                         if (insn & (1 << 5))
                             tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
                         tcg_gen_shri_i64(tmp64, tmp64, 32);
-                        tmp = new_tmp();
                         tcg_gen_trunc_i64_i32(tmp, tmp64);
                         tcg_temp_free_i64(tmp64);
                         if (rd != 15) {
-                            tmp2 = load_reg(s, rd);
+                            load_reg_var(s, tmp2, rd);
                             if (insn & (1 << 6)) {
                                 tcg_gen_sub_i32(tmp, tmp, tmp2);
                             } else {
                                 tcg_gen_add_i32(tmp, tmp, tmp2);
                             }
-                            dead_tmp(tmp2);
                         }
                         store_reg(s, rn, tmp);
                     } else {
@@ -6684,42 +6849,42 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                         } else {
                             tcg_gen_add_i32(tmp, tmp, tmp2);
                         }
-                        dead_tmp(tmp2);
                         if (insn & (1 << 22)) {
                             /* smlald, smlsld */
                             tmp64 = tcg_temp_new_i64();
                             tcg_gen_ext_i32_i64(tmp64, tmp);
-                            dead_tmp(tmp);
                             gen_addq(s, tmp64, rd, rn);
                             gen_storeq_reg(s, rd, rn, tmp64);
                             tcg_temp_free_i64(tmp64);
                         } else {
                             /* smuad, smusd, smlad, smlsd */
-                            if (rd != 15)
-                              {
-                                tmp2 = load_reg(s, rd);
+                            if (rd != 15) {
+                                load_reg_var(s, tmp2, rd);
                                 gen_helper_add_setq(tmp, tmp, tmp2);
-                                dead_tmp(tmp2);
-                              }
+                            }
                             store_reg(s, rn, tmp);
                         }
                     }
+                    tcg_temp_free_i32(tmp2);
+                    tcg_temp_free_i32(tmp);
                     break;
                 case 3:
                     op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
                     switch (op1) {
                     case 0: /* Unsigned sum of absolute differences.  */
                         ARCH(6);
-                        tmp = load_reg(s, rm);
-                        tmp2 = load_reg(s, rs);
+                        tmp = tcg_temp_new_i32();
+                        tmp2 = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rm);
+                        load_reg_var(s, tmp2, rs);
                         gen_helper_usad8(tmp, tmp, tmp2);
-                        dead_tmp(tmp2);
                         if (rd != 15) {
-                            tmp2 = load_reg(s, rd);
+                            load_reg_var(s, tmp2, rd);
                             tcg_gen_add_i32(tmp, tmp, tmp2);
-                            dead_tmp(tmp2);
                         }
+                        tcg_temp_free_i32(tmp2);
                         store_reg(s, rn, tmp);
+                        tcg_temp_free_i32(tmp);
                         break;
                     case 0x20: case 0x24: case 0x28: case 0x2c:
                         /* Bitfield insert/clear.  */
@@ -6727,27 +6892,30 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                         shift = (insn >> 7) & 0x1f;
                         i = (insn >> 16) & 0x1f;
                         i = i + 1 - shift;
+                        tmp = tcg_temp_new_i32();
                         if (rm == 15) {
-                            tmp = new_tmp();
                             tcg_gen_movi_i32(tmp, 0);
                         } else {
-                            tmp = load_reg(s, rm);
+                            load_reg_var(s, tmp, rm);
                         }
                         if (i != 32) {
-                            tmp2 = load_reg(s, rd);
+                            tmp2 = tcg_temp_new_i32();
+                            load_reg_var(s, tmp2, rd);
                             gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1);
-                            dead_tmp(tmp2);
+                            tcg_temp_free_i32(tmp2);
                         }
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                         break;
                     case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
                     case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
                         ARCH(6T2);
-                        tmp = load_reg(s, rm);
                         shift = (insn >> 7) & 0x1f;
                         i = ((insn >> 16) & 0x1f) + 1;
                         if (shift + i > 32)
                             goto illegal_op;
+                        tmp = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rm);
                         if (i < 32) {
                             if (op1 & 0x20) {
                                 gen_ubfx(tmp, shift, (1u << i) - 1);
@@ -6756,6 +6924,7 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                             }
                         }
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                         break;
                     default:
                         goto illegal_op;
@@ -6770,46 +6939,48 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
              * xxxx 0111 1111 xxxx  xxxx xxxx 1111 xxxx
              */
             sh = (0xf << 20) | (0xf << 4);
-            if (op1 == 0x7 && ((insn & sh) == sh))
-            {
+            if (op1 == 0x7 && ((insn & sh) == sh)) {
                 goto illegal_op;
             }
             /* load/store byte/word */
             rn = (insn >> 16) & 0xf;
             rd = (insn >> 12) & 0xf;
-            tmp2 = load_reg(s, rn);
+            tmp2 = tcg_temp_new_i32();
+            load_reg_var(s, tmp2, rn);
             i = (IS_USER(s) || (insn & 0x01200000) == 0x00200000);
             if (insn & (1 << 24))
                 gen_add_data_offset(s, insn, tmp2);
+            tmp = tcg_temp_new_i32();
             if (insn & (1 << 20)) {
                 /* load */
                 if (insn & (1 << 22)) {
-                    tmp = gen_ld8u(tmp2, i);
+                    gen_ld8u(tmp, tmp2, i);
                 } else {
-                    tmp = gen_ld32(tmp2, i);
+                    gen_ld32(tmp, tmp2, i);
                 }
             } else {
                 /* store */
-                tmp = load_reg(s, rd);
+                load_reg_var(s, tmp, rd);
                 if (insn & (1 << 22))
                     gen_st8(tmp, tmp2, i);
                 else
                     gen_st32(tmp, tmp2, i);
+                tcg_temp_free_i32(tmp);
             }
             if (!(insn & (1 << 24))) {
                 gen_add_data_offset(s, insn, tmp2);
                 store_reg(s, rn, tmp2);
             } else if (insn & (1 << 21)) {
                 store_reg(s, rn, tmp2);
-            } else {
-                dead_tmp(tmp2);
             }
+            tcg_temp_free_i32(tmp2);
             if (insn & (1 << 20)) {
                 /* Complete the load.  */
                 if (rd == 15)
                     gen_bx(s, tmp);
                 else
                     store_reg(s, rd, tmp);
+                tcg_temp_free_i32(tmp);
             }
             break;
         case 0x08:
@@ -6828,7 +6999,8 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                         user = 1;
                 }
                 rn = (insn >> 16) & 0xf;
-                addr = load_reg(s, rn);
+                addr = tcg_temp_new_i32();
+                load_reg_var(s, addr, rn);
 
                 /* compute total size */
                 loaded_base = 0;
@@ -6859,38 +7031,40 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                 j = 0;
                 for(i=0;i<16;i++) {
                     if (insn & (1 << i)) {
+                        tmp = tcg_temp_new_i32();
                         if (insn & (1 << 20)) {
                             /* load */
-                            tmp = gen_ld32(addr, IS_USER(s));
+                            gen_ld32(tmp, addr, IS_USER(s));
                             if (i == 15) {
                                 gen_bx(s, tmp);
+                                tcg_temp_free_i32(tmp);
                             } else if (user) {
                                 tmp2 = tcg_const_i32(i);
                                 gen_helper_set_user_reg(tmp2, tmp);
                                 tcg_temp_free_i32(tmp2);
-                                dead_tmp(tmp);
+                                tcg_temp_free_i32(tmp);
                             } else if (i == rn) {
                                 loaded_var = tmp;
                                 loaded_base = 1;
                             } else {
                                 store_reg(s, i, tmp);
+                                tcg_temp_free_i32(tmp);
                             }
                         } else {
                             /* store */
                             if (i == 15) {
                                 /* special case: r15 = PC + 8 */
                                 val = (long)s->pc + 4;
-                                tmp = new_tmp();
                                 tcg_gen_movi_i32(tmp, val);
                             } else if (user) {
-                                tmp = new_tmp();
                                 tmp2 = tcg_const_i32(i);
                                 gen_helper_get_user_reg(tmp, tmp2);
                                 tcg_temp_free_i32(tmp2);
                             } else {
-                                tmp = load_reg(s, i);
+                                load_reg_var(s, tmp, i);
                             }
                             gen_st32(tmp, addr, IS_USER(s));
+                            tcg_temp_free_i32(tmp);
                         }
                         j++;
                         /* no need to add after the last transfer */
@@ -6918,17 +7092,18 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                         }
                     }
                     store_reg(s, rn, addr);
-                } else {
-                    dead_tmp(addr);
                 }
+                tcg_temp_free_i32(addr);
                 if (loaded_base) {
                     store_reg(s, rn, loaded_var);
+                    tcg_temp_free_i32(loaded_var);
                 }
                 if ((insn & (1 << 22)) && !user) {
                     /* Restore CPSR from SPSR.  */
-                    tmp = load_cpu_field(spsr);
+                    tmp = tcg_temp_new_i32();
+                    load_cpu_field(tmp, spsr);
                     gen_set_cpsr(tmp, 0xffffffff);
-                    dead_tmp(tmp);
+                    tcg_temp_free_i32(tmp);
                     s->is_jmp = DISAS_UPDATE;
                 }
             }
@@ -6941,9 +7116,10 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
                 /* branch (and link) */
                 val = (int32_t)s->pc;
                 if (insn & (1 << 24)) {
-                    tmp = new_tmp();
+                    tmp = tcg_temp_new_i32();
                     tcg_gen_movi_i32(tmp, val);
                     store_reg(s, 14, tmp);
+                    tcg_temp_free_i32(tmp);
                 }
                 offset = (((int32_t)insn << 8) >> 8);
                 val += (offset << 2) + 4;
@@ -6974,8 +7150,7 @@  static void disas_arm_insn(CPUState * env, DisasContext *s)
 }
 
 /* Return true if this is a Thumb-2 logical op.  */
-static int
-thumb2_logic_op(int op)
+static inline int thumb2_logic_op(int op)
 {
     return (op < 8);
 }
@@ -6986,8 +7161,8 @@  thumb2_logic_op(int op)
    to the high bit of T1.
    Returns zero if the opcode is valid.  */
 
-static int
-gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCGv t0, TCGv t1)
+static int gen_thumb2_data_op(DisasContext *s, int op, int conds,
+                              uint32_t shifter_out, TCGv t0, TCGv t1)
 {
     int logic_cc;
 
@@ -7079,26 +7254,32 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
         if ((insn & (1 << 12)) == 0) {
             /* Second half of blx.  */
             offset = ((insn & 0x7ff) << 1);
-            tmp = load_reg(s, 14);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, 14);
             tcg_gen_addi_i32(tmp, tmp, offset);
             tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
 
-            tmp2 = new_tmp();
+            tmp2 = tcg_temp_new_i32();
             tcg_gen_movi_i32(tmp2, s->pc | 1);
             store_reg(s, 14, tmp2);
+            tcg_temp_free_i32(tmp2);
             gen_bx(s, tmp);
+            tcg_temp_free_i32(tmp);
             return 0;
         }
         if (insn & (1 << 11)) {
             /* Second half of bl.  */
             offset = ((insn & 0x7ff) << 1) | 1;
-            tmp = load_reg(s, 14);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, 14);
             tcg_gen_addi_i32(tmp, tmp, offset);
 
-            tmp2 = new_tmp();
+            tmp2 = tcg_temp_new_i32();
             tcg_gen_movi_i32(tmp2, s->pc | 1);
             store_reg(s, 14, tmp2);
+            tcg_temp_free_i32(tmp2);
             gen_bx(s, tmp);
+            tcg_temp_free_i32(tmp);
             return 0;
         }
         if ((s->pc & ~TARGET_PAGE_MASK) == 0) {
@@ -7133,11 +7314,11 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
             /* Other load/store, table branch.  */
             if (insn & 0x01200000) {
                 /* Load/store doubleword.  */
+                addr = tcg_temp_new_i32();
                 if (rn == 15) {
-                    addr = new_tmp();
                     tcg_gen_movi_i32(addr, s->pc & ~3);
                 } else {
-                    addr = load_reg(s, rn);
+                    load_reg_var(s, addr, rn);
                 }
                 offset = (insn & 0xff) * 4;
                 if ((insn & (1 << 23)) == 0)
@@ -7146,73 +7327,78 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     tcg_gen_addi_i32(addr, addr, offset);
                     offset = 0;
                 }
+                tmp = tcg_temp_new_i32();
                 if (insn & (1 << 20)) {
                     /* ldrd */
-                    tmp = gen_ld32(addr, IS_USER(s));
+                    gen_ld32(tmp, addr, IS_USER(s));
                     store_reg(s, rs, tmp);
                     tcg_gen_addi_i32(addr, addr, 4);
-                    tmp = gen_ld32(addr, IS_USER(s));
+                    gen_ld32(tmp, addr, IS_USER(s));
                     store_reg(s, rd, tmp);
                 } else {
                     /* strd */
-                    tmp = load_reg(s, rs);
+                    load_reg_var(s, tmp, rs);
                     gen_st32(tmp, addr, IS_USER(s));
                     tcg_gen_addi_i32(addr, addr, 4);
-                    tmp = load_reg(s, rd);
+                    load_reg_var(s, tmp, rd);
                     gen_st32(tmp, addr, IS_USER(s));
                 }
+                tcg_temp_free_i32(tmp);
                 if (insn & (1 << 21)) {
                     /* Base writeback.  */
-                    if (rn == 15)
+                    if (rn == 15) {
+                        tcg_temp_free_i32(addr);
                         goto illegal_op;
+                    }
                     tcg_gen_addi_i32(addr, addr, offset - 4);
                     store_reg(s, rn, addr);
-                } else {
-                    dead_tmp(addr);
                 }
+                tcg_temp_free_i32(addr);
             } else if ((insn & (1 << 23)) == 0) {
                 /* Load/store exclusive word.  */
                 addr = tcg_temp_local_new();
                 load_reg_var(s, addr, rn);
+                tmp = tcg_temp_new_i32();
                 if (insn & (1 << 20)) {
                     gen_helper_mark_exclusive(cpu_env, addr);
-                    tmp = gen_ld32(addr, IS_USER(s));
+                    gen_ld32(tmp, addr, IS_USER(s));
                     store_reg(s, rd, tmp);
                 } else {
                     int label = gen_new_label();
                     tmp2 = tcg_temp_local_new();
                     gen_helper_test_exclusive(tmp2, cpu_env, addr);
                     tcg_gen_brcondi_i32(TCG_COND_NE, tmp2, 0, label);
-                    tmp = load_reg(s, rs);
+                    load_reg_var(s, tmp, rs);
                     gen_st32(tmp, addr, IS_USER(s));
                     gen_set_label(label);
                     tcg_gen_mov_i32(cpu_R[rd], tmp2);
                     tcg_temp_free(tmp2);
                 }
+                tcg_temp_free_i32(tmp);
                 tcg_temp_free(addr);
             } else if ((insn & (1 << 6)) == 0) {
                 /* Table Branch.  */
+                addr = tcg_temp_new_i32();
                 if (rn == 15) {
-                    addr = new_tmp();
                     tcg_gen_movi_i32(addr, s->pc);
                 } else {
-                    addr = load_reg(s, rn);
+                    load_reg_var(s, addr, rn);
                 }
-                tmp = load_reg(s, rm);
+                tmp = tcg_temp_new_i32();
+                load_reg_var(s, tmp, rm);
                 tcg_gen_add_i32(addr, addr, tmp);
                 if (insn & (1 << 4)) {
                     /* tbh */
                     tcg_gen_add_i32(addr, addr, tmp);
-                    dead_tmp(tmp);
-                    tmp = gen_ld16u(addr, IS_USER(s));
+                    gen_ld16u(tmp, addr, IS_USER(s));
                 } else { /* tbb */
-                    dead_tmp(tmp);
-                    tmp = gen_ld8u(addr, IS_USER(s));
+                    gen_ld8u(tmp, addr, IS_USER(s));
                 }
-                dead_tmp(addr);
+                tcg_temp_free_i32(addr);
                 tcg_gen_shli_i32(tmp, tmp, 1);
                 tcg_gen_addi_i32(tmp, tmp, s->pc);
                 store_reg(s, 15, tmp);
+                tcg_temp_free_i32(tmp);
             } else {
                 /* Load/store exclusive byte/halfword/doubleword.  */
                 /* ??? These are not really atomic.  However we know
@@ -7223,29 +7409,35 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                 load_reg_var(s, addr, rn);
                 if (insn & (1 << 20)) {
                     gen_helper_mark_exclusive(cpu_env, addr);
+                    tmp = tcg_temp_new_i32();
                     switch (op) {
                     case 0:
-                        tmp = gen_ld8u(addr, IS_USER(s));
+                        gen_ld8u(tmp, addr, IS_USER(s));
                         break;
                     case 1:
-                        tmp = gen_ld16u(addr, IS_USER(s));
+                        gen_ld16u(tmp, addr, IS_USER(s));
                         break;
                     case 3:
-                        tmp = gen_ld32(addr, IS_USER(s));
+                        gen_ld32(tmp, addr, IS_USER(s));
                         tcg_gen_addi_i32(addr, addr, 4);
-                        tmp2 = gen_ld32(addr, IS_USER(s));
+                        tmp2 = tcg_temp_new_i32();
+                        gen_ld32(tmp2, addr, IS_USER(s));
                         store_reg(s, rd, tmp2);
+                        tcg_temp_free_i32(tmp2);
                         break;
                     default:
+                        tcg_temp_free_i32(tmp);
                         goto illegal_op;
                     }
                     store_reg(s, rs, tmp);
+                    tcg_temp_free_i32(tmp);
                 } else {
                     int label = gen_new_label();
                     tmp2 = tcg_temp_local_new();
                     gen_helper_test_exclusive(tmp2, cpu_env, addr);
                     tcg_gen_brcondi_i32(TCG_COND_NE, tmp2, 0, label);
-                    tmp = load_reg(s, rs);
+                    tmp = tcg_temp_new_i32();
+                    load_reg_var(s, tmp, rs);
                     switch (op) {
                     case 0:
                         gen_st8(tmp, addr, IS_USER(s));
@@ -7256,12 +7448,14 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     case 3:
                         gen_st32(tmp, addr, IS_USER(s));
                         tcg_gen_addi_i32(addr, addr, 4);
-                        tmp = load_reg(s, rd);
+                        load_reg_var(s, tmp, rd);
                         gen_st32(tmp, addr, IS_USER(s));
                         break;
                     default:
+                        tcg_temp_free_i32(tmp);
                         goto illegal_op;
                     }
+                    tcg_temp_free_i32(tmp);
                     gen_set_label(label);
                     tcg_gen_mov_i32(cpu_R[rm], tmp2);
                     tcg_temp_free(tmp2);
@@ -7276,13 +7470,16 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     goto illegal_op;
                 if (insn & (1 << 20)) {
                     /* rfe */
-                    addr = load_reg(s, rn);
+                    addr = tcg_temp_new_i32();
+                    load_reg_var(s, addr, rn);
                     if ((insn & (1 << 24)) == 0)
                         tcg_gen_addi_i32(addr, addr, -8);
                     /* Load PC into tmp and CPSR into tmp2.  */
-                    tmp = gen_ld32(addr, 0);
+                    tmp = tcg_temp_new_i32();
+                    gen_ld32(tmp, addr, 0);
                     tcg_gen_addi_i32(addr, addr, 4);
-                    tmp2 = gen_ld32(addr, 0);
+                    tmp2 = tcg_temp_new_i32();
+                    gen_ld32(tmp2, addr, 0);
                     if (insn & (1 << 21)) {
                         /* Base writeback.  */
                         if (insn & (1 << 24)) {
@@ -7291,17 +7488,18 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                             tcg_gen_addi_i32(addr, addr, -4);
                         }
                         store_reg(s, rn, addr);
-                    } else {
-                        dead_tmp(addr);
                     }
+                    tcg_temp_free_i32(addr);
                     gen_rfe(s, tmp, tmp2);
+                    tcg_temp_free_i32(tmp2);
+                    tcg_temp_free_i32(tmp);
                 } else {
                     /* srs */
                     op = (insn & 0x1f);
+                    addr = tcg_temp_new_i32();
                     if (op == (env->uncached_cpsr & CPSR_M)) {
-                        addr = load_reg(s, 13);
+                        load_reg_var(s, addr, 13);
                     } else {
-                        addr = new_tmp();
                         tmp = tcg_const_i32(op);
                         gen_helper_get_r13_banked(addr, cpu_env, tmp);
                         tcg_temp_free_i32(tmp);
@@ -7309,12 +7507,13 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     if ((insn & (1 << 24)) == 0) {
                         tcg_gen_addi_i32(addr, addr, -8);
                     }
-                    tmp = load_reg(s, 14);
+                    tmp = tcg_temp_new_i32();
+                    load_reg_var(s, tmp, 14);
                     gen_st32(tmp, addr, 0);
                     tcg_gen_addi_i32(addr, addr, 4);
-                    tmp = new_tmp();
                     gen_helper_cpsr_read(tmp);
                     gen_st32(tmp, addr, 0);
+                    tcg_temp_free_i32(tmp);
                     if (insn & (1 << 21)) {
                         if ((insn & (1 << 24)) == 0) {
                             tcg_gen_addi_i32(addr, addr, -4);
@@ -7328,14 +7527,14 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                             gen_helper_set_r13_banked(cpu_env, tmp, addr);
                             tcg_temp_free_i32(tmp);
                         }
-                    } else {
-                        dead_tmp(addr);
                     }
+                    tcg_temp_free_i32(addr);
                 }
             } else {
                 int i;
                 /* Load/store multiple.  */
-                addr = load_reg(s, rn);
+                addr = tcg_temp_new_i32();
+                load_reg_var(s, addr, rn);
                 offset = 0;
                 for (i = 0; i < 16; i++) {
                     if (insn & (1 << i))
@@ -7348,9 +7547,10 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                 for (i = 0; i < 16; i++) {
                     if ((insn & (1 << i)) == 0)
                         continue;
+                    tmp = tcg_temp_new_i32();
                     if (insn & (1 << 20)) {
                         /* Load.  */
-                        tmp = gen_ld32(addr, IS_USER(s));
+                        gen_ld32(tmp, addr, IS_USER(s));
                         if (i == 15) {
                             gen_bx(s, tmp);
                         } else {
@@ -7358,9 +7558,10 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                         }
                     } else {
                         /* Store.  */
-                        tmp = load_reg(s, i);
+                        load_reg_var(s, tmp, i);
                         gen_st32(tmp, addr, IS_USER(s));
                     }
+                    tcg_temp_free_i32(tmp);
                     tcg_gen_addi_i32(addr, addr, 4);
                 }
                 if (insn & (1 << 21)) {
@@ -7369,37 +7570,41 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                         tcg_gen_addi_i32(addr, addr, -offset);
                     }
                     /* Fault if writeback register is in register list.  */
-                    if (insn & (1 << rn))
+                    if (insn & (1 << rn)) {
+                        tcg_temp_free_i32(addr);
                         goto illegal_op;
+                    }
                     store_reg(s, rn, addr);
-                } else {
-                    dead_tmp(addr);
                 }
+                tcg_temp_free_i32(addr);
             }
         }
         break;
     case 5: /* Data processing register constant shift.  */
+        tmp = tcg_temp_new_i32();
         if (rn == 15) {
-            tmp = new_tmp();
             tcg_gen_movi_i32(tmp, 0);
         } else {
-            tmp = load_reg(s, rn);
+            load_reg_var(s, tmp, rn);
         }
-        tmp2 = load_reg(s, rm);
+        tmp2 = tcg_temp_new_i32();
+        load_reg_var(s, tmp2, rm);
         op = (insn >> 21) & 0xf;
         shiftop = (insn >> 4) & 3;
         shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
         conds = (insn & (1 << 20)) != 0;
         logic_cc = (conds && thumb2_logic_op(op));
         gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
-        if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
+        if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2)) {
+            tcg_temp_free_i32(tmp2);
+            tcg_temp_free_i32(tmp);
             goto illegal_op;
-        dead_tmp(tmp2);
+        }
+        tcg_temp_free_i32(tmp2);
         if (rd != 15) {
             store_reg(s, rd, tmp);
-        } else {
-            dead_tmp(tmp);
         }
+        tcg_temp_free_i32(tmp);
         break;
     case 13: /* Misc data processing.  */
         op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
@@ -7407,19 +7612,24 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
             goto illegal_op;
         switch (op) {
         case 0: /* Register controlled shift.  */
-            tmp = load_reg(s, rn);
-            tmp2 = load_reg(s, rm);
             if ((insn & 0x70) != 0)
                 goto illegal_op;
+            tmp = tcg_temp_new_i32();
+            tmp2 = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rn);
+            load_reg_var(s, tmp2, rm);
             op = (insn >> 21) & 3;
             logic_cc = (insn & (1 << 20)) != 0;
             gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
+            tcg_temp_free_i32(tmp2);
             if (logic_cc)
                 gen_logic_CC(tmp);
             store_reg_bx(env, s, rd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         case 1: /* Sign/zero extend.  */
-            tmp = load_reg(s, rm);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rm);
             shift = (insn >> 4) & 3;
             /* ??? In many cases it's not neccessary to do a
                rotate, a shift is sufficient.  */
@@ -7433,45 +7643,51 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
             case 3: gen_uxtb16(tmp); break;
             case 4: gen_sxtb(tmp);   break;
             case 5: gen_uxtb(tmp);   break;
-            default: goto illegal_op;
+            default: tcg_temp_free_i32(tmp);  goto illegal_op;
             }
             if (rn != 15) {
-                tmp2 = load_reg(s, rn);
+                tmp2 = tcg_temp_new_i32();
+                load_reg_var(s, tmp2, rn);
                 if ((op >> 1) == 1) {
                     gen_add16(tmp, tmp2);
                 } else {
                     tcg_gen_add_i32(tmp, tmp, tmp2);
-                    dead_tmp(tmp2);
                 }
+                tcg_temp_free_i32(tmp2);
             }
             store_reg(s, rd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         case 2: /* SIMD add/subtract.  */
             op = (insn >> 20) & 7;
             shift = (insn >> 4) & 7;
             if ((op & 3) == 3 || (shift & 3) == 3)
                 goto illegal_op;
-            tmp = load_reg(s, rn);
-            tmp2 = load_reg(s, rm);
+            tmp = tcg_temp_new_i32();
+            tmp2 = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rn);
+            load_reg_var(s, tmp2, rm);
             gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
-            dead_tmp(tmp2);
+            tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         case 3: /* Other data processing.  */
             op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rn);
             if (op < 4) {
                 /* Saturating add/subtract.  */
-                tmp = load_reg(s, rn);
-                tmp2 = load_reg(s, rm);
+                tmp2 = tcg_temp_new_i32();
+                load_reg_var(s, tmp2, rm);
                 if (op & 2)
                     gen_helper_double_saturate(tmp, tmp);
                 if (op & 1)
                     gen_helper_sub_saturate(tmp, tmp2, tmp);
                 else
                     gen_helper_add_saturate(tmp, tmp, tmp2);
-                dead_tmp(tmp2);
+                tcg_temp_free_i32(tmp2);
             } else {
-                tmp = load_reg(s, rn);
                 switch (op) {
                 case 0x0a: /* rbit */
                     gen_helper_rbit(tmp, tmp);
@@ -7486,46 +7702,47 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     gen_revsh(tmp);
                     break;
                 case 0x10: /* sel */
-                    tmp2 = load_reg(s, rm);
-                    tmp3 = new_tmp();
+                    tmp2 = tcg_temp_new_i32();
+                    tmp3 = tcg_temp_new_i32();
+                    load_reg_var(s, tmp2, rm);
                     tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
                     gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
-                    dead_tmp(tmp3);
-                    dead_tmp(tmp2);
+                    tcg_temp_free_i32(tmp3);
+                    tcg_temp_free_i32(tmp2);
                     break;
                 case 0x18: /* clz */
                     gen_helper_clz(tmp, tmp);
                     break;
                 default:
+                    tcg_temp_free_i32(tmp);
                     goto illegal_op;
                 }
             }
             store_reg(s, rd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         case 4: case 5: /* 32-bit multiply.  Sum of absolute differences.  */
             op = (insn >> 4) & 0xf;
-            tmp = load_reg(s, rn);
-            tmp2 = load_reg(s, rm);
+            tmp = tcg_temp_new_i32();
+            tmp2 = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rn);
+            load_reg_var(s, tmp2, rm);
             switch ((insn >> 20) & 7) {
             case 0: /* 32 x 32 -> 32 */
                 tcg_gen_mul_i32(tmp, tmp, tmp2);
-                dead_tmp(tmp2);
                 if (rs != 15) {
-                    tmp2 = load_reg(s, rs);
+                    load_reg_var(s, tmp2, rs);
                     if (op)
                         tcg_gen_sub_i32(tmp, tmp2, tmp);
                     else
                         tcg_gen_add_i32(tmp, tmp, tmp2);
-                    dead_tmp(tmp2);
                 }
                 break;
             case 1: /* 16 x 16 -> 32 */
                 gen_mulxy(tmp, tmp2, op & 2, op & 1);
-                dead_tmp(tmp2);
                 if (rs != 15) {
-                    tmp2 = load_reg(s, rs);
+                    load_reg_var(s, tmp2, rs);
                     gen_helper_add_setq(tmp, tmp, tmp2);
-                    dead_tmp(tmp2);
                 }
                 break;
             case 2: /* Dual multiply add.  */
@@ -7539,76 +7756,76 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                 } else {
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 }
-                dead_tmp(tmp2);
-                if (rs != 15)
-                  {
-                    tmp2 = load_reg(s, rs);
+                if (rs != 15) {
+                    load_reg_var(s, tmp2, rs);
                     gen_helper_add_setq(tmp, tmp, tmp2);
-                    dead_tmp(tmp2);
-                  }
+                }
                 break;
             case 3: /* 32 * 16 -> 32msb */
                 if (op)
                     tcg_gen_sari_i32(tmp2, tmp2, 16);
                 else
                     gen_sxth(tmp2);
-                tmp64 = gen_muls_i64_i32(tmp, tmp2);
+                tmp64 = tcg_temp_new_i64();
+                gen_muls_i64_i32(tmp64, tmp, tmp2);
                 tcg_gen_shri_i64(tmp64, tmp64, 16);
-                tmp = new_tmp();
                 tcg_gen_trunc_i64_i32(tmp, tmp64);
                 tcg_temp_free_i64(tmp64);
-                if (rs != 15)
-                  {
-                    tmp2 = load_reg(s, rs);
+                if (rs != 15) {
+                    load_reg_var(s, tmp2, rs);
                     gen_helper_add_setq(tmp, tmp, tmp2);
-                    dead_tmp(tmp2);
-                  }
+                }
                 break;
             case 5: case 6: /* 32 * 32 -> 32msb */
                 gen_imull(tmp, tmp2);
                 if (insn & (1 << 5)) {
                     gen_roundqd(tmp, tmp2);
-                    dead_tmp(tmp2);
                 } else {
-                    dead_tmp(tmp);
+                    tcg_temp_free_i32(tmp);
                     tmp = tmp2;
+                    tmp2 = tcg_temp_new_i32();
                 }
                 if (rs != 15) {
-                    tmp2 = load_reg(s, rs);
+                    load_reg_var(s, tmp2, rs);
                     if (insn & (1 << 21)) {
                         tcg_gen_add_i32(tmp, tmp, tmp2);
                     } else {
                         tcg_gen_sub_i32(tmp, tmp2, tmp);
                     }
-                    dead_tmp(tmp2);
                 }
                 break;
             case 7: /* Unsigned sum of absolute differences.  */
                 gen_helper_usad8(tmp, tmp, tmp2);
-                dead_tmp(tmp2);
                 if (rs != 15) {
-                    tmp2 = load_reg(s, rs);
+                    load_reg_var(s, tmp2, rs);
                     tcg_gen_add_i32(tmp, tmp, tmp2);
-                    dead_tmp(tmp2);
                 }
                 break;
             }
+            tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         case 6: case 7: /* 64-bit multiply, Divide.  */
             op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
-            tmp = load_reg(s, rn);
-            tmp2 = load_reg(s, rm);
+            tmp = tcg_temp_new_i32();
+            tmp2 = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rn);
+            load_reg_var(s, tmp2, rm);
             if ((op & 0x50) == 0x10) {
                 /* sdiv, udiv */
-                if (!arm_feature(env, ARM_FEATURE_DIV))
+                if (!arm_feature(env, ARM_FEATURE_DIV)) {
+                    tcg_temp_free_i32(tmp2);
+                    tcg_temp_free_i32(tmp);
                     goto illegal_op;
+                }
                 if (op & 0x20)
                     gen_helper_udiv(tmp, tmp, tmp2);
                 else
                     gen_helper_sdiv(tmp, tmp, tmp2);
-                dead_tmp(tmp2);
+                tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
+                tcg_temp_free_i32(tmp);
             } else if ((op & 0xe) == 0xc) {
                 /* Dual multiply accumulate long.  */
                 if (op & 1)
@@ -7619,31 +7836,31 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                 } else {
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 }
-                dead_tmp(tmp2);
+                tcg_temp_free_i32(tmp2);
                 /* BUGFIX */
                 tmp64 = tcg_temp_new_i64();
                 tcg_gen_ext_i32_i64(tmp64, tmp);
-                dead_tmp(tmp);
+                tcg_temp_free_i32(tmp);
                 gen_addq(s, tmp64, rs, rd);
                 gen_storeq_reg(s, rs, rd, tmp64);
                 tcg_temp_free_i64(tmp64);
             } else {
+                tmp64 = tcg_temp_new_i64();
                 if (op & 0x20) {
                     /* Unsigned 64-bit multiply  */
-                    tmp64 = gen_mulu_i64_i32(tmp, tmp2);
+                    gen_mulu_i64_i32(tmp64, tmp, tmp2);
                 } else {
                     if (op & 8) {
                         /* smlalxy */
                         gen_mulxy(tmp, tmp2, op & 2, op & 1);
-                        dead_tmp(tmp2);
-                        tmp64 = tcg_temp_new_i64();
                         tcg_gen_ext_i32_i64(tmp64, tmp);
-                        dead_tmp(tmp);
                     } else {
                         /* Signed 64-bit multiply  */
-                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
+                        gen_muls_i64_i32(tmp64, tmp, tmp2);
                     }
                 }
+                tcg_temp_free_i32(tmp2);
+                tcg_temp_free_i32(tmp);
                 if (op & 4) {
                     /* umaal */
                     gen_addq_lo(s, tmp64, rs);
@@ -7714,11 +7931,12 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     switch (op) {
                     case 0: /* msr cpsr.  */
                         if (IS_M(env)) {
-                            tmp = load_reg(s, rn);
+                            tmp = tcg_temp_new_i32();
+                            load_reg_var(s, tmp, rn);
                             addr = tcg_const_i32(insn & 0xff);
                             gen_helper_v7m_msr(cpu_env, addr, tmp);
                             tcg_temp_free_i32(addr);
-                            dead_tmp(tmp);
+                            tcg_temp_free_i32(tmp);
                             gen_lookup_tb(s);
                             break;
                         }
@@ -7726,11 +7944,15 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     case 1: /* msr spsr.  */
                         if (IS_M(env))
                             goto illegal_op;
-                        tmp = load_reg(s, rn);
+                        tmp = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rn);
                         if (gen_set_psr(s,
                               msr_mask(env, s, (insn >> 8) & 0xf, op == 1),
-                              op == 1, tmp))
+                              op == 1, tmp)) {
+                            tcg_temp_free_i32(tmp);
                             goto illegal_op;
+                        }
+                        tcg_temp_free_i32(tmp);
                         break;
                     case 2: /* cps, nop-hint.  */
                         if (((insn >> 8) & 7) == 0) {
@@ -7777,14 +7999,16 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                         break;
                     case 4: /* bxj */
                         /* Trivial implementation equivalent to bx.  */
-                        tmp = load_reg(s, rn);
+                        tmp = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rn);
                         gen_bx(s, tmp);
+                        tcg_temp_free_i32(tmp);
                         break;
                     case 5: /* Exception return.  */
                         /* Unpredictable in user mode.  */
                         goto illegal_op;
                     case 6: /* mrs cpsr.  */
-                        tmp = new_tmp();
+                        tmp = tcg_temp_new_i32();
                         if (IS_M(env)) {
                             addr = tcg_const_i32(insn & 0xff);
                             gen_helper_v7m_mrs(tmp, cpu_env, addr);
@@ -7793,13 +8017,16 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                             gen_helper_cpsr_read(tmp);
                         }
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                         break;
                     case 7: /* mrs spsr.  */
                         /* Not accessible in user mode.  */
                         if (IS_USER(s) || IS_M(env))
                             goto illegal_op;
-                        tmp = load_cpu_field(spsr);
+                        tmp = tcg_temp_new_i32();
+                        load_cpu_field(tmp, spsr);
                         store_reg(s, rd, tmp);
+                        tcg_temp_free_i32(tmp);
                         break;
                     }
                 }
@@ -7835,38 +8062,46 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     op = (insn >> 21) & 7;
                     imm = insn & 0x1f;
                     shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
+                    tmp = tcg_temp_new_i32();
                     if (rn == 15) {
-                        tmp = new_tmp();
                         tcg_gen_movi_i32(tmp, 0);
                     } else {
-                        tmp = load_reg(s, rn);
+                        load_reg_var(s, tmp, rn);
                     }
                     switch (op) {
                     case 2: /* Signed bitfield extract.  */
                         imm++;
-                        if (shift + imm > 32)
+                        if (shift + imm > 32) {
+                            tcg_temp_free_i32(tmp);
                             goto illegal_op;
+                        }
                         if (imm < 32)
                             gen_sbfx(tmp, shift, imm);
                         break;
                     case 6: /* Unsigned bitfield extract.  */
                         imm++;
-                        if (shift + imm > 32)
+                        if (shift + imm > 32) {
+                            tcg_temp_free_i32(tmp);
                             goto illegal_op;
+                        }
                         if (imm < 32)
                             gen_ubfx(tmp, shift, (1u << imm) - 1);
                         break;
                     case 3: /* Bitfield insert/clear.  */
-                        if (imm < shift)
+                        if (imm < shift) {
+                            tcg_temp_free_i32(tmp);
                             goto illegal_op;
+                        }
                         imm = imm + 1 - shift;
                         if (imm != 32) {
-                            tmp2 = load_reg(s, rd);
+                            tmp2 = tcg_temp_new_i32();
+                            load_reg_var(s, tmp2, rd);
                             gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1);
-                            dead_tmp(tmp2);
+                            tcg_temp_free_i32(tmp2);
                         }
                         break;
                     case 7:
+                        tcg_temp_free_i32(tmp);
                         goto illegal_op;
                     default: /* Saturate.  */
                         if (shift) {
@@ -7893,20 +8128,21 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                         break;
                     }
                     store_reg(s, rd, tmp);
+                    tcg_temp_free_i32(tmp);
                 } else {
                     imm = ((insn & 0x04000000) >> 15)
                           | ((insn & 0x7000) >> 4) | (insn & 0xff);
+                    tmp = tcg_temp_new_i32();
                     if (insn & (1 << 22)) {
                         /* 16-bit immediate.  */
                         imm |= (insn >> 4) & 0xf000;
                         if (insn & (1 << 23)) {
                             /* movt */
-                            tmp = load_reg(s, rd);
+                            load_reg_var(s, tmp, rd);
                             tcg_gen_ext16u_i32(tmp, tmp);
                             tcg_gen_ori_i32(tmp, tmp, imm << 16);
                         } else {
                             /* movw */
-                            tmp = new_tmp();
                             tcg_gen_movi_i32(tmp, imm);
                         }
                     } else {
@@ -7917,10 +8153,9 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                                 offset -= imm;
                             else
                                 offset += imm;
-                            tmp = new_tmp();
                             tcg_gen_movi_i32(tmp, offset);
                         } else {
-                            tmp = load_reg(s, rn);
+                            load_reg_var(s, tmp, rn);
                             if (insn & (1 << 23))
                                 tcg_gen_subi_i32(tmp, tmp, imm);
                             else
@@ -7928,6 +8163,7 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                         }
                     }
                     store_reg(s, rd, tmp);
+                    tcg_temp_free_i32(tmp);
                 }
             } else {
                 int shifter_out = 0;
@@ -7956,137 +8192,150 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     shifter_out = 1;
                     break;
                 }
-                tmp2 = new_tmp();
+                tmp2 = tcg_temp_new_i32();
                 tcg_gen_movi_i32(tmp2, imm);
                 rn = (insn >> 16) & 0xf;
+                tmp = tcg_temp_new_i32();
                 if (rn == 15) {
-                    tmp = new_tmp();
                     tcg_gen_movi_i32(tmp, 0);
                 } else {
-                    tmp = load_reg(s, rn);
+                    load_reg_var(s, tmp, rn);
                 }
                 op = (insn >> 21) & 0xf;
                 if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
-                                       shifter_out, tmp, tmp2))
+                                       shifter_out, tmp, tmp2)) {
+                    tcg_temp_free_i32(tmp);
+                    tcg_temp_free_i32(tmp2);
                     goto illegal_op;
-                dead_tmp(tmp2);
+                }
+                tcg_temp_free_i32(tmp2);
                 rd = (insn >> 8) & 0xf;
                 if (rd != 15) {
                     store_reg(s, rd, tmp);
-                } else {
-                    dead_tmp(tmp);
                 }
+                tcg_temp_free_i32(tmp);
             }
         }
         break;
     case 12: /* Load/store single data item.  */
         {
-        int postinc = 0;
-        int writeback = 0;
-        int user;
-        if ((insn & 0x01100000) == 0x01000000) {
-            if (disas_neon_ls_insn(env, s, insn))
-                goto illegal_op;
-            break;
-        }
-        user = IS_USER(s);
-        if (rn == 15) {
-            addr = new_tmp();
-            /* PC relative.  */
-            /* s->pc has already been incremented by 4.  */
-            imm = s->pc & 0xfffffffc;
-            if (insn & (1 << 23))
-                imm += insn & 0xfff;
-            else
-                imm -= insn & 0xfff;
-            tcg_gen_movi_i32(addr, imm);
-        } else {
-            addr = load_reg(s, rn);
-            if (insn & (1 << 23)) {
-                /* Positive offset.  */
-                imm = insn & 0xfff;
-                tcg_gen_addi_i32(addr, addr, imm);
+            int postinc = 0;
+            int writeback = 0;
+            int user;
+            if ((insn & 0x01100000) == 0x01000000) {
+                if (disas_neon_ls_insn(env, s, insn))
+                    goto illegal_op;
+                break;
+            }
+            user = IS_USER(s);
+            addr = tcg_temp_new_i32();
+            if (rn == 15) {
+                /* PC relative.  */
+                /* s->pc has already been incremented by 4.  */
+                imm = s->pc & 0xfffffffc;
+                if (insn & (1 << 23))
+                    imm += insn & 0xfff;
+                else
+                    imm -= insn & 0xfff;
+                tcg_gen_movi_i32(addr, imm);
             } else {
-                op = (insn >> 8) & 7;
-                imm = insn & 0xff;
-                switch (op) {
-                case 0: case 8: /* Shifted Register.  */
-                    shift = (insn >> 4) & 0xf;
-                    if (shift > 3)
-                        goto illegal_op;
-                    tmp = load_reg(s, rm);
-                    if (shift)
-                        tcg_gen_shli_i32(tmp, tmp, shift);
-                    tcg_gen_add_i32(addr, addr, tmp);
-                    dead_tmp(tmp);
-                    break;
-                case 4: /* Negative offset.  */
-                    tcg_gen_addi_i32(addr, addr, -imm);
-                    break;
-                case 6: /* User privilege.  */
-                    tcg_gen_addi_i32(addr, addr, imm);
-                    user = 1;
-                    break;
-                case 1: /* Post-decrement.  */
-                    imm = -imm;
-                    /* Fall through.  */
-                case 3: /* Post-increment.  */
-                    postinc = 1;
-                    writeback = 1;
-                    break;
-                case 5: /* Pre-decrement.  */
-                    imm = -imm;
-                    /* Fall through.  */
-                case 7: /* Pre-increment.  */
+                load_reg_var(s, addr, rn);
+                if (insn & (1 << 23)) {
+                    /* Positive offset.  */
+                    imm = insn & 0xfff;
                     tcg_gen_addi_i32(addr, addr, imm);
-                    writeback = 1;
-                    break;
-                default:
-                    goto illegal_op;
+                } else {
+                    op = (insn >> 8) & 7;
+                    imm = insn & 0xff;
+                    switch (op) {
+                    case 0: case 8: /* Shifted Register.  */
+                        shift = (insn >> 4) & 0xf;
+                        if (shift > 3) {
+                            tcg_temp_free_i32(addr);
+                            goto illegal_op;
+                        }
+                        tmp = tcg_temp_new_i32();
+                        load_reg_var(s, tmp, rm);
+                        if (shift)
+                            tcg_gen_shli_i32(tmp, tmp, shift);
+                        tcg_gen_add_i32(addr, addr, tmp);
+                        tcg_temp_free_i32(tmp);
+                        break;
+                    case 4: /* Negative offset.  */
+                        tcg_gen_addi_i32(addr, addr, -imm);
+                        break;
+                    case 6: /* User privilege.  */
+                        tcg_gen_addi_i32(addr, addr, imm);
+                        user = 1;
+                        break;
+                    case 1: /* Post-decrement.  */
+                        imm = -imm;
+                        /* Fall through.  */
+                    case 3: /* Post-increment.  */
+                        postinc = 1;
+                        writeback = 1;
+                        break;
+                    case 5: /* Pre-decrement.  */
+                        imm = -imm;
+                        /* Fall through.  */
+                    case 7: /* Pre-increment.  */
+                        tcg_gen_addi_i32(addr, addr, imm);
+                        writeback = 1;
+                        break;
+                    default:
+                        tcg_temp_free_i32(addr);
+                        goto illegal_op;
+                    }
                 }
             }
-        }
-        op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
-        if (insn & (1 << 20)) {
-            /* Load.  */
-            if (rs == 15 && op != 2) {
-                if (op & 2)
-                    goto illegal_op;
-                /* Memory hint.  Implemented as NOP.  */
-            } else {
-                switch (op) {
-                case 0: tmp = gen_ld8u(addr, user); break;
-                case 4: tmp = gen_ld8s(addr, user); break;
-                case 1: tmp = gen_ld16u(addr, user); break;
-                case 5: tmp = gen_ld16s(addr, user); break;
-                case 2: tmp = gen_ld32(addr, user); break;
-                default: goto illegal_op;
+            op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
+            if (insn & (1 << 20)) {
+                /* Load.  */
+                if (rs == 15 && op != 2) {
+                    if (op & 2) {
+                        tcg_temp_free_i32(addr);
+                        goto illegal_op;
+                    }
+                    /* Memory hint.  Implemented as NOP.  */
+                } else {
+                    tmp = tcg_temp_new_i32();
+                    switch (op) {
+                    case 0: gen_ld8u(tmp, addr, user); break;
+                    case 4: gen_ld8s(tmp, addr, user); break;
+                    case 1: gen_ld16u(tmp, addr, user); break;
+                    case 5: gen_ld16s(tmp, addr, user); break;
+                    case 2: gen_ld32(tmp, addr, user); break;
+                    default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); goto illegal_op;
+                    }
+                    if (rs == 15) {
+                        gen_bx(s, tmp);
+                    } else {
+                        store_reg(s, rs, tmp);
+                    }
+                    tcg_temp_free_i32(tmp);
                 }
+            } else {
+                /* Store.  */
                 if (rs == 15) {
-                    gen_bx(s, tmp);
-                } else {
-                    store_reg(s, rs, tmp);
+                    tcg_temp_free_i32(addr);
+                    goto illegal_op;
                 }
+                tmp = tcg_temp_new_i32();
+                load_reg_var(s, tmp, rs);
+                switch (op) {
+                case 0: gen_st8(tmp, addr, user); break;
+                case 1: gen_st16(tmp, addr, user); break;
+                case 2: gen_st32(tmp, addr, user); break;
+                default: tcg_temp_free_i32(tmp); tcg_temp_free_i32(addr); goto illegal_op;
+                }
+                tcg_temp_free_i32(tmp);
             }
-        } else {
-            /* Store.  */
-            if (rs == 15)
-                goto illegal_op;
-            tmp = load_reg(s, rs);
-            switch (op) {
-            case 0: gen_st8(tmp, addr, user); break;
-            case 1: gen_st16(tmp, addr, user); break;
-            case 2: gen_st32(tmp, addr, user); break;
-            default: goto illegal_op;
+            if (postinc)
+                tcg_gen_addi_i32(addr, addr, imm);
+            if (writeback) {
+                store_reg(s, rn, addr);
             }
-        }
-        if (postinc)
-            tcg_gen_addi_i32(addr, addr, imm);
-        if (writeback) {
-            store_reg(s, rn, addr);
-        } else {
-            dead_tmp(addr);
-        }
+            tcg_temp_free_i32(addr);
         }
         break;
     default:
@@ -8121,18 +8370,19 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
 
         rd = insn & 7;
         op = (insn >> 11) & 3;
+        tmp = tcg_temp_new_i32();
         if (op == 3) {
             /* add/subtract */
             rn = (insn >> 3) & 7;
-            tmp = load_reg(s, rn);
+            load_reg_var(s, tmp, rn);
+            tmp2 = tcg_temp_new_i32();
             if (insn & (1 << 10)) {
                 /* immediate */
-                tmp2 = new_tmp();
                 tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
             } else {
                 /* reg */
                 rm = (insn >> 6) & 7;
-                tmp2 = load_reg(s, rm);
+                load_reg_var(s, tmp2, rm);
             }
             if (insn & (1 << 9)) {
                 if (s->condexec_mask)
@@ -8145,45 +8395,43 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
                 else
                     gen_helper_add_cc(tmp, tmp, tmp2);
             }
-            dead_tmp(tmp2);
+            tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
         } else {
             /* shift immediate */
             rm = (insn >> 3) & 7;
             shift = (insn >> 6) & 0x1f;
-            tmp = load_reg(s, rm);
+            load_reg_var(s, tmp, rm);
             gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
             if (!s->condexec_mask)
                 gen_logic_CC(tmp);
             store_reg(s, rd, tmp);
         }
+        tcg_temp_free_i32(tmp);
         break;
     case 2: case 3:
         /* arithmetic large immediate */
         op = (insn >> 11) & 3;
         rd = (insn >> 8) & 0x7;
+        tmp = tcg_temp_new_i32();
         if (op == 0) { /* mov */
-            tmp = new_tmp();
             tcg_gen_movi_i32(tmp, insn & 0xff);
             if (!s->condexec_mask)
                 gen_logic_CC(tmp);
             store_reg(s, rd, tmp);
         } else {
-            tmp = load_reg(s, rd);
-            tmp2 = new_tmp();
+            load_reg_var(s, tmp, rd);
+            tmp2 = tcg_temp_new_i32();
             tcg_gen_movi_i32(tmp2, insn & 0xff);
             switch (op) {
             case 1: /* cmp */
                 gen_helper_sub_cc(tmp, tmp, tmp2);
-                dead_tmp(tmp);
-                dead_tmp(tmp2);
                 break;
             case 2: /* add */
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
                     gen_helper_add_cc(tmp, tmp, tmp2);
-                dead_tmp(tmp2);
                 store_reg(s, rd, tmp);
                 break;
             case 3: /* sub */
@@ -8191,11 +8439,12 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
                     gen_helper_sub_cc(tmp, tmp, tmp2);
-                dead_tmp(tmp2);
                 store_reg(s, rd, tmp);
                 break;
             }
+            tcg_temp_free_i32(tmp2);
         }
+        tcg_temp_free_i32(tmp);
         break;
     case 4:
         if (insn & (1 << 11)) {
@@ -8203,11 +8452,13 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
             /* load pc-relative.  Bit 1 of PC is ignored.  */
             val = s->pc + 2 + ((insn & 0xff) * 4);
             val &= ~(uint32_t)2;
-            addr = new_tmp();
+            addr = tcg_temp_new_i32();
             tcg_gen_movi_i32(addr, val);
-            tmp = gen_ld32(addr, IS_USER(s));
-            dead_tmp(addr);
+            tmp = tcg_temp_new_i32();
+            gen_ld32(tmp, addr, IS_USER(s));
+            tcg_temp_free_i32(addr);
             store_reg(s, rd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         }
         if (insn & (1 << 10)) {
@@ -8215,36 +8466,40 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
             rd = (insn & 7) | ((insn >> 4) & 8);
             rm = (insn >> 3) & 0xf;
             op = (insn >> 8) & 3;
+            tmp = tcg_temp_new_i32();
             switch (op) {
             case 0: /* add */
-                tmp = load_reg(s, rd);
-                tmp2 = load_reg(s, rm);
+                load_reg_var(s, tmp, rd);
+                tmp2 = tcg_temp_new_i32();
+                load_reg_var(s, tmp2, rm);
                 tcg_gen_add_i32(tmp, tmp, tmp2);
-                dead_tmp(tmp2);
+                tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
             case 1: /* cmp */
-                tmp = load_reg(s, rd);
-                tmp2 = load_reg(s, rm);
+                load_reg_var(s, tmp, rd);
+                tmp2 = tcg_temp_new_i32();
+                load_reg_var(s, tmp2, rm);
                 gen_helper_sub_cc(tmp, tmp, tmp2);
-                dead_tmp(tmp2);
-                dead_tmp(tmp);
+                tcg_temp_free_i32(tmp2);
                 break;
             case 2: /* mov/cpy */
-                tmp = load_reg(s, rm);
+                load_reg_var(s, tmp, rm);
                 store_reg(s, rd, tmp);
                 break;
             case 3:/* branch [and link] exchange thumb register */
-                tmp = load_reg(s, rm);
+                load_reg_var(s, tmp, rm);
                 if (insn & (1 << 7)) {
                     val = (uint32_t)s->pc | 1;
-                    tmp2 = new_tmp();
+                    tmp2 = tcg_temp_new_i32();
                     tcg_gen_movi_i32(tmp2, val);
                     store_reg(s, 14, tmp2);
+                    tcg_temp_free_i32(tmp2);
                 }
                 gen_bx(s, tmp);
                 break;
             }
+            tcg_temp_free_i32(tmp);
             break;
         }
 
@@ -8263,15 +8518,17 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
         }
 
         if (op == 9) { /* neg */
-            tmp = new_tmp();
+            tmp = tcg_temp_new_i32();
             tcg_gen_movi_i32(tmp, 0);
         } else if (op != 0xf) { /* mvn doesn't read its first operand */
-            tmp = load_reg(s, rd);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rd);
         } else {
             TCGV_UNUSED(tmp);
         }
 
-        tmp2 = load_reg(s, rm);
+        tmp2 = tcg_temp_new_i32();
+        load_reg_var(s, tmp2, rm);
         switch (op) {
         case 0x0: /* and */
             tcg_gen_and_i32(tmp, tmp, tmp2);
@@ -8373,15 +8630,13 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
         if (rd != 16) {
             if (val) {
                 store_reg(s, rm, tmp2);
-                if (op != 0xf)
-                    dead_tmp(tmp);
             } else {
                 store_reg(s, rd, tmp);
-                dead_tmp(tmp2);
             }
-        } else {
-            dead_tmp(tmp);
-            dead_tmp(tmp2);
+        }
+        tcg_temp_free_i32(tmp2);
+        if (op != 0xf) {
+            tcg_temp_free_i32(tmp);
         }
         break;
 
@@ -8391,13 +8646,14 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
         rn = (insn >> 3) & 7;
         rm = (insn >> 6) & 7;
         op = (insn >> 9) & 7;
-        addr = load_reg(s, rn);
-        tmp = load_reg(s, rm);
+        addr = tcg_temp_new_i32();
+        load_reg_var(s, addr, rn);
+        tmp = tcg_temp_new_i32();
+        load_reg_var(s, tmp, rm);
         tcg_gen_add_i32(addr, addr, tmp);
-        dead_tmp(tmp);
 
         if (op < 3) /* store */
-            tmp = load_reg(s, rd);
+            load_reg_var(s, tmp, rd);
 
         switch (op) {
         case 0: /* str */
@@ -8410,119 +8666,133 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
             gen_st8(tmp, addr, IS_USER(s));
             break;
         case 3: /* ldrsb */
-            tmp = gen_ld8s(addr, IS_USER(s));
+            gen_ld8s(tmp, addr, IS_USER(s));
             break;
         case 4: /* ldr */
-            tmp = gen_ld32(addr, IS_USER(s));
+            gen_ld32(tmp, addr, IS_USER(s));
             break;
         case 5: /* ldrh */
-            tmp = gen_ld16u(addr, IS_USER(s));
+            gen_ld16u(tmp, addr, IS_USER(s));
             break;
         case 6: /* ldrb */
-            tmp = gen_ld8u(addr, IS_USER(s));
+            gen_ld8u(tmp, addr, IS_USER(s));
             break;
         case 7: /* ldrsh */
-            tmp = gen_ld16s(addr, IS_USER(s));
+            gen_ld16s(tmp, addr, IS_USER(s));
             break;
         }
         if (op >= 3) /* load */
             store_reg(s, rd, tmp);
-        dead_tmp(addr);
+        tcg_temp_free_i32(tmp);
+        tcg_temp_free_i32(addr);
         break;
 
     case 6:
         /* load/store word immediate offset */
         rd = insn & 7;
         rn = (insn >> 3) & 7;
-        addr = load_reg(s, rn);
+        addr = tcg_temp_new_i32();
+        load_reg_var(s, addr, rn);
         val = (insn >> 4) & 0x7c;
         tcg_gen_addi_i32(addr, addr, val);
 
+        tmp = tcg_temp_new_i32();
         if (insn & (1 << 11)) {
             /* load */
-            tmp = gen_ld32(addr, IS_USER(s));
+            gen_ld32(tmp, addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
-            tmp = load_reg(s, rd);
+            load_reg_var(s, tmp, rd);
             gen_st32(tmp, addr, IS_USER(s));
         }
-        dead_tmp(addr);
+        tcg_temp_free_i32(tmp);
+        tcg_temp_free_i32(addr);
         break;
 
     case 7:
         /* load/store byte immediate offset */
         rd = insn & 7;
         rn = (insn >> 3) & 7;
-        addr = load_reg(s, rn);
+        addr = tcg_temp_new_i32();
+        load_reg_var(s, addr, rn);
         val = (insn >> 6) & 0x1f;
         tcg_gen_addi_i32(addr, addr, val);
 
+        tmp = tcg_temp_new_i32();
         if (insn & (1 << 11)) {
             /* load */
-            tmp = gen_ld8u(addr, IS_USER(s));
+            gen_ld8u(tmp, addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
-            tmp = load_reg(s, rd);
+            load_reg_var(s, tmp, rd);
             gen_st8(tmp, addr, IS_USER(s));
         }
-        dead_tmp(addr);
+        tcg_temp_free_i32(tmp);
+        tcg_temp_free_i32(addr);
         break;
 
     case 8:
         /* load/store halfword immediate offset */
         rd = insn & 7;
         rn = (insn >> 3) & 7;
-        addr = load_reg(s, rn);
+        addr = tcg_temp_new_i32();
+        load_reg_var(s, addr, rn);
         val = (insn >> 5) & 0x3e;
         tcg_gen_addi_i32(addr, addr, val);
 
+        tmp = tcg_temp_new_i32();
         if (insn & (1 << 11)) {
             /* load */
-            tmp = gen_ld16u(addr, IS_USER(s));
+            gen_ld16u(tmp, addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
-            tmp = load_reg(s, rd);
+            load_reg_var(s, tmp, rd);
             gen_st16(tmp, addr, IS_USER(s));
         }
-        dead_tmp(addr);
+        tcg_temp_free_i32(tmp);
+        tcg_temp_free_i32(addr);
         break;
 
     case 9:
         /* load/store from stack */
         rd = (insn >> 8) & 7;
-        addr = load_reg(s, 13);
+        addr = tcg_temp_new_i32();
+        load_reg_var(s, addr, 13);
         val = (insn & 0xff) * 4;
         tcg_gen_addi_i32(addr, addr, val);
 
+        tmp = tcg_temp_new_i32();
         if (insn & (1 << 11)) {
             /* load */
-            tmp = gen_ld32(addr, IS_USER(s));
+            gen_ld32(tmp, addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
-            tmp = load_reg(s, rd);
+            load_reg_var(s, tmp, rd);
             gen_st32(tmp, addr, IS_USER(s));
         }
-        dead_tmp(addr);
+        tcg_temp_free_i32(tmp);
+        tcg_temp_free_i32(addr);
         break;
 
     case 10:
         /* add to high reg */
         rd = (insn >> 8) & 7;
+        tmp = tcg_temp_new_i32();
         if (insn & (1 << 11)) {
             /* SP */
-            tmp = load_reg(s, 13);
+            load_reg_var(s, tmp, 13);
         } else {
             /* PC. bit 1 is ignored.  */
-            tmp = new_tmp();
             tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
         }
         val = (insn & 0xff) * 4;
         tcg_gen_addi_i32(tmp, tmp, val);
         store_reg(s, rd, tmp);
+        tcg_temp_free_i32(tmp);
         break;
 
     case 11:
@@ -8531,19 +8801,22 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
         switch (op) {
         case 0:
             /* adjust stack pointer */
-            tmp = load_reg(s, 13);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, 13);
             val = (insn & 0x7f) * 4;
             if (insn & (1 << 7))
                 val = -(int32_t)val;
             tcg_gen_addi_i32(tmp, tmp, val);
             store_reg(s, 13, tmp);
+            tcg_temp_free_i32(tmp);
             break;
 
         case 2: /* sign/zero extend.  */
             ARCH(6);
             rd = insn & 7;
             rm = (insn >> 3) & 7;
-            tmp = load_reg(s, rm);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rm);
             switch ((insn >> 6) & 3) {
             case 0: gen_sxth(tmp); break;
             case 1: gen_sxtb(tmp); break;
@@ -8551,10 +8824,12 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
             case 3: gen_uxtb(tmp); break;
             }
             store_reg(s, rd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
         case 4: case 5: case 0xc: case 0xd:
             /* push/pop */
-            addr = load_reg(s, 13);
+            addr = tcg_temp_new_i32();
+            load_reg_var(s, addr, 13);
             if (insn & (1 << 8))
                 offset = 4;
             else
@@ -8568,30 +8843,34 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
             }
             for (i = 0; i < 8; i++) {
                 if (insn & (1 << i)) {
+                    tmp = tcg_temp_new_i32();
                     if (insn & (1 << 11)) {
                         /* pop */
-                        tmp = gen_ld32(addr, IS_USER(s));
+                        gen_ld32(tmp, addr, IS_USER(s));
                         store_reg(s, i, tmp);
                     } else {
                         /* push */
-                        tmp = load_reg(s, i);
+                        load_reg_var(s, tmp, i);
                         gen_st32(tmp, addr, IS_USER(s));
                     }
+                    tcg_temp_free_i32(tmp);
                     /* advance to the next address.  */
                     tcg_gen_addi_i32(addr, addr, 4);
                 }
             }
             TCGV_UNUSED(tmp);
             if (insn & (1 << 8)) {
+                tmp = tcg_temp_new_i32();
                 if (insn & (1 << 11)) {
                     /* pop pc */
-                    tmp = gen_ld32(addr, IS_USER(s));
+                    gen_ld32(tmp, addr, IS_USER(s));
                     /* don't set the pc until the rest of the instruction
                        has completed */
                 } else {
                     /* push lr */
-                    tmp = load_reg(s, 14);
+                    load_reg_var(s, tmp, 14);
                     gen_st32(tmp, addr, IS_USER(s));
+                    tcg_temp_free_i32(tmp);
                 }
                 tcg_gen_addi_i32(addr, addr, 4);
             }
@@ -8600,21 +8879,25 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
             }
             /* write back the new stack pointer */
             store_reg(s, 13, addr);
+            tcg_temp_free_i32(addr);
             /* set the new PC value */
-            if ((insn & 0x0900) == 0x0900)
+            if ((insn & 0x0900) == 0x0900) {
                 gen_bx(s, tmp);
+                tcg_temp_free_i32(tmp);
+            }
             break;
 
         case 1: case 3: case 9: case 11: /* czb */
             rm = insn & 7;
-            tmp = load_reg(s, rm);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rm);
             s->condlabel = gen_new_label();
             s->condjmp = 1;
             if (insn & (1 << 11))
                 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
             else
                 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
-            dead_tmp(tmp);
+            tcg_temp_free_i32(tmp);
             offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
             val = (uint32_t)s->pc + 2;
             val += offset;
@@ -8643,14 +8926,16 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
             ARCH(6);
             rn = (insn >> 3) & 0x7;
             rd = insn & 0x7;
-            tmp = load_reg(s, rn);
+            tmp = tcg_temp_new_i32();
+            load_reg_var(s, tmp, rn);
             switch ((insn >> 6) & 3) {
             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
             case 1: gen_rev16(tmp); break;
             case 3: gen_revsh(tmp); break;
-            default: goto illegal_op;
+            default: tcg_temp_free_i32(tmp); goto illegal_op;
             }
             store_reg(s, rd, tmp);
+            tcg_temp_free_i32(tmp);
             break;
 
         case 6: /* cps */
@@ -8690,18 +8975,21 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
     case 12:
         /* load/store multiple */
         rn = (insn >> 8) & 0x7;
-        addr = load_reg(s, rn);
+        addr = tcg_temp_new_i32();
+        load_reg_var(s, addr, rn);
         for (i = 0; i < 8; i++) {
             if (insn & (1 << i)) {
+                tmp = tcg_temp_new_i32();
                 if (insn & (1 << 11)) {
                     /* load */
-                    tmp = gen_ld32(addr, IS_USER(s));
+                    gen_ld32(tmp, addr, IS_USER(s));
                     store_reg(s, i, tmp);
                 } else {
                     /* store */
-                    tmp = load_reg(s, i);
+                    load_reg_var(s, tmp, i);
                     gen_st32(tmp, addr, IS_USER(s));
                 }
+                tcg_temp_free_i32(tmp);
                 /* advance to the next address */
                 tcg_gen_addi_i32(addr, addr, 4);
             }
@@ -8709,9 +8997,8 @@  static void disas_thumb_insn(CPUState *env, DisasContext *s)
         /* Base register writeback.  */
         if ((insn & (1 << rn)) == 0) {
             store_reg(s, rn, addr);
-        } else {
-            dead_tmp(addr);
         }
+        tcg_temp_free_i32(addr);
         break;
 
     case 13:
@@ -8789,8 +9076,6 @@  static inline void gen_intermediate_code_internal(CPUState *env,
     int max_insns;
 
     /* generate intermediate code */
-    num_temps = 0;
-
     pc_start = tb->pc;
 
     dc->tb = tb;
@@ -8829,12 +9114,12 @@  static inline void gen_intermediate_code_internal(CPUState *env,
     gen_icount_start();
     /* Reset the conditional execution bits immediately. This avoids
        complications trying to do it at the end of the block.  */
-    if (env->condexec_bits)
-      {
-        TCGv tmp = new_tmp();
+    if (env->condexec_bits) {
+        TCGv tmp = tcg_temp_new_i32();
         tcg_gen_movi_i32(tmp, 0);
         store_cpu_field(tmp, condexec_bits);
-      }
+        tcg_temp_free_i32(tmp);
+    }
     do {
 #ifdef CONFIG_USER_ONLY
         /* Intercept jump to the magic kernel page.  */
@@ -8898,10 +9183,6 @@  static inline void gen_intermediate_code_internal(CPUState *env,
         } else {
             disas_arm_insn(env, dc);
         }
-        if (num_temps) {
-            fprintf(stderr, "Internal resource leak before %08x\n", dc->pc);
-            num_temps = 0;
-        }
 
         if (dc->condjmp && !dc->is_jmp) {
             gen_set_label(dc->condlabel);