diff mbox

[2/4] tcg/i386: Add shortcuts for registers used in L constraint

Message ID 1347557866-7256-3-git-send-email-sw@weilnetz.de
State Accepted
Headers show

Commit Message

Stefan Weil Sept. 13, 2012, 5:37 p.m. UTC
While 64 bit hosts use the first three registers which are also used
as function input parameters, 32 bit hosts use TCG_REG_EAX and
TCG_REG_EDX which are not used in parameter passing.

After defining new register macros for the registers used in L
constraint, the patch replaces most occurrences of
tcg_target_call_iarg_regs[0], tcg_target_call_iarg_regs[1] and
tcg_target_call_iarg_regs[2] by those new macros.

tcg_target_call_iarg_regs remains unchanged when it is used for input
arguments (only with 64 bit hosts) before tcg_out_calli.

A comment related to those registers was fixed, too.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 tcg/i386/tcg-target.c |   84 +++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 44 deletions(-)

Comments

Aurelien Jarno Sept. 13, 2012, 9:21 p.m. UTC | #1
On Thu, Sep 13, 2012 at 07:37:44PM +0200, Stefan Weil wrote:
> While 64 bit hosts use the first three registers which are also used
> as function input parameters, 32 bit hosts use TCG_REG_EAX and
> TCG_REG_EDX which are not used in parameter passing.
> 
> After defining new register macros for the registers used in L
> constraint, the patch replaces most occurrences of
> tcg_target_call_iarg_regs[0], tcg_target_call_iarg_regs[1] and
> tcg_target_call_iarg_regs[2] by those new macros.
> 
> tcg_target_call_iarg_regs remains unchanged when it is used for input
> arguments (only with 64 bit hosts) before tcg_out_calli.
> 
> A comment related to those registers was fixed, too.
> 
> Signed-off-by: Stefan Weil <sw@weilnetz.de>
> ---
>  tcg/i386/tcg-target.c |   84 +++++++++++++++++++++++--------------------------
>  1 file changed, 40 insertions(+), 44 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 43b5572..ef63967 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -88,6 +88,16 @@ static const int tcg_target_call_oarg_regs[] = {
>  #endif
>  };
>  
> +/* Registers used with L constraint. */
> +#if TCG_TARGET_REG_BITS == 64
> +# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
> +# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
> +# define TCG_REG_L2 tcg_target_call_iarg_regs[2]
> +#else
> +# define TCG_REG_L0 TCG_REG_EAX
> +# define TCG_REG_L1 TCG_REG_EDX
> +#endif
> +
>  static uint8_t *tb_ret_addr;
>  
>  static void patch_reloc(uint8_t *code_ptr, int type,
> @@ -181,15 +191,15 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
>          ct->ct |= TCG_CT_REG;
>          if (TCG_TARGET_REG_BITS == 64) {
>              tcg_regset_set32(ct->u.regs, 0, 0xffff);
> -            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
> -            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
> +            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
> +            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
>  #ifdef CONFIG_TCG_PASS_AREG0
> -            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
> +            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2);
>  #endif
>          } else {
>              tcg_regset_set32(ct->u.regs, 0, 0xff);
> -            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
> -            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
> +            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
> +            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
>          }
>          break;
>  
> @@ -1031,8 +1041,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
>                                      uint8_t **label_ptr, int which)
>  {
>      const int addrlo = args[addrlo_idx];
> -    const int r0 = tcg_target_call_iarg_regs[0];
> -    const int r1 = tcg_target_call_iarg_regs[1];
> +    const int r0 = TCG_REG_L0;
> +    const int r1 = TCG_REG_L1;
>      TCGType type = TCG_TYPE_I32;
>      int rexw = 0;
>  
> @@ -1194,8 +1204,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>                       label_ptr, offsetof(CPUTLBEntry, addr_read));
>  
>      /* TLB Hit.  */
> -    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
> -                           tcg_target_call_iarg_regs[0], 0, opc);
> +    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
>  
>      /* jmp label2 */
>      tcg_out8(s, OPC_JMP_short);
> @@ -1231,14 +1240,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>                   mem_index);
>  #ifdef CONFIG_TCG_PASS_AREG0
>      /* XXX/FIXME: suboptimal */
> -    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
> -                tcg_target_call_iarg_regs[2]);
> -    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
> -                tcg_target_call_iarg_regs[1]);
> -    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
> -                tcg_target_call_iarg_regs[0]);
> -    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
> -                TCG_AREG0);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
>  #endif
>  #endif
>  
> @@ -1305,11 +1310,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>                 use the ADDR32 prefix.  For now, do nothing.  */
>  
>              if (offset != GUEST_BASE) {
> -                tcg_out_movi(s, TCG_TYPE_I64,
> -                             tcg_target_call_iarg_regs[0], GUEST_BASE);
> -                tgen_arithr(s, ARITH_ADD + P_REXW,
> -                            tcg_target_call_iarg_regs[0], base);
> -                base = tcg_target_call_iarg_regs[0];
> +                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
> +                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
> +                base = TCG_REG_L0;
>                  offset = 0;
>              }
>          }
> @@ -1330,8 +1333,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
>      /* ??? Ideally we wouldn't need a scratch register.  For user-only,
>         we could perform the bswap twice to restore the original value
>         instead of moving to the scratch.  But as it is, the L constraint
> -       means that the second argument reg is definitely free here.  */
> -    int scratch = tcg_target_call_iarg_regs[1];
> +       means that TCG_REG_L1 is definitely free here.  */
> +    const int scratch = TCG_REG_L1;
>  
>      switch (sizeop) {
>      case 0:
> @@ -1404,8 +1407,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>                       label_ptr, offsetof(CPUTLBEntry, addr_write));
>  
>      /* TLB Hit.  */
> -    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
> -                           tcg_target_call_iarg_regs[0], 0, opc);
> +    tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
>  
>      /* jmp label2 */
>      tcg_out8(s, OPC_JMP_short);
> @@ -1442,19 +1444,15 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>  #endif
>  #else
>      tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
> -                tcg_target_call_iarg_regs[1], data_reg);
> -    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
> +                TCG_REG_L1, data_reg);
> +    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index);
>      stack_adjust = 0;
>  #ifdef CONFIG_TCG_PASS_AREG0
>      /* XXX/FIXME: suboptimal */
> -    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
> -                tcg_target_call_iarg_regs[2]);
> -    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
> -                tcg_target_call_iarg_regs[1]);
> -    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
> -                tcg_target_call_iarg_regs[0]);
> -    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
> -                TCG_AREG0);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
> +    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
>  #endif
>  #endif
>  
> @@ -1482,11 +1480,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>                 use the ADDR32 prefix.  For now, do nothing.  */
>  
>              if (offset != GUEST_BASE) {
> -                tcg_out_movi(s, TCG_TYPE_I64,
> -                             tcg_target_call_iarg_regs[0], GUEST_BASE);
> -                tgen_arithr(s, ARITH_ADD + P_REXW,
> -                            tcg_target_call_iarg_regs[0], base);
> -                base = tcg_target_call_iarg_regs[0];
> +                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
> +                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
> +                base = TCG_REG_L0;
>                  offset = 0;
>              }
>          }
> @@ -2038,15 +2034,15 @@ static void tcg_target_qemu_prologue(TCGContext *s)
>  #if TCG_TARGET_REG_BITS == 32
>      tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
>                 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
> -    tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
> +    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L1, TCG_REG_ESP,
>                 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
>  #else
> -    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
> +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_L0);
>  #endif
>      tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
>  
>      /* jmp *tb.  */
> -    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
> +    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_L1);

I don't think this is correct here. This has nothing to do with the L
constraint, it's really refers to the first and seconds argument passed
to the prologue.

>  
>      /* TB epilogue */
>      tb_ret_addr = s->code_ptr;
> -- 
> 1.7.10
>
Richard Henderson Sept. 13, 2012, 9:30 p.m. UTC | #2
On 09/13/2012 02:21 PM, Aurelien Jarno wrote:
>> >  #if TCG_TARGET_REG_BITS == 32
>> >      tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
>> >                 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
>> > -    tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
>> > +    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L1, TCG_REG_ESP,
>> >                 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
>> >  #else
>> > -    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
>> > +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_L0);
>> >  #endif
>> >      tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
>> >  
>> >      /* jmp *tb.  */
>> > -    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
>> > +    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_L1);
> I don't think this is correct here. This has nothing to do with the L
> constraint, it's really refers to the first and seconds argument passed
> to the prologue.
> 

In the 32-bit case, our use of TCG_REG_L1 really is just a temporary.
We loaded it from the stack just above there.

For the 64-bit case you're right.  But that's exactly how we set up
the Ln macros, so I think that's ok.


r~
Aurelien Jarno Sept. 13, 2012, 9:47 p.m. UTC | #3
On Thu, Sep 13, 2012 at 02:30:36PM -0700, Richard Henderson wrote:
> On 09/13/2012 02:21 PM, Aurelien Jarno wrote:
> >> >  #if TCG_TARGET_REG_BITS == 32
> >> >      tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
> >> >                 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
> >> > -    tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
> >> > +    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L1, TCG_REG_ESP,
> >> >                 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
> >> >  #else
> >> > -    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
> >> > +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_L0);
> >> >  #endif
> >> >      tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
> >> >  
> >> >      /* jmp *tb.  */
> >> > -    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
> >> > +    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_L1);
> > I don't think this is correct here. This has nothing to do with the L
> > constraint, it's really refers to the first and seconds argument passed
> > to the prologue.
> > 
> 
> In the 32-bit case, our use of TCG_REG_L1 really is just a temporary.
> We loaded it from the stack just above there.

Yeah, I missed this one. This should probably be replaced directly by
the name of the register.

> For the 64-bit case you're right.  But that's exactly how we set up
> the Ln macros, so I think that's ok.
> 

Then we should change the name and especially the comment above it. They
are just #define for the first argument registers, and not related to
the 'L' constraint.
Peter Maydell Sept. 13, 2012, 10:03 p.m. UTC | #4
On 13 September 2012 22:47, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Thu, Sep 13, 2012 at 02:30:36PM -0700, Richard Henderson wrote:
>> On 09/13/2012 02:21 PM, Aurelien Jarno wrote:
>> >> >  #if TCG_TARGET_REG_BITS == 32
>> >> >      tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
>> >> >                 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
>> >> > -    tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
>> >> > +    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L1, TCG_REG_ESP,
>> >> >                 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
>> >> >  #else
>> >> > -    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
>> >> > +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_L0);
>> >> >  #endif
>> >> >      tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
>> >> >
>> >> >      /* jmp *tb.  */
>> >> > -    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
>> >> > +    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_L1);
>> > I don't think this is correct here. This has nothing to do with the L
>> > constraint, it's really refers to the first and seconds argument passed
>> > to the prologue.
>> >
>>
>> In the 32-bit case, our use of TCG_REG_L1 really is just a temporary.
>> We loaded it from the stack just above there.
>
> Yeah, I missed this one. This should probably be replaced directly by
> the name of the register.

That would mean you'd have to #ifdef it though, since the register you
want is "random temporary for 32 bits, but 2nd input argument register
for 64 bits". (Or you could just use TCG_REG_L1.)

>> For the 64-bit case you're right.  But that's exactly how we set up
>> the Ln macros, so I think that's ok.
>>
>
> Then we should change the name and especially the comment above it. They
> are just #define for the first argument registers, and not related to
> the 'L' constraint.

Hard to come up with a snappy name for "register which is the Nth
input argument if input args are in registers, but an arbitrary
temp reg otherwise, and which is in the forbidden list for the L
constraint"...

The alternative to that would be to pull out the 32 and 64 bit
cases rather than trying to share as much of the generation
code as we do at the moment. That might be clearer to read
but it would be a larger change.

-- PMM
Richard Henderson Sept. 13, 2012, 10:20 p.m. UTC | #5
On 09/13/2012 03:03 PM, Peter Maydell wrote:
> Hard to come up with a snappy name for "register which is the Nth
> input argument if input args are in registers, but an arbitrary
> temp reg otherwise, and which is in the forbidden list for the L
> constraint"...

I'm more than happy to let "register which is ..." be a comment
just before the definitions of TCG_REG_Ln at the top...



r~
Stefan Weil Sept. 14, 2012, 5:18 a.m. UTC | #6
Am 14.09.2012 00:20, schrieb Richard Henderson:
> On 09/13/2012 03:03 PM, Peter Maydell wrote:
>> Hard to come up with a snappy name for "register which is the Nth
>> input argument if input args are in registers, but an arbitrary
>> temp reg otherwise, and which is in the forbidden list for the L
>> constraint"...
> I'm more than happy to let "register which is ..." be a comment
> just before the definitions of TCG_REG_Ln at the top...
>
>
>
> r~
>

This looks like a pragmatic solution.

If everybody agrees, I'll send an updated patch.

Or Aurelien or Blue can commit this series and fix the comment
as suggested by Richard. Maybe a comment in theprologue code
would also be good.

Thanks to all reviewers.

Stefan
Aurelien Jarno Sept. 22, 2012, 2:55 p.m. UTC | #7
On Fri, Sep 14, 2012 at 07:18:17AM +0200, Stefan Weil wrote:
> Am 14.09.2012 00:20, schrieb Richard Henderson:
> >On 09/13/2012 03:03 PM, Peter Maydell wrote:
> >>Hard to come up with a snappy name for "register which is the Nth
> >>input argument if input args are in registers, but an arbitrary
> >>temp reg otherwise, and which is in the forbidden list for the L
> >>constraint"...
> >I'm more than happy to let "register which is ..." be a comment
> >just before the definitions of TCG_REG_Ln at the top...
> >
> >
> >
> >r~
> >
> 
> This looks like a pragmatic solution.
> 
> If everybody agrees, I'll send an updated patch.
> 
> Or Aurelien or Blue can commit this series and fix the comment
> as suggested by Richard. Maybe a comment in theprologue code
> would also be good.
> 
> Thanks to all reviewers.
> 

Your patch didn't even compiled on i386. I have fixed it. I also changed
the prologue on i386 to not load the TB address through a register,
given i386 has so many addressing mode. This way the issue is solved.
diff mbox

Patch

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 43b5572..ef63967 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -88,6 +88,16 @@  static const int tcg_target_call_oarg_regs[] = {
 #endif
 };
 
+/* Registers used with L constraint. */
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
+# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
+# define TCG_REG_L2 tcg_target_call_iarg_regs[2]
+#else
+# define TCG_REG_L0 TCG_REG_EAX
+# define TCG_REG_L1 TCG_REG_EDX
+#endif
+
 static uint8_t *tb_ret_addr;
 
 static void patch_reloc(uint8_t *code_ptr, int type,
@@ -181,15 +191,15 @@  static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         ct->ct |= TCG_CT_REG;
         if (TCG_TARGET_REG_BITS == 64) {
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
-            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
-            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
 #ifdef CONFIG_TCG_PASS_AREG0
-            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2);
 #endif
         } else {
             tcg_regset_set32(ct->u.regs, 0, 0xff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
         }
         break;
 
@@ -1031,8 +1041,8 @@  static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
                                     uint8_t **label_ptr, int which)
 {
     const int addrlo = args[addrlo_idx];
-    const int r0 = tcg_target_call_iarg_regs[0];
-    const int r1 = tcg_target_call_iarg_regs[1];
+    const int r0 = TCG_REG_L0;
+    const int r1 = TCG_REG_L1;
     TCGType type = TCG_TYPE_I32;
     int rexw = 0;
 
@@ -1194,8 +1204,7 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                      label_ptr, offsetof(CPUTLBEntry, addr_read));
 
     /* TLB Hit.  */
-    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1231,14 +1240,10 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                  mem_index);
 #ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
 #endif
 #endif
 
@@ -1305,11 +1310,9 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                use the ADDR32 prefix.  For now, do nothing.  */
 
             if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64,
-                             tcg_target_call_iarg_regs[0], GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW,
-                            tcg_target_call_iarg_regs[0], base);
-                base = tcg_target_call_iarg_regs[0];
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+                base = TCG_REG_L0;
                 offset = 0;
             }
         }
@@ -1330,8 +1333,8 @@  static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
     /* ??? Ideally we wouldn't need a scratch register.  For user-only,
        we could perform the bswap twice to restore the original value
        instead of moving to the scratch.  But as it is, the L constraint
-       means that the second argument reg is definitely free here.  */
-    int scratch = tcg_target_call_iarg_regs[1];
+       means that TCG_REG_L1 is definitely free here.  */
+    const int scratch = TCG_REG_L1;
 
     switch (sizeop) {
     case 0:
@@ -1404,8 +1407,7 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                      label_ptr, offsetof(CPUTLBEntry, addr_write));
 
     /* TLB Hit.  */
-    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+    tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1442,19 +1444,15 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 #endif
 #else
     tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                tcg_target_call_iarg_regs[1], data_reg);
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
+                TCG_REG_L1, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index);
     stack_adjust = 0;
 #ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
 #endif
 #endif
 
@@ -1482,11 +1480,9 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                use the ADDR32 prefix.  For now, do nothing.  */
 
             if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64,
-                             tcg_target_call_iarg_regs[0], GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW,
-                            tcg_target_call_iarg_regs[0], base);
-                base = tcg_target_call_iarg_regs[0];
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+                base = TCG_REG_L0;
                 offset = 0;
             }
         }
@@ -2038,15 +2034,15 @@  static void tcg_target_qemu_prologue(TCGContext *s)
 #if TCG_TARGET_REG_BITS == 32
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
                (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
-    tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L1, TCG_REG_ESP,
                (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
 #else
-    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_L0);
 #endif
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
 
     /* jmp *tb.  */
-    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
+    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_L1);
 
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;