diff mbox

[5/7] tcg-i386: Implement deposit operation.

Message ID 1294440183-885-6-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Jan. 7, 2011, 10:43 p.m. UTC
Special case deposits that are implementable with byte and word stores.
Otherwise implement with double-word shift plus rotates.

Expose tcg_reg_alloc to the backend for allocation of scratch registers.
There's an edge condition that cannot actually happen at the moment due
to a bug elsewhere in the register allocator, but it doesn't seem right
to leave that unfixed.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   68 ++++++++++++++++++++++++++++++++++++++++++++++--
 tcg/i386/tcg-target.h |    2 +
 tcg/tcg.c             |    2 +
 3 files changed, 69 insertions(+), 3 deletions(-)

Comments

Aurelien Jarno Jan. 9, 2011, 9:53 p.m. UTC | #1
On Fri, Jan 07, 2011 at 02:43:01PM -0800, Richard Henderson wrote:
> Special case deposits that are implementable with byte and word stores.
> Otherwise implement with double-word shift plus rotates.
> 
> Expose tcg_reg_alloc to the backend for allocation of scratch registers.
> There's an edge condition that cannot actually happen at the moment due
> to a bug elsewhere in the register allocator, but it doesn't seem right
> to leave that unfixed.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/i386/tcg-target.c |   68 ++++++++++++++++++++++++++++++++++++++++++++++--
>  tcg/i386/tcg-target.h |    2 +
>  tcg/tcg.c             |    2 +
>  3 files changed, 69 insertions(+), 3 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index bb19a95..cc7d266 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -258,7 +258,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  #define OPC_JMP_long	(0xe9)
>  #define OPC_JMP_short	(0xeb)
>  #define OPC_LEA         (0x8d)
> -#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
> +#define OPC_MOVB_EbGb	(0x88)		/* stores, more or less */
> +#define OPC_MOVB_GbEb   (0x8a)		/* loads, more or less */
>  #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
>  #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
>  #define OPC_MOVL_EvIz	(0xc7)
> @@ -277,6 +278,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  #define OPC_SHIFT_1	(0xd1)
>  #define OPC_SHIFT_Ib	(0xc1)
>  #define OPC_SHIFT_cl	(0xd3)
> +#define OPC_SHRD_Ib	(0xac | P_EXT)
>  #define OPC_TESTL	(0x85)
>  #define OPC_XCHG_ax_r32	(0x90)
>  
> @@ -710,6 +712,59 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
>      }
>  }
>  
> +static void tcg_out_deposit(TCGContext *s, int inout, int val,
> +                            unsigned ofs, unsigned len, int rexw)
> +{
> +    /* Look for MOVW special case.  */
> +    if (ofs == 0 && len == 16) {
> +        tcg_out_modrm(s, OPC_MOVL_GvEv + P_DATA16, inout, val);
> +        return;
> +    }
> +
> +    /* Look for MOVB w/ %reg_l special case.  */
> +    if (ofs == 0 && len == 8
> +        && (TCG_TARGET_REG_BITS == 64 || (inout < 4 && val < 4))) {
> +        tcg_out_modrm(s, OPC_MOVB_GbEb + P_REXB_R + P_REXB_RM, inout, val);
> +        return;
> +    }
> +
> +    /* Look for MOVB w/ %reg_h special case.  */
> +    if (ofs == 8 && len == 8 && inout < 4 && val < 4) {
> +        tcg_out_modrm(s, OPC_MOVB_GbEb, inout + 4, val);
> +        return;
> +    }
> +
> +    /* If we have a real deposit from self, we need a temporary.  */
> +    /* ??? There really ought to be a way to easily allocate a scratch.  */
> +    if (inout == val) {
> +        TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32;
> +        TCGRegSet inuse = s->reserved_regs;
> +
> +        tcg_regset_set_reg(inuse, inout);
> +        val = tcg_reg_alloc(s, tcg_target_available_regs[type], inuse);
> +
> +        tcg_out_mov(s, type, val, inout);

I am a bit worried by allocating a new register here, especially on the
i386 target, where the number of free registers is quite low, and often
0. We already had to tweak some code to avoid calls to tcg_abort() due
to missing registers.

> +    }
> +
> +    /* Arrange for the field to be at offset 0.  */
> +    if (ofs != 0) {
> +        tcg_out_shifti(s, SHIFT_ROR + rexw, inout, ofs);
> +    }
> +
> +    /* Shift the value into the top of the word.  This shifts the old
> +       field out of the bottom of the word and leaves us with the whole
> +       word rotated right by the size of the field.  */
> +    tcg_out_modrm(s, OPC_SHRD_Ib + rexw, val, inout);
> +    tcg_out8(s, len);
> +
> +    /* Restore the field to its proper location.  */
> +    ofs = (len + ofs) & (rexw ? 63 : 31);
> +    if (ofs != 0) {
> +        tcg_out_shifti(s, SHIFT_ROL + rexw, inout, ofs);
> +    }
> +}
> +
> +
>  /* Use SMALL != 0 to force a short forward branch.  */
>  static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
>  {
> @@ -1266,7 +1321,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
>  
>      switch (sizeop) {
>      case 0:
> -        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
> +        tcg_out_modrm_offset(s, OPC_MOVB_EbGb + P_REXB_R, datalo, base, ofs);
>          break;
>      case 1:
>          if (bswap) {
> @@ -1504,7 +1559,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          break;
>  
>      OP_32_64(st8):
> -        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
> +        tcg_out_modrm_offset(s, OPC_MOVB_EbGb | P_REXB_R,
>                               args[0], args[1], args[2]);
>          break;
>      OP_32_64(st16):
> @@ -1603,6 +1658,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          }
>          break;
>  
> +    OP_32_64(deposit):
> +        tcg_out_deposit(s, args[0], args[2],
> +                        args[3] >> 8, args[3] & 0xff, rexw);
> +        break;
> +
>      case INDEX_op_brcond_i32:
>          tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
>                           args[3], 0);
> @@ -1783,6 +1843,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
>      { INDEX_op_sar_i32, { "r", "0", "ci" } },
>      { INDEX_op_rotl_i32, { "r", "0", "ci" } },
>      { INDEX_op_rotr_i32, { "r", "0", "ci" } },
> +    { INDEX_op_deposit_i32, { "r", "0", "r" } },
>  
>      { INDEX_op_brcond_i32, { "r", "ri" } },
>  
> @@ -1835,6 +1896,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
>      { INDEX_op_sar_i64, { "r", "0", "ci" } },
>      { INDEX_op_rotl_i64, { "r", "0", "ci" } },
>      { INDEX_op_rotr_i64, { "r", "0", "ci" } },
> +    { INDEX_op_deposit_i64, { "r", "0", "r" } },
>  
>      { INDEX_op_brcond_i64, { "r", "re" } },
>      { INDEX_op_setcond_i64, { "r", "r", "re" } },
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index bfafbfc..9f90d17 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -77,6 +77,7 @@ enum {
>  /* optional instructions */
>  #define TCG_TARGET_HAS_div2_i32
>  #define TCG_TARGET_HAS_rot_i32
> +#define TCG_TARGET_HAS_deposit_i32
>  #define TCG_TARGET_HAS_ext8s_i32
>  #define TCG_TARGET_HAS_ext16s_i32
>  #define TCG_TARGET_HAS_ext8u_i32
> @@ -94,6 +95,7 @@ enum {
>  #if TCG_TARGET_REG_BITS == 64
>  #define TCG_TARGET_HAS_div2_i64
>  #define TCG_TARGET_HAS_rot_i64
> +#define TCG_TARGET_HAS_deposit_i64
>  #define TCG_TARGET_HAS_ext8s_i64
>  #define TCG_TARGET_HAS_ext16s_i64
>  #define TCG_TARGET_HAS_ext32s_i64
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index e95a42f..5ab9122 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -156,6 +156,8 @@ int gen_new_label(void)
>      return idx;
>  }
>  
> +static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2);
> +
>  #include "tcg-target.c"
>  
>  /* pool based memory allocation */
> -- 
> 1.7.2.3
> 
>
Richard Henderson Jan. 9, 2011, 10:55 p.m. UTC | #2
On 01/09/2011 01:53 PM, Aurelien Jarno wrote:
>> +    if (inout == val) {
>> +        TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32;
>> +        TCGRegSet inuse = s->reserved_regs;
>> +
>> +        tcg_regset_set_reg(inuse, inout);
>> +        val = tcg_reg_alloc(s, tcg_target_available_regs[type], inuse);
>> +
>> +        tcg_out_mov(s, type, val, inout);
> 
> I am a bit worried by allocating a new register here, especially on the
> i386 target, where the number of free registers is quite low, and often
> 0. We already had to tweak some code to avoid calls to tcg_abort() due
> to missing registers.

Well, as I said, this case can't actually trigger due to a bug in the
register allocator.  This can be seen in an insn like

	mov	%dl,%dh

where you would expect to see

	deposit	x,x,x,8,8

however, the matching constraint forces the destination and the matching
source into a new register:

                /* if the input is aliased to an output and if it is
                   not dead after the instruction, we must allocate
                   a new register and move it */
                if (!IS_DEAD_IARG(i - nb_oargs)) 
                    goto allocate_in_reg;

which means that we'll always see

	mov	y,x
	deposit y,y,x,8,8

So I could simply put a tcg_abort there.  It would be up to whoever
improves the register allocator to provide some mechanism for a
backend to allocate a scratch.  What do you think?


r~
Aurelien Jarno Jan. 10, 2011, 12:16 a.m. UTC | #3
On Sun, Jan 09, 2011 at 02:55:13PM -0800, Richard Henderson wrote:
> On 01/09/2011 01:53 PM, Aurelien Jarno wrote:
> >> +    if (inout == val) {
> >> +        TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32;
> >> +        TCGRegSet inuse = s->reserved_regs;
> >> +
> >> +        tcg_regset_set_reg(inuse, inout);
> >> +        val = tcg_reg_alloc(s, tcg_target_available_regs[type], inuse);
> >> +
> >> +        tcg_out_mov(s, type, val, inout);
> > 
> > I am a bit worried by allocating a new register here, especially on the
> > i386 target, where the number of free registers is quite low, and often
> > 0. We already had to tweak some code to avoid calls to tcg_abort() due
> > to missing registers.
> 
> Well, as I said, this case can't actually trigger due to a bug in the
> register allocator.  This can be seen in an insn like
> 
> 	mov	%dl,%dh
> 
> where you would expect to see
> 
> 	deposit	x,x,x,8,8
> 
> however, the matching constraint forces the destination and the matching
> source into a new register:
> 
>                 /* if the input is aliased to an output and if it is
>                    not dead after the instruction, we must allocate
>                    a new register and move it */
>                 if (!IS_DEAD_IARG(i - nb_oargs)) 
>                     goto allocate_in_reg;

I guess we should also add a case when the input argument and the output
argument are the same.

> which means that we'll always see
> 
> 	mov	y,x
> 	deposit y,y,x,8,8
> 
> So I could simply put a tcg_abort there.  It would be up to whoever
> improves the register allocator to provide some mechanism for a
> backend to allocate a scratch.  What do you think?
> 

The code being written now or latter doesn't change the question to know
if it is always possible to allocate one scratch register here on i386.

Up to know we didn't provide any function for allocating a scratch
register, as on CISC CPU the op directly match to an instruction whereas
on RISC CPUs where the op has to be emulated with multiple instructions,
they are enough registers available to define one or more permanent
scratch register.
Richard Henderson Jan. 10, 2011, 12:43 a.m. UTC | #4
On 01/09/2011 04:16 PM, Aurelien Jarno wrote:
> The code being written now or latter doesn't change the question to know
> if it is always possible to allocate one scratch register here on i386.

Yes.

Here there's only one register that needs to remain live.  In the 
worst case, we'll spill one live register.  Which we can always do
via a simple store via AREG0 + offset.


r~
Aurelien Jarno Jan. 10, 2011, 4:52 p.m. UTC | #5
On Sun, Jan 09, 2011 at 04:43:22PM -0800, Richard Henderson wrote:
> On 01/09/2011 04:16 PM, Aurelien Jarno wrote:
> > The code being written now or latter doesn't change the question to know
> > if it is always possible to allocate one scratch register here on i386.
> 
> Yes.
> 
> Here there's only one register that needs to remain live.  In the 
> worst case, we'll spill one live register.  Which we can always do
> via a simple store via AREG0 + offset.

Oh right correct we are inside the instruction and in a case we have
very few arguments, and only 32-bit.

If we allow the target to allocate scratch registers themselves, it's
probably better to do that via a function.
Aurelien Jarno Jan. 10, 2011, 6:37 p.m. UTC | #6
On Sun, Jan 09, 2011 at 02:55:13PM -0800, Richard Henderson wrote:
> On 01/09/2011 01:53 PM, Aurelien Jarno wrote:
> >> +    if (inout == val) {
> >> +        TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32;
> >> +        TCGRegSet inuse = s->reserved_regs;
> >> +
> >> +        tcg_regset_set_reg(inuse, inout);
> >> +        val = tcg_reg_alloc(s, tcg_target_available_regs[type], inuse);
> >> +
> >> +        tcg_out_mov(s, type, val, inout);
> > 
> > I am a bit worried by allocating a new register here, especially on the
> > i386 target, where the number of free registers is quite low, and often
> > 0. We already had to tweak some code to avoid calls to tcg_abort() due
> > to missing registers.
> 
> Well, as I said, this case can't actually trigger due to a bug in the
> register allocator.  This can be seen in an insn like
> 
> 	mov	%dl,%dh
> 
> where you would expect to see
> 
> 	deposit	x,x,x,8,8
> 
> however, the matching constraint forces the destination and the matching
> source into a new register:
> 
>                 /* if the input is aliased to an output and if it is
>                    not dead after the instruction, we must allocate
>                    a new register and move it */
>                 if (!IS_DEAD_IARG(i - nb_oargs)) 
>                     goto allocate_in_reg;

I have not been able to trigger this code path with a deposit
instruction.

> which means that we'll always see
> 
> 	mov	y,x
> 	deposit y,y,x,8,8
> 
> So I could simply put a tcg_abort there.  It would be up to whoever
> improves the register allocator to provide some mechanism for a
> backend to allocate a scratch.  What do you think?
> 

Do you have a way to trigger this problem? or a dump of the ops and asm
output?
Richard Henderson Jan. 10, 2011, 7:19 p.m. UTC | #7
On 01/10/2011 10:37 AM, Aurelien Jarno wrote:
>> 	mov	y,x
>> 	deposit y,y,x,8,8
>>
>> So I could simply put a tcg_abort there.  It would be up to whoever
>> improves the register allocator to provide some mechanism for a
>> backend to allocate a scratch.  What do you think?
>>
> 
> Do you have a way to trigger this problem? or a dump of the ops and asm
> output?

IN: 
0x408120c4:  rlwimi  r4,r4,8,16,23

OP:
 ---- 0x408120c4
 deposit_i32 r4,r4,r4,8,8
 goto_tb $0x0
 movi_i32 nip,$0x408120c8
 exit_tb $0x7fbb00ca5758

OUT: [size=52]
0x60294380:  mov    0x10(%r14),%ebp
0x60294384:  mov    %ebp,%ebx
0x60294386:  ror    $0x8,%ebp
0x60294389:  shrd   $0x8,%ebx,%ebp
0x6029438d:  rol    $0x10,%ebp
0x60294390:  mov    %ebp,0x10(%r14)
0x60294394:  jmpq   0x60294399
0x60294399:  mov    $0x408120c8,%ebp
0x6029439e:  mov    %ebp,0x25c(%r14)
0x602943a5:  mov    $0x7fbb00ca5758,%rax
0x602943af:  jmpq   0x622b772e

That should do it.  This is present in linux-user-test ppc/ls.
This output still contains that allocate-a-scratch path.


r~
diff mbox

Patch

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index bb19a95..cc7d266 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -258,7 +258,8 @@  static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_JMP_long	(0xe9)
 #define OPC_JMP_short	(0xeb)
 #define OPC_LEA         (0x8d)
-#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
+#define OPC_MOVB_EbGb	(0x88)		/* stores, more or less */
+#define OPC_MOVB_GbEb   (0x8a)		/* loads, more or less */
 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
 #define OPC_MOVL_EvIz	(0xc7)
@@ -277,6 +278,7 @@  static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_SHIFT_1	(0xd1)
 #define OPC_SHIFT_Ib	(0xc1)
 #define OPC_SHIFT_cl	(0xd3)
+#define OPC_SHRD_Ib	(0xac | P_EXT)
 #define OPC_TESTL	(0x85)
 #define OPC_XCHG_ax_r32	(0x90)
 
@@ -710,6 +712,59 @@  static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
     }
 }
 
+static void tcg_out_deposit(TCGContext *s, int inout, int val,
+                            unsigned ofs, unsigned len, int rexw)
+{
+    /* Look for MOVW special case.  */
+    if (ofs == 0 && len == 16) {
+        tcg_out_modrm(s, OPC_MOVL_GvEv + P_DATA16, inout, val);
+        return;
+    }
+
+    /* Look for MOVB w/ %reg_l special case.  */
+    if (ofs == 0 && len == 8
+        && (TCG_TARGET_REG_BITS == 64 || (inout < 4 && val < 4))) {
+        tcg_out_modrm(s, OPC_MOVB_GbEb + P_REXB_R + P_REXB_RM, inout, val);
+        return;
+    }
+
+    /* Look for MOVB w/ %reg_h special case.  */
+    if (ofs == 8 && len == 8 && inout < 4 && val < 4) {
+        tcg_out_modrm(s, OPC_MOVB_GbEb, inout + 4, val);
+        return;
+    }
+
+    /* If we have a real deposit from self, we need a temporary.  */
+    /* ??? There really ought to be a way to easily allocate a scratch.  */
+    if (inout == val) {
+        TCGType type = rexw ? TCG_TYPE_I64 : TCG_TYPE_I32;
+        TCGRegSet inuse = s->reserved_regs;
+
+        tcg_regset_set_reg(inuse, inout);
+        val = tcg_reg_alloc(s, tcg_target_available_regs[type], inuse);
+
+        tcg_out_mov(s, type, val, inout);
+    }
+
+    /* Arrange for the field to be at offset 0.  */
+    if (ofs != 0) {
+        tcg_out_shifti(s, SHIFT_ROR + rexw, inout, ofs);
+    }
+
+    /* Shift the value into the top of the word.  This shifts the old
+       field out of the bottom of the word and leaves us with the whole
+       word rotated right by the size of the field.  */
+    tcg_out_modrm(s, OPC_SHRD_Ib + rexw, val, inout);
+    tcg_out8(s, len);
+
+    /* Restore the field to its proper location.  */
+    ofs = (len + ofs) & (rexw ? 63 : 31);
+    if (ofs != 0) {
+        tcg_out_shifti(s, SHIFT_ROL + rexw, inout, ofs);
+    }
+}
+
+
 /* Use SMALL != 0 to force a short forward branch.  */
 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 {
@@ -1266,7 +1321,7 @@  static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
 
     switch (sizeop) {
     case 0:
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVB_EbGb + P_REXB_R, datalo, base, ofs);
         break;
     case 1:
         if (bswap) {
@@ -1504,7 +1559,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     OP_32_64(st8):
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
+        tcg_out_modrm_offset(s, OPC_MOVB_EbGb | P_REXB_R,
                              args[0], args[1], args[2]);
         break;
     OP_32_64(st16):
@@ -1603,6 +1658,11 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
 
+    OP_32_64(deposit):
+        tcg_out_deposit(s, args[0], args[2],
+                        args[3] >> 8, args[3] & 0xff, rexw);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
                          args[3], 0);
@@ -1783,6 +1843,7 @@  static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
     { INDEX_op_rotl_i32, { "r", "0", "ci" } },
     { INDEX_op_rotr_i32, { "r", "0", "ci" } },
+    { INDEX_op_deposit_i32, { "r", "0", "r" } },
 
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
@@ -1835,6 +1896,7 @@  static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
     { INDEX_op_rotl_i64, { "r", "0", "ci" } },
     { INDEX_op_rotr_i64, { "r", "0", "ci" } },
+    { INDEX_op_deposit_i64, { "r", "0", "r" } },
 
     { INDEX_op_brcond_i64, { "r", "re" } },
     { INDEX_op_setcond_i64, { "r", "r", "re" } },
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index bfafbfc..9f90d17 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -77,6 +77,7 @@  enum {
 /* optional instructions */
 #define TCG_TARGET_HAS_div2_i32
 #define TCG_TARGET_HAS_rot_i32
+#define TCG_TARGET_HAS_deposit_i32
 #define TCG_TARGET_HAS_ext8s_i32
 #define TCG_TARGET_HAS_ext16s_i32
 #define TCG_TARGET_HAS_ext8u_i32
@@ -94,6 +95,7 @@  enum {
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64
 #define TCG_TARGET_HAS_rot_i64
+#define TCG_TARGET_HAS_deposit_i64
 #define TCG_TARGET_HAS_ext8s_i64
 #define TCG_TARGET_HAS_ext16s_i64
 #define TCG_TARGET_HAS_ext32s_i64
diff --git a/tcg/tcg.c b/tcg/tcg.c
index e95a42f..5ab9122 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -156,6 +156,8 @@  int gen_new_label(void)
     return idx;
 }
 
+static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2);
+
 #include "tcg-target.c"
 
 /* pool based memory allocation */