Patchwork [06/11] TCG/x86: use stack for TCG temps

login
register
mail settings
Submitter Blue Swirl
Date May 14, 2011, 7:38 p.m.
Message ID <BANLkTi=WFTufHML4ikeSJjU4BG=Qq7vNjQ@mail.gmail.com>
Download mbox | patch
Permalink /patch/95578/
State New
Headers show

Comments

Blue Swirl - May 14, 2011, 7:38 p.m.
Use stack instead of temp_buf array in CPUState for TCG
temps.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 tcg/i386/tcg-target.c |   19 ++++++++++---------
 1 files changed, 10 insertions(+), 9 deletions(-)
Richard Henderson - May 17, 2011, 2:57 p.m.
On 05/14/2011 12:38 PM, Blue Swirl wrote:
> -    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
> +    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE +
> +        CPU_TEMP_BUF_NLONGS * sizeof(long);
>      frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
>          ~(TCG_TARGET_STACK_ALIGN - 1);
>      stack_addend = frame_size - push_size;
> +    tcg_set_frame(s, TCG_REG_ESP, 0, CPU_TEMP_BUF_NLONGS * sizeof(long));
> +
> +    /* Save all callee saved registers.  */
> +    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
> +        tcg_out_push(s, tcg_target_callee_save_regs[i]);
> +    }
> +
>      tcg_out_addi(s, TCG_REG_ESP, -stack_addend);

Wrong argument to tcg_set_frame.  The temps need to be above the
outgoing call arguments, i.e. offset TCG_STATIC_CALL_ARGS_SIZE.


r~
Aurelien Jarno - May 17, 2011, 6:46 p.m.
On Sat, May 14, 2011 at 10:38:40PM +0300, Blue Swirl wrote:
> Use stack instead of temp_buf array in CPUState for TCG
> temps.
> 
> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
> ---
>  tcg/i386/tcg-target.c |   19 ++++++++++---------
>  1 files changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 01747f3..0e168ea 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -1918,19 +1918,22 @@ static void tcg_target_qemu_prologue(TCGContext *s)
> 
>      /* TB prologue */
> 
> -    /* Save all callee saved registers.  */
> -    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
> -        tcg_out_push(s, tcg_target_callee_save_regs[i]);
> -    }
> -
> -    /* Reserve some stack space.  */
> +    /* Reserve some stack space, also for TCG temps.  */
>      push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
>      push_size *= TCG_TARGET_REG_BITS / 8;
> 
> -    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
> +    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE +
> +        CPU_TEMP_BUF_NLONGS * sizeof(long);
>      frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
>          ~(TCG_TARGET_STACK_ALIGN - 1);
>      stack_addend = frame_size - push_size;
> +    tcg_set_frame(s, TCG_REG_ESP, 0, CPU_TEMP_BUF_NLONGS * sizeof(long));

You should probably use TCG_REG_CALL_STACK instead of hardcoading the
register.

> +
> +    /* Save all callee saved registers.  */
> +    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
> +        tcg_out_push(s, tcg_target_callee_save_regs[i]);
> +    }
> +
>      tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
> 
>      /* jmp *tb.  */
> @@ -1979,6 +1982,4 @@ static void tcg_target_init(TCGContext *s)
>      tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
> 
>      tcg_add_target_add_op_defs(x86_op_defs);
> -    tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf),
> -                  CPU_TEMP_BUF_NLONGS * sizeof(long));
>  }

Note that this patch is likely to break calls to helpers which need
parameters on the stack, by judging at the current code (I haven't 
tested it in practice):

|     if (allocate_args) {
|         tcg_out_addi(s, TCG_REG_CALL_STACK, -STACK_DIR(call_stack_size));
|     }

The stack register (esp) is decreased.

|     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
|     for(i = nb_regs; i < nb_params; i++) {
|         arg = args[nb_oargs + i];
| #ifdef TCG_TARGET_STACK_GROWSUP
|         stack_offset -= sizeof(tcg_target_long);
| #endif
|         if (arg != TCG_CALL_DUMMY_ARG) {
|             ts = &s->temps[arg];
|             if (ts->val_type == TEMP_VAL_REG) {
|                 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
|             } else if (ts->val_type == TEMP_VAL_MEM) {
|                 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
|                                     s->reserved_regs);

tcg_reg_alloc() may spill some register, and save them in the allocated
frame. If it is referenced by the stack pointer, given it has been
changed, it won't be save at the write place.


|                 /* XXX: not correct if reading values from the stack */
|                 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);

As the comment said, if ts->mem_reg is the stack register (like with
this patch), given it has been increased above, the wrong value will be
read.

|                 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
|             } else if (ts->val_type == TEMP_VAL_CONST) {
|                 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
|                                     s->reserved_regs);
|                 /* XXX: sign extend may be needed on some targets */
|                 tcg_out_movi(s, ts->type, reg, ts->val);
|                 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
|             } else {
|                 tcg_abort();
|             }
Blue Swirl - May 17, 2011, 8:08 p.m.
On Tue, May 17, 2011 at 9:46 PM, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Sat, May 14, 2011 at 10:38:40PM +0300, Blue Swirl wrote:
>> Use stack instead of temp_buf array in CPUState for TCG
>> temps.
>>
>> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
>> ---
>>  tcg/i386/tcg-target.c |   19 ++++++++++---------
>>  1 files changed, 10 insertions(+), 9 deletions(-)
>>
>> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
>> index 01747f3..0e168ea 100644
>> --- a/tcg/i386/tcg-target.c
>> +++ b/tcg/i386/tcg-target.c
>> @@ -1918,19 +1918,22 @@ static void tcg_target_qemu_prologue(TCGContext *s)
>>
>>      /* TB prologue */
>>
>> -    /* Save all callee saved registers.  */
>> -    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
>> -        tcg_out_push(s, tcg_target_callee_save_regs[i]);
>> -    }
>> -
>> -    /* Reserve some stack space.  */
>> +    /* Reserve some stack space, also for TCG temps.  */
>>      push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
>>      push_size *= TCG_TARGET_REG_BITS / 8;
>>
>> -    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
>> +    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE +
>> +        CPU_TEMP_BUF_NLONGS * sizeof(long);
>>      frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
>>          ~(TCG_TARGET_STACK_ALIGN - 1);
>>      stack_addend = frame_size - push_size;
>> +    tcg_set_frame(s, TCG_REG_ESP, 0, CPU_TEMP_BUF_NLONGS * sizeof(long));
>
> You should probably use TCG_REG_CALL_STACK instead of hardcoading the
> register.

OK.

>> +
>> +    /* Save all callee saved registers.  */
>> +    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
>> +        tcg_out_push(s, tcg_target_callee_save_regs[i]);
>> +    }
>> +
>>      tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
>>
>>      /* jmp *tb.  */
>> @@ -1979,6 +1982,4 @@ static void tcg_target_init(TCGContext *s)
>>      tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
>>
>>      tcg_add_target_add_op_defs(x86_op_defs);
>> -    tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf),
>> -                  CPU_TEMP_BUF_NLONGS * sizeof(long));
>>  }
>
> Note that this patch is likely to break calls to helpers which need
> parameters on the stack, by judging at the current code (I haven't
> tested it in practice):
>
> |     if (allocate_args) {
> |         tcg_out_addi(s, TCG_REG_CALL_STACK, -STACK_DIR(call_stack_size));
> |     }
>
> The stack register (esp) is decreased.
>
> |     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
> |     for(i = nb_regs; i < nb_params; i++) {
> |         arg = args[nb_oargs + i];
> | #ifdef TCG_TARGET_STACK_GROWSUP
> |         stack_offset -= sizeof(tcg_target_long);
> | #endif
> |         if (arg != TCG_CALL_DUMMY_ARG) {
> |             ts = &s->temps[arg];
> |             if (ts->val_type == TEMP_VAL_REG) {
> |                 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
> |             } else if (ts->val_type == TEMP_VAL_MEM) {
> |                 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
> |                                     s->reserved_regs);
>
> tcg_reg_alloc() may spill some register, and save them in the allocated
> frame. If it is referenced by the stack pointer, given it has been
> changed, it won't be save at the write place.
>
>
> |                 /* XXX: not correct if reading values from the stack */
> |                 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
>
> As the comment said, if ts->mem_reg is the stack register (like with
> this patch), given it has been increased above, the wrong value will be
> read.
>
> |                 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
> |             } else if (ts->val_type == TEMP_VAL_CONST) {
> |                 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
> |                                     s->reserved_regs);
> |                 /* XXX: sign extend may be needed on some targets */
> |                 tcg_out_movi(s, ts->type, reg, ts->val);
> |                 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
> |             } else {
> |                 tcg_abort();
> |             }

Maybe the frame pointer register could be set up and used instead, but
that would cost one extra register.

Alternatively it may be possible to avoid changing stack pointer. We
know in advance all possible helpers and the number of their
parameters, so the stack could be preallocated as suggested by a
comment.
Richard Henderson - May 18, 2011, 4:09 p.m.
On 05/17/2011 11:46 AM, Aurelien Jarno wrote:
>> -    tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf),
>> -                  CPU_TEMP_BUF_NLONGS * sizeof(long));
>>  }
> 
> Note that this patch is likely to break calls to helpers which need
> parameters on the stack, by judging at the current code (I haven't 
> tested it in practice):
> 
> |     if (allocate_args) {
> |         tcg_out_addi(s, TCG_REG_CALL_STACK, -STACK_DIR(call_stack_size));
> |     }
> 
> The stack register (esp) is decreased.

I don't think this ever happens in practice, given that we've already
allocated TCG_STATIC_CALL_ARGS_SIZE worth of stack for calls.  For 
i386, that's 32 int-sized arguments, well more than any helper needs.

This code in tcg.c is way too simplistic to actually work on targets
with non-trivial stack allocation policies, e.g. ppc64.  The fact 
that the target works at present is testament to the fact that this
code doesn't actually trigger.  I would be just as happy to remove
this dynamic stack allocation code and replace it with an assert.



r~

Patch

From ebc14537b3abd4a28360d25e42e3dbe1c9b2cb14 Mon Sep 17 00:00:00 2001
Message-Id: <ebc14537b3abd4a28360d25e42e3dbe1c9b2cb14.1305401750.git.blauwirbel@gmail.com>
In-Reply-To: <6e21df8e369388a3152dcc7da30431c672e1ee37.1305401750.git.blauwirbel@gmail.com>
References: <6e21df8e369388a3152dcc7da30431c672e1ee37.1305401750.git.blauwirbel@gmail.com>
From: Blue Swirl <blauwirbel@gmail.com>
Date: Sat, 14 May 2011 13:14:45 +0000
Subject: [PATCH 06/11] TCG/x86: use stack for TCG temps

Use stack instead of temp_buf array in CPUState for TCG
temps.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 tcg/i386/tcg-target.c |   19 ++++++++++---------
 1 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 01747f3..0e168ea 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1918,19 +1918,22 @@  static void tcg_target_qemu_prologue(TCGContext *s)
 
     /* TB prologue */
 
-    /* Save all callee saved registers.  */
-    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
-        tcg_out_push(s, tcg_target_callee_save_regs[i]);
-    }
-
-    /* Reserve some stack space.  */
+    /* Reserve some stack space, also for TCG temps.  */
     push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
     push_size *= TCG_TARGET_REG_BITS / 8;
 
-    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
+    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE +
+        CPU_TEMP_BUF_NLONGS * sizeof(long);
     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
         ~(TCG_TARGET_STACK_ALIGN - 1);
     stack_addend = frame_size - push_size;
+    tcg_set_frame(s, TCG_REG_ESP, 0, CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+    /* Save all callee saved registers.  */
+    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+        tcg_out_push(s, tcg_target_callee_save_regs[i]);
+    }
+
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
 
     /* jmp *tb.  */
@@ -1979,6 +1982,4 @@  static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
 
     tcg_add_target_add_op_defs(x86_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
-- 
1.7.2.5