diff mbox

target-tilegx: Finish decoding the first TB block.

Message ID 54E7F5EB.60402@sunrus.com.cn
State New
Headers show

Commit Message

Chen Gang Feb. 21, 2015, 3:05 a.m. UTC
At present finish decoding the first TB block: _start. Always let y1 and
x1 pipe at the last which may content jump instructions.

The related disassembly code is below which is the same as the objdump:

  y0: 00000000500bfdb4  move r52, r54
  y2: 0208000007600000  ld r1, r54
  y1: 1c06400000000000  fnop
  x0: 0000000051483000  fnop
  x1: 180f86c600000000  addi r12, r54, -16
  x0: 00000000403f8336  andi r54, r12, -8
  x1: 286af00680000000  lnk r13
  y0: 00000000500bf005  move r5, r0
  y2: 03f8000007600000  st r54, r63
  y1: 040046c600000000  addi r12, r54, 8
  y0: 00000000500bfff7  move r55, r63
  y2: 03f8000004c00000  st r12, r63
  y1: 0400468100000000  addi r2, r52, 8
  x0: 0000000040110d86  addi r6, r54, 16
  x1: 07ffffe000000000  moveli r0, -1
  x0: 000000007ffff000  shl16insli r0, r0, -1
  x1: 000007e180000000  moveli r3, 0
  x0: 000000007ffa8000  shl16insli r0, r0, -88
  x1: 3800006180000000  shl16insli r3, r3, 0
  x0: 00000000500cd000  and r0, r0, r13
  x1: 3877406180000000  shl16insli r3, r3, 3816
  x0: 0000000010000fcc  moveli r12, 0
  x1: 2806686180000000  add r3, r3, r13
  x0: 000000007000030c  shl16insli r12, r12, 0
  x1: 000007e200000000  moveli r4, 0
  x0: 000000007039030c  shl16insli r12, r12, 912
  x1: 3800008200000000  shl16insli r4, r4, 0
  x0: 00000000500cd30c  and r12, r12, r13
  x1: 3881808200000000  shl16insli r4, r4, 4144
  x0: 00000000500cd104  and r4, r4, r13
  x1: 286a718000000000  jr r12

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
---
 target-tilegx/cpu-qom.h   |   2 +
 target-tilegx/cpu.c       |   4 -
 target-tilegx/cpu.h       |   1 +
 target-tilegx/translate.c | 437 +++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 433 insertions(+), 11 deletions(-)

Comments

Chen Gang Feb. 21, 2015, 7:16 a.m. UTC | #1
On 2/21/15 11:05, Chen Gang S wrote:
> At present finish decoding the first TB block: _start. Always let y1 and
> x1 pipe at the last which may content jump instructions.
> 
> The related disassembly code is below which is the same as the objdump:
> 
>   y0: 00000000500bfdb4  move r52, r54
>   y2: 0208000007600000  ld r1, r54
>   y1: 1c06400000000000  fnop
>   x0: 0000000051483000  fnop
>   x1: 180f86c600000000  addi r12, r54, -16
>   x0: 00000000403f8336  andi r54, r12, -8
>   x1: 286af00680000000  lnk r13
>   y0: 00000000500bf005  move r5, r0
>   y2: 03f8000007600000  st r54, r63
>   y1: 040046c600000000  addi r12, r54, 8
>   y0: 00000000500bfff7  move r55, r63
>   y2: 03f8000004c00000  st r12, r63
>   y1: 0400468100000000  addi r2, r52, 8
>   x0: 0000000040110d86  addi r6, r54, 16
>   x1: 07ffffe000000000  moveli r0, -1
>   x0: 000000007ffff000  shl16insli r0, r0, -1
>   x1: 000007e180000000  moveli r3, 0
>   x0: 000000007ffa8000  shl16insli r0, r0, -88
>   x1: 3800006180000000  shl16insli r3, r3, 0
>   x0: 00000000500cd000  and r0, r0, r13
>   x1: 3877406180000000  shl16insli r3, r3, 3816
>   x0: 0000000010000fcc  moveli r12, 0
>   x1: 2806686180000000  add r3, r3, r13
>   x0: 000000007000030c  shl16insli r12, r12, 0
>   x1: 000007e200000000  moveli r4, 0
>   x0: 000000007039030c  shl16insli r12, r12, 912
>   x1: 3800008200000000  shl16insli r4, r4, 0
>   x0: 00000000500cd30c  and r12, r12, r13
>   x1: 3881808200000000  shl16insli r4, r4, 4144
>   x0: 00000000500cd104  and r4, r4, r13
>   x1: 286a718000000000  jr r12
>

Oh, sorry, it has no 'and' operation, it should be 'add' (3 places need
be modified). It is a printing typo issue.

[...] 
> +    case 0x0000000050000000ULL:
> +        switch (TILEGX_CODE_X0_18(bundle)) {
> +        /* add Dest, SrcA, SrcB */
> +        case 0x00000000000c0000ULL:
> +            rdest = (unsigned char)(bundle & TILEGX_DATA_REGISTER);
> +            rsrc = (unsigned char)((bundle >> 6) & TILEGX_DATA_REGISTER);
> +            rsrcb = (unsigned char)((bundle >> 12) & TILEGX_DATA_REGISTER);
> +            qemu_log("and r%d, r%d, r%d", rdest, rsrc, rsrcb);

It should be print "add", not "and".

Thanks.
Chen Gang Feb. 21, 2015, 3:31 p.m. UTC | #2
Oh, we can not only assume y1 and x1 is the last execution in a bundle,
e.g. in __libc_start_main:

   11330:       c6f106c685928d8a        { addi r10, sp, 40 ; addi r13, sp, 32 ; st r25, r30 }
   11338:       2862014010000fca        { moveli r10, 0 ; st r10, r0 }

In this case, r10 will be over written. I have to use tcg temporary
variables for it in each bundle:

 - We can still use the original pipes order: "y0, y2, y1" and "x0, x1".

 - y0, y2, and x0 need to use tcg temporary variables, but y1 and x1 can
   still use real variables.

 - y1 and x1 need to flush the temporary variables, they also need to
   consider about jump cases for tcg code (flush tcg temporary variables
   after comparing and before jump).


Welcome any ideas, suggestions or completions.

Thanks.

On 2/21/15 15:16, Chen Gang S wrote:
> On 2/21/15 11:05, Chen Gang S wrote:
>> At present finish decoding the first TB block: _start. Always let y1 and
>> x1 pipe at the last which may content jump instructions.
>>
>> The related disassembly code is below which is the same as the objdump:
>>
>>   y0: 00000000500bfdb4  move r52, r54
>>   y2: 0208000007600000  ld r1, r54
>>   y1: 1c06400000000000  fnop
>>   x0: 0000000051483000  fnop
>>   x1: 180f86c600000000  addi r12, r54, -16
>>   x0: 00000000403f8336  andi r54, r12, -8
>>   x1: 286af00680000000  lnk r13
>>   y0: 00000000500bf005  move r5, r0
>>   y2: 03f8000007600000  st r54, r63
>>   y1: 040046c600000000  addi r12, r54, 8
>>   y0: 00000000500bfff7  move r55, r63
>>   y2: 03f8000004c00000  st r12, r63
>>   y1: 0400468100000000  addi r2, r52, 8
>>   x0: 0000000040110d86  addi r6, r54, 16
>>   x1: 07ffffe000000000  moveli r0, -1
>>   x0: 000000007ffff000  shl16insli r0, r0, -1
>>   x1: 000007e180000000  moveli r3, 0
>>   x0: 000000007ffa8000  shl16insli r0, r0, -88
>>   x1: 3800006180000000  shl16insli r3, r3, 0
>>   x0: 00000000500cd000  and r0, r0, r13
>>   x1: 3877406180000000  shl16insli r3, r3, 3816
>>   x0: 0000000010000fcc  moveli r12, 0
>>   x1: 2806686180000000  add r3, r3, r13
>>   x0: 000000007000030c  shl16insli r12, r12, 0
>>   x1: 000007e200000000  moveli r4, 0
>>   x0: 000000007039030c  shl16insli r12, r12, 912
>>   x1: 3800008200000000  shl16insli r4, r4, 0
>>   x0: 00000000500cd30c  and r12, r12, r13
>>   x1: 3881808200000000  shl16insli r4, r4, 4144
>>   x0: 00000000500cd104  and r4, r4, r13
>>   x1: 286a718000000000  jr r12
>>
> 
> Oh, sorry, it has no 'and' operation, it should be 'add' (3 places need
> be modified). It is a printing typo issue.
> 
> [...] 
>> +    case 0x0000000050000000ULL:
>> +        switch (TILEGX_CODE_X0_18(bundle)) {
>> +        /* add Dest, SrcA, SrcB */
>> +        case 0x00000000000c0000ULL:
>> +            rdest = (unsigned char)(bundle & TILEGX_DATA_REGISTER);
>> +            rsrc = (unsigned char)((bundle >> 6) & TILEGX_DATA_REGISTER);
>> +            rsrcb = (unsigned char)((bundle >> 12) & TILEGX_DATA_REGISTER);
>> +            qemu_log("and r%d, r%d, r%d", rdest, rsrc, rsrcb);
> 
> It should be print "add", not "and".
> 
> Thanks.
>
Chen Gang Feb. 21, 2015, 4:02 p.m. UTC | #3
I guess, y2 does not need tcg temporary variables either, since it is
only for ld/st operation (all memory operations must be only processed
by y2 or x1).

At present, I can generate the tcg code for _start TB block, and can
execute to 2nd TB block __libc_start_main (although I don't know whether
the execution is correct or not).

But I guess, I have to fix current issue firstly, before continue.

Thanks.

On 2/21/15 23:31, Chen Gang S wrote:
> 
> Oh, we can not only assume y1 and x1 is the last execution in a bundle,
> e.g. in __libc_start_main:
> 
>    11330:       c6f106c685928d8a        { addi r10, sp, 40 ; addi r13, sp, 32 ; st r25, r30 }
>    11338:       2862014010000fca        { moveli r10, 0 ; st r10, r0 }
> 
> In this case, r10 will be over written. I have to use tcg temporary
> variables for it in each bundle:
> 
>  - We can still use the original pipes order: "y0, y2, y1" and "x0, x1".
> 
>  - y0, y2, and x0 need to use tcg temporary variables, but y1 and x1 can
>    still use real variables.
> 
>  - y1 and x1 need to flush the temporary variables, they also need to
>    consider about jump cases for tcg code (flush tcg temporary variables
>    after comparing and before jump).
> 
> 
> Welcome any ideas, suggestions or completions.
> 
> Thanks.
> 
> On 2/21/15 15:16, Chen Gang S wrote:
>> On 2/21/15 11:05, Chen Gang S wrote:
>>> At present finish decoding the first TB block: _start. Always let y1 and
>>> x1 pipe at the last which may content jump instructions.
>>>
>>> The related disassembly code is below which is the same as the objdump:
>>>
>>>   y0: 00000000500bfdb4  move r52, r54
>>>   y2: 0208000007600000  ld r1, r54
>>>   y1: 1c06400000000000  fnop
>>>   x0: 0000000051483000  fnop
>>>   x1: 180f86c600000000  addi r12, r54, -16
>>>   x0: 00000000403f8336  andi r54, r12, -8
>>>   x1: 286af00680000000  lnk r13
>>>   y0: 00000000500bf005  move r5, r0
>>>   y2: 03f8000007600000  st r54, r63
>>>   y1: 040046c600000000  addi r12, r54, 8
>>>   y0: 00000000500bfff7  move r55, r63
>>>   y2: 03f8000004c00000  st r12, r63
>>>   y1: 0400468100000000  addi r2, r52, 8
>>>   x0: 0000000040110d86  addi r6, r54, 16
>>>   x1: 07ffffe000000000  moveli r0, -1
>>>   x0: 000000007ffff000  shl16insli r0, r0, -1
>>>   x1: 000007e180000000  moveli r3, 0
>>>   x0: 000000007ffa8000  shl16insli r0, r0, -88
>>>   x1: 3800006180000000  shl16insli r3, r3, 0
>>>   x0: 00000000500cd000  and r0, r0, r13
>>>   x1: 3877406180000000  shl16insli r3, r3, 3816
>>>   x0: 0000000010000fcc  moveli r12, 0
>>>   x1: 2806686180000000  add r3, r3, r13
>>>   x0: 000000007000030c  shl16insli r12, r12, 0
>>>   x1: 000007e200000000  moveli r4, 0
>>>   x0: 000000007039030c  shl16insli r12, r12, 912
>>>   x1: 3800008200000000  shl16insli r4, r4, 0
>>>   x0: 00000000500cd30c  and r12, r12, r13
>>>   x1: 3881808200000000  shl16insli r4, r4, 4144
>>>   x0: 00000000500cd104  and r4, r4, r13
>>>   x1: 286a718000000000  jr r12
>>>
>>
>> Oh, sorry, it has no 'and' operation, it should be 'add' (3 places need
>> be modified). It is a printing typo issue.
>>
>> [...] 
>>> +    case 0x0000000050000000ULL:
>>> +        switch (TILEGX_CODE_X0_18(bundle)) {
>>> +        /* add Dest, SrcA, SrcB */
>>> +        case 0x00000000000c0000ULL:
>>> +            rdest = (unsigned char)(bundle & TILEGX_DATA_REGISTER);
>>> +            rsrc = (unsigned char)((bundle >> 6) & TILEGX_DATA_REGISTER);
>>> +            rsrcb = (unsigned char)((bundle >> 12) & TILEGX_DATA_REGISTER);
>>> +            qemu_log("and r%d, r%d, r%d", rdest, rsrc, rsrcb);
>>
>> It should be print "add", not "and".
>>
>> Thanks.
>>
>
Richard Henderson Feb. 21, 2015, 4:33 p.m. UTC | #4
On 02/21/2015 07:31 AM, Chen Gang S wrote:
> 
> Oh, we can not only assume y1 and x1 is the last execution in a bundle,
> e.g. in __libc_start_main:
> 
>    11330:       c6f106c685928d8a        { addi r10, sp, 40 ; addi r13, sp, 32 ; st r25, r30 }
>    11338:       2862014010000fca        { moveli r10, 0 ; st r10, r0 }
> 
> In this case, r10 will be over written. I have to use tcg temporary
> variables for it in each bundle:

Yes.

>  - We can still use the original pipes order: "y0, y2, y1" and "x0, x1".

I guess, sure, though I don't think that'll help as much as you imagine.

>  - y0, y2, and x0 need to use tcg temporary variables, but y1 and x1 can
>    still use real variables.

Possibly, but I wouldn't structure it that way.  I'd have all of the pipes
write to temporaries.

>  - y1 and x1 need to flush the temporary variables, they also need to
>    consider about jump cases for tcg code (flush tcg temporary variables
>    after comparing and before jump).

I wouldn't do that either.  I'd have y1/x1 record that a branch occurred, and
the condition, but delay the flushing and branching to common code.

For instance:

typedef enum ExitStatus {
    NO_EXIT,         /* Fall through to next bundle */
    EXIT_JUMP,       /* Normal branch exit */
    EXIT_NORETURN    /* Exception taken (e.g. syscall) */
} ExitStatus;

typedef struct DisasContext {
    struct TranslationBlock *tb;
    int mem_idx;

    uint64_t pc;
    ExitStatus exit;

    int result_regs[3];
    TCGv result_vals[3];

    TCGCond branch_cond;
    TCGv branch_dest;
    TCGv branch_val1;
    TCGv branch_val2;
} DisasContext;


static void translate_one_bundle(DisasContext *dc, uint64_t bundle)
{
    int i;

    /* Initialize the per-bundle state of DC.  */
    dc->exit = NO_EXIT;
    for (i = 0; i < 3; ++i) {
        dc->result_regs[i] = -1;
        TCGV_UNUSED_I64(dc->result_vals[i]);
    }
    dc->branch_cond = TCG_COND_NEVER;
    dc->branch_desti = -1;
    TCGV_UNUSED_I64(dc->branch_dest);
    TCGV_UNUSED_I64(dc->branch_val1);
    TCGV_UNUSED_I64(dc->branch_val2);

    /* Decode all pipes, writing results into DC.  */

    /* If some pipe raises an exception, nothing left to do.  */
    if (dc->exit == EXIT_NORETURN) {
        return;
    }

    /* Write back register results from all pipes.  */
    for (i = 0; i < 3; ++i) {
        int r = dc->result_regs[i];
        if (r >= 0)
            tcg_gen_mov_i64(cpu_regs[r], dc->result_vals[i]);
            tcg_temp_free_i64(dc->result_vals[i]);
        }
    }

    /* Write back branch results, i.e. take the branch now.  */
    if (dc->branch_cond != TCG_COND_NEVER) {
        if (dc->branch_cond == TCG_COND_ALWAYS) {
            /* Unconditional branch */
            tcg_gen_mov_i64(cpu_pc, dc->branch_dest);
        } else {
            /* Conditional branch */
            TCGv next_pc = tcg_const_i64(dc->pc + 8);
            tcg_gen_movcond_i64(dc->branch_cond, cpu_pc,
                                dc->branch_val1, dc->branch_val2,
                                dc->branch_dest, next_pc);
            tcg_temp_free_i64(dc->branch_val1);
            tcg_temp_free_i64(dc->branch_val2);
            tcg_temp_free_i64(next_pc);
        }
        tcg_temp_free_i64(dc->branch_dest);
        dc->exit = EXIT_JUMP;
    }
}

This mostly ignores the use of tcg_gen_goto_tb for now.  It's slightly more
complicated to use, and it makes debugging execution traces a bit harder.
Neither of which do you really want while bringing up the decoder.  However,
the eventual use of goto_tb is why we want to delay performing the branch until
after writing back the registers, rather than simply writing to cpu_pc right away.

I hope this is enough to get started properly.


r~
Chen Gang Feb. 22, 2015, 12:25 a.m. UTC | #5
On 2/22/15 00:33, Richard Henderson wrote:
> On 02/21/2015 07:31 AM, Chen Gang S wrote:
>>
>>  - We can still use the original pipes order: "y0, y2, y1" and "x0, x1".
> 
> I guess, sure, though I don't think that'll help as much as you imagine.
> 
>>  - y0, y2, and x0 need to use tcg temporary variables, but y1 and x1 can
>>    still use real variables.
> 
> Possibly, but I wouldn't structure it that way.  I'd have all of the pipes
> write to temporaries.
> 
>>  - y1 and x1 need to flush the temporary variables, they also need to
>>    consider about jump cases for tcg code (flush tcg temporary variables
>>    after comparing and before jump).
> 
> I wouldn't do that either.  I'd have y1/x1 record that a branch occurred, and
> the condition, but delay the flushing and branching to common code.
>

OK, thanks. For me, your idea is OK, it is more simpler (although with
more tcg temporary variables).

At present, the performance is not quite important, so I shall implement
the translation with the way which you provide below, thanks.

> For instance:
> 

The demo code below is much valuable to me, but I guess, it can be
improved too (the related reply is below):

> typedef enum ExitStatus {
>     NO_EXIT,         /* Fall through to next bundle */
>     EXIT_JUMP,       /* Normal branch exit */
>     EXIT_NORETURN    /* Exception taken (e.g. syscall) */
> } ExitStatus;
> 
> typedef struct DisasContext {
>     struct TranslationBlock *tb;
>     int mem_idx;
> 
>     uint64_t pc;
>     ExitStatus exit;
> 
>     int result_regs[3];
>     TCGv result_vals[3];
> 
>     TCGCond branch_cond;
>     TCGv branch_dest;
>     TCGv branch_val1;
>     TCGv branch_val2;
> } DisasContext;
> 
> 
> static void translate_one_bundle(DisasContext *dc, uint64_t bundle)
> {
>     int i;
> 
>     /* Initialize the per-bundle state of DC.  */
>     dc->exit = NO_EXIT;
>     for (i = 0; i < 3; ++i) {
>         dc->result_regs[i] = -1;
>         TCGV_UNUSED_I64(dc->result_vals[i]);
>     }
>     dc->branch_cond = TCG_COND_NEVER;
>     dc->branch_desti = -1;

It is branch_dest, not branch_desti.

>     TCGV_UNUSED_I64(dc->branch_dest);
>     TCGV_UNUSED_I64(dc->branch_val1);
>     TCGV_UNUSED_I64(dc->branch_val2);
> 
>     /* Decode all pipes, writing results into DC.  */
> 
>     /* If some pipe raises an exception, nothing left to do.  */
>     if (dc->exit == EXIT_NORETURN) {
>         return;
>     }
> 
>     /* Write back register results from all pipes.  */
>     for (i = 0; i < 3; ++i) {
>         int r = dc->result_regs[i];
>         if (r >= 0)
>             tcg_gen_mov_i64(cpu_regs[r], dc->result_vals[i]);
>             tcg_temp_free_i64(dc->result_vals[i]);
>         }
>     }
> 
>     /* Write back branch results, i.e. take the branch now.  */
>     if (dc->branch_cond != TCG_COND_NEVER) {
>         if (dc->branch_cond == TCG_COND_ALWAYS) {
>             /* Unconditional branch */
>             tcg_gen_mov_i64(cpu_pc, dc->branch_dest);
>         } else {
>             /* Conditional branch */
>             TCGv next_pc = tcg_const_i64(dc->pc + 8);
>             tcg_gen_movcond_i64(dc->branch_cond, cpu_pc,
>                                 dc->branch_val1, dc->branch_val2,
>                                 dc->branch_dest, next_pc);
>             tcg_temp_free_i64(dc->branch_val1);
>             tcg_temp_free_i64(dc->branch_val2);
>             tcg_temp_free_i64(next_pc);
>         }
>         tcg_temp_free_i64(dc->branch_dest);
>         dc->exit = EXIT_JUMP;

Do we need tcg_gen_exit_tb(0)? At present for simplify thinking, I always
use tcg_gen_exit_tb(0) for the end of one TB block.

>     }
> }
> 
> This mostly ignores the use of tcg_gen_goto_tb for now.  It's slightly more
> complicated to use, and it makes debugging execution traces a bit harder.
> Neither of which do you really want while bringing up the decoder.  However,
> the eventual use of goto_tb is why we want to delay performing the branch until
> after writing back the registers, rather than simply writing to cpu_pc right away.
>

Excuse me, I am not quite familiar with tcg_gen_goto/exit_tb(), at
present, I still don't understand their parameters.

I guess, they are for performance (tb chaining), so I simply only use
tcg_gen_exit_tb(0), at present.

Welcome more information about tcg_gen_goto_tb() and tcg_gen_exit_tb().


> I hope this is enough to get started properly.
> 

Yeah, it is really enough.

Thanks.
Chris Metcalf Feb. 22, 2015, 12:25 a.m. UTC | #6
On 2/21/2015 7:25 PM, Chen Gang S wrote:
> On 2/22/15 00:33, Richard Henderson wrote:
>> >On 02/21/2015 07:31 AM, Chen Gang S wrote:
>>> >>
>>> >>  - We can still use the original pipes order: "y0, y2, y1" and "x0, x1".
>> >
>> >I guess, sure, though I don't think that'll help as much as you imagine.
>> >
>>
> OK, thanks. For me, your idea is OK, it is more simpler (although with
> more tcg temporary variables).

Richard pretty much said all I wanted to say, but I just wanted to reinforce
that the semantics of the multiple pipes is always "all together". So if you have
{ move r1, r2; move r2, r1 } then that swaps r1 and r2.  Or if you have
{ ld r1, sp; jrp r1 } then you are restoring r1 but jumping to wherever its
previous value said you needed to go.  Etc.

Similarly, if any pipeline takes an exception (a TLB fault from a memory op,
a GPV fault from an illegal mfspr, etc) then no pipeline completes its action.
Chen Gang Feb. 22, 2015, 1:08 a.m. UTC | #7
On 2/22/15 08:25, Chris Metcalf wrote:
> On 2/21/2015 7:25 PM, Chen Gang S wrote:
>> On 2/22/15 00:33, Richard Henderson wrote:
>>> >On 02/21/2015 07:31 AM, Chen Gang S wrote:
>>>> >>
>>>> >>  - We can still use the original pipes order: "y0, y2, y1" and "x0, x1".
>>> >
>>> >I guess, sure, though I don't think that'll help as much as you imagine.
>>> >
>>>
>> OK, thanks. For me, your idea is OK, it is more simpler (although with
>> more tcg temporary variables).
> 
> Richard pretty much said all I wanted to say, but I just wanted to reinforce
> that the semantics of the multiple pipes is always "all together".

OK, thanks.

>                                                                    So if you have
> { move r1, r2; move r2, r1 } then that swaps r1 and r2.

So the first pipe must have temporary variable.

>                                                          Or if you have
> { ld r1, sp; jrp r1 } then you are restoring r1 but jumping to wherever its
> previous value said you needed to go.  Etc.
 
Excuse me, I am not quite sure whether ld/st can be in pipe x0, y0, or
y1.

But I guess your meaning is that y2 must have temporary variable, or it
will cause issue.

> Similarly, if any pipeline takes an exception (a TLB fault from a memory op,
> a GPV fault from an illegal mfspr, etc) then no pipeline completes its action.
> 

Oh, really !! And I guess, Richard's code can not be sure of it: memory
write operand (e.g st) is not buffered. If what I guess is correct, for
me, it is not quite easy to fix this issue.

I also guess, at present, we need think of more before continue.

Thanks.
Chen Gang Feb. 22, 2015, 4:42 a.m. UTC | #8
On 2/22/15 09:08, Chen Gang S wrote:
> On 2/22/15 08:25, Chris Metcalf wrote:

[...]

>> Similarly, if any pipeline takes an exception (a TLB fault from a memory op,
>> a GPV fault from an illegal mfspr, etc) then no pipeline completes its action.
>>
> 
> Oh, really !! And I guess, Richard's code can not be sure of it: memory
> write operand (e.g st) is not buffered. If what I guess is correct, for
> me, it is not quite easy to fix this issue.
> 
> I also guess, at present, we need think of more before continue.
> 

Oh, Richard's code is still OK, but always be sure that y2 and x1 must
be the last pipe execution of the bundle:

 - Execute y0/y1/x0 which will save temporary changing: they are only
   have effect with registers, have no effect with others (e.g. memory
   value). Temporary variable can also be for SPR, so y1 is also OK.

 - Execute y2/x1, if they fail, just generate exception, then as the
   result, we can say "the whole bundle is not executed".

 - After finish y2/x1, we write back register temporary variables, then
   write back branch temporary variables.


Thanks.
diff mbox

Patch

diff --git a/target-tilegx/cpu-qom.h b/target-tilegx/cpu-qom.h
index e15a8b8..866a77d 100644
--- a/target-tilegx/cpu-qom.h
+++ b/target-tilegx/cpu-qom.h
@@ -69,4 +69,6 @@  static inline TilegxCPU *tilegx_env_get_cpu(CPUTLState *env)
 
 #define ENV_GET_CPU(e) CPU(tilegx_env_get_cpu(e))
 
+#define ENV_OFFSET offsetof(TilegxCPU, env)
+
 #endif
diff --git a/target-tilegx/cpu.c b/target-tilegx/cpu.c
index 3dd66b5..a10cc24 100644
--- a/target-tilegx/cpu.c
+++ b/target-tilegx/cpu.c
@@ -69,10 +69,6 @@  static void tilegx_cpu_realizefn(DeviceState *dev, Error **errp)
     mcc->parent_realize(dev, errp);
 }
 
-static void tilegx_tcg_init(void)
-{
-}
-
 static void tilegx_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
diff --git a/target-tilegx/cpu.h b/target-tilegx/cpu.h
index 09a2b26..439c14f 100644
--- a/target-tilegx/cpu.h
+++ b/target-tilegx/cpu.h
@@ -54,6 +54,7 @@  typedef struct CPUTLState {
 
 #include "exec/cpu-all.h"
 
+void tilegx_tcg_init(void);
 int cpu_tilegx_exec(CPUTLState *s);
 int cpu_tilegx_signal_handler(int host_signum, void *pinfo, void *puc);
 
diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c
index 5131fa7..254f439 100644
--- a/target-tilegx/translate.c
+++ b/target-tilegx/translate.c
@@ -25,17 +25,440 @@ 
 #include "exec/cpu_ldst.h"
 #include "exec/helper-gen.h"
 
+#define TILEGX_BUNDLE_SIZE            8  /* Each bundle size in bytes */
+#define TILEGX_BUNDLE_INSNS           3  /* Maximized insns per bundle */
+#define TILEGX_BUNDLE_OPCS            10 /* Assume maximized opcs per bundle */
+
+/* Check Bundle whether is Y type, else is X type */
+#define TILEGX_BUNDLE_TYPE_MASK       0xc000000000000000ULL
+#define TILEGX_BUNDLE_TYPE_Y(bundle)  ((bundle) & TILEGX_BUNDLE_TYPE_MASK)
+#define TILEGX_BUNDLE_TYPE_X(bundle)  (!TILEGX_BUNDLE_TYPE_Y(bundle))
+
+/* Bundle pipe mask, still remain the bundle type */
+#define TILEGX_PIPE_X0(bundle)        ((bundle) & 0x000000007fffffffULL)
+#define TILEGX_PIPE_X1(bundle)        ((bundle) & 0x3fffffff80000000ULL)
+#define TILEGX_PIPE_Y0(bundle)        ((bundle) & 0x00000000780fffffULL)
+#define TILEGX_PIPE_Y1(bundle)        ((bundle) & 0x3c07ffff80000000ULL)
+#define TILEGX_PIPE_Y2(bundle)        ((bundle) & 0x03f8000007f00000ULL)
+
+/* Code mask */
+#define TILEGX_CODE_X0(bundle)        ((bundle) & 0x0000000070000000ULL)
+#define TILEGX_CODE_X0_18(bundle)     ((bundle) & 0x000000000ffc0000ULL)
+#define TILEGX_CODE_X0_20(bundle)     ((bundle) & 0x000000000ff00000ULL)
+#define TILEGX_CODE_X1(bundle)        ((bundle) & 0x3800000000000000ULL)
+#define TILEGX_CODE_X1_49(bundle)     ((bundle) & 0x07fe000000000000ULL)
+#define TILEGX_CODE_X1_49_43(bundle)  ((bundle) & 0x0001f80000000000ULL)
+#define TILEGX_CODE_X1_51(bundle)     ((bundle) & 0x07f8000000000000ULL)
+#define TILEGX_CODE_Y0(bundle)        ((bundle) & 0x0000000078000000ULL)
+#define TILEGX_CODE_Y0_E(bundle)      ((bundle) & 0x00000000000c0000ULL)
+#define TILEGX_CODE_Y1(bundle)        ((bundle) & 0x3c00000000000000ULL)
+#define TILEGX_CODE_Y1_E(bundle)      ((bundle) & 0x0006000000000000ULL)
+#define TILEGX_CODE_Y2(bundle)        ((bundle) & 0x0200000004000000ULL)
+/* No Y2_E */
+
+/* (F)Nop operation, only have effect within their own pipe */
+#define TILEGX_OPCX0_FNOP             0x0000000051483000ULL
+#define TILEGX_OPCX0_NOP              0x0000000051485000ULL
+#define TILEGX_OPCX1_FNOP             0x286a300000000000ULL
+#define TILEGX_OPCX1_NOP              0x286b080000000000ULL
+#define TILEGX_OPCY0_FNOP             0x00000000300c3000ULL
+#define TILEGX_OPCY0_NOP              0x00000000300c5000ULL
+#define TILEGX_OPCY1_FNOP             0x1c06400000000000ULL
+#define TILEGX_OPCY1_NOP              0x1c06780000000000ULL
+/* No Y2 (F)NOP */
+
+/* Data width mask */
+#define TILEGX_DATA_REGISTER          0x3f    /* Register data width mask */
+#define TILEGX_DATA_IMM8              0xff    /* Imm8 data width mask */
+#define TILEGX_DATA_IMM16             0xffff  /* Imm8 data width mask */
+
+static TCGv_ptr cpu_env;
+
+#include "exec/gen-icount.h"
+
+void tilegx_tcg_init(void)
+{
+    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
+}
+
+/* This is the state at translation time.  */
+struct DisasContext {
+    uint64_t pc;
+
+    int is_jmp;
+    unsigned int cpustate_changed;
+};
+
+static int translate_x0_bundle(struct DisasContext *dc, uint64_t bundle)
+{
+    unsigned char rdest, rsrc, rsrcb;
+    char im8;
+    short im16;
+
+    qemu_log("\nx0: %16.16lx\t", bundle);
+    if (bundle == TILEGX_OPCX0_FNOP) {
+        qemu_log("fnop");
+        return 0;
+    }
+
+    if (bundle == TILEGX_OPCX0_NOP) {
+        qemu_log("nop");
+        return 0;
+    }
+
+    switch (TILEGX_CODE_X0(bundle)) {
+    case 0x0000000010000000ULL:
+        rdest = (unsigned char)(bundle & TILEGX_DATA_REGISTER);
+        rsrc = (unsigned char)((bundle >> 6) & TILEGX_DATA_REGISTER);
+        im16 = (short)((bundle >> 12) & TILEGX_DATA_IMM16);
+        if (rsrc != 0x3f) {
+            /* FIXME: raise an exception for invalid instruction */
+            qemu_log("invalid moveli src value: %x", rsrc);
+            return -1;
+        }
+        qemu_log("moveli r%d, %d", rdest, im16);
+        break;
+
+    case 0x0000000040000000ULL:
+        switch (TILEGX_CODE_X0_20(bundle)) {
+        /* andi Dest, SrcA, Imm8 */
+        case 0x0000000000300000ULL:
+            rdest = (unsigned char)(bundle & TILEGX_DATA_REGISTER);
+            rsrc = (unsigned char)((bundle >> 6) & TILEGX_DATA_REGISTER);
+            im8 = (char)((bundle >> 12) & TILEGX_DATA_IMM8);
+            qemu_log("andi r%d, r%d, %d", rdest, rsrc, im8);
+            break;
+
+        /* addi Dest, SrcA, Imm8 */
+        case 0x0000000000100000ULL:
+            rdest = (unsigned char)(bundle & TILEGX_DATA_REGISTER);
+            rsrc = (unsigned char)((bundle >> 6) & TILEGX_DATA_REGISTER);
+            im8 = (char)((bundle >> 12) & TILEGX_DATA_IMM8);
+            qemu_log("addi r%d, r%d, %d", rdest, rsrc, im8);
+            break;
+
+        default:
+            /* FIXME: raise an exception for invalid instruction */
+            qemu_log("20 bundle value: %16.16llx", TILEGX_CODE_X0_20(bundle));
+            return -1;
+        }
+        break;
+
+    case 0x0000000050000000ULL:
+        switch (TILEGX_CODE_X0_18(bundle)) {
+        /* add Dest, SrcA, SrcB */
+        case 0x00000000000c0000ULL:
+            rdest = (unsigned char)(bundle & TILEGX_DATA_REGISTER);
+            rsrc = (unsigned char)((bundle >> 6) & TILEGX_DATA_REGISTER);
+            rsrcb = (unsigned char)((bundle >> 12) & TILEGX_DATA_REGISTER);
+            qemu_log("and r%d, r%d, r%d", rdest, rsrc, rsrcb);
+            break;
+
+        default:
+            /* FIXME: raise an exception for invalid instruction */
+            qemu_log("18 bundle value: %16.16llx", TILEGX_CODE_X0_18(bundle));
+            return -1;
+        }
+        break;
+
+    /* shl16insli Dest, SrcA, Imm16 */
+    case 0x0000000070000000ULL:
+        rdest = (unsigned char)(bundle & TILEGX_DATA_REGISTER);
+        rsrc = (unsigned char)((bundle >> 6) & TILEGX_DATA_REGISTER);
+        im16 = (short)((bundle >> 12) & TILEGX_DATA_IMM16);
+        qemu_log("shl16insli r%d, r%d, %d", rdest, rsrc, im16);
+        break;
+
+    default:
+        /* FIXME: raise an exception for invalid instruction */
+        qemu_log("bundle value: %16.16llx", TILEGX_CODE_X0(bundle));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int translate_x1_bundle(struct DisasContext *dc, uint64_t bundle)
+{
+    unsigned char rdest, rsrc, rsrcb;
+    char im8;
+    short im16;
+
+    qemu_log("\nx1: %16.16lx\t", bundle);
+    if (bundle == TILEGX_OPCX1_FNOP) {
+        qemu_log("fnop");
+        return 0;
+    }
+
+    if (bundle == TILEGX_OPCX1_NOP) {
+        qemu_log("nop");
+        return 0;
+    }
+
+    switch (TILEGX_CODE_X1(bundle)) {
+    /* moveli Dest, Imm16 */
+    case 0x0000000000000000ULL:
+        rdest = (unsigned char)((bundle >> 31) & TILEGX_DATA_REGISTER);
+        rsrc = (unsigned char)((bundle >> 37) & TILEGX_DATA_REGISTER);
+        im16 = (short)((bundle >> 43) & TILEGX_DATA_IMM16);
+        if (rsrc != 0x3f) {
+            /* FIXME: raise an exception for invalid instruction */
+            qemu_log("invalid moveli src value: %x", rsrc);
+            return -1;
+        }
+        qemu_log("moveli r%d, %d", rdest, im16);
+        break;
+
+    case 0x1800000000000000ULL:
+        switch (TILEGX_CODE_X1_51(bundle)) {
+        /* addi Dest, SrcA, Imm8 */
+        case 0x0008000000000000ULL:
+            rdest = (unsigned char)((bundle >> 31) & TILEGX_DATA_REGISTER);
+            rsrc = (unsigned char)((bundle >> 37) & TILEGX_DATA_REGISTER);
+            im8 = (char)((bundle >> 43) & TILEGX_DATA_IMM8);
+            qemu_log("addi r%d, r%d, %d", rdest, rsrc, im8);
+            break;
+        default:
+            /* FIXME: raise an exception for invalid instruction */
+            qemu_log("51 bundle value: %llx", TILEGX_CODE_X1_51(bundle));
+            return -1;
+        }
+        break;
+
+    case 0x2800000000000000ULL:
+        switch (TILEGX_CODE_X1_49(bundle)) {
+        /* add Dest, SrcA, SrcB */
+        case 0x0006000000000000ULL:
+            rdest = (unsigned char)((bundle >> 31) & TILEGX_DATA_REGISTER);
+            rsrc = (unsigned char)((bundle >> 37) & TILEGX_DATA_REGISTER);
+            rsrcb = (unsigned char)((bundle >> 43) & TILEGX_DATA_REGISTER);
+            qemu_log("add r%d, r%d, r%d", rdest, rsrc, rsrcb);
+            break;
+
+        case 0x006a000000000000ULL:
+            switch (TILEGX_CODE_X1_49_43(bundle)) {
+            /* lnk Dest */
+            case 0x0000f00000000000ULL:
+                rdest = (unsigned char)((bundle >> 31)  & TILEGX_DATA_REGISTER);
+                qemu_log("lnk r%d", rdest);
+                break;
+            /* jr SrcA */
+            case 0x0000700000000000ULL:
+                rsrc = (unsigned char)((bundle >> 37) & TILEGX_DATA_REGISTER);
+                qemu_log("jr r%d", rsrc);
+                dc->is_jmp = DISAS_JUMP;
+                break;
+
+            default:
+                /* FIXME: raise an exception for invalid instruction */
+                qemu_log("49_43 bundle value: %16.16llx",
+                       TILEGX_CODE_X1_49_43(bundle));
+                return -1;
+            }
+            break;
+        default:
+            /* FIXME: raise an exception for invalid instruction */
+            qemu_log("49 bundle value: %16.16llx", TILEGX_CODE_X1_49(bundle));
+            return -1;
+        }
+        break;
+
+    /* shl16insli Dest, SrcA, Imm16 */
+    case 0x3800000000000000ULL:
+        rdest = (unsigned char)((bundle >> 31) & TILEGX_DATA_REGISTER);
+        rsrc = (unsigned char)((bundle >> 37) & TILEGX_DATA_REGISTER);
+        im16 = (short)((bundle >> 43) & TILEGX_DATA_IMM16);
+        qemu_log("shl16insli r%d, r%d, %d", rdest, rsrc, im16);
+        break;
+
+    default:
+        /* FIXME: raise an exception for invalid instruction */
+        qemu_log("bundle value: %16.16llx", TILEGX_CODE_X1(bundle));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int translate_y0_bundle(struct DisasContext *dc, uint64_t bundle)
+{
+    unsigned char rsrc, rdst;
+
+    qemu_log("\ny0: %16.16lx\t", bundle);
+    if (bundle == TILEGX_OPCY0_FNOP) {
+        qemu_log("fnop");
+        return 0;
+    }
+
+    if (bundle == TILEGX_OPCY0_NOP) {
+        qemu_log("nop");
+        return 0;
+    }
+
+    switch (TILEGX_CODE_Y0(bundle)) {
+    case 0x0000000050000000ULL:
+        switch (TILEGX_CODE_Y0_E(bundle)) {
+        /* move Dest, SrcA */
+        case 0x0000000000080000ULL:
+            rdst = (unsigned char)(bundle & TILEGX_DATA_REGISTER);
+            rsrc = (unsigned char)((bundle >> 6) & TILEGX_DATA_REGISTER);
+            qemu_log("move r%d, r%d", rdst, rsrc);
+            break;
+        default:
+            /* FIXME: raise an exception for invalid instruction */
+            return -1;
+        }
+        break;
+
+    default:
+        /* FIXME: raise an exception for invalid instruction */
+        qemu_log("bundle value: %16.16llx", TILEGX_CODE_Y0(bundle));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int translate_y1_bundle(struct DisasContext *dc, uint64_t bundle)
+{
+    unsigned char rdest, rsrc;
+    char im8;
+
+    qemu_log("\ny1: %16.16lx\t", bundle);
+    if (bundle == TILEGX_OPCY1_FNOP) {
+        qemu_log("fnop");
+        return 0;
+    }
+
+    if (bundle == TILEGX_OPCY1_NOP) {
+        qemu_log("nop");
+        return 0;
+    }
+
+    switch (TILEGX_CODE_Y1(bundle)) {
+    /* addi Dest, SrcA, Imm8 */
+    case 0x0400000000000000ULL:
+        rdest = (unsigned char)((bundle >> 31) & TILEGX_DATA_REGISTER);
+        rsrc = (unsigned char)((bundle >> 37) & TILEGX_DATA_REGISTER);
+        im8 = (char)((bundle >> 43) & TILEGX_DATA_IMM8);
+        qemu_log("addi r%d, r%d, %d", rdest, rsrc, im8);
+        break;
+
+    default:
+        /* FIXME: raise an exception for invalid instruction */
+        qemu_log("bundle value: %16.16llx", TILEGX_CODE_Y1(bundle));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int translate_y2_bundle(struct DisasContext *dc, uint64_t bundle,
+                               uint64_t mode)
+{
+    unsigned char rsrc, rdest;
+
+    qemu_log("\ny2: %16.16lx\t", bundle);
+    switch (TILEGX_CODE_Y2(bundle)) {
+    case 0x0200000004000000ULL:
+        switch (mode) {
+        /* ld Dest, Src */
+        case 0x8000000000000000ULL:
+            rdest = (unsigned char)((bundle >> 51) & TILEGX_DATA_REGISTER);
+            rsrc = (unsigned char)((bundle >> 20) & TILEGX_DATA_REGISTER);
+            qemu_log("ld r%d, r%d", rdest, rsrc);
+            break;
+        /* st Dest, Src */
+        case 0xc000000000000000ULL:
+            rdest = (unsigned char)((bundle >> 20) & TILEGX_DATA_REGISTER);
+            rsrc = (unsigned char)((bundle >> 51) & TILEGX_DATA_REGISTER);
+            qemu_log("st r%d, r%d", rdest, rsrc);
+            break;
+        default:
+            /* FIXME: raise an exception for invalid instruction */
+            qemu_log("bundle value: %16.16llx,  mode: %16.16lx",
+                   TILEGX_CODE_Y2(bundle), mode);
+            return -1;
+        }
+        break;
+
+    default:
+        /* FIXME: raise an exception for invalid instruction */
+        qemu_log("bundle value: %16.16llx", TILEGX_CODE_Y2(bundle));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int translate_one_bundle(struct DisasContext *dc, uint64_t bundle)
+{
+    int ret = 0;
+
+    if (TILEGX_BUNDLE_TYPE_Y(bundle)) {
+        ret = translate_y0_bundle(dc, TILEGX_PIPE_Y0(bundle));
+        if (ret) {
+            return ret;
+        }
+        ret = translate_y2_bundle(dc, TILEGX_PIPE_Y2(bundle),
+                                  bundle & TILEGX_BUNDLE_TYPE_MASK);
+        if (ret) {
+            return ret;
+        }
+        /* y1 may jump, so the last */
+        return translate_y1_bundle(dc, TILEGX_PIPE_Y1(bundle));
+    }
+
+    ret = translate_x0_bundle(dc, TILEGX_PIPE_X0(bundle));
+    if (ret) {
+        return ret;
+    }
+    /* x1 may jump, so the last */
+    return translate_x1_bundle(dc, TILEGX_PIPE_X1(bundle));
+}
+
 static inline void gen_intermediate_code_internal(TilegxCPU *cpu,
                                                   TranslationBlock *tb,
                                                   bool search_pc)
 {
-    /*
-     * FIXME: after load elf64 tilegx binary successfully, it will quit, at
-     * present, and will implement the related features next.
-     */
-    fprintf(stderr, "\nLoad elf64 tilegx successfully\n");
-    fprintf(stderr, "reach code start position: [" TARGET_FMT_lx "] %s\n\n",
-            tb->pc, lookup_symbol(tb->pc));
+    int ret = 0;
+    struct DisasContext ctx;
+    struct DisasContext *dc = &ctx;
+
+    CPUTLState *env = &cpu->env;
+    uint64_t pc_start = tb->pc;
+    uint64_t next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+    int num_insns = 0;
+    int max_insns = tb->cflags & CF_COUNT_MASK;
+
+    /* FIXME: do not consider about search_pc firstly. */
+
+    dc->is_jmp = DISAS_NEXT;
+    dc->pc = pc_start;
+    dc->cpustate_changed = 0;
+
+    if (!max_insns) {
+        max_insns = CF_COUNT_MASK;
+    }
+
+    gen_tb_start(tb);
+    do {
+        ret = translate_one_bundle(dc, cpu_ldq_data(env, dc->pc));
+        if (ret) {
+            goto err;
+        }
+        num_insns++;
+        dc->pc += 8;
+    } while (tcg_op_buf_count() <= OPC_MAX_SIZE - TILEGX_BUNDLE_OPCS
+             && num_insns <= max_insns - TILEGX_BUNDLE_INSNS
+             && dc->pc <= next_page_start - TILEGX_BUNDLE_SIZE
+             && !dc->is_jmp
+             && !dc->cpustate_changed);
+    gen_tb_end(tb, num_insns);
+
+    /* FIXME: Only decode once */
+err:
+    qemu_log("\n\nfinish tb translation.\n");
     exit(0);
 }