Message ID | 1378051658-3393-4-git-send-email-rth@twiddle.net |
---|---|
State | New |
Headers | show |
On 09/01/2013 06:07 PM, Richard Henderson wrote: > Drop the ld/st_trampolines, loading the return address into a > parameter register directly. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > include/exec/exec-all.h | 4 +- > tcg/ppc/tcg-target.c | 220 +++++++++++++++++++----------------------------- > 2 files changed, 86 insertions(+), 138 deletions(-) > > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index beb4149..a81e805 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -324,9 +324,7 @@ extern uintptr_t tci_tb_ptr; > In some implementations, we pass the "logical" return address manually; > in others, we must infer the logical return from the true return. */ > #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) > -# if defined (_ARCH_PPC) && !defined (_ARCH_PPC64) > -# define GETRA_LDST(RA) (*(int32_t *)((RA) - 4)) > -# elif defined(__arm__) > +# if defined(__arm__) > /* We define two insns between the return address and the branch back to > straight-line. Find and decode that branch insn. */ > # define GETRA_LDST(RA) tcg_getra_ldst(RA) > diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c > index b0fbc54..a890319 100644 > --- a/tcg/ppc/tcg-target.c > +++ b/tcg/ppc/tcg-target.c > @@ -551,27 +551,26 @@ static void add_qemu_ldst_label (TCGContext *s, > label->label_ptr[0] = label_ptr; > } > > -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, > - int mmu_idx) */ > +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, > + * int mmu_idx, uintptr_t ra) > + */ > static const void * const qemu_ld_helpers[4] = { > - helper_ldb_mmu, > - helper_ldw_mmu, > - helper_ldl_mmu, > - helper_ldq_mmu, > + helper_ret_ldub_mmu, > + helper_ret_lduw_mmu, > + helper_ret_ldul_mmu, > + helper_ret_ldq_mmu, > }; > > -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, > - uintxx_t val, int mmu_idx) */ > +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, > + * uintxx_t val, int mmu_idx, uintptr_t ra) > + */ > static const void * const qemu_st_helpers[4] = { > - helper_stb_mmu, > - helper_stw_mmu, > - helper_stl_mmu, > - helper_stq_mmu, > + helper_ret_stb_mmu, > + helper_ret_stw_mmu, > + helper_ret_stl_mmu, > + helper_ret_stq_mmu, > }; > > -static void *ld_trampolines[4]; > -static void *st_trampolines[4]; > - > static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2, > int addr_reg, int addr_reg2, int s_bits, > int offset1, int offset2, uint8_t **label_ptr) > @@ -608,9 +607,14 @@ static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2, > tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1)); > tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ)); > #endif > + > + /* Use a conditional branch-and-link so that we load a pointer to > + somewhere within the current opcode, for passing on to the helper. > + This address cannot be used for a tail call, but it's shorter > + than forming an address from scratch. */ > *label_ptr = s->code_ptr; > retranst = ((uint16_t *) s->code_ptr)[1] & ~3; > - tcg_out32 (s, BC | BI (7, CR_EQ) | retranst | BO_COND_FALSE); > + tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK); > > /* r0 now contains &env->tlb_table[mem_index][index].addr_x */ > tcg_out32 (s, (LWZ > @@ -833,132 +837,99 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) > } > > #if defined(CONFIG_SOFTMMU) > -static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label) > +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) > { > - int s_bits; > - int ir; > - int opc = label->opc; > - int mem_index = label->mem_index; > - int data_reg = label->datalo_reg; > - int data_reg2 = label->datahi_reg; > - int addr_reg = label->addrlo_reg; > - uint8_t *raddr = label->raddr; > - uint8_t **label_ptr = &label->label_ptr[0]; > + TCGReg ir, datalo, datahi; > + int opc = lb->opc; > > - s_bits = opc & 3; > + reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); > > - /* resolve label address */ > - reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr); > + ir = TCG_REG_R3; > + tcg_out_mov(s, TCG_TYPE_PTR, ir++, TCG_AREG0); > > - /* slow path */ > - ir = 4; > -#if TARGET_LONG_BITS == 32 > - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); > -#else > + if (TARGET_LONG_BITS == 32) { > + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg); > + } else { > #ifdef TCG_TARGET_CALL_ALIGN_ARGS > - ir |= 1; > -#endif > - tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg); > - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); > + ir |= 1; > #endif > - tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); > - tcg_out_call (s, (tcg_target_long) ld_trampolines[s_bits], 1); > - tcg_out32 (s, (tcg_target_long) raddr); > + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrhi_reg); > + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg); > + } > + > + tcg_out_movi(s, TCG_TYPE_I32, ir++, lb->mem_index); > + tcg_out32(s, MFSPR | RT(ir++) | LR); > + > + tcg_out_call(s, (uintptr_t)qemu_ld_helpers[opc & 3], 1); > + > + datalo = lb->datalo_reg; > switch (opc) { > case 0|4: > - tcg_out32 (s, EXTSB | RA (data_reg) | RS (3)); > + tcg_out32(s, EXTSB | RA(datalo) | RS(TCG_REG_R3)); > break; > case 1|4: > - tcg_out32 (s, EXTSH | RA (data_reg) | RS (3)); > + tcg_out32(s, EXTSH | RA(datalo) | RS(TCG_REG_R3)); > break; > - case 0: > - case 1: > - case 2: > - if (data_reg != 3) > - tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3); > + > + default: > + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R3); > break; > + > case 3: > - if (data_reg == 3) { > - if (data_reg2 == 4) { > - tcg_out_mov (s, TCG_TYPE_I32, 0, 4); > - tcg_out_mov (s, TCG_TYPE_I32, 4, 3); > - tcg_out_mov (s, TCG_TYPE_I32, 3, 0); > - } > - else { > - tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3); > - tcg_out_mov (s, TCG_TYPE_I32, 3, 4); > - } > - } > - else { > - if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4); > - if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3); > + datahi = lb->datahi_reg; > + if (datalo != TCG_REG_R3) { > + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4); > + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); > + } else if (datahi != TCG_REG_R4) { > + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); > + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4); > + } else { > + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, TCG_REG_R4); > + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); > + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R0); > } > break; > } > + > /* Jump to the code corresponding to next IR of qemu_st */ > - tcg_out_b (s, 0, (tcg_target_long) raddr); > + tcg_out_b(s, 0, (uintptr_t)lb->raddr); > } > > -static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label) > +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) > { > - int ir; > - int opc = label->opc; > - int mem_index = label->mem_index; > - int data_reg = label->datalo_reg; > - int data_reg2 = label->datahi_reg; > - int addr_reg = label->addrlo_reg; > - uint8_t *raddr = label->raddr; > - uint8_t **label_ptr = &label->label_ptr[0]; > - > - /* resolve label address */ > - reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr); > - > - /* slow path */ > - ir = 4; > -#if TARGET_LONG_BITS == 32 > - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); > -#else > + TCGReg ir; > + > + reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); > + > + ir = TCG_REG_R3; > + tcg_out_mov(s, TCG_TYPE_PTR, ir++, TCG_AREG0); > + > + if (TARGET_LONG_BITS == 32) { > + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg); > + } else { > #ifdef TCG_TARGET_CALL_ALIGN_ARGS > - ir |= 1; > -#endif > - tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg); > - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); > + ir |= 1; > #endif > + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrhi_reg); > + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg); > + } > > - switch (opc) { > - case 0: > - tcg_out32 (s, (RLWINM > - | RA (ir) > - | RS (data_reg) > - | SH (0) > - | MB (24) > - | ME (31))); > - break; > - case 1: > - tcg_out32 (s, (RLWINM > - | RA (ir) > - | RS (data_reg) > - | SH (0) > - | MB (16) > - | ME (31))); > - break; > - case 2: > - tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg); > - break; > - case 3: > + if (lb->opc != 3) { > + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datalo_reg); > + } else { > #ifdef TCG_TARGET_CALL_ALIGN_ARGS > ir |= 1; > #endif > - tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2); > - tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg); > - break; > + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datahi_reg); > + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datalo_reg); > } > - ir++; > > - tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); > - tcg_out_call (s, (tcg_target_long) st_trampolines[opc], 1); > - tcg_out32 (s, (tcg_target_long) raddr); > - tcg_out_b (s, 0, (tcg_target_long) raddr); > + tcg_out_movi(s, TCG_TYPE_I32, ir++, lb->mem_index); > + tcg_out32(s, MFSPR | RT(ir++) | LR); > + > + tcg_out_call(s, (uintptr_t)qemu_st_helpers[lb->opc], 1); > + > + tcg_out_b(s, 0, (uintptr_t)lb->raddr); > } > > void tcg_out_tb_finalize(TCGContext *s) > @@ -979,17 +950,6 @@ void tcg_out_tb_finalize(TCGContext *s) > } > #endif > > -#ifdef CONFIG_SOFTMMU > -static void emit_ldst_trampoline (TCGContext *s, const void *ptr) > -{ > - tcg_out32 (s, MFSPR | RT (3) | LR); > - tcg_out32 (s, ADDI | RT (3) | RA (3) | 4); > - tcg_out32 (s, MTSPR | RS (3) | LR); > - tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0); > - tcg_out_b (s, 0, (tcg_target_long) ptr); > -} > -#endif > - > static void tcg_target_qemu_prologue (TCGContext *s) > { > int i, frame_size; > @@ -1050,16 +1010,6 @@ static void tcg_target_qemu_prologue (TCGContext *s) > tcg_out32 (s, MTSPR | RS (0) | LR); > tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size); > tcg_out32 (s, BCLR | BO_ALWAYS); > - > -#ifdef CONFIG_SOFTMMU > - for (i = 0; i < 4; ++i) { > - ld_trampolines[i] = s->code_ptr; > - emit_ldst_trampoline (s, qemu_ld_helpers[i]); > - > - st_trampolines[i] = s->code_ptr; > - emit_ldst_trampoline (s, qemu_st_helpers[i]); > - } > -#endif > } > > static void tcg_out_ld (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, > Bad news... with this patch, either with or without patch 2, trying to execute sieve.flat from kvm-unit-tests (it doesn't matter if it is compiled as 32-bit or 64-bit, and with both i386-softmmu and x86_64-softmmu targets) fails as follows on my PowerBook: qemu: fatal: Trying to execute code outside RAM or ROM at 0x70360000 EAX=00006d20 EBX=00007025 ECX=00000000 EDX=00000000 ESI=07fd7bd0 EDI=000f1930 EBP=07fd7b00 ESP=00006e0c EIP=70270000 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 00000000 ffffffff 008f9300 CS =f000 000f0000 ffffffff 008f9b00 SS =0000 00000000 ffffffff 008f9300 DS =0000 00000000 ffffffff 008f9300 FS =0000 00000000 ffffffff 008f9300 GS =0000 00000000 ffffffff 008f9300 LDT=0000 00000000 0000ffff 00008200 TR =0000 00000000 0000ffff 00008b00 GDT= 000f69e0 00000037 IDT= 00000000 000003ff CR0=00000010 CR2=00000000 CR3=00000000 CR4=00000000 DR0=00000000 DR1=00000000 DR2=00000000 DR3=00000000 DR6=ffff0ff0 DR7=00000400 CCS=00000044 CCD=00006df8 CCO=ADDL EFER=0000000000000000 FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80 FPR0=0000000000000000 0000 FPR1=0000000000000000 0000 FPR2=0000000000000000 0000 FPR3=0000000000000000 0000 FPR4=0000000000000000 0000 FPR5=0000000000000000 0000 FPR6=0000000000000000 0000 FPR7=0000000000000000 0000 XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000 XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000 XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000 XMM06=00000000000000000000000000000000 XMM07=00000000000000000000000000000000 Aborted The failure happens as soon as the first hardware interrupt is serviced: Servicing hardware INT=0x08 Servicing hardware INT=0x09 ---------------- IN: 0x000fe98f: push %es 0x000fe990: push %ebp 0x000fe992: push %edi 0x000fe994: push %esi 0x000fe996: push %ebx 0x000fe998: sub $0x44,%esp 0x000fe99c: mov $0x40,%eax 0x000fe9a2: mov %ax,%es 0x000fe9a4: mov %es:0x6c,%edx 0x000fe9aa: inc %edx 0x000fe9ac: cmp $0x1800af,%edx 0x000fe9b3: jbe 0xfe9c6 ---------------- IN: 0x000f77f4: xor %edx,%edx 0x000f77f7: calll *%ecx qemu: fatal: Trying to execute code outside RAM or ROM at 0x70360000 Command line: i386-softmmu/qemu-system-i386 -kernel sieve32.flat -serial stdio -device isa-debug-exit,iobase=0xf4 -nographic My two patches + 2/4 from this series work. I didn't try 4/4 because it doesn't apply cleanly on top of my patches. Paolo
On 08/19/2013 12:42 PM, Paolo Bonzini wrote: > Bad news... with this patch, either with or without patch 2, trying to execute > sieve.flat from kvm-unit-tests (it doesn't matter if it is compiled as 32-bit > or 64-bit, and with both i386-softmmu and x86_64-softmmu targets) fails as > follows on my PowerBook: > > qemu: fatal: Trying to execute code outside RAM or ROM at 0x70360000 Hum. Are you sure it's anything related to the ppc backend at all? This test doesn't work with an x86_64 host either. qemu: fatal: Trying to execute code outside RAM or ROM at 0x004001ba EAX=80000011 EBX=00009500 ECX=c0000080 EDX=00000000 ESI=00000000 EDI=00542000 EBP=00000000 ESP=0044abbc EIP=004001ba EFL=00000046 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0010 00000000 ffffffff 00cf9300 DPL=0 DS [-WA] CS =0008 00000000 ffffffff 00cf9a00 DPL=0 CS32 [-R-] SS =0010 00000000 ffffffff 00cf9300 DPL=0 DS [-WA] DS =0010 00000000 ffffffff 00cf9300 DPL=0 DS [-WA] FS =0010 00000000 ffffffff 00cf9300 DPL=0 DS [-WA] GS =0010 00000000 ffffffff 00cf9300 DPL=0 DS [-WA] LDT=0000 00000000 0000ffff 00008200 DPL=0 LDT TR =0000 00000000 0000ffff 00008b00 DPL=0 TSS32-busy GDT= 0040800a 00000447 IDT= 00000000 000003ff CR0=80000011 CR2=00000000 CR3=00407000 CR4=00000020 DR0=00000000 DR1=00000000 DR2=00000000 DR3=00000000 DR6=ffff0ff0 DR7=00000400 CCS=00000000 CCD=00000000 CCO=SARL EFER=0000000000000000 FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80 FPR0=0000000000000000 0000 FPR1=0000000000000000 0000 FPR2=0000000000000000 0000 FPR3=0000000000000000 0000 FPR4=0000000000000000 0000 FPR5=0000000000000000 0000 FPR6=0000000000000000 0000 FPR7=0000000000000000 0000 XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000 XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000 XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000 XMM06=00000000000000000000000000000000 XMM07=00000000000000000000000000000000 Aborted (core dumped) This happens after one of the writes to %cr0. Of course, the test works with kvm enabled, so I don't blame the test so much as the target-i386 front end... This is not new breakage, either. I've checked back through 1.4.0 and I can't make it work with any version of TCG. r~
Il 09/09/2013 19:42, Richard Henderson ha scritto: > On 08/19/2013 12:42 PM, Paolo Bonzini wrote: >> Bad news... with this patch, either with or without patch 2, trying to execute >> sieve.flat from kvm-unit-tests (it doesn't matter if it is compiled as 32-bit >> or 64-bit, and with both i386-softmmu and x86_64-softmmu targets) fails as >> follows on my PowerBook: >> >> qemu: fatal: Trying to execute code outside RAM or ROM at 0x70360000 > > Hum. Are you sure it's anything related to the ppc backend at all? This > test doesn't work with an x86_64 host either. > > qemu: fatal: Trying to execute code outside RAM or ROM at 0x004001ba > > EAX=80000011 EBX=00009500 ECX=c0000080 EDX=00000000 > ESI=00000000 EDI=00542000 EBP=00000000 ESP=0044abbc > EIP=004001ba EFL=00000046 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 > ES =0010 00000000 ffffffff 00cf9300 DPL=0 DS [-WA] > CS =0008 00000000 ffffffff 00cf9a00 DPL=0 CS32 [-R-] > SS =0010 00000000 ffffffff 00cf9300 DPL=0 DS [-WA] > DS =0010 00000000 ffffffff 00cf9300 DPL=0 DS [-WA] > FS =0010 00000000 ffffffff 00cf9300 DPL=0 DS [-WA] > GS =0010 00000000 ffffffff 00cf9300 DPL=0 DS [-WA] > LDT=0000 00000000 0000ffff 00008200 DPL=0 LDT > TR =0000 00000000 0000ffff 00008b00 DPL=0 TSS32-busy > GDT= 0040800a 00000447 > IDT= 00000000 000003ff > CR0=80000011 CR2=00000000 CR3=00407000 CR4=00000020 > DR0=00000000 DR1=00000000 DR2=00000000 DR3=00000000 > DR6=ffff0ff0 DR7=00000400 > CCS=00000000 CCD=00000000 CCO=SARL > EFER=0000000000000000 > FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80 > FPR0=0000000000000000 0000 FPR1=0000000000000000 0000 > FPR2=0000000000000000 0000 FPR3=0000000000000000 0000 > FPR4=0000000000000000 0000 FPR5=0000000000000000 0000 > FPR6=0000000000000000 0000 FPR7=0000000000000000 0000 > XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000 > XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000 > XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000 > XMM06=00000000000000000000000000000000 XMM07=00000000000000000000000000000000 > Aborted (core dumped) > > This happens after one of the writes to %cr0. Of course, the test works with > kvm enabled, so I don't blame the test so much as the target-i386 front end... > > This is not new breakage, either. I've checked back through 1.4.0 and I can't > make it work with any version of TCG. Strange... works here with 1.6.0 from Fedora $ time qemu-system-x86_64 -device isa-debug-exit,iobase=0xf4 -serial stdio -kernel sieve64.flat enabling apic starting sieve static:78498 out of 1000000 paging enabled cr0 = 80010011 cr3 = 7fff000 cr4 = 20 mapped:78498 out of 1000000 virtual:5761455 out of 100000000 virtual:5761455 out of 100000000 virtual:5761455 out of 100000000 real 0m50.056s user 0m49.467s sys 0m0.415s I sent you my binaries offlist. Paolo
On 09/09/2013 10:49 AM, Paolo Bonzini wrote:
> I sent you my binaries offlist.
And apparently there was something wrong with the binaries I built myself, as
yours work. I'll now look at my ppc32 changes and see what's what.
r~
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index beb4149..a81e805 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -324,9 +324,7 @@ extern uintptr_t tci_tb_ptr; In some implementations, we pass the "logical" return address manually; in others, we must infer the logical return from the true return. */ #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -# if defined (_ARCH_PPC) && !defined (_ARCH_PPC64) -# define GETRA_LDST(RA) (*(int32_t *)((RA) - 4)) -# elif defined(__arm__) +# if defined(__arm__) /* We define two insns between the return address and the branch back to straight-line. Find and decode that branch insn. */ # define GETRA_LDST(RA) tcg_getra_ldst(RA) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index b0fbc54..a890319 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -551,27 +551,26 @@ static void add_qemu_ldst_label (TCGContext *s, label->label_ptr[0] = label_ptr; } -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, + helper_ret_ldub_mmu, + helper_ret_lduw_mmu, + helper_ret_ldul_mmu, + helper_ret_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, + helper_ret_stb_mmu, + helper_ret_stw_mmu, + helper_ret_stl_mmu, + helper_ret_stq_mmu, }; -static void *ld_trampolines[4]; -static void *st_trampolines[4]; - static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2, int addr_reg, int addr_reg2, int s_bits, int offset1, int offset2, uint8_t **label_ptr) @@ -608,9 +607,14 @@ static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2, tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1)); tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ)); #endif + + /* Use a conditional branch-and-link so that we load a pointer to + somewhere within the current opcode, for passing on to the helper. + This address cannot be used for a tail call, but it's shorter + than forming an address from scratch. */ *label_ptr = s->code_ptr; retranst = ((uint16_t *) s->code_ptr)[1] & ~3; - tcg_out32 (s, BC | BI (7, CR_EQ) | retranst | BO_COND_FALSE); + tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK); /* r0 now contains &env->tlb_table[mem_index][index].addr_x */ tcg_out32 (s, (LWZ @@ -833,132 +837,99 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) } #if defined(CONFIG_SOFTMMU) -static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label) +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - int s_bits; - int ir; - int opc = label->opc; - int mem_index = label->mem_index; - int data_reg = label->datalo_reg; - int data_reg2 = label->datahi_reg; - int addr_reg = label->addrlo_reg; - uint8_t *raddr = label->raddr; - uint8_t **label_ptr = &label->label_ptr[0]; + TCGReg ir, datalo, datahi; + int opc = lb->opc; - s_bits = opc & 3; + reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); - /* resolve label address */ - reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr); + ir = TCG_REG_R3; + tcg_out_mov(s, TCG_TYPE_PTR, ir++, TCG_AREG0); - /* slow path */ - ir = 4; -#if TARGET_LONG_BITS == 32 - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); -#else + if (TARGET_LONG_BITS == 32) { + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg); + } else { #ifdef TCG_TARGET_CALL_ALIGN_ARGS - ir |= 1; -#endif - tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg); - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); + ir |= 1; #endif - tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); - tcg_out_call (s, (tcg_target_long) ld_trampolines[s_bits], 1); - tcg_out32 (s, (tcg_target_long) raddr); + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrhi_reg); + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg); + } + + tcg_out_movi(s, TCG_TYPE_I32, ir++, lb->mem_index); + tcg_out32(s, MFSPR | RT(ir++) | LR); + + tcg_out_call(s, (uintptr_t)qemu_ld_helpers[opc & 3], 1); + + datalo = lb->datalo_reg; switch (opc) { case 0|4: - tcg_out32 (s, EXTSB | RA (data_reg) | RS (3)); + tcg_out32(s, EXTSB | RA(datalo) | RS(TCG_REG_R3)); break; case 1|4: - tcg_out32 (s, EXTSH | RA (data_reg) | RS (3)); + tcg_out32(s, EXTSH | RA(datalo) | RS(TCG_REG_R3)); break; - case 0: - case 1: - case 2: - if (data_reg != 3) - tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3); + + default: + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R3); break; + case 3: - if (data_reg == 3) { - if (data_reg2 == 4) { - tcg_out_mov (s, TCG_TYPE_I32, 0, 4); - tcg_out_mov (s, TCG_TYPE_I32, 4, 3); - tcg_out_mov (s, TCG_TYPE_I32, 3, 0); - } - else { - tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3); - tcg_out_mov (s, TCG_TYPE_I32, 3, 4); - } - } - else { - if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4); - if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3); + datahi = lb->datahi_reg; + if (datalo != TCG_REG_R3) { + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4); + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); + } else if (datahi != TCG_REG_R4) { + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4); + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, TCG_REG_R4); + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R0); } break; } + /* Jump to the code corresponding to next IR of qemu_st */ - tcg_out_b (s, 0, (tcg_target_long) raddr); + tcg_out_b(s, 0, (uintptr_t)lb->raddr); } -static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label) +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - int ir; - int opc = label->opc; - int mem_index = label->mem_index; - int data_reg = label->datalo_reg; - int data_reg2 = label->datahi_reg; - int addr_reg = label->addrlo_reg; - uint8_t *raddr = label->raddr; - uint8_t **label_ptr = &label->label_ptr[0]; - - /* resolve label address */ - reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr); - - /* slow path */ - ir = 4; -#if TARGET_LONG_BITS == 32 - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); -#else + TCGReg ir; + + reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); + + ir = TCG_REG_R3; + tcg_out_mov(s, TCG_TYPE_PTR, ir++, TCG_AREG0); + + if (TARGET_LONG_BITS == 32) { + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg); + } else { #ifdef TCG_TARGET_CALL_ALIGN_ARGS - ir |= 1; -#endif - tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg); - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); + ir |= 1; #endif + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrhi_reg); + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg); + } - switch (opc) { - case 0: - tcg_out32 (s, (RLWINM - | RA (ir) - | RS (data_reg) - | SH (0) - | MB (24) - | ME (31))); - break; - case 1: - tcg_out32 (s, (RLWINM - | RA (ir) - | RS (data_reg) - | SH (0) - | MB (16) - | ME (31))); - break; - case 2: - tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg); - break; - case 3: + if (lb->opc != 3) { + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datalo_reg); + } else { #ifdef TCG_TARGET_CALL_ALIGN_ARGS ir |= 1; #endif - tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2); - tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg); - break; + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datahi_reg); + tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datalo_reg); } - ir++; - tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); - tcg_out_call (s, (tcg_target_long) st_trampolines[opc], 1); - tcg_out32 (s, (tcg_target_long) raddr); - tcg_out_b (s, 0, (tcg_target_long) raddr); + tcg_out_movi(s, TCG_TYPE_I32, ir++, lb->mem_index); + tcg_out32(s, MFSPR | RT(ir++) | LR); + + tcg_out_call(s, (uintptr_t)qemu_st_helpers[lb->opc], 1); + + tcg_out_b(s, 0, (uintptr_t)lb->raddr); } void tcg_out_tb_finalize(TCGContext *s) @@ -979,17 +950,6 @@ void tcg_out_tb_finalize(TCGContext *s) } #endif -#ifdef CONFIG_SOFTMMU -static void emit_ldst_trampoline (TCGContext *s, const void *ptr) -{ - tcg_out32 (s, MFSPR | RT (3) | LR); - tcg_out32 (s, ADDI | RT (3) | RA (3) | 4); - tcg_out32 (s, MTSPR | RS (3) | LR); - tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0); - tcg_out_b (s, 0, (tcg_target_long) ptr); -} -#endif - static void tcg_target_qemu_prologue (TCGContext *s) { int i, frame_size; @@ -1050,16 +1010,6 @@ static void tcg_target_qemu_prologue (TCGContext *s) tcg_out32 (s, MTSPR | RS (0) | LR); tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size); tcg_out32 (s, BCLR | BO_ALWAYS); - -#ifdef CONFIG_SOFTMMU - for (i = 0; i < 4; ++i) { - ld_trampolines[i] = s->code_ptr; - emit_ldst_trampoline (s, qemu_ld_helpers[i]); - - st_trampolines[i] = s->code_ptr; - emit_ldst_trampoline (s, qemu_st_helpers[i]); - } -#endif } static void tcg_out_ld (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
Drop the ld/st_trampolines, loading the return address into a parameter register directly. Signed-off-by: Richard Henderson <rth@twiddle.net> --- include/exec/exec-all.h | 4 +- tcg/ppc/tcg-target.c | 220 +++++++++++++++++++----------------------------- 2 files changed, 86 insertions(+), 138 deletions(-)