From patchwork Tue Mar 16 22:10:49 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 47983 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 3B2D1B7D10 for ; Thu, 18 Mar 2010 12:06:54 +1100 (EST) Received: from localhost ([127.0.0.1]:55550 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Ns45n-0005Zc-Jt for incoming@patchwork.ozlabs.org; Wed, 17 Mar 2010 21:00:07 -0400 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1Ns3t0-00006J-Ic for qemu-devel@nongnu.org; Wed, 17 Mar 2010 20:46:54 -0400 Received: from [199.232.76.173] (port=52452 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Ns3sz-000056-4U for qemu-devel@nongnu.org; Wed, 17 Mar 2010 20:46:53 -0400 Received: from Debian-exim by monty-python.gnu.org with spam-scanned (Exim 4.60) (envelope-from ) id 1Ns3sw-0000HY-3N for qemu-devel@nongnu.org; Wed, 17 Mar 2010 20:46:52 -0400 Received: from are.twiddle.net ([75.149.56.221]:38373) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1Ns3st-0000HG-SK for qemu-devel@nongnu.org; Wed, 17 Mar 2010 20:46:48 -0400 Received: by are.twiddle.net (Postfix, from userid 5000) id 65E8EB0E; Wed, 17 Mar 2010 17:46:45 -0700 (PDT) Message-Id: In-Reply-To: References: From: Richard Henderson Date: Tue, 16 Mar 2010 15:10:49 -0700 To: qemu-devel@nongnu.org X-detected-operating-system: by monty-python.gnu.org: GNU/Linux 2.6 (newer, 2) Subject: [Qemu-devel] [PATCH 8/8] target-alpha: Emit goto_tb opcodes. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Use an ExitStatus enumeration instead of magic numbers as the return value from translate_one. Emit goto_tb opcodes when ending a TB via a direct branch. Signed-off-by: Richard Henderson --- target-alpha/translate.c | 339 ++++++++++++++++++++++++++-------------------- 1 files changed, 193 insertions(+), 146 deletions(-) diff --git a/target-alpha/translate.c b/target-alpha/translate.c index b845094..d312939 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -43,12 +43,13 @@ typedef struct DisasContext DisasContext; struct DisasContext { + struct TranslationBlock *tb; + CPUAlphaState *env; uint64_t pc; int mem_idx; #if !defined (CONFIG_USER_ONLY) int pal_mode; #endif - CPUAlphaState *env; uint32_t amask; /* Current rounding mode for this TB. */ @@ -57,6 +58,25 @@ struct DisasContext { int tb_ftz; }; +/* Return values from translate_one, indicating the state of the TB. + Note that zero indicates that we are not exiting the TB. */ + +typedef enum { + NO_EXIT, + + /* We have emitted one or more goto_tb. No fixup required. */ + EXIT_GOTO_TB, + + /* We are not using a goto_tb (for whatever reason), but have updated + the PC (for whatever reason), so there's no need to do it again on + exiting the TB. */ + EXIT_PC_UPDATED, + + /* We are exiting the TB, but have neither emitted a goto_tb, nor + updated the PC for the next instruction to be executed. */ + EXIT_PC_STALE +} ExitStatus; + /* global register indexes */ static TCGv_ptr cpu_env; static TCGv cpu_ir[31]; @@ -300,77 +320,126 @@ static inline void gen_store_mem(DisasContext *ctx, tcg_temp_free(addr); } -static void gen_bcond_pcload(DisasContext *ctx, int32_t disp, int lab_true) +static int use_goto_tb(DisasContext *ctx, uint64_t dest) { - int lab_over = gen_new_label(); + /* Check for the dest on the same page as the start of the TB. We + also want to suppress goto_tb in the case of single-steping and IO. */ + return (((ctx->tb->pc ^ dest) & TARGET_PAGE_MASK) == 0 + && !ctx->env->singlestep_enabled + && !(ctx->tb->cflags & CF_LAST_IO)); +} - tcg_gen_movi_i64(cpu_pc, ctx->pc); - tcg_gen_br(lab_over); - gen_set_label(lab_true); - tcg_gen_movi_i64(cpu_pc, ctx->pc + (int64_t)(disp << 2)); - gen_set_label(lab_over); +static ExitStatus gen_bdirect(DisasContext *ctx, int ra, int32_t disp) +{ + uint64_t dest = ctx->pc + (disp << 2); + + if (ra != 31) { + tcg_gen_movi_i64(cpu_ir[ra], ctx->pc); + } + + /* Notice branch-to-next; used to initialize RA with the PC. */ + if (disp == 0) { + return 0; + } else if (use_goto_tb(ctx, dest)) { + tcg_gen_goto_tb(0); + tcg_gen_movi_i64(cpu_pc, dest); + tcg_gen_exit_tb((long)ctx->tb); + return EXIT_GOTO_TB; + } else { + tcg_gen_movi_i64(cpu_pc, dest); + return EXIT_PC_UPDATED; + } } -static void gen_bcond(DisasContext *ctx, TCGCond cond, int ra, - int32_t disp, int mask) +static ExitStatus gen_bcond_internal(DisasContext *ctx, TCGCond cond, + TCGv cmp, int32_t disp) { + uint64_t dest = ctx->pc + (disp << 2); int lab_true = gen_new_label(); - if (likely(ra != 31)) { + if (use_goto_tb(ctx, dest)) { + tcg_gen_brcondi_i64(cond, cmp, 0, lab_true); + + tcg_gen_goto_tb(0); + tcg_gen_movi_i64(cpu_pc, ctx->pc); + tcg_gen_exit_tb((long)ctx->tb); + + gen_set_label(lab_true); + tcg_gen_goto_tb(1); + tcg_gen_movi_i64(cpu_pc, dest); + tcg_gen_exit_tb((long)ctx->tb + 1); + + return EXIT_GOTO_TB; + } else { + int lab_over = gen_new_label(); + + /* ??? Consider using either + movi pc, next + addi tmp, pc, disp + movcond pc, cond, 0, tmp, pc + or + setcond tmp, cond, 0 + movi pc, next + neg tmp, tmp + andi tmp, tmp, disp + add pc, pc, tmp + The current diamond subgraph surely isn't efficient. */ + + tcg_gen_brcondi_i64(cond, cmp, 0, lab_true); + tcg_gen_movi_i64(cpu_pc, ctx->pc); + tcg_gen_br(lab_over); + gen_set_label(lab_true); + tcg_gen_movi_i64(cpu_pc, dest); + gen_set_label(lab_over); + + return EXIT_PC_UPDATED; + } +} + +static ExitStatus gen_bcond(DisasContext *ctx, TCGCond cond, int ra, + int32_t disp, int mask) +{ + TCGv cmp_tmp; + + if (unlikely(ra == 31)) { + cmp_tmp = tcg_const_i64(0); + } else { + cmp_tmp = tcg_temp_new(); if (mask) { - TCGv tmp = tcg_temp_new(); - tcg_gen_andi_i64(tmp, cpu_ir[ra], 1); - tcg_gen_brcondi_i64(cond, tmp, 0, lab_true); - tcg_temp_free(tmp); + tcg_gen_andi_i64(cmp_tmp, cpu_ir[ra], 1); } else { - tcg_gen_brcondi_i64(cond, cpu_ir[ra], 0, lab_true); + tcg_gen_mov_i64(cmp_tmp, cpu_ir[ra]); } - } else { - /* Very uncommon case - Do not bother to optimize. */ - TCGv tmp = tcg_const_i64(0); - tcg_gen_brcondi_i64(cond, tmp, 0, lab_true); - tcg_temp_free(tmp); } - gen_bcond_pcload(ctx, disp, lab_true); + + return gen_bcond_internal(ctx, cond, cmp_tmp, disp); } -/* Generate a forward TCG branch to LAB_TRUE if RA cmp 0.0. - This is complicated by the fact that -0.0 compares the same as +0.0. */ +/* Fold -0.0 for comparison with COND. */ -static void gen_fbcond_internal(TCGCond cond, TCGv src, int lab_true) +static void gen_fold_mzero(TCGCond cond, TCGv dest, TCGv src) { - int lab_false = -1; uint64_t mzero = 1ull << 63; - TCGv tmp; switch (cond) { case TCG_COND_LE: case TCG_COND_GT: /* For <= or >, the -0.0 value directly compares the way we want. */ - tcg_gen_brcondi_i64(cond, src, 0, lab_true); + tcg_gen_mov_i64(dest, src); break; case TCG_COND_EQ: case TCG_COND_NE: /* For == or !=, we can simply mask off the sign bit and compare. */ - /* ??? Assume that the temporary is reclaimed at the branch. */ - tmp = tcg_temp_new(); - tcg_gen_andi_i64(tmp, src, mzero - 1); - tcg_gen_brcondi_i64(cond, tmp, 0, lab_true); + tcg_gen_andi_i64(dest, src, mzero - 1); break; case TCG_COND_GE: - /* For >=, emit two branches to the destination. */ - tcg_gen_brcondi_i64(cond, src, 0, lab_true); - tcg_gen_brcondi_i64(TCG_COND_EQ, src, mzero, lab_true); - break; - case TCG_COND_LT: - /* For <, first filter out -0.0 to what will be the fallthru. */ - lab_false = gen_new_label(); - tcg_gen_brcondi_i64(TCG_COND_EQ, src, mzero, lab_false); - tcg_gen_brcondi_i64(cond, src, 0, lab_true); - gen_set_label(lab_false); + /* For >= or <, map -0.0 to +0.0 via comparison and mask. */ + tcg_gen_setcondi_i64(TCG_COND_NE, dest, src, mzero); + tcg_gen_neg_i64(dest, dest); + tcg_gen_and_i64(dest, dest, src); break; default: @@ -378,24 +447,24 @@ static void gen_fbcond_internal(TCGCond cond, TCGv src, int lab_true) } } -static void gen_fbcond(DisasContext *ctx, TCGCond cond, int ra, int32_t disp) +static ExitStatus gen_fbcond(DisasContext *ctx, TCGCond cond, int ra, + int32_t disp) { - int lab_true; + TCGv cmp_tmp; if (unlikely(ra == 31)) { /* Very uncommon case, but easier to optimize it to an integer comparison than continuing with the floating point comparison. */ - gen_bcond(ctx, cond, ra, disp, 0); - return; + return gen_bcond(ctx, cond, ra, disp, 0); } - lab_true = gen_new_label(); - gen_fbcond_internal(cond, cpu_fir[ra], lab_true); - gen_bcond_pcload(ctx, disp, lab_true); + cmp_tmp = tcg_temp_new(); + gen_fold_mzero(cond, cmp_tmp, cpu_fir[ra]); + return gen_bcond_internal(ctx, cond, cmp_tmp, disp); } static void gen_cmov(TCGCond cond, int ra, int rb, int rc, - int islit, uint8_t lit, int mask) + int islit, uint8_t lit, int mask) { TCGCond inv_cond = tcg_invert_cond(cond); int l1; @@ -429,18 +498,23 @@ static void gen_cmov(TCGCond cond, int ra, int rb, int rc, static void gen_fcmov(TCGCond cond, int ra, int rb, int rc) { - TCGv va = cpu_fir[ra]; + TCGv cmp_tmp; int l1; - if (unlikely(rc == 31)) + if (unlikely(rc == 31)) { return; + } + + cmp_tmp = tcg_temp_new(); if (unlikely(ra == 31)) { - /* ??? Assume that the temporary is reclaimed at the branch. */ - va = tcg_const_i64(0); + tcg_gen_movi_i64(cmp_tmp, 0); + } else { + gen_fold_mzero(cond, cmp_tmp, cpu_fir[ra]); } l1 = gen_new_label(); - gen_fbcond_internal(tcg_invert_cond(cond), va, l1); + tcg_gen_brcondi_i64(tcg_invert_cond(cond), cmp_tmp, 0, l1); + tcg_temp_free(cmp_tmp); if (rb != 31) tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[rb]); @@ -1332,14 +1406,14 @@ static void gen_rx(int ra, int set) tcg_temp_free_i32(tmp); } -static inline int translate_one(DisasContext *ctx, uint32_t insn) +static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) { uint32_t palcode; int32_t disp21, disp16, disp12; uint16_t fn11, fn16; uint8_t opc, ra, rb, rc, sbz, fpfn, fn7, fn2, islit, real_islit; uint8_t lit; - int ret; + ExitStatus ret; /* Decode all instruction fields */ opc = insn >> 26; @@ -1362,10 +1436,10 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) fpfn = fn11 & 0x3F; fn7 = (insn >> 5) & 0x0000007F; fn2 = (insn >> 5) & 0x00000003; - ret = 0; LOG_DISAS("opc %02x ra %2d rb %2d rc %2d disp16 %6d\n", opc, ra, rb, rc, disp16); + ret = NO_EXIT; switch (opc) { case 0x00: /* CALL_PAL */ @@ -1383,7 +1457,8 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) if (palcode >= 0x80 && palcode < 0xC0) { /* Unprivileged PAL call */ gen_excp(ctx, EXCP_CALL_PAL + ((palcode & 0x3F) << 6), 0); - ret = 3; + /* PC updated by gen_excp. */ + ret = EXIT_PC_UPDATED; break; } #ifndef CONFIG_USER_ONLY @@ -1392,7 +1467,8 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) if (ctx->mem_idx & 1) goto invalid_opc; gen_excp(ctx, EXCP_CALL_PALP + ((palcode & 0x3F) << 6), 0); - ret = 3; + /* PC updated by gen_excp. */ + ret = EXIT_PC_UPDATED; } #endif /* Invalid PAL call */ @@ -2395,13 +2471,11 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) switch ((uint16_t)disp16) { case 0x0000: /* TRAPB */ - /* No-op. Just exit from the current tb */ - ret = 2; + /* No-op. */ break; case 0x0400: /* EXCB */ - /* No-op. Just exit from the current tb */ - ret = 2; + /* No-op. */ break; case 0x4000: /* MB */ @@ -2465,21 +2539,7 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) if (ra != 31) tcg_gen_movi_i64(cpu_ir[ra], ctx->pc); /* Those four jumps only differ by the branch prediction hint */ - switch (fn2) { - case 0x0: - /* JMP */ - break; - case 0x1: - /* JSR */ - break; - case 0x2: - /* RET */ - break; - case 0x3: - /* JSR_COROUTINE */ - break; - } - ret = 1; + ret = EXIT_PC_UPDATED; break; case 0x1B: /* HW_LD (PALcode) */ @@ -2770,7 +2830,7 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) tcg_temp_free(tmp2); } tcg_temp_free(tmp1); - ret = 2; + ret = EXIT_PC_STALE; } break; #endif @@ -2795,7 +2855,7 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) gen_helper_hw_ret(tmp); tcg_temp_free(tmp); } - ret = 2; + ret = EXIT_PC_UPDATED; break; #endif case 0x1F: @@ -2956,85 +3016,66 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) break; case 0x30: /* BR */ - if (ra != 31) - tcg_gen_movi_i64(cpu_ir[ra], ctx->pc); - tcg_gen_movi_i64(cpu_pc, ctx->pc + (int64_t)(disp21 << 2)); - ret = 1; + ret = gen_bdirect(ctx, ra, disp21); break; case 0x31: /* FBEQ */ - gen_fbcond(ctx, TCG_COND_EQ, ra, disp21); - ret = 1; + ret = gen_fbcond(ctx, TCG_COND_EQ, ra, disp21); break; case 0x32: /* FBLT */ - gen_fbcond(ctx, TCG_COND_LT, ra, disp21); - ret = 1; + ret = gen_fbcond(ctx, TCG_COND_LT, ra, disp21); break; case 0x33: /* FBLE */ - gen_fbcond(ctx, TCG_COND_LE, ra, disp21); - ret = 1; + ret = gen_fbcond(ctx, TCG_COND_LE, ra, disp21); break; case 0x34: /* BSR */ - if (ra != 31) - tcg_gen_movi_i64(cpu_ir[ra], ctx->pc); - tcg_gen_movi_i64(cpu_pc, ctx->pc + (int64_t)(disp21 << 2)); - ret = 1; + ret = gen_bdirect(ctx, ra, disp21); break; case 0x35: /* FBNE */ - gen_fbcond(ctx, TCG_COND_NE, ra, disp21); - ret = 1; + ret = gen_fbcond(ctx, TCG_COND_NE, ra, disp21); break; case 0x36: /* FBGE */ - gen_fbcond(ctx, TCG_COND_GE, ra, disp21); - ret = 1; + ret = gen_fbcond(ctx, TCG_COND_GE, ra, disp21); break; case 0x37: /* FBGT */ - gen_fbcond(ctx, TCG_COND_GT, ra, disp21); - ret = 1; + ret = gen_fbcond(ctx, TCG_COND_GT, ra, disp21); break; case 0x38: /* BLBC */ - gen_bcond(ctx, TCG_COND_EQ, ra, disp21, 1); - ret = 1; + ret = gen_bcond(ctx, TCG_COND_EQ, ra, disp21, 1); break; case 0x39: /* BEQ */ - gen_bcond(ctx, TCG_COND_EQ, ra, disp21, 0); - ret = 1; + ret = gen_bcond(ctx, TCG_COND_EQ, ra, disp21, 0); break; case 0x3A: /* BLT */ - gen_bcond(ctx, TCG_COND_LT, ra, disp21, 0); - ret = 1; + ret = gen_bcond(ctx, TCG_COND_LT, ra, disp21, 0); break; case 0x3B: /* BLE */ - gen_bcond(ctx, TCG_COND_LE, ra, disp21, 0); - ret = 1; + ret = gen_bcond(ctx, TCG_COND_LE, ra, disp21, 0); break; case 0x3C: /* BLBS */ - gen_bcond(ctx, TCG_COND_NE, ra, disp21, 1); - ret = 1; + ret = gen_bcond(ctx, TCG_COND_NE, ra, disp21, 1); break; case 0x3D: /* BNE */ - gen_bcond(ctx, TCG_COND_NE, ra, disp21, 0); - ret = 1; + ret = gen_bcond(ctx, TCG_COND_NE, ra, disp21, 0); break; case 0x3E: /* BGE */ - gen_bcond(ctx, TCG_COND_GE, ra, disp21, 0); - ret = 1; + ret = gen_bcond(ctx, TCG_COND_GE, ra, disp21, 0); break; case 0x3F: /* BGT */ - gen_bcond(ctx, TCG_COND_GT, ra, disp21, 0); - ret = 1; + ret = gen_bcond(ctx, TCG_COND_GT, ra, disp21, 0); break; invalid_opc: gen_invalid(ctx); - ret = 3; + /* PC updated by gen_excp. */ + ret = EXIT_PC_UPDATED; break; } @@ -3051,15 +3092,17 @@ static inline void gen_intermediate_code_internal(CPUState *env, uint16_t *gen_opc_end; CPUBreakpoint *bp; int j, lj = -1; - int ret; + ExitStatus ret; int num_insns; int max_insns; pc_start = tb->pc; gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; + + ctx.tb = tb; + ctx.env = env; ctx.pc = pc_start; ctx.amask = env->amask; - ctx.env = env; #if defined (CONFIG_USER_ONLY) ctx.mem_idx = 0; #else @@ -3083,7 +3126,7 @@ static inline void gen_intermediate_code_internal(CPUState *env, max_insns = CF_COUNT_MASK; gen_icount_start(); - for (ret = 0; ret == 0;) { + do { if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) { QTAILQ_FOREACH(bp, &env->breakpoints, entry) { if (bp->pc == ctx.pc) { @@ -3114,36 +3157,39 @@ static inline void gen_intermediate_code_internal(CPUState *env, ctx.pc += 4; ret = translate_one(ctxp, insn); - if (ret != 0) - break; - /* if we reach a page boundary or are single stepping, stop - * generation - */ - if (env->singlestep_enabled) { - gen_excp(&ctx, EXCP_DEBUG, 0); - break; - } - if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0) - break; - - if (gen_opc_ptr >= gen_opc_end) - break; - - if (num_insns >= max_insns) - break; - - if (singlestep) { - break; + if (ret == NO_EXIT) { + /* If we reach a page boundary, are single stepping, + or exhaust instruction count, stop generation. */ + if (env->singlestep_enabled) { + gen_excp(&ctx, EXCP_DEBUG, 0); + ret = EXIT_PC_UPDATED; + } else if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0 + || gen_opc_ptr >= gen_opc_end + || num_insns >= max_insns + || singlestep) { + ret = EXIT_PC_STALE; + } } + } while (ret == NO_EXIT); + + if (tb->cflags & CF_LAST_IO) { + gen_io_end(); } - if (ret != 1 && ret != 3) { + + switch (ret) { + case EXIT_GOTO_TB: + break; + case EXIT_PC_STALE: tcg_gen_movi_i64(cpu_pc, ctx.pc); + /* FALLTHRU */ + case EXIT_PC_UPDATED: + tcg_gen_exit_tb(0); + break; + default: + abort(); } - if (tb->cflags & CF_LAST_IO) - gen_io_end(); - /* Generate the return instruction */ - tcg_gen_exit_tb(0); + gen_icount_end(tb, num_insns); *gen_opc_ptr = INDEX_op_end; if (search_pc) { @@ -3155,6 +3201,7 @@ static inline void gen_intermediate_code_internal(CPUState *env, tb->size = ctx.pc - pc_start; tb->icount = num_insns; } + #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { qemu_log("IN: %s\n", lookup_symbol(pc_start));