From patchwork Mon Nov 2 15:16:44 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ulrich Hecht X-Patchwork-Id: 37426 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 6A7B9B7BFE for ; Tue, 3 Nov 2009 02:16:36 +1100 (EST) Received: from localhost ([127.0.0.1]:34943 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1N4ye1-0004Mt-Gw for incoming@patchwork.ozlabs.org; Mon, 02 Nov 2009 10:16:33 -0500 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1N4ydT-0004Mg-Ll for qemu-devel@nongnu.org; Mon, 02 Nov 2009 10:15:59 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1N4ydP-0004KR-Qa for qemu-devel@nongnu.org; Mon, 02 Nov 2009 10:15:59 -0500 Received: from [199.232.76.173] (port=44043 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1N4ydP-0004KM-NQ for qemu-devel@nongnu.org; Mon, 02 Nov 2009 10:15:55 -0500 Received: from cantor2.suse.de ([195.135.220.15]:43281 helo=mx2.suse.de) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1N4ydP-0002qw-8V for qemu-devel@nongnu.org; Mon, 02 Nov 2009 10:15:55 -0500 Received: from relay2.suse.de (relay-ext.suse.de [195.135.221.8]) by mx2.suse.de (Postfix) with ESMTP id 7DDD68726A; Mon, 2 Nov 2009 16:15:52 +0100 (CET) From: Ulrich Hecht Organization: SuSE Linux Products GmbH To: Aurelien Jarno Subject: Re: [Qemu-devel] [PATCH 2/9] S/390 CPU emulation Date: Mon, 2 Nov 2009 17:16:44 +0200 User-Agent: KMail/1.9.10 References: <1255696735-21396-1-git-send-email-uli@suse.de> <200910191917.18972.uli@suse.de> <20091022212854.GW1883@hall.aurel32.net> In-Reply-To: <20091022212854.GW1883@hall.aurel32.net> MIME-Version: 1.0 Message-Id: <200911021616.45102.uli@suse.de> X-detected-operating-system: by monty-python.gnu.org: GNU/Linux 2.4-2.6 Cc: qemu-devel@nongnu.org, agraf@suse.de X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org On Thursday 22 October 2009, Aurelien Jarno wrote: > Probably the second. Changing the instruction pointer in the helper > instead of using the proper goto_tb TCG op prevents TB chaining, and > therefore as a huge impact on performance. > > It's something not difficult to implement, and that I would definitely > want to see in the patch before getting it merged. OK, I implemented it, and the surprising result is that performance does not get any better; in fact it even suffers a little bit. (My standard quick test, the polarssl test suite, shows about a 2% performance impact when profiled with cachegrind). Could there be anything I overlooked? I modelled my implementation after those in the existing targets. (See the attached patch that goes on top of my other S/390 patches.) CU Uli diff --git a/target-s390/helpers.h b/target-s390/helpers.h index 0d16760..6009312 100644 --- a/target-s390/helpers.h +++ b/target-s390/helpers.h @@ -15,7 +15,6 @@ DEF_HELPER_FLAGS_1(set_cc_comp_s64, TCG_CALL_PURE|TCG_CALL_CONST, i32, s64) DEF_HELPER_FLAGS_1(set_cc_nz_u32, TCG_CALL_PURE|TCG_CALL_CONST, i32, i32) DEF_HELPER_FLAGS_1(set_cc_nz_u64, TCG_CALL_PURE|TCG_CALL_CONST, i32, i64) DEF_HELPER_FLAGS_2(set_cc_icm, TCG_CALL_PURE|TCG_CALL_CONST, i32, i32, i32) -DEF_HELPER_4(brc, void, i32, i32, i64, s32) DEF_HELPER_3(brctg, void, i64, i64, s32) DEF_HELPER_3(brct, void, i32, i64, s32) DEF_HELPER_4(brcl, void, i32, i32, i64, s64) diff --git a/target-s390/op_helper.c b/target-s390/op_helper.c index 637d22f..f7f52ba 100644 --- a/target-s390/op_helper.c +++ b/target-s390/op_helper.c @@ -218,17 +218,6 @@ uint32_t HELPER(set_cc_icm)(uint32_t mask, uint32_t val) return cc; } -/* relative conditional branch */ -void HELPER(brc)(uint32_t cc, uint32_t mask, uint64_t pc, int32_t offset) -{ - if ( mask & ( 1 << (3 - cc) ) ) { - env->psw.addr = pc + offset; - } - else { - env->psw.addr = pc + 4; - } -} - /* branch relative on 64-bit count (condition is computed inline, this only does the branch */ void HELPER(brctg)(uint64_t flag, uint64_t pc, int32_t offset) diff --git a/target-s390/translate.c b/target-s390/translate.c index 9ffa7bd..5a7cfe7 100644 --- a/target-s390/translate.c +++ b/target-s390/translate.c @@ -49,6 +49,7 @@ struct DisasContext { uint64_t pc; int is_jmp; CPUS390XState *env; + struct TranslationBlock *tb; }; #define DISAS_EXCP 4 @@ -359,23 +360,55 @@ static void gen_bcr(uint32_t mask, int tr, uint64_t offset) tcg_temp_free(target); } -static void gen_brc(uint32_t mask, uint64_t pc, int32_t offset) +static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong pc) { - TCGv p; - TCGv_i32 m, o; + TranslationBlock *tb; + + tb = s->tb; + /* NOTE: we handle the case where the TB spans two pages here */ + if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) || + (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) { + /* jump to same page: we can use a direct jump */ + tcg_gen_mov_i32(global_cc, cc); + tcg_gen_goto_tb(tb_num); + tcg_gen_movi_i64(psw_addr, pc); + tcg_gen_exit_tb((long)tb + tb_num); + } else { + /* jump to another page: currently not optimized */ + tcg_gen_movi_i64(psw_addr, pc); + tcg_gen_mov_i32(global_cc, cc); + tcg_gen_exit_tb(0); + } +} + +static void gen_brc(uint32_t mask, DisasContext *s, int32_t offset) +{ + TCGv_i32 r; + TCGv_i32 tmp, tmp2; + int skip; if (mask == 0xf) { /* unconditional */ - tcg_gen_movi_i64(psw_addr, pc + offset); + //tcg_gen_movi_i64(psw_addr, s->pc + offset); + gen_goto_tb(s, 0, s->pc + offset); } else { - m = tcg_const_i32(mask); - p = tcg_const_i64(pc); - o = tcg_const_i32(offset); - gen_helper_brc(cc, m, p, o); - tcg_temp_free(m); - tcg_temp_free(p); - tcg_temp_free(o); + tmp = tcg_const_i32(3); + tcg_gen_sub_i32(tmp, tmp, cc); /* 3 - cc */ + tmp2 = tcg_const_i32(1); + tcg_gen_shl_i32(tmp2, tmp2, tmp); /* 1 << (3 - cc) */ + r = tcg_const_i32(mask); + tcg_gen_and_i32(r, r, tmp2); /* mask & (1 << (3 - cc)) */ + tcg_temp_free(tmp); + tcg_temp_free(tmp2); + skip = gen_new_label(); + tcg_gen_brcondi_i32(TCG_COND_EQ, r, 0, skip); + gen_goto_tb(s, 0, s->pc + offset); + gen_set_label(skip); + gen_goto_tb(s, 1, s->pc + 4); + tcg_gen_mov_i32(global_cc, cc); + tcg_temp_free(r); } + s->is_jmp = DISAS_TB_JUMP; } static void gen_set_cc_add64(TCGv v1, TCGv v2, TCGv vr) @@ -1143,9 +1176,7 @@ static void disas_a7(DisasContext *s, int op, int r1, int i2) tcg_temp_free(tmp2); break; case 0x4: /* brc m1, i2 */ - /* FIXME: optimize m1 == 0xf (unconditional) case */ - gen_brc(r1, s->pc, i2 * 2); - s->is_jmp = DISAS_JUMP; + gen_brc(r1, s, i2 * 2); return; case 0x5: /* BRAS R1,I2 [RI] */ tmp = tcg_const_i64(s->pc + 4); @@ -2739,6 +2770,7 @@ static inline void gen_intermediate_code_internal (CPUState *env, dc.env = env; dc.pc = pc_start; dc.is_jmp = DISAS_NEXT; + dc.tb = tb; gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; @@ -2778,8 +2810,11 @@ static inline void gen_intermediate_code_internal (CPUState *env, num_insns++; } while (!dc.is_jmp && gen_opc_ptr < gen_opc_end && dc.pc < next_page_start && num_insns < max_insns && !env->singlestep_enabled); - tcg_gen_mov_i32(global_cc, cc); - tcg_temp_free(cc); + + if (dc.is_jmp != DISAS_TB_JUMP) { + tcg_gen_mov_i32(global_cc, cc); + tcg_temp_free(cc); + } if (!dc.is_jmp) { tcg_gen_st_i64(tcg_const_i64(dc.pc), cpu_env, offsetof(CPUState, psw.addr)); @@ -2794,7 +2829,9 @@ static inline void gen_intermediate_code_internal (CPUState *env, if (tb->cflags & CF_LAST_IO) gen_io_end(); /* Generate the return instruction */ - tcg_gen_exit_tb(0); + if (dc.is_jmp != DISAS_TB_JUMP) { + tcg_gen_exit_tb(0); + } gen_icount_end(tb, num_insns); *gen_opc_ptr = INDEX_op_end; if (search_pc) {