From patchwork Mon Sep 2 16:28:46 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 271990 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id E490C2C009F for ; Tue, 3 Sep 2013 02:30:10 +1000 (EST) Received: from localhost ([::1]:40946 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VGX0z-0002Kq-2p for incoming@patchwork.ozlabs.org; Mon, 02 Sep 2013 12:30:09 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:39729) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VGX0J-0002Jr-FM for qemu-devel@nongnu.org; Mon, 02 Sep 2013 12:29:32 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1VGX0D-0005g0-Uo for qemu-devel@nongnu.org; Mon, 02 Sep 2013 12:29:27 -0400 Received: from mail-pd0-x22f.google.com ([2607:f8b0:400e:c02::22f]:49468) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VGX0D-0005fh-Eh for qemu-devel@nongnu.org; Mon, 02 Sep 2013 12:29:21 -0400 Received: by mail-pd0-f175.google.com with SMTP id q10so4901630pdj.20 for ; Mon, 02 Sep 2013 09:29:20 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=sender:from:to:cc:subject:date:message-id:in-reply-to:references; bh=PnTvUEOv/SLIy3AJLjeEzCJa/wAbEz0qvC4Bd3xYCq0=; b=xO7sS8m1SH+nmoD0kHY9f0YU6Vp+qoiuRzYq4juEWprX3KDug3AyqJQkJu5Xl52b5t nHI7ypxmQ7skna6VsvgO1UNxd4jBNx0re74JQzWaLywS7u0OT7NPZC3EUsk6jMWYivda M68iUl7m34LVLmxoBrcEALN4OVLXTqwbNIpwOrDFNtNMjwSUusWgSiPkqf19I0T3WXXj az0v1NYSnBnYQsPuPT4af96bpBVTPqEX40twbZG6B0NcmXPDGvT1CFUQ/yQ93asxsZHg PiVqLOb8vqnjSOJWlGbDiUcRp9HR72Rjc1iAANHU2Wd07ygw4D1ipdlcgPLiDW8F4p8M WvkQ== X-Received: by 10.68.131.133 with SMTP id om5mr4089939pbb.148.1378139360300; Mon, 02 Sep 2013 09:29:20 -0700 (PDT) Received: from anchor.twiddle.net (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPSA id ia5sm16821148pbc.42.1969.12.31.16.00.00 (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Mon, 02 Sep 2013 09:29:19 -0700 (PDT) From: Richard Henderson To: qemu-devel@nongnu.org Date: Mon, 2 Sep 2013 09:28:46 -0700 Message-Id: <1378139354-28602-2-git-send-email-rth@twiddle.net> X-Mailer: git-send-email 1.8.1.4 In-Reply-To: <1378139354-28602-1-git-send-email-rth@twiddle.net> References: <1378139354-28602-1-git-send-email-rth@twiddle.net> X-detected-operating-system: by eggs.gnu.org: Error: Malformed IPv6 address (bad octet value). X-Received-From: 2607:f8b0:400e:c02::22f Cc: aurelien@aurel32.net, anthony@codemonkey.ws Subject: [Qemu-devel] [PULL 01/29] tcg: Add muluh and mulsh opcodes X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Use them in places where mulu2 and muls2 are used. Optimize mulx2 with dead low part to mulxh. Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.h | 4 ++++ tcg/arm/tcg-target.h | 2 ++ tcg/hppa/tcg-target.h | 2 ++ tcg/i386/tcg-target.h | 4 ++++ tcg/ia64/tcg-target.h | 4 ++++ tcg/mips/tcg-target.h | 2 ++ tcg/optimize.c | 20 ++++++++++++++++++++ tcg/ppc/tcg-target.h | 2 ++ tcg/ppc64/tcg-target.h | 4 ++++ tcg/s390/tcg-target.h | 4 ++++ tcg/sparc/tcg-target.h | 4 ++++ tcg/tcg-op.h | 40 ++++++++++++++++++++++++++++++++++++---- tcg/tcg-opc.h | 4 ++++ tcg/tcg.c | 36 ++++++++++++++++++++++++++++++------ tcg/tcg.h | 2 ++ tcg/tci/tcg-target.h | 5 ++++- 16 files changed, 128 insertions(+), 11 deletions(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 51e5092..26ee28b 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -61,6 +61,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 @@ -87,6 +89,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 enum { TCG_AREG0 = TCG_REG_X19, diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 5cd9d6a..ed48092 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -80,6 +80,8 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h index 25467bd..0f6f2ff 100644 --- a/tcg/hppa/tcg-target.h +++ b/tcg/hppa/tcg-target.h @@ -100,6 +100,8 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */ diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index e3f6bb9..b7d1a55 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -96,6 +96,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 @@ -122,6 +124,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 1 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #endif #define TCG_TARGET_deposit_i32_valid(ofs, len) \ diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index f32d519..ee6b2c8 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -146,6 +146,10 @@ typedef enum { #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index a438950..6cb7c2f 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -89,6 +89,8 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 /* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */ #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ diff --git a/tcg/optimize.c b/tcg/optimize.c index b35868a..e8dedf3 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -198,6 +198,8 @@ static TCGOpcode op_to_mov(TCGOpcode op) static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) { + uint64_t l64, h64; + switch (op) { CASE_OP_32_64(add): return x + y; @@ -290,6 +292,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_ext32u_i64: return (uint32_t)x; + case INDEX_op_muluh_i32: + return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; + case INDEX_op_mulsh_i32: + return ((int64_t)(int32_t)x * (int32_t)y) >> 32; + + case INDEX_op_muluh_i64: + mulu64(&l64, &h64, x, y); + return h64; + case INDEX_op_mulsh_i64: + muls64(&l64, &h64, x, y); + return h64; + default: fprintf(stderr, "Unrecognized operation %d in do_constant_folding.\n", op); @@ -531,6 +545,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): + CASE_OP_32_64(muluh): + CASE_OP_32_64(mulsh): swap_commutative(args[0], &args[1], &args[2]); break; CASE_OP_32_64(brcond): @@ -771,6 +787,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, switch (op) { CASE_OP_32_64(and): CASE_OP_32_64(mul): + CASE_OP_32_64(muluh): + CASE_OP_32_64(mulsh): if ((temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0)) { s->gen_opc_buf[op_index] = op_to_movi(op); @@ -882,6 +900,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): + CASE_OP_32_64(muluh): + CASE_OP_32_64(mulsh): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { s->gen_opc_buf[op_index] = op_to_movi(op); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index b42d97c..613c5ff 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -96,6 +96,8 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 48fc6e2..0789daf 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -95,6 +95,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 @@ -118,6 +120,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 1 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 42ca36c..b02f170 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -69,6 +69,8 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 @@ -94,6 +96,8 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 extern bool tcg_target_deposit_valid(int ofs, int len); #define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index dab52d7..1a696bc 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -107,6 +107,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div_i64 1 @@ -134,6 +136,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #endif #define TCG_AREG0 TCG_REG_I0 diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 364964d..3de7545 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -1039,10 +1039,18 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i32(); - tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), - TCGV_LOW(arg1), TCGV_LOW(arg2)); - /* Allow the optimizer room to replace mulu2 with two moves. */ - tcg_gen_op0(INDEX_op_nop); + if (TCG_TARGET_HAS_mulu2_i32) { + tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + tcg_debug_assert(TCG_TARGET_HAS_muluh_i32); + tcg_gen_op3_i32(INDEX_op_mul_i32, TCGV_LOW(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_op3_i32(INDEX_op_muluh_i32, TCGV_HIGH(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + } tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2)); tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); @@ -2401,6 +2409,12 @@ static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); /* Allow the optimizer room to replace mulu2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_muluh_i32) { + TCGv_i32 t = tcg_temp_new_i32(); + tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); + tcg_gen_mov_i32(rl, t); + tcg_temp_free_i32(t); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2420,6 +2434,12 @@ static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); /* Allow the optimizer room to replace muls2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_mulsh_i32) { + TCGv_i32 t = tcg_temp_new_i32(); + tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); + tcg_gen_mov_i32(rl, t); + tcg_temp_free_i32(t); } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); @@ -2499,6 +2519,12 @@ static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); /* Allow the optimizer room to replace mulu2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_muluh_i64) { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); + tcg_gen_mov_i64(rl, t); + tcg_temp_free_i64(t); } else if (TCG_TARGET_HAS_mulu2_i64) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2540,6 +2566,12 @@ static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); /* Allow the optimizer room to replace muls2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_mulsh_i64) { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); + tcg_gen_mov_i64(rl, t); + tcg_temp_free_i64(t); } else { TCGv_i64 t0 = tcg_temp_new_i64(); int sizemask = 0; diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index a8af5b9..a75c29d 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -91,6 +91,8 @@ DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32)) DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) +DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32)) +DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32)) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32)) DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) @@ -167,6 +169,8 @@ DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64)) DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) +DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64)) +DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64)) /* QEMU specific */ #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS diff --git a/tcg/tcg.c b/tcg/tcg.c index 19bd5a3..541a442 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1252,12 +1252,13 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, static void tcg_liveness_analysis(TCGContext *s) { int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops; - TCGOpcode op, op_new; + TCGOpcode op, op_new, op_new2; TCGArg *args; const TCGOpDef *def; uint8_t *dead_temps, *mem_temps; uint16_t dead_args; uint8_t sync_args; + bool have_op_new2; s->gen_opc_ptr++; /* skip end */ @@ -1394,29 +1395,52 @@ static void tcg_liveness_analysis(TCGContext *s) goto do_not_remove; case INDEX_op_mulu2_i32: + op_new = INDEX_op_mul_i32; + op_new2 = INDEX_op_muluh_i32; + have_op_new2 = TCG_TARGET_HAS_muluh_i32; + goto do_mul2; case INDEX_op_muls2_i32: op_new = INDEX_op_mul_i32; + op_new2 = INDEX_op_mulsh_i32; + have_op_new2 = TCG_TARGET_HAS_mulsh_i32; goto do_mul2; case INDEX_op_mulu2_i64: + op_new = INDEX_op_mul_i64; + op_new2 = INDEX_op_muluh_i64; + have_op_new2 = TCG_TARGET_HAS_muluh_i64; + goto do_mul2; case INDEX_op_muls2_i64: op_new = INDEX_op_mul_i64; + op_new2 = INDEX_op_mulsh_i64; + have_op_new2 = TCG_TARGET_HAS_mulsh_i64; + goto do_mul2; do_mul2: args -= 4; nb_iargs = 2; nb_oargs = 2; - /* Likewise, test for the high part of the operation dead. */ if (dead_temps[args[1]] && !mem_temps[args[1]]) { if (dead_temps[args[0]] && !mem_temps[args[0]]) { + /* Both parts of the operation are dead. */ goto do_remove; } + /* The high part of the operation is dead; generate the low. */ s->gen_opc_buf[op_index] = op = op_new; args[1] = args[2]; args[2] = args[3]; - assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); - tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1); - /* Fall through and mark the single-word operation live. */ - nb_oargs = 1; + } else if (have_op_new2 && dead_temps[args[0]] + && !mem_temps[args[0]]) { + /* The low part of the operation is dead; generate the high. */ + s->gen_opc_buf[op_index] = op = op_new2; + args[0] = args[1]; + args[1] = args[2]; + args[2] = args[3]; + } else { + goto do_not_remove; } + assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); + tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1); + /* Mark the single-word operation live. */ + nb_oargs = 1; goto do_not_remove; default: diff --git a/tcg/tcg.h b/tcg/tcg.h index f3f9889..3f869dd 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -85,6 +85,8 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 /* Turn some undef macros into true macros. */ #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index d7fc14e..ff12b4b 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -76,6 +76,8 @@ #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_movcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_bswap16_i64 1 @@ -100,13 +102,14 @@ #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_movcond_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 - #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #endif /* TCG_TARGET_REG_BITS == 64 */ /* Number of registers available.