From patchwork Tue Jan 19 12:27:58 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Juha.Riihimaki@nokia.com X-Patchwork-Id: 43188 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id E65F0B7CD5 for ; Tue, 19 Jan 2010 23:29:37 +1100 (EST) Received: from localhost ([127.0.0.1]:36677 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1NXDCs-0002EN-SE for incoming@patchwork.ozlabs.org; Tue, 19 Jan 2010 07:29:14 -0500 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1NXDCC-0002EI-Je for qemu-devel@nongnu.org; Tue, 19 Jan 2010 07:28:32 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1NXDC3-0002Da-Cx for qemu-devel@nongnu.org; Tue, 19 Jan 2010 07:28:31 -0500 Received: from [199.232.76.173] (port=50783 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1NXDC3-0002DX-8S for qemu-devel@nongnu.org; Tue, 19 Jan 2010 07:28:23 -0500 Received: from smtp.nokia.com ([192.100.105.134]:22967 helo=mgw-mx09.nokia.com) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1NXDC1-0003Ki-IT for qemu-devel@nongnu.org; Tue, 19 Jan 2010 07:28:23 -0500 Received: from vaebh106.NOE.Nokia.com (vaebh106.europe.nokia.com [10.160.244.32]) by mgw-mx09.nokia.com (Switch-3.3.3/Switch-3.3.3) with ESMTP id o0JCS90w030600 for ; Tue, 19 Jan 2010 06:28:16 -0600 Received: from esebh102.NOE.Nokia.com ([172.21.138.183]) by vaebh106.NOE.Nokia.com with Microsoft SMTPSVC(6.0.3790.3959); Tue, 19 Jan 2010 14:27:57 +0200 Received: from mgw-da01.ext.nokia.com ([147.243.128.24]) by esebh102.NOE.Nokia.com over TLS secured channel with Microsoft SMTPSVC(6.0.3790.3959); Tue, 19 Jan 2010 14:27:56 +0200 Received: from localhost.localdomain (r1a5lc01.research.nokia.com [172.21.41.16]) by mgw-da01.ext.nokia.com (Switch-3.3.3/Switch-3.3.3) with ESMTP id o0JCRq6K025594 for ; Tue, 19 Jan 2010 14:27:52 +0200 From: juha.riihimaki@nokia.com To: qemu-devel@nongnu.org Date: Tue, 19 Jan 2010 14:27:58 +0200 Message-Id: <1263904078-63327-1-git-send-email-juha.riihimaki@nokia.com> X-Mailer: git-send-email 1.6.5 MIME-Version: 1.0 X-OriginalArrivalTime: 19 Jan 2010 12:27:56.0416 (UTC) FILETIME=[D44B2800:01CA9902] X-Nokia-AV: Clean X-MIME-Autoconverted: from 8bit to quoted-printable by mgw-mx09.nokia.com id o0JCS90w030600 X-detected-operating-system: by monty-python.gnu.org: GNU/Linux 2.6 (newer, 1) Subject: [Qemu-devel] [PATCH] target-arm: neon emulation enhancements X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Juha Riihimäki This patch improves the detection of undefined NEON data instruction encodings, fixes a bug in vcvt instruction decoding, adds an implementation for 64bit wide vsli and vsri instructions and finally aligns the formatting of the source code inside the disas_neon_data_insn function. Signed-off-by: Juha Riihimäki --- target-arm/translate.c | 1095 ++++++++++++++++++++++++++++-------------------- 1 files changed, 640 insertions(+), 455 deletions(-) diff --git a/target-arm/translate.c b/target-arm/translate.c index 5cf3e06..50a2440 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -4136,8 +4136,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) TCGv tmp, tmp2, tmp3, tmp4, tmp5; TCGv_i64 tmp64; - if (!vfp_enabled(env)) - return 1; + if (!vfp_enabled(env)) { + return 1; + } q = (insn & (1 << 6)) != 0; u = (insn >> 24) & 1; VFP_DREG_D(rd, insn); @@ -4147,73 +4148,82 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if ((insn & (1 << 23)) == 0) { /* Three register same length. */ op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); - if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9 - || op == 10 || op == 11 || op == 16)) { - /* 64-bit element instructions. */ - for (pass = 0; pass < (q ? 2 : 1); pass++) { - neon_load_reg64(cpu_V0, rn + pass); - neon_load_reg64(cpu_V1, rm + pass); - switch (op) { - case 1: /* VQADD */ - if (u) { - gen_helper_neon_add_saturate_u64(CPU_V001); - } else { - gen_helper_neon_add_saturate_s64(CPU_V001); - } - break; - case 5: /* VQSUB */ - if (u) { - gen_helper_neon_sub_saturate_u64(CPU_V001); - } else { - gen_helper_neon_sub_saturate_s64(CPU_V001); - } - break; - case 8: /* VSHL */ - if (u) { - gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); - } else { - gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0); - } - break; - case 9: /* VQSHL */ - if (u) { - gen_helper_neon_qshl_u64(cpu_V0, cpu_env, - cpu_V0, cpu_V0); - } else { - gen_helper_neon_qshl_s64(cpu_V1, cpu_env, - cpu_V1, cpu_V0); - } - break; - case 10: /* VRSHL */ - if (u) { - gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0); - } else { - gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0); - } - break; - case 11: /* VQRSHL */ - if (u) { - gen_helper_neon_qrshl_u64(cpu_V0, cpu_env, - cpu_V1, cpu_V0); - } else { - gen_helper_neon_qrshl_s64(cpu_V0, cpu_env, - cpu_V1, cpu_V0); - } - break; - case 16: - if (u) { - tcg_gen_sub_i64(CPU_V001); - } else { - tcg_gen_add_i64(CPU_V001); + if (op == 24 || op == 25 || (q && ((rd | rn | rm) & 1))) { + return 1; + } + if (size == 3) { + if (op == 1 || op == 5 || op == 8 || op == 9 || op == 10 + || op == 11 || op == 16) { + /* 64-bit element instructions. */ + for (pass = 0; pass < (q ? 2 : 1); pass++) { + neon_load_reg64(cpu_V0, rn + pass); + neon_load_reg64(cpu_V1, rm + pass); + switch (op) { + case 1: /* VQADD */ + if (u) { + gen_helper_neon_add_saturate_u64(CPU_V001); + } else { + gen_helper_neon_add_saturate_s64(CPU_V001); + } + break; + case 5: /* VQSUB */ + if (u) { + gen_helper_neon_sub_saturate_u64(CPU_V001); + } else { + gen_helper_neon_sub_saturate_s64(CPU_V001); + } + break; + case 8: /* VSHL */ + if (u) { + gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); + } else { + gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0); + } + break; + case 9: /* VQSHL */ + if (u) { + gen_helper_neon_qshl_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V0); + } else { + gen_helper_neon_qshl_s64(cpu_V1, cpu_env, + cpu_V1, cpu_V0); + } + break; + case 10: /* VRSHL */ + if (u) { + gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0); + } else { + gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0); + } + break; + case 11: /* VQRSHL */ + if (u) { + gen_helper_neon_qrshl_u64(cpu_V0, cpu_env, + cpu_V1, cpu_V0); + } else { + gen_helper_neon_qrshl_s64(cpu_V0, cpu_env, + cpu_V1, cpu_V0); + } + break; + case 16: /* VADD, VSUB */ + if (u) { + tcg_gen_sub_i64(CPU_V001); + } else { + tcg_gen_add_i64(CPU_V001); + } + break; + default: + abort(); } - break; - default: - abort(); + neon_store_reg64(cpu_V0, rd + pass); } - neon_store_reg64(cpu_V0, rd + pass); + return 0; + } + if (op != 3) { + return 1; } - return 0; } + pairwise = 0; switch (op) { case 8: /* VSHL */ case 9: /* VQSHL */ @@ -4225,290 +4235,333 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) rtmp = rn; rn = rm; rm = rtmp; - pairwise = 0; } break; + case 19: /* VMUL */ + if (u && size) { + return 1; + } + break; + case 23: /* VPADD */ + if (u) { + return 1; + } + /* fall through */ case 20: /* VPMAX */ case 21: /* VPMIN */ - case 23: /* VPADD */ pairwise = 1; break; - case 26: /* VPADD (float) */ + case 22: /* VQDMULH/VQRDMULH */ + if (!size) { + return 1; + } + break; + case 26: /* VADD/VSUB/VPADD/VABD (float) */ pairwise = (u && size < 2); + /* fall through */ + case 27: /* VMLA/VMLS/VMUL (float) */ + if (size & 1) { + return 1; + } + break; + case 28: /* VCEQ/VCGE/VCGT (float) */ + if ((!u && size) || (size & 1)) { + return 1; + } + break; + case 29: /* VACGE/VACGT (float) */ + if (!u || (size & 1)) { + return 1; + } break; case 30: /* VPMIN/VPMAX (float) */ pairwise = u; + if (size & 1) { + return 1; + } + break; + case 31: /* VRECPS/VRSQRTS */ + if (u || (size & 1)) { + return 1; + } break; default: - pairwise = 0; break; } + if (pairwise && q) { + return 1; + } for (pass = 0; pass < (q ? 4 : 2); pass++) { - - if (pairwise) { - /* Pairwise. */ - if (q) - n = (pass & 1) * 2; - else - n = 0; - if (pass < q + 1) { - tmp = neon_load_reg(rn, n); - tmp2 = neon_load_reg(rn, n + 1); + if (pairwise) { + /* Pairwise. */ + if (!pass) { + tmp = neon_load_reg(rn, 0); + tmp2 = neon_load_reg(rn, 1); + } else { + tmp = neon_load_reg(rm, 0); + tmp2 = neon_load_reg(rm, 1); + } } else { - tmp = neon_load_reg(rm, n); - tmp2 = neon_load_reg(rm, n + 1); + /* Elementwise. */ + tmp = neon_load_reg(rn, pass); + tmp2 = neon_load_reg(rm, pass); } - } else { - /* Elementwise. */ - tmp = neon_load_reg(rn, pass); - tmp2 = neon_load_reg(rm, pass); - } - switch (op) { - case 0: /* VHADD */ - GEN_NEON_INTEGER_OP(hadd); - break; - case 1: /* VQADD */ - GEN_NEON_INTEGER_OP_ENV(qadd); - break; - case 2: /* VRHADD */ - GEN_NEON_INTEGER_OP(rhadd); - break; - case 3: /* Logic ops. */ - switch ((u << 2) | size) { - case 0: /* VAND */ - tcg_gen_and_i32(tmp, tmp, tmp2); + switch (op) { + case 0: /* VHADD */ + GEN_NEON_INTEGER_OP(hadd); break; - case 1: /* BIC */ - tcg_gen_andc_i32(tmp, tmp, tmp2); + case 1: /* VQADD */ + GEN_NEON_INTEGER_OP_ENV(qadd); break; - case 2: /* VORR */ - tcg_gen_or_i32(tmp, tmp, tmp2); + case 2: /* VRHADD */ + GEN_NEON_INTEGER_OP(rhadd); break; - case 3: /* VORN */ - tcg_gen_orc_i32(tmp, tmp, tmp2); + case 3: /* Logic ops. */ + switch ((u << 2) | size) { + case 0: /* VAND */ + tcg_gen_and_i32(tmp, tmp, tmp2); + break; + case 1: /* VBIC */ + tcg_gen_andc_i32(tmp, tmp, tmp2); + break; + case 2: /* VORR, VMOV */ + tcg_gen_or_i32(tmp, tmp, tmp2); + break; + case 3: /* VORN */ + tcg_gen_orc_i32(tmp, tmp, tmp2); + break; + case 4: /* VEOR */ + tcg_gen_xor_i32(tmp, tmp, tmp2); + break; + case 5: /* VBSL */ + tmp3 = neon_load_reg(rd, pass); + gen_neon_bsl(tmp, tmp, tmp2, tmp3); + dead_tmp(tmp3); + break; + case 6: /* VBIT */ + tmp3 = neon_load_reg(rd, pass); + gen_neon_bsl(tmp, tmp, tmp3, tmp2); + dead_tmp(tmp3); + break; + case 7: /* VBIF */ + tmp3 = neon_load_reg(rd, pass); + gen_neon_bsl(tmp, tmp3, tmp, tmp2); + dead_tmp(tmp3); + break; + } break; - case 4: /* VEOR */ - tcg_gen_xor_i32(tmp, tmp, tmp2); + case 4: /* VHSUB */ + GEN_NEON_INTEGER_OP(hsub); break; - case 5: /* VBSL */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp, tmp2, tmp3); - dead_tmp(tmp3); + case 5: /* VQSUB */ + GEN_NEON_INTEGER_OP_ENV(qsub); break; - case 6: /* VBIT */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp, tmp3, tmp2); - dead_tmp(tmp3); + case 6: /* VCGT */ + GEN_NEON_INTEGER_OP(cgt); break; - case 7: /* VBIF */ - tmp3 = neon_load_reg(rd, pass); - gen_neon_bsl(tmp, tmp3, tmp, tmp2); - dead_tmp(tmp3); + case 7: /* VCGE */ + GEN_NEON_INTEGER_OP(cge); break; - } - break; - case 4: /* VHSUB */ - GEN_NEON_INTEGER_OP(hsub); - break; - case 5: /* VQSUB */ - GEN_NEON_INTEGER_OP_ENV(qsub); - break; - case 6: /* VCGT */ - GEN_NEON_INTEGER_OP(cgt); - break; - case 7: /* VCGE */ - GEN_NEON_INTEGER_OP(cge); - break; - case 8: /* VSHL */ - GEN_NEON_INTEGER_OP(shl); - break; - case 9: /* VQSHL */ - GEN_NEON_INTEGER_OP_ENV(qshl); - break; - case 10: /* VRSHL */ - GEN_NEON_INTEGER_OP(rshl); - break; - case 11: /* VQRSHL */ - GEN_NEON_INTEGER_OP_ENV(qrshl); - break; - case 12: /* VMAX */ - GEN_NEON_INTEGER_OP(max); - break; - case 13: /* VMIN */ - GEN_NEON_INTEGER_OP(min); - break; - case 14: /* VABD */ - GEN_NEON_INTEGER_OP(abd); - break; - case 15: /* VABA */ - GEN_NEON_INTEGER_OP(abd); - dead_tmp(tmp2); - tmp2 = neon_load_reg(rd, pass); - gen_neon_add(size, tmp, tmp2); - break; - case 16: - if (!u) { /* VADD */ - if (gen_neon_add(size, tmp, tmp2)) - return 1; - } else { /* VSUB */ - switch (size) { - case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break; - default: return 1; - } - } - break; - case 17: - if (!u) { /* VTST */ - switch (size) { - case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break; - default: return 1; + case 8: /* VSHL */ + GEN_NEON_INTEGER_OP(shl); + break; + case 9: /* VQSHL */ + GEN_NEON_INTEGER_OP_ENV(qshl); + break; + case 10: /* VRSHL */ + GEN_NEON_INTEGER_OP(rshl); + break; + case 11: /* VQRSHL */ + GEN_NEON_INTEGER_OP_ENV(qrshl); + break; + case 12: /* VMAX */ + GEN_NEON_INTEGER_OP(max); + break; + case 13: /* VMIN */ + GEN_NEON_INTEGER_OP(min); + break; + case 14: /* VABD */ + GEN_NEON_INTEGER_OP(abd); + break; + case 15: /* VABA */ + GEN_NEON_INTEGER_OP(abd); + dead_tmp(tmp2); + tmp2 = neon_load_reg(rd, pass); + gen_neon_add(size, tmp, tmp2); + break; + case 16: + if (!u) { /* VADD */ + if (gen_neon_add(size, tmp, tmp2)) { + abort(); /* size == 3 is handled earlier */ + } + } else { /* VSUB */ + switch (size) { + case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break; + default: abort(); /* size == 3 is handled earlier */ + } } - } else { /* VCEQ */ - switch (size) { - case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; - default: return 1; + break; + case 17: + if (!u) { /* VTST */ + switch (size) { + case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break; + default: abort(); /* size == 3 is handled earlier */ + } + } else { /* VCEQ */ + switch (size) { + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; + default: abort(); /* size == 3 is handled earlier */ + } } - } - break; - case 18: /* Multiply. */ - switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: return 1; - } - dead_tmp(tmp2); - tmp2 = neon_load_reg(rd, pass); - if (u) { /* VMLS */ - gen_neon_rsb(size, tmp, tmp2); - } else { /* VMLA */ - gen_neon_add(size, tmp, tmp2); - } - break; - case 19: /* VMUL */ - if (u) { /* polynomial */ - gen_helper_neon_mul_p8(tmp, tmp, tmp2); - } else { /* Integer */ + break; + case 18: /* Multiply. */ switch (size) { case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: return 1; + default: abort(); /* size == 3 is handled earlier */ } - } - break; - case 20: /* VPMAX */ - GEN_NEON_INTEGER_OP(pmax); - break; - case 21: /* VPMIN */ - GEN_NEON_INTEGER_OP(pmin); - break; - case 22: /* Hultiply high. */ - if (!u) { /* VQDMULH */ - switch (size) { - case 1: gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); break; - case 2: gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); break; - default: return 1; + dead_tmp(tmp2); + tmp2 = neon_load_reg(rd, pass); + if (u) { /* VMLS */ + gen_neon_rsb(size, tmp, tmp2); + } else { /* VMLA */ + gen_neon_add(size, tmp, tmp2); } - } else { /* VQRDHMUL */ + break; + case 19: /* VMUL */ + if (u) { /* polynomial */ + gen_helper_neon_mul_p8(tmp, tmp, tmp2); + } else { /* Integer */ + switch (size) { + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; + default: abort(); /* size == 3 is handled earlier */ + } + } + break; + case 20: /* VPMAX */ + GEN_NEON_INTEGER_OP(pmax); + break; + case 21: /* VPMIN */ + GEN_NEON_INTEGER_OP(pmin); + break; + case 22: /* Multiply high. */ + if (!u) { /* VQDMULH */ + switch (size) { + case 1: + gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); + break; + default: + abort(); /* size == 0,3 is handled earlier */ + } + } else { /* VQRDHMUL */ + switch (size) { + case 1: + gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); + break; + default: + abort(); /* size == 0,3 is handled earlier */ + } + } + break; + case 23: /* VPADD */ switch (size) { - case 1: gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); break; - case 2: gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); break; - default: return 1; + case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break; + default: abort(); /* size == 3 is handled earlier */ } - } - break; - case 23: /* VPADD */ - if (u) - return 1; - switch (size) { - case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break; - default: return 1; - } - break; - case 26: /* Floating point arithnetic. */ - switch ((u << 2) | size) { - case 0: /* VADD */ - gen_helper_neon_add_f32(tmp, tmp, tmp2); break; - case 2: /* VSUB */ - gen_helper_neon_sub_f32(tmp, tmp, tmp2); + case 26: /* Floating point arithmetic. */ + switch ((u << 2) | size) { + case 0: /* VADD */ + gen_helper_neon_add_f32(tmp, tmp, tmp2); + break; + case 2: /* VSUB */ + gen_helper_neon_sub_f32(tmp, tmp, tmp2); + break; + case 4: /* VPADD */ + gen_helper_neon_add_f32(tmp, tmp, tmp2); + break; + case 6: /* VABD */ + gen_helper_neon_abd_f32(tmp, tmp, tmp2); + break; + default: + abort(); /* other values are handled earlier */ + } break; - case 4: /* VPADD */ - gen_helper_neon_add_f32(tmp, tmp, tmp2); + case 27: /* Float multiply. */ + gen_helper_neon_mul_f32(tmp, tmp, tmp2); + if (!u) { + dead_tmp(tmp2); + tmp2 = neon_load_reg(rd, pass); + if (size == 0) { + gen_helper_neon_add_f32(tmp, tmp, tmp2); + } else { + gen_helper_neon_sub_f32(tmp, tmp2, tmp); + } + } break; - case 6: /* VABD */ - gen_helper_neon_abd_f32(tmp, tmp, tmp2); + case 28: /* Float compare. */ + if (!u) { + gen_helper_neon_ceq_f32(tmp, tmp, tmp2); + } else { + if (size == 0) { + gen_helper_neon_cge_f32(tmp, tmp, tmp2); + } else { + gen_helper_neon_cgt_f32(tmp, tmp, tmp2); + } + } break; - default: - return 1; - } - break; - case 27: /* Float multiply. */ - gen_helper_neon_mul_f32(tmp, tmp, tmp2); - if (!u) { - dead_tmp(tmp2); - tmp2 = neon_load_reg(rd, pass); + case 29: /* Float compare absolute. */ if (size == 0) { - gen_helper_neon_add_f32(tmp, tmp, tmp2); + gen_helper_neon_acge_f32(tmp, tmp, tmp2); + } else { + gen_helper_neon_acgt_f32(tmp, tmp, tmp2); + } + break; + case 30: /* Float min/max. */ + if (size == 0) { + gen_helper_neon_max_f32(tmp, tmp, tmp2); } else { - gen_helper_neon_sub_f32(tmp, tmp2, tmp); + gen_helper_neon_min_f32(tmp, tmp, tmp2); } + break; + case 31: + if (size == 0) { + gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); + } else { + gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); + } + break; + default: + abort(); } - break; - case 28: /* Float compare. */ - if (!u) { - gen_helper_neon_ceq_f32(tmp, tmp, tmp2); + dead_tmp(tmp2); + + /* Save the result. For elementwise operations we can put it + straight into the destination register. For pairwise operations + we have to be careful to avoid clobbering the source operands.*/ + if (pairwise && rd == rm) { + neon_store_scratch(pass, tmp); } else { - if (size == 0) - gen_helper_neon_cge_f32(tmp, tmp, tmp2); - else - gen_helper_neon_cgt_f32(tmp, tmp, tmp2); + neon_store_reg(rd, pass, tmp); } - break; - case 29: /* Float compare absolute. */ - if (!u) - return 1; - if (size == 0) - gen_helper_neon_acge_f32(tmp, tmp, tmp2); - else - gen_helper_neon_acgt_f32(tmp, tmp, tmp2); - break; - case 30: /* Float min/max. */ - if (size == 0) - gen_helper_neon_max_f32(tmp, tmp, tmp2); - else - gen_helper_neon_min_f32(tmp, tmp, tmp2); - break; - case 31: - if (size == 0) - gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); - else - gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); - break; - default: - abort(); - } - dead_tmp(tmp2); - - /* Save the result. For elementwise operations we can put it - straight into the destination register. For pairwise operations - we have to be careful to avoid clobbering the source operands. */ - if (pairwise && rd == rm) { - neon_store_scratch(pass, tmp); - } else { - neon_store_reg(rd, pass, tmp); - } - } /* for pass */ if (pairwise && rd == rm) { for (pass = 0; pass < (q ? 4 : 2); pass++) { @@ -4522,23 +4575,32 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Two registers and shift. */ op = (insn >> 8) & 0xf; if (insn & (1 << 7)) { + if (op & 8) { + return 1; + } /* 64-bit shift. */ size = 3; } else { size = 2; - while ((insn & (1 << (size + 19))) == 0) + while ((insn & (1 << (size + 19))) == 0) { size--; + } } shift = (insn >> 16) & ((1 << (3 + size)) - 1); /* To avoid excessive dumplication of ops we implement shift by immediate using the variable shift operations. */ if (op < 8) { /* Shift by immediate: - VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ + VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VSLI, VQSHL, VQSHLU */ /* Right shifts are encoded as N - shift, where N is the element size in bits. */ - if (op <= 4) + if ((q && ((rd | rm) & 1)) + || (!u && (op == 4 || op == 6))) { + return 1; + } + if (op <= 4) { shift = shift - (1 << (size + 3)); + } if (size == 3) { count = q + 1; } else { @@ -4569,34 +4631,42 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) switch (op) { case 0: /* VSHR */ case 1: /* VSRA */ - if (u) - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - else - gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1); + if (u) { + gen_helper_neon_shl_u64(cpu_V0, cpu_V0, + cpu_V1); + } else { + gen_helper_neon_shl_s64(cpu_V0, cpu_V0, + cpu_V1); + } break; case 2: /* VRSHR */ case 3: /* VRSRA */ - if (u) - gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1); - else - gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); + if (u) { + gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, + cpu_V1); + } else { + gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, + cpu_V1); + } break; case 4: /* VSRI */ - if (!u) - return 1; gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); break; case 5: /* VSHL, VSLI */ gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); break; - case 6: /* VQSHL */ - if (u) - gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1); - else - gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1); + case 6: /* VQSHLU */ + gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, + cpu_V1); break; - case 7: /* VQSHLU */ - gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1); + case 7: /* VQSHL/VQSHLU */ + if (u) { + gen_helper_neon_qshl_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); + } else { + gen_helper_neon_qshl_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); + } break; } if (op == 1 || op == 3) { @@ -4605,7 +4675,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1); } else if (op == 4 || (op == 5 && u)) { /* Insert */ - cpu_abort(env, "VS[LR]I.64 not implemented"); + neon_load_reg64(cpu_V0, rd + pass); + uint64_t mask; + if (op == 4) { + mask = 0xffffffffffffffffLL >> -shift; + } else { + mask = 0xffffffffffffffffLL << shift; + } + tcg_gen_andi_i64(cpu_V1, cpu_V1, mask); + tcg_gen_andi_i64(cpu_V0, cpu_V0, ~mask); + tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); } neon_store_reg64(cpu_V0, rd + pass); } else { /* size < 3 */ @@ -4623,27 +4702,40 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) GEN_NEON_INTEGER_OP(rshl); break; case 4: /* VSRI */ - if (!u) - return 1; - GEN_NEON_INTEGER_OP(shl); - break; case 5: /* VSHL, VSLI */ switch (size) { - case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break; - default: return 1; + case 0: + gen_helper_neon_shl_u8(tmp, tmp, tmp2); + break; + case 1: + gen_helper_neon_shl_u16(tmp, tmp, tmp2); + break; + case 2: + gen_helper_neon_shl_u32(tmp, tmp, tmp2); + break; + default: + abort(); /* size == 3 is handled earlier */ } break; - case 6: /* VQSHL */ + case 6: /* VQSHLU */ GEN_NEON_INTEGER_OP_ENV(qshl); break; case 7: /* VQSHLU */ switch (size) { - case 0: gen_helper_neon_qshl_u8(tmp, cpu_env, tmp, tmp2); break; - case 1: gen_helper_neon_qshl_u16(tmp, cpu_env, tmp, tmp2); break; - case 2: gen_helper_neon_qshl_u32(tmp, cpu_env, tmp, tmp2); break; - default: return 1; + case 0: + gen_helper_neon_qshl_u8(tmp, cpu_env, tmp, + tmp2); + break; + case 1: + gen_helper_neon_qshl_u16(tmp, cpu_env, tmp, + tmp2); + break; + case 2: + gen_helper_neon_qshl_u32(tmp, cpu_env, tmp, + tmp2); + break; + default: + abort(); /* size == 3 is handled earlier */ } break; } @@ -4658,32 +4750,35 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Insert */ switch (size) { case 0: - if (op == 4) + if (op == 4) { mask = 0xff >> -shift; - else + } else { mask = (uint8_t)(0xff << shift); + } mask |= mask << 8; mask |= mask << 16; break; case 1: - if (op == 4) + if (op == 4) { mask = 0xffff >> -shift; - else + } else { mask = (uint16_t)(0xffff << shift); + } mask |= mask << 16; break; case 2: if (shift < -31 || shift > 31) { mask = 0; } else { - if (op == 4) + if (op == 4) { mask = 0xffffffffu >> -shift; - else + } else { mask = 0xffffffffu << shift; + } } break; default: - abort(); + abort(); /* size == 3 is handled earlier */ } tmp2 = neon_load_reg(rd, pass); tcg_gen_andi_i32(tmp, tmp, mask); @@ -4697,6 +4792,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } else if (op < 10) { /* Shift by immediate and narrow: VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ + if (rm & 1) { + return 1; + } shift = shift - (1 << (size + 3)); size++; switch (size) { @@ -4723,15 +4821,21 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (size == 3) { neon_load_reg64(cpu_V0, rm + pass); if (q) { - if (u) - gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64); - else - gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64); + if (u) { + gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, + tmp64); + } else { + gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, + tmp64); + } } else { - if (u) - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64); - else - gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64); + if (u) { + gen_helper_neon_shl_u64(cpu_V0, cpu_V0, + tmp64); + } else { + gen_helper_neon_shl_s64(cpu_V0, cpu_V0, + tmp64); + } } } else { tmp = neon_load_reg(rm + pass, 0); @@ -4746,10 +4850,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (op == 8 && !u) { gen_neon_narrow(size - 1, tmp, cpu_V0); } else { - if (op == 8) + if (op == 8) { gen_neon_narrow_sats(size - 1, tmp, cpu_V0); - else + } else { gen_neon_narrow_satu(size - 1, tmp, cpu_V0); + } } neon_store_reg(rd, pass, tmp); } /* for pass */ @@ -4760,14 +4865,15 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } else if (op == 10) { /* VSHLL */ - if (q || size == 3) + if (q) { return 1; + } tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); for (pass = 0; pass < 2; pass++) { - if (pass == 1) + if (pass == 1) { tmp = tmp2; - + } gen_neon_widen(cpu_V0, tmp, size, u); if (shift != 0) { @@ -4788,22 +4894,29 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } neon_store_reg64(cpu_V0, rd + pass); } - } else if (op == 15 || op == 16) { + } else if (op == 14 || op == 15) { /* VCVT fixed-point. */ + if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) { + return 1; + } for (pass = 0; pass < (q ? 4 : 2); pass++) { - tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass)); + tcg_gen_ld_f32(cpu_F0s, cpu_env, + neon_reg_offset(rm, pass)); if (op & 1) { - if (u) + if (u) { gen_vfp_ulto(0, shift); - else + } else { gen_vfp_slto(0, shift); + } } else { - if (u) + if (u) { gen_vfp_toul(0, shift); - else + } else { gen_vfp_tosl(0, shift); + } } - tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass)); + tcg_gen_st_f32(cpu_F0s, cpu_env, + neon_reg_offset(rd, pass)); } } else { return 1; @@ -4842,10 +4955,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) break; case 14: imm |= (imm << 8) | (imm << 16) | (imm << 24); - if (invert) + if (invert) { imm = ~imm; + } break; case 15: + if (invert) { + return 1; + } imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); break; @@ -4884,36 +5001,22 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } else { /* (insn & 0x00800010 == 0x00800000) */ if (size != 3) { op = (insn >> 8) & 0xf; - if ((insn & (1 << 6)) == 0) { + if (!q) { /* Three registers of different lengths. */ - int src1_wide; - int src2_wide; - int prewiden; - /* prewiden, src1_wide, src2_wide */ - static const int neon_3reg_wide[16][3] = { - {1, 0, 0}, /* VADDL */ - {1, 1, 0}, /* VADDW */ - {1, 0, 0}, /* VSUBL */ - {1, 1, 0}, /* VSUBW */ - {0, 1, 1}, /* VADDHN */ - {0, 0, 0}, /* VABAL */ - {0, 1, 1}, /* VSUBHN */ - {0, 0, 0}, /* VABDL */ - {0, 0, 0}, /* VMLAL */ - {0, 0, 0}, /* VQDMLAL */ - {0, 0, 0}, /* VMLSL */ - {0, 0, 0}, /* VQDMLSL */ - {0, 0, 0}, /* Integer VMULL */ - {0, 0, 0}, /* VQDMULL */ - {0, 0, 0} /* Polynomial VMULL */ - }; - - prewiden = neon_3reg_wide[op][0]; - src1_wide = neon_3reg_wide[op][1]; - src2_wide = neon_3reg_wide[op][2]; - - if (size == 0 && (op == 9 || op == 11 || op == 13)) + + if (op == 15 + || (op < 4 && ((rd & 1) || ((op & 1) && (rn & 1)))) + || ((op == 4 || op == 6) && ((rn | rm) & 1)) + || ((op == 5 || op >= 7) && (rd & 1)) + || ((op == 9 || op == 11) && (u || size == 0)) + || (op == 13 && size == 0) + || (op == 14 && (u || size))) { return 1; + } + + int src1_wide = (op == 1 || op == 3 || op == 4 || op == 6); + int src2_wide = (op == 4 || op == 6); + int prewiden = (op < 4); /* Avoid overlapping operands. Wide source operands are always aligned so will never overlap with wide @@ -4980,7 +5083,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 5: gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2); break; - default: abort(); + default: abort(); /* size == 3 is handled earlier */ } dead_tmp(tmp2); dead_tmp(tmp); @@ -4995,7 +5098,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) cpu_abort(env, "Polynomial VMULL not implemented"); default: /* 15 is RESERVED. */ - return 1; + abort(); /* op == 15 is handled earlier */ } if (op == 5 || op == 13 || (op >= 8 && op <= 11)) { /* Accumulate. */ @@ -5038,7 +5141,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); tcg_gen_trunc_i64_i32(tmp, cpu_V0); break; - default: abort(); + default: abort(); /* size == 3 is handled earlier */ } } else { switch (size) { @@ -5053,7 +5156,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); tcg_gen_trunc_i64_i32(tmp, cpu_V0); break; - default: abort(); + default: abort(); /* size == 3 is handled earlier */ } } if (pass == 0) { @@ -5076,8 +5179,15 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 5: /* Floating point VMLS scalar */ case 8: /* Integer VMUL scalar */ case 9: /* Floating point VMUL scalar */ + if (size <= (op & 1)) { + return 1; + } + /* fall through */ case 12: /* VQDMULH scalar */ case 13: /* VQRDMULH scalar */ + if (u && ((rd | rn) & 1)) { + return 1; + } tmp = neon_get_scalar(size, rm); neon_store_scratch(0, tmp); for (pass = 0; pass < (u ? 4 : 2); pass++) { @@ -5085,24 +5195,35 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(rn, pass); if (op == 12) { if (size == 1) { - gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); - } else { - gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); + gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, + tmp2); + } else { /* TODO: what happens when size == 0? */ + gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, + tmp2); } } else if (op == 13) { if (size == 1) { - gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); - } else { - gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); + gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, + tmp2); + } else { /* TODO: what happens when size == 0? */ + gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, + tmp2); } } else if (op & 1) { gen_helper_neon_mul_f32(tmp, tmp, tmp2); } else { switch (size) { - case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; - default: return 1; + case 0: + gen_helper_neon_mul_u8(tmp, tmp, tmp2); + break; + case 1: + gen_helper_neon_mul_u16(tmp, tmp, tmp2); + break; + case 2: + tcg_gen_mul_i32(tmp, tmp, tmp2); + break; + default: + abort(); /* size == 3 is handled earlier */ } } dead_tmp(tmp2); @@ -5123,21 +5244,26 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_helper_neon_sub_f32(tmp, tmp2, tmp); break; default: - abort(); + abort(); /* size == 3 is handled earlier */ } dead_tmp(tmp2); } neon_store_reg(rd, pass, tmp); } break; - case 2: /* VMLAL sclar */ case 3: /* VQDMLAL scalar */ - case 6: /* VMLSL scalar */ case 7: /* VQDMLSL scalar */ - case 10: /* VMULL scalar */ case 11: /* VQDMULL scalar */ - if (size == 0 && (op == 3 || op == 7 || op == 11)) + if (u) { return 1; + } + /* fall through */ + case 2: /* VMLAL sclar */ + case 6: /* VMLSL scalar */ + case 10: /* VMULL scalar */ + if (size == 0 || (rd & 1)) { + return 1; + } tmp2 = neon_get_scalar(size, rm); tmp3 = neon_load_reg(rn, 1); @@ -5189,8 +5315,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) imm = (insn >> 8) & 0xf; count = q + 1; - if (imm > 7 && !q) + if ((imm > 7 && !q) + || (q && ((rd | rn | rm) & 1))) { return 1; + } if (imm == 0) { neon_load_reg64(cpu_V0, rn); @@ -5240,10 +5368,15 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Two register misc. */ op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); size = (insn >> 18) & 3; + if ((q && (op < 36 || op > 46) && ((rd | rm) & 1)) + || (op >= 56 && size != 2)) { + return 1; + } switch (op) { case 0: /* VREV64 */ - if (size == 3) + if (size == 3) { return 1; + } for (pass = 0; pass < (q ? 2 : 1); pass++) { tmp = neon_load_reg(rm, pass * 2); tmp2 = neon_load_reg(rm, pass * 2 + 1); @@ -5268,8 +5401,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) break; case 4: case 5: /* VPADDL */ case 12: case 13: /* VPADAL */ - if (size == 3) + if (size == 3) { return 1; + } for (pass = 0; pass < q + 1; pass++) { tmp = neon_load_reg(rm, pass * 2); gen_neon_widen(cpu_V0, tmp, size, op & 1); @@ -5290,15 +5424,19 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } break; case 33: /* VTRN */ - if (size == 2) { + switch (size) { + case 0: case 1: + goto elementwise; + case 2: for (n = 0; n < (q ? 4 : 2); n += 2) { tmp = neon_load_reg(rm, n); tmp2 = neon_load_reg(rd, n + 1); neon_store_reg(rm, n, tmp2); neon_store_reg(rd, n + 1, tmp); } - } else { - goto elementwise; + break; + default: + return 1; } break; case 34: /* VUZP */ @@ -5306,8 +5444,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) Rd A3 A2 A1 A0 B2 B0 A2 A0 Rm B3 B2 B1 B0 B3 B1 A3 A1 */ - if (size == 3) + if (size == 3 || (!q && size == 2)) { return 1; + } gen_neon_unzip(rd, q, 0, size); gen_neon_unzip(rm, q, 4, size); if (q) { @@ -5333,8 +5472,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) Rd A3 A2 A1 A0 B1 A1 B0 A0 Rm B3 B2 B1 B0 B3 A3 B2 A2 */ - if (size == 3) + if (size == 3 || (!q && size == 2)) { return 1; + } count = (q ? 4 : 2); for (n = 0; n < count; n++) { tmp = neon_load_reg(rd, n); @@ -5355,8 +5495,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } break; case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */ - if (size == 3) + if (size == 3 || (rm & 1)) { return 1; + } TCGV_UNUSED(tmp2); for (pass = 0; pass < 2; pass++) { neon_load_reg64(cpu_V0, rm + pass); @@ -5377,20 +5518,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } break; case 38: /* VSHLL */ - if (q || size == 3) + if (q || size == 3 || (rd & 1)) { return 1; + } tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); for (pass = 0; pass < 2; pass++) { - if (pass == 1) + if (pass == 1) { tmp = tmp2; + } gen_neon_widen(cpu_V0, tmp, size, 1); neon_store_reg64(cpu_V0, rd + pass); } break; case 44: /* VCVT.F16.F32 */ - if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) - return 1; + if (!arm_feature(env, ARM_FEATURE_VFP_FP16) + || q || size != 1 || (rm & 1)) { + return 1; + } tmp = new_tmp(); tmp2 = new_tmp(); tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0)); @@ -5411,8 +5556,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) dead_tmp(tmp); break; case 46: /* VCVT.F32.F16 */ - if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) - return 1; + if (!arm_feature(env, ARM_FEATURE_VFP_FP16) + || q || size != 1 || (rd & 1)) { + return 1; + } tmp3 = new_tmp(); tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); @@ -5447,38 +5594,44 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) switch (size) { case 0: tcg_gen_bswap32_i32(tmp, tmp); break; case 1: gen_swap_half(tmp); break; - default: return 1; + default: dead_tmp(tmp); return 1; } break; case 2: /* VREV16 */ - if (size != 0) + if (size != 0) { + dead_tmp(tmp); return 1; + } gen_rev16(tmp); break; - case 8: /* CLS */ + case 8: /* VCLS */ switch (size) { case 0: gen_helper_neon_cls_s8(tmp, tmp); break; case 1: gen_helper_neon_cls_s16(tmp, tmp); break; case 2: gen_helper_neon_cls_s32(tmp, tmp); break; - default: return 1; + default: dead_tmp(tmp); return 1; } break; - case 9: /* CLZ */ + case 9: /* VCLZ */ switch (size) { case 0: gen_helper_neon_clz_u8(tmp, tmp); break; case 1: gen_helper_neon_clz_u16(tmp, tmp); break; case 2: gen_helper_clz(tmp, tmp); break; - default: return 1; + default: dead_tmp(tmp); return 1; } break; - case 10: /* CNT */ - if (size != 0) + case 10: /* VCNT */ + if (size != 0) { + dead_tmp(tmp); return 1; + } gen_helper_neon_cnt_u8(tmp, tmp); break; case 11: /* VNOT */ - if (size != 0) + if (size != 0) { + dead_tmp(tmp); return 1; + } tcg_gen_not_i32(tmp, tmp); break; case 14: /* VQABS */ @@ -5486,7 +5639,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 0: gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); break; case 1: gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); break; case 2: gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); break; - default: return 1; + default: dead_tmp(tmp); return 1; } break; case 15: /* VQNEG */ @@ -5494,7 +5647,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 0: gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); break; case 1: gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); break; case 2: gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); break; - default: return 1; + default: dead_tmp(tmp); return 1; } break; case 16: case 19: /* VCGT #0, VCLE #0 */ @@ -5503,11 +5656,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break; - default: return 1; + default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1; } tcg_temp_free(tmp2); - if (op == 19) + if (op == 19) { tcg_gen_not_i32(tmp, tmp); + } break; case 17: case 20: /* VCGE #0, VCLT #0 */ tmp2 = tcg_const_i32(0); @@ -5515,11 +5669,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break; - default: return 1; + default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1; } tcg_temp_free(tmp2); - if (op == 20) + if (op == 20) { tcg_gen_not_i32(tmp, tmp); + } break; case 18: /* VCEQ #0 */ tmp2 = tcg_const_i32(0); @@ -5527,7 +5682,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; - default: return 1; + default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1; } tcg_temp_free(tmp2); break; @@ -5536,42 +5691,68 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 0: gen_helper_neon_abs_s8(tmp, tmp); break; case 1: gen_helper_neon_abs_s16(tmp, tmp); break; case 2: tcg_gen_abs_i32(tmp, tmp); break; - default: return 1; + default: dead_tmp(tmp); return 1; } break; case 23: /* VNEG */ - if (size == 3) + if (size == 3) { + dead_tmp(tmp); return 1; + } tmp2 = tcg_const_i32(0); gen_neon_rsb(size, tmp, tmp2); tcg_temp_free(tmp2); break; case 24: case 27: /* Float VCGT #0, Float VCLE #0 */ + if (size != 2) { + dead_tmp(tmp); + return 1; + } tmp2 = tcg_const_i32(0); gen_helper_neon_cgt_f32(tmp, tmp, tmp2); tcg_temp_free(tmp2); - if (op == 27) + if (op == 27) { tcg_gen_not_i32(tmp, tmp); + } break; case 25: case 28: /* Float VCGE #0, Float VCLT #0 */ + if (size != 2) { + dead_tmp(tmp); + return 1; + } tmp2 = tcg_const_i32(0); gen_helper_neon_cge_f32(tmp, tmp, tmp2); tcg_temp_free(tmp2); - if (op == 28) + if (op == 28) { tcg_gen_not_i32(tmp, tmp); + } break; case 26: /* Float VCEQ #0 */ + if (size != 2) { + dead_tmp(tmp); + return 1; + } tmp2 = tcg_const_i32(0); gen_helper_neon_ceq_f32(tmp, tmp, tmp2); tcg_temp_free(tmp2); break; case 30: /* Float VABS */ + if (size != 2) { + return 1; + } gen_vfp_abs(0); break; case 31: /* Float VNEG */ + if (size != 2) { + return 1; + } gen_vfp_neg(0); break; case 32: /* VSWP */ + if (size != 0) { + dead_tmp(tmp); + return 1; + } tmp2 = neon_load_reg(rd, pass); neon_store_reg(rm, pass, tmp2); break; @@ -5580,8 +5761,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) switch (size) { case 0: gen_neon_trn_u8(tmp, tmp2); break; case 1: gen_neon_trn_u16(tmp, tmp2); break; - case 2: abort(); - default: return 1; + default: abort(); /* size == 2,3 is handled earlier */ } neon_store_reg(rm, pass, tmp2); break; @@ -5610,7 +5790,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_vfp_uito(0); break; default: - /* Reserved: 21, 29, 39-56 */ + /* Reserved: 3, 6, 7, 21, 29, 39-43, 45, 47-55 */ + dead_tmp(tmp); return 1; } if (op == 30 || op == 31 || op >= 58) { @@ -5625,7 +5806,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } else if ((insn & (1 << 10)) == 0) { /* VTBL, VTBX. */ n = ((insn >> 5) & 0x18) + 8; - if (insn & (1 << 6)) { + if (q) { tmp = neon_load_reg(rd, 0); } else { tmp = new_tmp(); @@ -5636,7 +5817,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp5 = tcg_const_i32(n); gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5); dead_tmp(tmp); - if (insn & (1 << 6)) { + if (q) { tmp = neon_load_reg(rd, 1); } else { tmp = new_tmp(); @@ -5651,6 +5832,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) dead_tmp(tmp); } else if ((insn & 0x380) == 0) { /* VDUP */ + if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { + return 1; + } if (insn & (1 << 19)) { tmp = neon_load_reg(rm, 1); } else { @@ -5659,10 +5843,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (insn & (1 << 16)) { gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8); } else if (insn & (1 << 17)) { - if ((insn >> 18) & 1) + if ((insn >> 18) & 1) { gen_neon_dup_high16(tmp); - else + } else { gen_neon_dup_low16(tmp); + } } for (pass = 0; pass < (q ? 4 : 2); pass++) { tmp2 = new_tmp(); @@ -7827,7 +8012,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) /* Coprocessor. */ if (((insn >> 24) & 3) == 3) { /* Translate into the equivalent ARM encoding. */ - insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4); + insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28); if (disas_neon_data_insn(env, s, insn)) goto illegal_op; } else {