Patchwork [3/4] target-arm: neon emulation enhancements

login
register
mail settings
Submitter Riku Voipio
Date Feb. 5, 2010, 3:52 p.m.
Message ID <1265385151-11024-4-git-send-email-riku.voipio@iki.fi>
Download mbox | patch
Permalink /patch/44652/
State New
Headers show

Comments

Riku Voipio - Feb. 5, 2010, 3:52 p.m.
From: Juha Riihimäki <juha.riihimaki@nokia.com>

This patch improves the detection of undefined NEON data instruction
encodings, fixes bugs in some of the instruction decodings and adds an
implementation for 64bit wide vsli and vsri instructions.

Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com>
Signed-off-by: Riku Voipio <riku.voipio@nokia.com>
---
 target-arm/translate.c |  450 +++++++++++++++++++++++++++++-------------------
 1 files changed, 276 insertions(+), 174 deletions(-)

Patch

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 537d9d6..743b846 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4147,73 +4147,82 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     if ((insn & (1 << 23)) == 0) {
         /* Three register same length.  */
         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
-        if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9
-                          || op == 10 || op  == 11 || op == 16)) {
-            /* 64-bit element instructions.  */
-            for (pass = 0; pass < (q ? 2 : 1); pass++) {
-                neon_load_reg64(cpu_V0, rn + pass);
-                neon_load_reg64(cpu_V1, rm + pass);
-                switch (op) {
-                case 1: /* VQADD */
-                    if (u) {
-                        gen_helper_neon_add_saturate_u64(CPU_V001);
-                    } else {
-                        gen_helper_neon_add_saturate_s64(CPU_V001);
-                    }
-                    break;
-                case 5: /* VQSUB */
-                    if (u) {
-                        gen_helper_neon_sub_saturate_u64(CPU_V001);
-                    } else {
-                        gen_helper_neon_sub_saturate_s64(CPU_V001);
-                    }
-                    break;
-                case 8: /* VSHL */
-                    if (u) {
-                        gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
-                    } else {
-                        gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 9: /* VQSHL */
-                    if (u) {
-                        gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V0);
-                    } else {
-                        gen_helper_neon_qshl_s64(cpu_V1, cpu_env,
-                                                 cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 10: /* VRSHL */
-                    if (u) {
-                        gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
-                    } else {
-                        gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 11: /* VQRSHL */
-                    if (u) {
-                        gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
-                                                  cpu_V1, cpu_V0);
-                    } else {
-                        gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
-                                                  cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 16:
-                    if (u) {
-                        tcg_gen_sub_i64(CPU_V001);
-                    } else {
-                        tcg_gen_add_i64(CPU_V001);
+        if (op == 24 || op == 25 || (q && ((rd | rn | rm) & 1))) {
+            return 1;
+        }
+        if (size == 3) {
+            if (op == 1 || op == 5 || op == 8 || op == 9 || op == 10
+                || op == 11 || op == 16) {
+                /* 64-bit element instructions.  */
+                for (pass = 0; pass < (q ? 2 : 1); pass++) {
+                    neon_load_reg64(cpu_V0, rn + pass);
+                    neon_load_reg64(cpu_V1, rm + pass);
+                    switch (op) {
+                    case 1: /* VQADD */
+                        if (u) {
+                            gen_helper_neon_add_saturate_u64(CPU_V001);
+                        } else {
+                            gen_helper_neon_add_saturate_s64(CPU_V001);
+                        }
+                        break;
+                    case 5: /* VQSUB */
+                        if (u) {
+                            gen_helper_neon_sub_saturate_u64(CPU_V001);
+                        } else {
+                            gen_helper_neon_sub_saturate_s64(CPU_V001);
+                        }
+                        break;
+                    case 8: /* VSHL */
+                        if (u) {
+                            gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
+                        } else {
+                            gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 9: /* VQSHL */
+                        if (u) {
+                            gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
+                                                     cpu_V0, cpu_V0);
+                        } else {
+                            gen_helper_neon_qshl_s64(cpu_V1, cpu_env,
+                                                     cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 10: /* VRSHL */
+                        if (u) {
+                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
+                        } else {
+                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 11: /* VQRSHL */
+                        if (u) {
+                            gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
+                                                      cpu_V1, cpu_V0);
+                        } else {
+                            gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
+                                                      cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 16: /* VADD, VSUB */
+                        if (u) {
+                            tcg_gen_sub_i64(CPU_V001);
+                        } else {
+                            tcg_gen_add_i64(CPU_V001);
+                        }
+                        break;
+                    default:
+                        abort();
                     }
-                    break;
-                default:
-                    abort();
+                    neon_store_reg64(cpu_V0, rd + pass);
                 }
-                neon_store_reg64(cpu_V0, rd + pass);
+                return 0;
+            }
+            if (op != 3) {
+                return 1;
             }
-            return 0;
         }
+        pairwise = 0;
         switch (op) {
         case 8: /* VSHL */
         case 9: /* VQSHL */
@@ -4225,39 +4234,72 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 rtmp = rn;
                 rn = rm;
                 rm = rtmp;
-                pairwise = 0;
             }
             break;
+        case 19: /* VMUL */
+            if (u && size) {
+                return 1;
+            }
+            break;
+        case 23: /* VPADD */
+            if (u) {
+                return 1;
+            }
+            /* fall through */
         case 20: /* VPMAX */
         case 21: /* VPMIN */
-        case 23: /* VPADD */
             pairwise = 1;
             break;
-        case 26: /* VPADD (float) */
+        case 22: /* VQDMULH/VQRDMULH */
+            if (!size) {
+                return 1;
+            }
+            break;
+        case 26: /* VADD/VSUB/VPADD/VABD (float) */
             pairwise = (u && size < 2);
+            /* fall through */
+        case 27: /* VMLA/VMLS/VMUL (float) */
+            if (size & 1) {
+                return 1;
+            }
+            break;
+        case 28: /* VCEQ/VCGE/VCGT (float) */
+            if ((!u && size) || (size & 1)) {
+                return 1;
+            }
+            break;
+        case 29: /* VACGE/VACGT (float) */
+            if (!u || (size & 1)) {
+                return 1;
+            }
             break;
         case 30: /* VPMIN/VPMAX (float) */
             pairwise = u;
+            if (size & 1) {
+                return 1;
+            }
+            break;
+        case 31: /* VRECPS/VRSQRTS */
+            if (u || (size & 1)) {
+                return 1;
+            }
             break;
         default:
-            pairwise = 0;
             break;
         }
+        if (pairwise && q) {
+            return 1;
+        }
 
         for (pass = 0; pass < (q ? 4 : 2); pass++) {
-
         if (pairwise) {
             /* Pairwise.  */
-            if (q)
-                n = (pass & 1) * 2;
-            else
-                n = 0;
-            if (pass < q + 1) {
-                tmp = neon_load_reg(rn, n);
-                tmp2 = neon_load_reg(rn, n + 1);
+            if (!pass) {
+                tmp = neon_load_reg(rn, 0);
+                tmp2 = neon_load_reg(rn, 1);
             } else {
-                tmp = neon_load_reg(rm, n);
-                tmp2 = neon_load_reg(rm, n + 1);
+                tmp = neon_load_reg(rm, 0);
+                tmp2 = neon_load_reg(rm, 1);
             }
         } else {
             /* Elementwise.  */
@@ -4350,13 +4392,13 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
         case 16:
             if (!u) { /* VADD */
                 if (gen_neon_add(size, tmp, tmp2))
-                    return 1;
+                    abort(); /* size == 3 is handled earlier */
             } else { /* VSUB */
                 switch (size) {
                 case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
                 case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
                 case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 3 is handled earlier */
                 }
             }
             break;
@@ -4366,14 +4408,14 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
                 case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
                 case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 3 is handled earlier */
                 }
             } else { /* VCEQ */
                 switch (size) {
                 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
                 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
                 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 3 is handled earlier */
                 }
             }
             break;
@@ -4382,7 +4424,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-            default: return 1;
+            default: abort(); /* size == 3 is handled earlier */
             }
             dead_tmp(tmp2);
             tmp2 = neon_load_reg(rd, pass);
@@ -4400,7 +4442,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
                 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
                 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 3 is handled earlier */
                 }
             }
             break;
@@ -4415,24 +4457,22 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 switch (size) {
                 case 1: gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
                 case 2: gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 0,3 is handled earlier */
                 }
             } else { /* VQRDHMUL */
                 switch (size) {
                 case 1: gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
                 case 2: gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 0,3 is handled earlier */
                 }
             }
             break;
         case 23: /* VPADD */
-            if (u)
-                return 1;
             switch (size) {
             case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
             case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
             case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
-            default: return 1;
+            default: abort(); /* size == 3 is handled earlier */
             }
             break;
         case 26: /* Floating point arithnetic.  */
@@ -4450,7 +4490,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 gen_helper_neon_abd_f32(tmp, tmp, tmp2);
                 break;
             default:
-                return 1;
+                abort(); /* other values are handled earlier */
             }
             break;
         case 27: /* Float multiply.  */
@@ -4476,8 +4516,6 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             }
             break;
         case 29: /* Float compare absolute.  */
-            if (!u)
-                return 1;
             if (size == 0)
                 gen_helper_neon_acge_f32(tmp, tmp, tmp2);
             else
@@ -4522,11 +4560,14 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             /* Two registers and shift.  */
             op = (insn >> 8) & 0xf;
             if (insn & (1 << 7)) {
+                if (op & 8) {
+                    return 1;
+                }
                 /* 64-bit shift.   */
                 size = 3;
             } else {
                 size = 2;
-                while ((insn & (1 << (size + 19))) == 0)
+                while (size && (insn & (1 << (size + 19))) == 0)
                     size--;
             }
             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
@@ -4534,9 +4575,13 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                by immediate using the variable shift operations.  */
             if (op < 8) {
                 /* Shift by immediate:
-                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
+                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VSLI, VQSHL, VQSHLU */
                 /* Right shifts are encoded as N - shift, where N is the
                    element size in bits.  */
+                if ((q && ((rd | rm) & 1))
+                    || (!u && (op == 4 || op == 6))) {
+                    return 1;
+                }
                 if (op <= 4)
                     shift = shift - (1 << (size + 3));
                 if (size == 3) {
@@ -4582,22 +4627,21 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
                             break;
                         case 4: /* VSRI */
-                            if (!u)
-                                return 1;
                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
                             break;
                         case 5: /* VSHL, VSLI */
                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
                             break;
-                        case 6: /* VQSHL */
+                        case 6: /* VQSHLU */
+                            gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0,
+                                                     cpu_V1);
+                            break;
+                        case 7: /* VQSHL/VQSHLU */
                             if (u)
                                 gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
                             else
                                 gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
                             break;
-                        case 7: /* VQSHLU */
-                            gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
-                            break;
                         }
                         if (op == 1 || op == 3) {
                             /* Accumulate.  */
@@ -4605,7 +4649,16 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
                         } else if (op == 4 || (op == 5 && u)) {
                             /* Insert */
-                            cpu_abort(env, "VS[LR]I.64 not implemented");
+                            neon_load_reg64(cpu_V0, rd + pass);
+                            uint64_t mask;
+                            if (op == 4) {
+                                mask = 0xffffffffffffffffLL >> -shift;
+                            } else {
+                                mask = 0xffffffffffffffffLL << shift;
+                            }
+                            tcg_gen_andi_i64(cpu_V1, cpu_V1, mask);
+                            tcg_gen_andi_i64(cpu_V0, cpu_V0, ~mask);
+                            tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
                         }
                         neon_store_reg64(cpu_V0, rd + pass);
                     } else { /* size < 3 */
@@ -4623,16 +4676,12 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             GEN_NEON_INTEGER_OP(rshl);
                             break;
                         case 4: /* VSRI */
-                            if (!u)
-                                return 1;
-                            GEN_NEON_INTEGER_OP(shl);
-                            break;
                         case 5: /* VSHL, VSLI */
                             switch (size) {
                             case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: abort(); /* size == 3 is handled earlier */
                             }
                             break;
                         case 6: /* VQSHL */
@@ -4643,7 +4692,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qshl_u8(tmp, cpu_env, tmp, tmp2); break;
                             case 1: gen_helper_neon_qshl_u16(tmp, cpu_env, tmp, tmp2); break;
                             case 2: gen_helper_neon_qshl_u32(tmp, cpu_env, tmp, tmp2); break;
-                            default: return 1;
+                            default: abort(); /* size == 3 is handled earlier */
                             }
                             break;
                         }
@@ -4697,6 +4746,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             } else if (op < 10) {
                 /* Shift by immediate and narrow:
                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
+                if (rm & 1) {
+                    return 1;
+                }
                 shift = shift - (1 << (size + 3));
                 size++;
                 switch (size) {
@@ -4760,7 +4812,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 }
             } else if (op == 10) {
                 /* VSHLL */
-                if (q || size == 3)
+                if (q)
                     return 1;
                 tmp = neon_load_reg(rm, 0);
                 tmp2 = neon_load_reg(rm, 1);
@@ -4788,8 +4840,11 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     neon_store_reg64(cpu_V0, rd + pass);
                 }
-            } else if (op == 15 || op == 16) {
+            } else if (op == 14 || op == 15) {
                 /* VCVT fixed-point.  */
+                if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
+                    return 1;
+                }
                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
                     if (op & 1) {
@@ -4846,6 +4901,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     imm = ~imm;
                 break;
             case 15:
+                if (invert) {
+                    return 1;
+                }
                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
                 break;
@@ -4884,36 +4942,22 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     } else { /* (insn & 0x00800010 == 0x00800000) */
         if (size != 3) {
             op = (insn >> 8) & 0xf;
-            if ((insn & (1 << 6)) == 0) {
+            if (!q) {
                 /* Three registers of different lengths.  */
-                int src1_wide;
-                int src2_wide;
-                int prewiden;
-                /* prewiden, src1_wide, src2_wide */
-                static const int neon_3reg_wide[16][3] = {
-                    {1, 0, 0}, /* VADDL */
-                    {1, 1, 0}, /* VADDW */
-                    {1, 0, 0}, /* VSUBL */
-                    {1, 1, 0}, /* VSUBW */
-                    {0, 1, 1}, /* VADDHN */
-                    {0, 0, 0}, /* VABAL */
-                    {0, 1, 1}, /* VSUBHN */
-                    {0, 0, 0}, /* VABDL */
-                    {0, 0, 0}, /* VMLAL */
-                    {0, 0, 0}, /* VQDMLAL */
-                    {0, 0, 0}, /* VMLSL */
-                    {0, 0, 0}, /* VQDMLSL */
-                    {0, 0, 0}, /* Integer VMULL */
-                    {0, 0, 0}, /* VQDMULL */
-                    {0, 0, 0}  /* Polynomial VMULL */
-                };
-
-                prewiden = neon_3reg_wide[op][0];
-                src1_wide = neon_3reg_wide[op][1];
-                src2_wide = neon_3reg_wide[op][2];
-
-                if (size == 0 && (op == 9 || op == 11 || op == 13))
+
+                if (op == 15
+                    || (op < 4 && ((rd & 1) || ((op & 1) && (rn & 1))))
+                    || ((op == 4 || op == 6) && ((rn | rm) & 1))
+                    || ((op == 5 || op >= 7) && (rd & 1))
+                    || ((op == 9 || op == 11) && (u || size == 0))
+                    || (op == 13 && size == 0)
+                    || (op == 14 && (u || size))) {
                     return 1;
+                }
+
+                int src1_wide = (op == 1 || op == 3 || op == 4 || op == 6);
+                int src2_wide = (op == 4 || op == 6);
+                int prewiden = (op < 4);
 
                 /* Avoid overlapping operands.  Wide source operands are
                    always aligned so will never overlap with wide
@@ -4995,7 +5039,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         cpu_abort(env, "Polynomial VMULL not implemented");
 
                     default: /* 15 is RESERVED.  */
-                        return 1;
+                        abort(); /* op == 15 is handled earlier */
                     }
                     if (op == 5 || op == 13 || (op >= 8 && op <= 11)) {
                         /* Accumulate.  */
@@ -5076,8 +5120,15 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 5: /* Floating point VMLS scalar */
                 case 8: /* Integer VMUL scalar */
                 case 9: /* Floating point VMUL scalar */
+                    if (size <= (op & 1)) {
+                        return 1;
+                    }
+                    /* fall through */
                 case 12: /* VQDMULH scalar */
                 case 13: /* VQRDMULH scalar */
+                    if (u && ((rd | rn) & 1)) {
+                        return 1;
+                    }
                     tmp = neon_get_scalar(size, rm);
                     neon_store_scratch(0, tmp);
                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
@@ -5086,13 +5137,13 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         if (op == 12) {
                             if (size == 1) {
                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
-                            } else {
+                            } else { /* TODO: what happens when size == 0? */
                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
                             }
                         } else if (op == 13) {
                             if (size == 1) {
                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
-                            } else {
+                            } else { /* TODO: what happens when size == 0? */
                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
                             }
                         } else if (op & 1) {
@@ -5102,7 +5153,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: abort(); /* size == 3 is handled earlier */
                             }
                         }
                         dead_tmp(tmp2);
@@ -5130,14 +5181,19 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_store_reg(rd, pass, tmp);
                     }
                     break;
-                case 2: /* VMLAL sclar */
                 case 3: /* VQDMLAL scalar */
-                case 6: /* VMLSL scalar */
                 case 7: /* VQDMLSL scalar */
-                case 10: /* VMULL scalar */
                 case 11: /* VQDMULL scalar */
-                    if (size == 0 && (op == 3 || op == 7 || op == 11))
+                    if (u) {
                         return 1;
+                    }
+                    /* fall through */
+                case 2: /* VMLAL sclar */
+                case 6: /* VMLSL scalar */
+                case 10: /* VMULL scalar */
+                    if (size == 0 || (rd & 1)) {
+                        return 1;
+                    }
 
                     tmp2 = neon_get_scalar(size, rm);
                     tmp3 = neon_load_reg(rn, 1);
@@ -5189,8 +5245,10 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 imm = (insn >> 8) & 0xf;
                 count = q + 1;
 
-                if (imm > 7 && !q)
+                if ((imm > 7 && !q)
+                    || (q && ((rd | rn | rm) & 1))) {
                     return 1;
+                }
 
                 if (imm == 0) {
                     neon_load_reg64(cpu_V0, rn);
@@ -5240,6 +5298,10 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 /* Two register misc.  */
                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
                 size = (insn >> 18) & 3;
+                if ((q && (op < 36 || op > 46) && ((rd | rm) & 1))
+                    || (op >= 56 && size != 2)) {
+                    return 1;
+                }
                 switch (op) {
                 case 0: /* VREV64 */
                     if (size == 3)
@@ -5290,15 +5352,19 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 33: /* VTRN */
-                    if (size == 2) {
+                    switch (size) {
+                    case 0: case 1:
+                        goto elementwise;
+                    case 2:
                         for (n = 0; n < (q ? 4 : 2); n += 2) {
                             tmp = neon_load_reg(rm, n);
                             tmp2 = neon_load_reg(rd, n + 1);
                             neon_store_reg(rm, n, tmp2);
                             neon_store_reg(rd, n + 1, tmp);
                         }
-                    } else {
-                        goto elementwise;
+                        break;
+                    default:
+                        return 1;
                     }
                     break;
                 case 34: /* VUZP */
@@ -5306,7 +5372,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                        Rd   A3 A2 A1 A0  B2 B0 A2 A0
                        Rm   B3 B2 B1 B0  B3 B1 A3 A1
                      */
-                    if (size == 3)
+                    if (size == 3 || (!q && size == 2))
                         return 1;
                     gen_neon_unzip(rd, q, 0, size);
                     gen_neon_unzip(rm, q, 4, size);
@@ -5333,7 +5399,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                        Rd   A3 A2 A1 A0  B1 A1 B0 A0
                        Rm   B3 B2 B1 B0  B3 A3 B2 A2
                      */
-                    if (size == 3)
+                    if (size == 3 || (!q && size == 2))
                         return 1;
                     count = (q ? 4 : 2);
                     for (n = 0; n < count; n++) {
@@ -5355,7 +5421,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
-                    if (size == 3)
+                    if (size == 3 || (rm & 1))
                         return 1;
                     TCGV_UNUSED(tmp2);
                     for (pass = 0; pass < 2; pass++) {
@@ -5377,7 +5443,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 38: /* VSHLL */
-                    if (q || size == 3)
+                    if (q || size == 3 || (rd & 1))
                         return 1;
                     tmp = neon_load_reg(rm, 0);
                     tmp2 = neon_load_reg(rm, 1);
@@ -5390,7 +5456,8 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 44: /* VCVT.F16.F32 */
-                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
+                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16)
+                        || q || size != 1 || (rm & 1))
                       return 1;
                     tmp = new_tmp();
                     tmp2 = new_tmp();
@@ -5412,7 +5479,8 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     dead_tmp(tmp);
                     break;
                 case 46: /* VCVT.F32.F16 */
-                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
+                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16)
+                        || q || size != 1 || (rd & 1))
                       return 1;
                     tmp3 = new_tmp();
                     tmp = neon_load_reg(rm, 0);
@@ -5448,12 +5516,14 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             switch (size) {
                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
                             case 1: gen_swap_half(tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 2: /* VREV16 */
-                            if (size != 0)
+                            if (size != 0) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             gen_rev16(tmp);
                             break;
                         case 8: /* CLS */
@@ -5461,7 +5531,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 9: /* CLZ */
@@ -5469,17 +5539,21 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
                             case 2: gen_helper_clz(tmp, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 10: /* CNT */
-                            if (size != 0)
+                            if (size != 0) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             gen_helper_neon_cnt_u8(tmp, tmp);
                             break;
                         case 11: /* VNOT */
-                            if (size != 0)
+                            if (size != 0) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             tcg_gen_not_i32(tmp, tmp);
                             break;
                         case 14: /* VQABS */
@@ -5487,7 +5561,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); break;
                             case 1: gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); break;
                             case 2: gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 15: /* VQNEG */
@@ -5495,7 +5569,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); break;
                             case 1: gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); break;
                             case 2: gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 16: case 19: /* VCGT #0, VCLE #0 */
@@ -5504,7 +5578,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1;
                             }
                             tcg_temp_free(tmp2);
                             if (op == 19)
@@ -5516,7 +5590,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1;
                             }
                             tcg_temp_free(tmp2);
                             if (op == 20)
@@ -5528,7 +5602,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1;
                             }
                             tcg_temp_free(tmp2);
                             break;
@@ -5537,24 +5611,35 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
                             case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
                             case 2: tcg_gen_abs_i32(tmp, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 23: /* VNEG */
-                            if (size == 3)
+                            if (size == 3) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_neon_rsb(size, tmp, tmp2);
                             tcg_temp_free(tmp2);
                             break;
                         case 24: case 27: /* Float VCGT #0, Float VCLE #0 */
+                            if (size != 2) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
                             tcg_temp_free(tmp2);
-                            if (op == 27)
+                            if (op == 27) {
                                 tcg_gen_not_i32(tmp, tmp);
+                            }
                             break;
                         case 25: case 28: /* Float VCGE #0, Float VCLT #0 */
+                            if (size != 2) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_helper_neon_cge_f32(tmp, tmp, tmp2);
                             tcg_temp_free(tmp2);
@@ -5562,17 +5647,31 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                 tcg_gen_not_i32(tmp, tmp);
                             break;
                         case 26: /* Float VCEQ #0 */
+                            if (size != 2) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
                             tcg_temp_free(tmp2);
                             break;
                         case 30: /* Float VABS */
+                            if (size != 2) {
+                                return 1;
+                            }
                             gen_vfp_abs(0);
                             break;
                         case 31: /* Float VNEG */
+                            if (size != 2) {
+                                return 1;
+                            }
                             gen_vfp_neg(0);
                             break;
                         case 32: /* VSWP */
+                            if (size != 0) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = neon_load_reg(rd, pass);
                             neon_store_reg(rm, pass, tmp2);
                             break;
@@ -5581,8 +5680,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             switch (size) {
                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
-                            case 2: abort();
-                            default: return 1;
+                            default: abort(); /* size == 2,3 is handled earlier */
                             }
                             neon_store_reg(rm, pass, tmp2);
                             break;
@@ -5611,7 +5709,8 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             gen_vfp_uito(0);
                             break;
                         default:
-                            /* Reserved: 21, 29, 39-56 */
+                            /* Reserved: 3, 6, 7, 21, 29, 39-43, 45, 47-55 */
+                            dead_tmp(tmp);
                             return 1;
                         }
                         if (op == 30 || op == 31 || op >= 58) {
@@ -5626,7 +5725,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             } else if ((insn & (1 << 10)) == 0) {
                 /* VTBL, VTBX.  */
                 n = ((insn >> 5) & 0x18) + 8;
-                if (insn & (1 << 6)) {
+                if (q) {
                     tmp = neon_load_reg(rd, 0);
                 } else {
                     tmp = new_tmp();
@@ -5637,7 +5736,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 tmp5 = tcg_const_i32(n);
                 gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5);
                 dead_tmp(tmp);
-                if (insn & (1 << 6)) {
+                if (q) {
                     tmp = neon_load_reg(rd, 1);
                 } else {
                     tmp = new_tmp();
@@ -5652,6 +5751,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 dead_tmp(tmp);
             } else if ((insn & 0x380) == 0) {
                 /* VDUP */
+                if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
+                    return 1;
+                }
                 if (insn & (1 << 19)) {
                     tmp = neon_load_reg(rm, 1);
                 } else {
@@ -7828,7 +7930,7 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
         /* Coprocessor.  */
         if (((insn >> 24) & 3) == 3) {
             /* Translate into the equivalent ARM encoding.  */
-            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4);
+            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
             if (disas_neon_data_insn(env, s, insn))
                 goto illegal_op;
         } else {