diff --git a/target-arm/translate.c b/target-arm/translate.c
index 537d9d6..743b846 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4147,73 +4147,82 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     if ((insn & (1 << 23)) == 0) {
         /* Three register same length.  */
         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
-        if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9
-                          || op == 10 || op  == 11 || op == 16)) {
-            /* 64-bit element instructions.  */
-            for (pass = 0; pass < (q ? 2 : 1); pass++) {
-                neon_load_reg64(cpu_V0, rn + pass);
-                neon_load_reg64(cpu_V1, rm + pass);
-                switch (op) {
-                case 1: /* VQADD */
-                    if (u) {
-                        gen_helper_neon_add_saturate_u64(CPU_V001);
-                    } else {
-                        gen_helper_neon_add_saturate_s64(CPU_V001);
-                    }
-                    break;
-                case 5: /* VQSUB */
-                    if (u) {
-                        gen_helper_neon_sub_saturate_u64(CPU_V001);
-                    } else {
-                        gen_helper_neon_sub_saturate_s64(CPU_V001);
-                    }
-                    break;
-                case 8: /* VSHL */
-                    if (u) {
-                        gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
-                    } else {
-                        gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 9: /* VQSHL */
-                    if (u) {
-                        gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V0);
-                    } else {
-                        gen_helper_neon_qshl_s64(cpu_V1, cpu_env,
-                                                 cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 10: /* VRSHL */
-                    if (u) {
-                        gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
-                    } else {
-                        gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 11: /* VQRSHL */
-                    if (u) {
-                        gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
-                                                  cpu_V1, cpu_V0);
-                    } else {
-                        gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
-                                                  cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 16:
-                    if (u) {
-                        tcg_gen_sub_i64(CPU_V001);
-                    } else {
-                        tcg_gen_add_i64(CPU_V001);
+        if (op == 24 || op == 25 || (q && ((rd | rn | rm) & 1))) {
+            return 1;
+        }
+        if (size == 3) {
+            if (op == 1 || op == 5 || op == 8 || op == 9 || op == 10
+                || op == 11 || op == 16) {
+                /* 64-bit element instructions.  */
+                for (pass = 0; pass < (q ? 2 : 1); pass++) {
+                    neon_load_reg64(cpu_V0, rn + pass);
+                    neon_load_reg64(cpu_V1, rm + pass);
+                    switch (op) {
+                    case 1: /* VQADD */
+                        if (u) {
+                            gen_helper_neon_add_saturate_u64(CPU_V001);
+                        } else {
+                            gen_helper_neon_add_saturate_s64(CPU_V001);
+                        }
+                        break;
+                    case 5: /* VQSUB */
+                        if (u) {
+                            gen_helper_neon_sub_saturate_u64(CPU_V001);
+                        } else {
+                            gen_helper_neon_sub_saturate_s64(CPU_V001);
+                        }
+                        break;
+                    case 8: /* VSHL */
+                        if (u) {
+                            gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
+                        } else {
+                            gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 9: /* VQSHL */
+                        if (u) {
+                            gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
+                                                     cpu_V0, cpu_V0);
+                        } else {
+                            gen_helper_neon_qshl_s64(cpu_V1, cpu_env,
+                                                     cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 10: /* VRSHL */
+                        if (u) {
+                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
+                        } else {
+                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 11: /* VQRSHL */
+                        if (u) {
+                            gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
+                                                      cpu_V1, cpu_V0);
+                        } else {
+                            gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
+                                                      cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 16: /* VADD, VSUB */
+                        if (u) {
+                            tcg_gen_sub_i64(CPU_V001);
+                        } else {
+                            tcg_gen_add_i64(CPU_V001);
+                        }
+                        break;
+                    default:
+                        abort();
                     }
-                    break;
-                default:
-                    abort();
+                    neon_store_reg64(cpu_V0, rd + pass);
                 }
-                neon_store_reg64(cpu_V0, rd + pass);
+                return 0;
+            }
+            if (op != 3) {
+                return 1;
             }
-            return 0;
         }
+        pairwise = 0;
         switch (op) {
         case 8: /* VSHL */
         case 9: /* VQSHL */
@@ -4225,39 +4234,72 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 rtmp = rn;
                 rn = rm;
                 rm = rtmp;
-                pairwise = 0;
             }
             break;
+        case 19: /* VMUL */
+            if (u && size) {
+                return 1;
+            }
+            break;
+        case 23: /* VPADD */
+            if (u) {
+                return 1;
+            }
+            /* fall through */
         case 20: /* VPMAX */
         case 21: /* VPMIN */
-        case 23: /* VPADD */
             pairwise = 1;
             break;
-        case 26: /* VPADD (float) */
+        case 22: /* VQDMULH/VQRDMULH */
+            if (!size) {
+                return 1;
+            }
+            break;
+        case 26: /* VADD/VSUB/VPADD/VABD (float) */
             pairwise = (u && size < 2);
+            /* fall through */
+        case 27: /* VMLA/VMLS/VMUL (float) */
+            if (size & 1) {
+                return 1;
+            }
+            break;
+        case 28: /* VCEQ/VCGE/VCGT (float) */
+            if ((!u && size) || (size & 1)) {
+                return 1;
+            }
+            break;
+        case 29: /* VACGE/VACGT (float) */
+            if (!u || (size & 1)) {
+                return 1;
+            }
             break;
         case 30: /* VPMIN/VPMAX (float) */
             pairwise = u;
+            if (size & 1) {
+                return 1;
+            }
+            break;
+        case 31: /* VRECPS/VRSQRTS */
+            if (u || (size & 1)) {
+                return 1;
+            }
             break;
         default:
-            pairwise = 0;
             break;
         }
+        if (pairwise && q) {
+            return 1;
+        }
 
         for (pass = 0; pass < (q ? 4 : 2); pass++) {
-
         if (pairwise) {
             /* Pairwise.  */
-            if (q)
-                n = (pass & 1) * 2;
-            else
-                n = 0;
-            if (pass < q + 1) {
-                tmp = neon_load_reg(rn, n);
-                tmp2 = neon_load_reg(rn, n + 1);
+            if (!pass) {
+                tmp = neon_load_reg(rn, 0);
+                tmp2 = neon_load_reg(rn, 1);
             } else {
-                tmp = neon_load_reg(rm, n);
-                tmp2 = neon_load_reg(rm, n + 1);
+                tmp = neon_load_reg(rm, 0);
+                tmp2 = neon_load_reg(rm, 1);
             }
         } else {
             /* Elementwise.  */
@@ -4350,13 +4392,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
         case 16:
             if (!u) { /* VADD */
                 if (gen_neon_add(size, tmp, tmp2))
-                    return 1;
+                    abort(); /* size == 3 is handled earlier */
             } else { /* VSUB */
                 switch (size) {
                 case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
                 case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
                 case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 3 is handled earlier */
                 }
             }
             break;
@@ -4366,14 +4408,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
                 case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
                 case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 3 is handled earlier */
                 }
             } else { /* VCEQ */
                 switch (size) {
                 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
                 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
                 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 3 is handled earlier */
                 }
             }
             break;
@@ -4382,7 +4424,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-            default: return 1;
+            default: abort(); /* size == 3 is handled earlier */
             }
             dead_tmp(tmp2);
             tmp2 = neon_load_reg(rd, pass);
@@ -4400,7 +4442,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
                 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
                 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 3 is handled earlier */
                 }
             }
             break;
@@ -4415,24 +4457,22 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 switch (size) {
                 case 1: gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
                 case 2: gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 0,3 is handled earlier */
                 }
             } else { /* VQRDHMUL */
                 switch (size) {
                 case 1: gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
                 case 2: gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 0,3 is handled earlier */
                 }
             }
             break;
         case 23: /* VPADD */
-            if (u)
-                return 1;
             switch (size) {
             case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
             case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
             case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
-            default: return 1;
+            default: abort(); /* size == 3 is handled earlier */
             }
             break;
         case 26: /* Floating point arithnetic.  */
@@ -4450,7 +4490,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 gen_helper_neon_abd_f32(tmp, tmp, tmp2);
                 break;
             default:
-                return 1;
+                abort(); /* other values are handled earlier */
             }
             break;
         case 27: /* Float multiply.  */
@@ -4476,8 +4516,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             }
             break;
         case 29: /* Float compare absolute.  */
-            if (!u)
-                return 1;
             if (size == 0)
                 gen_helper_neon_acge_f32(tmp, tmp, tmp2);
             else
@@ -4522,11 +4560,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             /* Two registers and shift.  */
             op = (insn >> 8) & 0xf;
             if (insn & (1 << 7)) {
+                if (op & 8) {
+                    return 1;
+                }
                 /* 64-bit shift.   */
                 size = 3;
             } else {
                 size = 2;
-                while ((insn & (1 << (size + 19))) == 0)
+                while (size && (insn & (1 << (size + 19))) == 0)
                     size--;
             }
             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
@@ -4534,9 +4575,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                by immediate using the variable shift operations.  */
             if (op < 8) {
                 /* Shift by immediate:
-                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
+                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VSLI, VQSHL, VQSHLU */
                 /* Right shifts are encoded as N - shift, where N is the
                    element size in bits.  */
+                if ((q && ((rd | rm) & 1))
+                    || (!u && (op == 4 || op == 6))) {
+                    return 1;
+                }
                 if (op <= 4)
                     shift = shift - (1 << (size + 3));
                 if (size == 3) {
@@ -4582,22 +4627,21 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
                             break;
                         case 4: /* VSRI */
-                            if (!u)
-                                return 1;
                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
                             break;
                         case 5: /* VSHL, VSLI */
                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
                             break;
-                        case 6: /* VQSHL */
+                        case 6: /* VQSHLU */
+                            gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0,
+                                                     cpu_V1);
+                            break;
+                        case 7: /* VQSHL/VQSHLU */
                             if (u)
                                 gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
                             else
                                 gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
                             break;
-                        case 7: /* VQSHLU */
-                            gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
-                            break;
                         }
                         if (op == 1 || op == 3) {
                             /* Accumulate.  */
@@ -4605,7 +4649,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
                         } else if (op == 4 || (op == 5 && u)) {
                             /* Insert */
-                            cpu_abort(env, "VS[LR]I.64 not implemented");
+                            neon_load_reg64(cpu_V0, rd + pass);
+                            uint64_t mask;
+                            if (op == 4) {
+                                mask = 0xffffffffffffffffLL >> -shift;
+                            } else {
+                                mask = 0xffffffffffffffffLL << shift;
+                            }
+                            tcg_gen_andi_i64(cpu_V1, cpu_V1, mask);
+                            tcg_gen_andi_i64(cpu_V0, cpu_V0, ~mask);
+                            tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
                         }
                         neon_store_reg64(cpu_V0, rd + pass);
                     } else { /* size < 3 */
@@ -4623,16 +4676,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             GEN_NEON_INTEGER_OP(rshl);
                             break;
                         case 4: /* VSRI */
-                            if (!u)
-                                return 1;
-                            GEN_NEON_INTEGER_OP(shl);
-                            break;
                         case 5: /* VSHL, VSLI */
                             switch (size) {
                             case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: abort(); /* size == 3 is handled earlier */
                             }
                             break;
                         case 6: /* VQSHL */
@@ -4643,7 +4692,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qshl_u8(tmp, cpu_env, tmp, tmp2); break;
                             case 1: gen_helper_neon_qshl_u16(tmp, cpu_env, tmp, tmp2); break;
                             case 2: gen_helper_neon_qshl_u32(tmp, cpu_env, tmp, tmp2); break;
-                            default: return 1;
+                            default: abort(); /* size == 3 is handled earlier */
                             }
                             break;
                         }
@@ -4697,6 +4746,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             } else if (op < 10) {
                 /* Shift by immediate and narrow:
                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
+                if (rm & 1) {
+                    return 1;
+                }
                 shift = shift - (1 << (size + 3));
                 size++;
                 switch (size) {
@@ -4760,7 +4812,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 }
             } else if (op == 10) {
                 /* VSHLL */
-                if (q || size == 3)
+                if (q)
                     return 1;
                 tmp = neon_load_reg(rm, 0);
                 tmp2 = neon_load_reg(rm, 1);
@@ -4788,8 +4840,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     neon_store_reg64(cpu_V0, rd + pass);
                 }
-            } else if (op == 15 || op == 16) {
+            } else if (op == 14 || op == 15) {
                 /* VCVT fixed-point.  */
+                if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
+                    return 1;
+                }
                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
                     if (op & 1) {
@@ -4846,6 +4901,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     imm = ~imm;
                 break;
             case 15:
+                if (invert) {
+                    return 1;
+                }
                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
                 break;
@@ -4884,36 +4942,22 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     } else { /* (insn & 0x00800010 == 0x00800000) */
         if (size != 3) {
             op = (insn >> 8) & 0xf;
-            if ((insn & (1 << 6)) == 0) {
+            if (!q) {
                 /* Three registers of different lengths.  */
-                int src1_wide;
-                int src2_wide;
-                int prewiden;
-                /* prewiden, src1_wide, src2_wide */
-                static const int neon_3reg_wide[16][3] = {
-                    {1, 0, 0}, /* VADDL */
-                    {1, 1, 0}, /* VADDW */
-                    {1, 0, 0}, /* VSUBL */
-                    {1, 1, 0}, /* VSUBW */
-                    {0, 1, 1}, /* VADDHN */
-                    {0, 0, 0}, /* VABAL */
-                    {0, 1, 1}, /* VSUBHN */
-                    {0, 0, 0}, /* VABDL */
-                    {0, 0, 0}, /* VMLAL */
-                    {0, 0, 0}, /* VQDMLAL */
-                    {0, 0, 0}, /* VMLSL */
-                    {0, 0, 0}, /* VQDMLSL */
-                    {0, 0, 0}, /* Integer VMULL */
-                    {0, 0, 0}, /* VQDMULL */
-                    {0, 0, 0}  /* Polynomial VMULL */
-                };
-
-                prewiden = neon_3reg_wide[op][0];
-                src1_wide = neon_3reg_wide[op][1];
-                src2_wide = neon_3reg_wide[op][2];
-
-                if (size == 0 && (op == 9 || op == 11 || op == 13))
+
+                if (op == 15
+                    || (op < 4 && ((rd & 1) || ((op & 1) && (rn & 1))))
+                    || ((op == 4 || op == 6) && ((rn | rm) & 1))
+                    || ((op == 5 || op >= 7) && (rd & 1))
+                    || ((op == 9 || op == 11) && (u || size == 0))
+                    || (op == 13 && size == 0)
+                    || (op == 14 && (u || size))) {
                     return 1;
+                }
+
+                int src1_wide = (op == 1 || op == 3 || op == 4 || op == 6);
+                int src2_wide = (op == 4 || op == 6);
+                int prewiden = (op < 4);
 
                 /* Avoid overlapping operands.  Wide source operands are
                    always aligned so will never overlap with wide
@@ -4995,7 +5039,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         cpu_abort(env, "Polynomial VMULL not implemented");
 
                     default: /* 15 is RESERVED.  */
-                        return 1;
+                        abort(); /* op == 15 is handled earlier */
                     }
                     if (op == 5 || op == 13 || (op >= 8 && op <= 11)) {
                         /* Accumulate.  */
@@ -5076,8 +5120,15 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 5: /* Floating point VMLS scalar */
                 case 8: /* Integer VMUL scalar */
                 case 9: /* Floating point VMUL scalar */
+                    if (size <= (op & 1)) {
+                        return 1;
+                    }
+                    /* fall through */
                 case 12: /* VQDMULH scalar */
                 case 13: /* VQRDMULH scalar */
+                    if (u && ((rd | rn) & 1)) {
+                        return 1;
+                    }
                     tmp = neon_get_scalar(size, rm);
                     neon_store_scratch(0, tmp);
                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
@@ -5086,13 +5137,13 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         if (op == 12) {
                             if (size == 1) {
                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
-                            } else {
+                            } else { /* TODO: what happens when size == 0? */
                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
                             }
                         } else if (op == 13) {
                             if (size == 1) {
                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
-                            } else {
+                            } else { /* TODO: what happens when size == 0? */
                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
                             }
                         } else if (op & 1) {
@@ -5102,7 +5153,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: abort(); /* size == 3 is handled earlier */
                             }
                         }
                         dead_tmp(tmp2);
@@ -5130,14 +5181,19 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_store_reg(rd, pass, tmp);
                     }
                     break;
-                case 2: /* VMLAL sclar */
                 case 3: /* VQDMLAL scalar */
-                case 6: /* VMLSL scalar */
                 case 7: /* VQDMLSL scalar */
-                case 10: /* VMULL scalar */
                 case 11: /* VQDMULL scalar */
-                    if (size == 0 && (op == 3 || op == 7 || op == 11))
+                    if (u) {
                         return 1;
+                    }
+                    /* fall through */
+                case 2: /* VMLAL sclar */
+                case 6: /* VMLSL scalar */
+                case 10: /* VMULL scalar */
+                    if (size == 0 || (rd & 1)) {
+                        return 1;
+                    }
 
                     tmp2 = neon_get_scalar(size, rm);
                     tmp3 = neon_load_reg(rn, 1);
@@ -5189,8 +5245,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 imm = (insn >> 8) & 0xf;
                 count = q + 1;
 
-                if (imm > 7 && !q)
+                if ((imm > 7 && !q)
+                    || (q && ((rd | rn | rm) & 1))) {
                     return 1;
+                }
 
                 if (imm == 0) {
                     neon_load_reg64(cpu_V0, rn);
@@ -5240,6 +5298,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 /* Two register misc.  */
                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
                 size = (insn >> 18) & 3;
+                if ((q && (op < 36 || op > 46) && ((rd | rm) & 1))
+                    || (op >= 56 && size != 2)) {
+                    return 1;
+                }
                 switch (op) {
                 case 0: /* VREV64 */
                     if (size == 3)
@@ -5290,15 +5352,19 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 33: /* VTRN */
-                    if (size == 2) {
+                    switch (size) {
+                    case 0: case 1:
+                        goto elementwise;
+                    case 2:
                         for (n = 0; n < (q ? 4 : 2); n += 2) {
                             tmp = neon_load_reg(rm, n);
                             tmp2 = neon_load_reg(rd, n + 1);
                             neon_store_reg(rm, n, tmp2);
                             neon_store_reg(rd, n + 1, tmp);
                         }
-                    } else {
-                        goto elementwise;
+                        break;
+                    default:
+                        return 1;
                     }
                     break;
                 case 34: /* VUZP */
@@ -5306,7 +5372,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                        Rd   A3 A2 A1 A0  B2 B0 A2 A0
                        Rm   B3 B2 B1 B0  B3 B1 A3 A1
                      */
-                    if (size == 3)
+                    if (size == 3 || (!q && size == 2))
                         return 1;
                     gen_neon_unzip(rd, q, 0, size);
                     gen_neon_unzip(rm, q, 4, size);
@@ -5333,7 +5399,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                        Rd   A3 A2 A1 A0  B1 A1 B0 A0
                        Rm   B3 B2 B1 B0  B3 A3 B2 A2
                      */
-                    if (size == 3)
+                    if (size == 3 || (!q && size == 2))
                         return 1;
                     count = (q ? 4 : 2);
                     for (n = 0; n < count; n++) {
@@ -5355,7 +5421,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
-                    if (size == 3)
+                    if (size == 3 || (rm & 1))
                         return 1;
                     TCGV_UNUSED(tmp2);
                     for (pass = 0; pass < 2; pass++) {
@@ -5377,7 +5443,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 38: /* VSHLL */
-                    if (q || size == 3)
+                    if (q || size == 3 || (rd & 1))
                         return 1;
                     tmp = neon_load_reg(rm, 0);
                     tmp2 = neon_load_reg(rm, 1);
@@ -5390,7 +5456,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 44: /* VCVT.F16.F32 */
-                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
+                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16)
+                        || q || size != 1 || (rm & 1))
                       return 1;
                     tmp = new_tmp();
                     tmp2 = new_tmp();
@@ -5412,7 +5479,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     dead_tmp(tmp);
                     break;
                 case 46: /* VCVT.F32.F16 */
-                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
+                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16)
+                        || q || size != 1 || (rd & 1))
                       return 1;
                     tmp3 = new_tmp();
                     tmp = neon_load_reg(rm, 0);
@@ -5448,12 +5516,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             switch (size) {
                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
                             case 1: gen_swap_half(tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 2: /* VREV16 */
-                            if (size != 0)
+                            if (size != 0) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             gen_rev16(tmp);
                             break;
                         case 8: /* CLS */
@@ -5461,7 +5531,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 9: /* CLZ */
@@ -5469,17 +5539,21 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
                             case 2: gen_helper_clz(tmp, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 10: /* CNT */
-                            if (size != 0)
+                            if (size != 0) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             gen_helper_neon_cnt_u8(tmp, tmp);
                             break;
                         case 11: /* VNOT */
-                            if (size != 0)
+                            if (size != 0) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             tcg_gen_not_i32(tmp, tmp);
                             break;
                         case 14: /* VQABS */
@@ -5487,7 +5561,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); break;
                             case 1: gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); break;
                             case 2: gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 15: /* VQNEG */
@@ -5495,7 +5569,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); break;
                             case 1: gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); break;
                             case 2: gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 16: case 19: /* VCGT #0, VCLE #0 */
@@ -5504,7 +5578,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1;
                             }
                             tcg_temp_free(tmp2);
                             if (op == 19)
@@ -5516,7 +5590,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1;
                             }
                             tcg_temp_free(tmp2);
                             if (op == 20)
@@ -5528,7 +5602,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1;
                             }
                             tcg_temp_free(tmp2);
                             break;
@@ -5537,24 +5611,35 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
                             case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
                             case 2: tcg_gen_abs_i32(tmp, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 23: /* VNEG */
-                            if (size == 3)
+                            if (size == 3) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_neon_rsb(size, tmp, tmp2);
                             tcg_temp_free(tmp2);
                             break;
                         case 24: case 27: /* Float VCGT #0, Float VCLE #0 */
+                            if (size != 2) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
                             tcg_temp_free(tmp2);
-                            if (op == 27)
+                            if (op == 27) {
                                 tcg_gen_not_i32(tmp, tmp);
+                            }
                             break;
                         case 25: case 28: /* Float VCGE #0, Float VCLT #0 */
+                            if (size != 2) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_helper_neon_cge_f32(tmp, tmp, tmp2);
                             tcg_temp_free(tmp2);
@@ -5562,17 +5647,31 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                 tcg_gen_not_i32(tmp, tmp);
                             break;
                         case 26: /* Float VCEQ #0 */
+                            if (size != 2) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
                             tcg_temp_free(tmp2);
                             break;
                         case 30: /* Float VABS */
+                            if (size != 2) {
+                                return 1;
+                            }
                             gen_vfp_abs(0);
                             break;
                         case 31: /* Float VNEG */
+                            if (size != 2) {
+                                return 1;
+                            }
                             gen_vfp_neg(0);
                             break;
                         case 32: /* VSWP */
+                            if (size != 0) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = neon_load_reg(rd, pass);
                             neon_store_reg(rm, pass, tmp2);
                             break;
@@ -5581,8 +5680,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             switch (size) {
                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
-                            case 2: abort();
-                            default: return 1;
+                            default: abort(); /* size == 2,3 is handled earlier */
                             }
                             neon_store_reg(rm, pass, tmp2);
                             break;
@@ -5611,7 +5709,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             gen_vfp_uito(0);
                             break;
                         default:
-                            /* Reserved: 21, 29, 39-56 */
+                            /* Reserved: 3, 6, 7, 21, 29, 39-43, 45, 47-55 */
+                            dead_tmp(tmp);
                             return 1;
                         }
                         if (op == 30 || op == 31 || op >= 58) {
@@ -5626,7 +5725,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             } else if ((insn & (1 << 10)) == 0) {
                 /* VTBL, VTBX.  */
                 n = ((insn >> 5) & 0x18) + 8;
-                if (insn & (1 << 6)) {
+                if (q) {
                     tmp = neon_load_reg(rd, 0);
                 } else {
                     tmp = new_tmp();
@@ -5637,7 +5736,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 tmp5 = tcg_const_i32(n);
                 gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5);
                 dead_tmp(tmp);
-                if (insn & (1 << 6)) {
+                if (q) {
                     tmp = neon_load_reg(rd, 1);
                 } else {
                     tmp = new_tmp();
@@ -5652,6 +5751,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 dead_tmp(tmp);
             } else if ((insn & 0x380) == 0) {
                 /* VDUP */
+                if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
+                    return 1;
+                }
                 if (insn & (1 << 19)) {
                     tmp = neon_load_reg(rm, 1);
                 } else {
@@ -7828,7 +7930,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
         /* Coprocessor.  */
         if (((insn >> 24) & 3) == 3) {
             /* Translate into the equivalent ARM encoding.  */
-            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4);
+            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
             if (disas_neon_data_insn(env, s, insn))
                 goto illegal_op;
         } else {
