Patchwork target-arm: neon emulation enhancements

login
register
mail settings
Submitter Juha.Riihimaki@nokia.com
Date Jan. 19, 2010, 12:27 p.m.
Message ID <1263904078-63327-1-git-send-email-juha.riihimaki@nokia.com>
Download mbox | patch
Permalink /patch/43188/
State New
Headers show

Comments

Juha.Riihimaki@nokia.com - Jan. 19, 2010, 12:27 p.m.
From: Juha Riihimäki <juha.riihimaki@nokia.com>

This patch improves the detection of undefined NEON data instruction
encodings, fixes a bug in vcvt instruction decoding, adds an
implementation for 64bit wide vsli and vsri instructions and finally
aligns the formatting of the source code inside the disas_neon_data_insn
function.

Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com>
---
 target-arm/translate.c | 1095 ++++++++++++++++++++++++++++--------------------
 1 files changed, 640 insertions(+), 455 deletions(-)

Patch

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 5cf3e06..50a2440 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4136,8 +4136,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     TCGv tmp, tmp2, tmp3, tmp4, tmp5;
     TCGv_i64 tmp64;
 
-    if (!vfp_enabled(env))
-      return 1;
+    if (!vfp_enabled(env)) {
+        return 1;
+    }
     q = (insn & (1 << 6)) != 0;
     u = (insn >> 24) & 1;
     VFP_DREG_D(rd, insn);
@@ -4147,73 +4148,82 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     if ((insn & (1 << 23)) == 0) {
         /* Three register same length.  */
         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
-        if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9
-                          || op == 10 || op  == 11 || op == 16)) {
-            /* 64-bit element instructions.  */
-            for (pass = 0; pass < (q ? 2 : 1); pass++) {
-                neon_load_reg64(cpu_V0, rn + pass);
-                neon_load_reg64(cpu_V1, rm + pass);
-                switch (op) {
-                case 1: /* VQADD */
-                    if (u) {
-                        gen_helper_neon_add_saturate_u64(CPU_V001);
-                    } else {
-                        gen_helper_neon_add_saturate_s64(CPU_V001);
-                    }
-                    break;
-                case 5: /* VQSUB */
-                    if (u) {
-                        gen_helper_neon_sub_saturate_u64(CPU_V001);
-                    } else {
-                        gen_helper_neon_sub_saturate_s64(CPU_V001);
-                    }
-                    break;
-                case 8: /* VSHL */
-                    if (u) {
-                        gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
-                    } else {
-                        gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 9: /* VQSHL */
-                    if (u) {
-                        gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V0);
-                    } else {
-                        gen_helper_neon_qshl_s64(cpu_V1, cpu_env,
-                                                 cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 10: /* VRSHL */
-                    if (u) {
-                        gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
-                    } else {
-                        gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 11: /* VQRSHL */
-                    if (u) {
-                        gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
-                                                  cpu_V1, cpu_V0);
-                    } else {
-                        gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
-                                                  cpu_V1, cpu_V0);
-                    }
-                    break;
-                case 16:
-                    if (u) {
-                        tcg_gen_sub_i64(CPU_V001);
-                    } else {
-                        tcg_gen_add_i64(CPU_V001);
+        if (op == 24 || op == 25 || (q && ((rd | rn | rm) & 1))) {
+            return 1;
+        }
+        if (size == 3) {
+            if (op == 1 || op == 5 || op == 8 || op == 9 || op == 10
+                || op == 11 || op == 16) {
+                /* 64-bit element instructions.  */
+                for (pass = 0; pass < (q ? 2 : 1); pass++) {
+                    neon_load_reg64(cpu_V0, rn + pass);
+                    neon_load_reg64(cpu_V1, rm + pass);
+                    switch (op) {
+                    case 1: /* VQADD */
+                        if (u) {
+                            gen_helper_neon_add_saturate_u64(CPU_V001);
+                        } else {
+                            gen_helper_neon_add_saturate_s64(CPU_V001);
+                        }
+                        break;
+                    case 5: /* VQSUB */
+                        if (u) {
+                            gen_helper_neon_sub_saturate_u64(CPU_V001);
+                        } else {
+                            gen_helper_neon_sub_saturate_s64(CPU_V001);
+                        }
+                        break;
+                    case 8: /* VSHL */
+                        if (u) {
+                            gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
+                        } else {
+                            gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 9: /* VQSHL */
+                        if (u) {
+                            gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
+                                                     cpu_V0, cpu_V0);
+                        } else {
+                            gen_helper_neon_qshl_s64(cpu_V1, cpu_env,
+                                                     cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 10: /* VRSHL */
+                        if (u) {
+                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
+                        } else {
+                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 11: /* VQRSHL */
+                        if (u) {
+                            gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
+                                                      cpu_V1, cpu_V0);
+                        } else {
+                            gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
+                                                      cpu_V1, cpu_V0);
+                        }
+                        break;
+                    case 16: /* VADD, VSUB */
+                        if (u) {
+                            tcg_gen_sub_i64(CPU_V001);
+                        } else {
+                            tcg_gen_add_i64(CPU_V001);
+                        }
+                        break;
+                    default:
+                        abort();
                     }
-                    break;
-                default:
-                    abort();
+                    neon_store_reg64(cpu_V0, rd + pass);
                 }
-                neon_store_reg64(cpu_V0, rd + pass);
+                return 0;
+            }
+            if (op != 3) {
+                return 1;
             }
-            return 0;
         }
+        pairwise = 0;
         switch (op) {
         case 8: /* VSHL */
         case 9: /* VQSHL */
@@ -4225,290 +4235,333 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 rtmp = rn;
                 rn = rm;
                 rm = rtmp;
-                pairwise = 0;
             }
             break;
+        case 19: /* VMUL */
+            if (u && size) {
+                return 1;
+            }
+            break;
+        case 23: /* VPADD */
+            if (u) {
+                return 1;
+            }
+            /* fall through */
         case 20: /* VPMAX */
         case 21: /* VPMIN */
-        case 23: /* VPADD */
             pairwise = 1;
             break;
-        case 26: /* VPADD (float) */
+        case 22: /* VQDMULH/VQRDMULH */
+            if (!size) {
+                return 1;
+            }
+            break;
+        case 26: /* VADD/VSUB/VPADD/VABD (float) */
             pairwise = (u && size < 2);
+            /* fall through */
+        case 27: /* VMLA/VMLS/VMUL (float) */
+            if (size & 1) {
+                return 1;
+            }
+            break;
+        case 28: /* VCEQ/VCGE/VCGT (float) */
+            if ((!u && size) || (size & 1)) {
+                return 1;
+            }
+            break;
+        case 29: /* VACGE/VACGT (float) */
+            if (!u || (size & 1)) {
+                return 1;
+            }
             break;
         case 30: /* VPMIN/VPMAX (float) */
             pairwise = u;
+            if (size & 1) {
+                return 1;
+            }
+            break;
+        case 31: /* VRECPS/VRSQRTS */
+            if (u || (size & 1)) {
+                return 1;
+            }
             break;
         default:
-            pairwise = 0;
             break;
         }
+        if (pairwise && q) {
+            return 1;
+        }
 
         for (pass = 0; pass < (q ? 4 : 2); pass++) {
-
-        if (pairwise) {
-            /* Pairwise.  */
-            if (q)
-                n = (pass & 1) * 2;
-            else
-                n = 0;
-            if (pass < q + 1) {
-                tmp = neon_load_reg(rn, n);
-                tmp2 = neon_load_reg(rn, n + 1);
+            if (pairwise) {
+                /* Pairwise.  */
+                if (!pass) {
+                    tmp = neon_load_reg(rn, 0);
+                    tmp2 = neon_load_reg(rn, 1);
+                } else {
+                    tmp = neon_load_reg(rm, 0);
+                    tmp2 = neon_load_reg(rm, 1);
+                }
             } else {
-                tmp = neon_load_reg(rm, n);
-                tmp2 = neon_load_reg(rm, n + 1);
+                /* Elementwise.  */
+                tmp = neon_load_reg(rn, pass);
+                tmp2 = neon_load_reg(rm, pass);
             }
-        } else {
-            /* Elementwise.  */
-            tmp = neon_load_reg(rn, pass);
-            tmp2 = neon_load_reg(rm, pass);
-        }
-        switch (op) {
-        case 0: /* VHADD */
-            GEN_NEON_INTEGER_OP(hadd);
-            break;
-        case 1: /* VQADD */
-            GEN_NEON_INTEGER_OP_ENV(qadd);
-            break;
-        case 2: /* VRHADD */
-            GEN_NEON_INTEGER_OP(rhadd);
-            break;
-        case 3: /* Logic ops.  */
-            switch ((u << 2) | size) {
-            case 0: /* VAND */
-                tcg_gen_and_i32(tmp, tmp, tmp2);
+            switch (op) {
+            case 0: /* VHADD */
+                GEN_NEON_INTEGER_OP(hadd);
                 break;
-            case 1: /* BIC */
-                tcg_gen_andc_i32(tmp, tmp, tmp2);
+            case 1: /* VQADD */
+                GEN_NEON_INTEGER_OP_ENV(qadd);
                 break;
-            case 2: /* VORR */
-                tcg_gen_or_i32(tmp, tmp, tmp2);
+            case 2: /* VRHADD */
+                GEN_NEON_INTEGER_OP(rhadd);
                 break;
-            case 3: /* VORN */
-                tcg_gen_orc_i32(tmp, tmp, tmp2);
+            case 3: /* Logic ops.  */
+                switch ((u << 2) | size) {
+                case 0: /* VAND */
+                    tcg_gen_and_i32(tmp, tmp, tmp2);
+                    break;
+                case 1: /* VBIC */
+                    tcg_gen_andc_i32(tmp, tmp, tmp2);
+                    break;
+                case 2: /* VORR, VMOV */
+                    tcg_gen_or_i32(tmp, tmp, tmp2);
+                    break;
+                case 3: /* VORN */
+                    tcg_gen_orc_i32(tmp, tmp, tmp2);
+                    break;
+                case 4: /* VEOR */
+                    tcg_gen_xor_i32(tmp, tmp, tmp2);
+                    break;
+                case 5: /* VBSL */
+                    tmp3 = neon_load_reg(rd, pass);
+                    gen_neon_bsl(tmp, tmp, tmp2, tmp3);
+                    dead_tmp(tmp3);
+                    break;
+                case 6: /* VBIT */
+                    tmp3 = neon_load_reg(rd, pass);
+                    gen_neon_bsl(tmp, tmp, tmp3, tmp2);
+                    dead_tmp(tmp3);
+                    break;
+                case 7: /* VBIF */
+                    tmp3 = neon_load_reg(rd, pass);
+                    gen_neon_bsl(tmp, tmp3, tmp, tmp2);
+                    dead_tmp(tmp3);
+                    break;
+                }
                 break;
-            case 4: /* VEOR */
-                tcg_gen_xor_i32(tmp, tmp, tmp2);
+            case 4: /* VHSUB */
+                GEN_NEON_INTEGER_OP(hsub);
                 break;
-            case 5: /* VBSL */
-                tmp3 = neon_load_reg(rd, pass);
-                gen_neon_bsl(tmp, tmp, tmp2, tmp3);
-                dead_tmp(tmp3);
+            case 5: /* VQSUB */
+                GEN_NEON_INTEGER_OP_ENV(qsub);
                 break;
-            case 6: /* VBIT */
-                tmp3 = neon_load_reg(rd, pass);
-                gen_neon_bsl(tmp, tmp, tmp3, tmp2);
-                dead_tmp(tmp3);
+            case 6: /* VCGT */
+                GEN_NEON_INTEGER_OP(cgt);
                 break;
-            case 7: /* VBIF */
-                tmp3 = neon_load_reg(rd, pass);
-                gen_neon_bsl(tmp, tmp3, tmp, tmp2);
-                dead_tmp(tmp3);
+            case 7: /* VCGE */
+                GEN_NEON_INTEGER_OP(cge);
                 break;
-            }
-            break;
-        case 4: /* VHSUB */
-            GEN_NEON_INTEGER_OP(hsub);
-            break;
-        case 5: /* VQSUB */
-            GEN_NEON_INTEGER_OP_ENV(qsub);
-            break;
-        case 6: /* VCGT */
-            GEN_NEON_INTEGER_OP(cgt);
-            break;
-        case 7: /* VCGE */
-            GEN_NEON_INTEGER_OP(cge);
-            break;
-        case 8: /* VSHL */
-            GEN_NEON_INTEGER_OP(shl);
-            break;
-        case 9: /* VQSHL */
-            GEN_NEON_INTEGER_OP_ENV(qshl);
-            break;
-        case 10: /* VRSHL */
-            GEN_NEON_INTEGER_OP(rshl);
-            break;
-        case 11: /* VQRSHL */
-            GEN_NEON_INTEGER_OP_ENV(qrshl);
-            break;
-        case 12: /* VMAX */
-            GEN_NEON_INTEGER_OP(max);
-            break;
-        case 13: /* VMIN */
-            GEN_NEON_INTEGER_OP(min);
-            break;
-        case 14: /* VABD */
-            GEN_NEON_INTEGER_OP(abd);
-            break;
-        case 15: /* VABA */
-            GEN_NEON_INTEGER_OP(abd);
-            dead_tmp(tmp2);
-            tmp2 = neon_load_reg(rd, pass);
-            gen_neon_add(size, tmp, tmp2);
-            break;
-        case 16:
-            if (!u) { /* VADD */
-                if (gen_neon_add(size, tmp, tmp2))
-                    return 1;
-            } else { /* VSUB */
-                switch (size) {
-                case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
-                case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
-                case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
-                default: return 1;
-                }
-            }
-            break;
-        case 17:
-            if (!u) { /* VTST */
-                switch (size) {
-                case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
-                case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
-                case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
-                default: return 1;
+            case 8: /* VSHL */
+                GEN_NEON_INTEGER_OP(shl);
+                break;
+            case 9: /* VQSHL */
+                GEN_NEON_INTEGER_OP_ENV(qshl);
+                break;
+            case 10: /* VRSHL */
+                GEN_NEON_INTEGER_OP(rshl);
+                break;
+            case 11: /* VQRSHL */
+                GEN_NEON_INTEGER_OP_ENV(qrshl);
+                break;
+            case 12: /* VMAX */
+                GEN_NEON_INTEGER_OP(max);
+                break;
+            case 13: /* VMIN */
+                GEN_NEON_INTEGER_OP(min);
+                break;
+            case 14: /* VABD */
+                GEN_NEON_INTEGER_OP(abd);
+                break;
+            case 15: /* VABA */
+                GEN_NEON_INTEGER_OP(abd);
+                dead_tmp(tmp2);
+                tmp2 = neon_load_reg(rd, pass);
+                gen_neon_add(size, tmp, tmp2);
+                break;
+            case 16:
+                if (!u) { /* VADD */
+                    if (gen_neon_add(size, tmp, tmp2)) {
+                        abort(); /* size == 3 is handled earlier */
+                    }
+                } else { /* VSUB */
+                    switch (size) {
+                    case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
+                    case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
+                    case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
+                    default: abort(); /* size == 3 is handled earlier */
+                    }
                 }
-            } else { /* VCEQ */
-                switch (size) {
-                case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
-                case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
-                case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
-                default: return 1;
+                break;
+            case 17:
+                if (!u) { /* VTST */
+                    switch (size) {
+                    case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
+                    case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
+                    case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
+                    default: abort(); /* size == 3 is handled earlier */
+                    }
+                } else { /* VCEQ */
+                    switch (size) {
+                    case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
+                    case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
+                    case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
+                    default: abort(); /* size == 3 is handled earlier */
+                    }
                 }
-            }
-            break;
-        case 18: /* Multiply.  */
-            switch (size) {
-            case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
-            case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
-            case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-            default: return 1;
-            }
-            dead_tmp(tmp2);
-            tmp2 = neon_load_reg(rd, pass);
-            if (u) { /* VMLS */
-                gen_neon_rsb(size, tmp, tmp2);
-            } else { /* VMLA */
-                gen_neon_add(size, tmp, tmp2);
-            }
-            break;
-        case 19: /* VMUL */
-            if (u) { /* polynomial */
-                gen_helper_neon_mul_p8(tmp, tmp, tmp2);
-            } else { /* Integer */
+                break;
+            case 18: /* Multiply.  */
                 switch (size) {
                 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
                 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
                 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-                default: return 1;
+                default: abort(); /* size == 3 is handled earlier */
                 }
-            }
-            break;
-        case 20: /* VPMAX */
-            GEN_NEON_INTEGER_OP(pmax);
-            break;
-        case 21: /* VPMIN */
-            GEN_NEON_INTEGER_OP(pmin);
-            break;
-        case 22: /* Hultiply high.  */
-            if (!u) { /* VQDMULH */
-                switch (size) {
-                case 1: gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
-                case 2: gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
-                default: return 1;
+                dead_tmp(tmp2);
+                tmp2 = neon_load_reg(rd, pass);
+                if (u) { /* VMLS */
+                    gen_neon_rsb(size, tmp, tmp2);
+                } else { /* VMLA */
+                    gen_neon_add(size, tmp, tmp2);
                 }
-            } else { /* VQRDHMUL */
+                break;
+            case 19: /* VMUL */
+                if (u) { /* polynomial */
+                    gen_helper_neon_mul_p8(tmp, tmp, tmp2);
+                } else { /* Integer */
+                    switch (size) {
+                    case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
+                    case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
+                    case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
+                    default: abort(); /* size == 3 is handled earlier */
+                    }
+                }
+                break;
+            case 20: /* VPMAX */
+                GEN_NEON_INTEGER_OP(pmax);
+                break;
+            case 21: /* VPMIN */
+                GEN_NEON_INTEGER_OP(pmin);
+                break;
+            case 22: /* Multiply high.  */
+                if (!u) { /* VQDMULH */
+                    switch (size) {
+                    case 1:
+                        gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
+                        break;
+                    case 2:
+                        gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
+                        break;
+                    default:
+                        abort(); /* size == 0,3 is handled earlier */
+                    }
+                } else { /* VQRDHMUL */
+                    switch (size) {
+                    case 1:
+                        gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
+                        break;
+                    case 2:
+                        gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
+                        break;
+                    default:
+                        abort(); /* size == 0,3 is handled earlier */
+                    }
+                }
+                break;
+            case 23: /* VPADD */
                 switch (size) {
-                case 1: gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); break;
-                case 2: gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); break;
-                default: return 1;
+                case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
+                case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
+                case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
+                default: abort(); /* size == 3 is handled earlier */
                 }
-            }
-            break;
-        case 23: /* VPADD */
-            if (u)
-                return 1;
-            switch (size) {
-            case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
-            case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
-            case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
-            default: return 1;
-            }
-            break;
-        case 26: /* Floating point arithnetic.  */
-            switch ((u << 2) | size) {
-            case 0: /* VADD */
-                gen_helper_neon_add_f32(tmp, tmp, tmp2);
                 break;
-            case 2: /* VSUB */
-                gen_helper_neon_sub_f32(tmp, tmp, tmp2);
+            case 26: /* Floating point arithmetic.  */
+                switch ((u << 2) | size) {
+                case 0: /* VADD */
+                    gen_helper_neon_add_f32(tmp, tmp, tmp2);
+                    break;
+                case 2: /* VSUB */
+                    gen_helper_neon_sub_f32(tmp, tmp, tmp2);
+                    break;
+                case 4: /* VPADD */
+                    gen_helper_neon_add_f32(tmp, tmp, tmp2);
+                    break;
+                case 6: /* VABD */
+                    gen_helper_neon_abd_f32(tmp, tmp, tmp2);
+                    break;
+                default:
+                    abort(); /* other values are handled earlier */
+                }
                 break;
-            case 4: /* VPADD */
-                gen_helper_neon_add_f32(tmp, tmp, tmp2);
+            case 27: /* Float multiply.  */
+                gen_helper_neon_mul_f32(tmp, tmp, tmp2);
+                if (!u) {
+                    dead_tmp(tmp2);
+                    tmp2 = neon_load_reg(rd, pass);
+                    if (size == 0) {
+                        gen_helper_neon_add_f32(tmp, tmp, tmp2);
+                    } else {
+                        gen_helper_neon_sub_f32(tmp, tmp2, tmp);
+                    }
+                }
                 break;
-            case 6: /* VABD */
-                gen_helper_neon_abd_f32(tmp, tmp, tmp2);
+            case 28: /* Float compare.  */
+                if (!u) {
+                    gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
+                } else {
+                    if (size == 0) {
+                        gen_helper_neon_cge_f32(tmp, tmp, tmp2);
+                    } else {
+                        gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
+                    }
+                }
                 break;
-            default:
-                return 1;
-            }
-            break;
-        case 27: /* Float multiply.  */
-            gen_helper_neon_mul_f32(tmp, tmp, tmp2);
-            if (!u) {
-                dead_tmp(tmp2);
-                tmp2 = neon_load_reg(rd, pass);
+            case 29: /* Float compare absolute.  */
                 if (size == 0) {
-                    gen_helper_neon_add_f32(tmp, tmp, tmp2);
+                    gen_helper_neon_acge_f32(tmp, tmp, tmp2);
+                } else {
+                    gen_helper_neon_acgt_f32(tmp, tmp, tmp2);
+                }
+                break;
+            case 30: /* Float min/max.  */
+                if (size == 0) {
+                    gen_helper_neon_max_f32(tmp, tmp, tmp2);
                 } else {
-                    gen_helper_neon_sub_f32(tmp, tmp2, tmp);
+                    gen_helper_neon_min_f32(tmp, tmp, tmp2);
                 }
+                break;
+            case 31:
+                if (size == 0) {
+                    gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
+                } else {
+                    gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
+                }
+                break;
+            default:
+                abort();
             }
-            break;
-        case 28: /* Float compare.  */
-            if (!u) {
-                gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
+            dead_tmp(tmp2);
+
+            /* Save the result.  For elementwise operations we can put it
+               straight into the destination register.  For pairwise operations
+               we have to be careful to avoid clobbering the source operands.*/
+            if (pairwise && rd == rm) {
+                neon_store_scratch(pass, tmp);
             } else {
-                if (size == 0)
-                    gen_helper_neon_cge_f32(tmp, tmp, tmp2);
-                else
-                    gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
+                neon_store_reg(rd, pass, tmp);
             }
-            break;
-        case 29: /* Float compare absolute.  */
-            if (!u)
-                return 1;
-            if (size == 0)
-                gen_helper_neon_acge_f32(tmp, tmp, tmp2);
-            else
-                gen_helper_neon_acgt_f32(tmp, tmp, tmp2);
-            break;
-        case 30: /* Float min/max.  */
-            if (size == 0)
-                gen_helper_neon_max_f32(tmp, tmp, tmp2);
-            else
-                gen_helper_neon_min_f32(tmp, tmp, tmp2);
-            break;
-        case 31:
-            if (size == 0)
-                gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
-            else
-                gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
-            break;
-        default:
-            abort();
-        }
-        dead_tmp(tmp2);
-
-        /* Save the result.  For elementwise operations we can put it
-           straight into the destination register.  For pairwise operations
-           we have to be careful to avoid clobbering the source operands.  */
-        if (pairwise && rd == rm) {
-            neon_store_scratch(pass, tmp);
-        } else {
-            neon_store_reg(rd, pass, tmp);
-        }
-
         } /* for pass */
         if (pairwise && rd == rm) {
             for (pass = 0; pass < (q ? 4 : 2); pass++) {
@@ -4522,23 +4575,32 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             /* Two registers and shift.  */
             op = (insn >> 8) & 0xf;
             if (insn & (1 << 7)) {
+                if (op & 8) {
+                    return 1;
+                }
                 /* 64-bit shift.   */
                 size = 3;
             } else {
                 size = 2;
-                while ((insn & (1 << (size + 19))) == 0)
+                while ((insn & (1 << (size + 19))) == 0) {
                     size--;
+                }
             }
             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
             /* To avoid excessive dumplication of ops we implement shift
                by immediate using the variable shift operations.  */
             if (op < 8) {
                 /* Shift by immediate:
-                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
+                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VSLI, VQSHL, VQSHLU */
                 /* Right shifts are encoded as N - shift, where N is the
                    element size in bits.  */
-                if (op <= 4)
+                if ((q && ((rd | rm) & 1))
+                    || (!u && (op == 4 || op == 6))) {
+                    return 1;
+                }
+                if (op <= 4) {
                     shift = shift - (1 << (size + 3));
+                }
                 if (size == 3) {
                     count = q + 1;
                 } else {
@@ -4569,34 +4631,42 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         switch (op) {
                         case 0:  /* VSHR */
                         case 1:  /* VSRA */
-                            if (u)
-                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
-                            else
-                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
+                            if (u) {
+                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0,
+                                                        cpu_V1);
+                            } else {
+                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0,
+                                                        cpu_V1);
+                            }
                             break;
                         case 2: /* VRSHR */
                         case 3: /* VRSRA */
-                            if (u)
-                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
-                            else
-                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
+                            if (u) {
+                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0,
+                                                         cpu_V1);
+                            } else {
+                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0,
+                                                         cpu_V1);
+                            }
                             break;
                         case 4: /* VSRI */
-                            if (!u)
-                                return 1;
                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
                             break;
                         case 5: /* VSHL, VSLI */
                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
                             break;
-                        case 6: /* VQSHL */
-                            if (u)
-                                gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
-                            else
-                                gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
+                        case 6: /* VQSHLU */
+                            gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0,
+                                                     cpu_V1);
                             break;
-                        case 7: /* VQSHLU */
-                            gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
+                        case 7: /* VQSHL/VQSHLU */
+                            if (u) {
+                                gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
+                                                         cpu_V0, cpu_V1);
+                            } else {
+                                gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
+                                                         cpu_V0, cpu_V1);
+                            }
                             break;
                         }
                         if (op == 1 || op == 3) {
@@ -4605,7 +4675,16 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
                         } else if (op == 4 || (op == 5 && u)) {
                             /* Insert */
-                            cpu_abort(env, "VS[LR]I.64 not implemented");
+                            neon_load_reg64(cpu_V0, rd + pass);
+                            uint64_t mask;
+                            if (op == 4) {
+                                mask = 0xffffffffffffffffLL >> -shift;
+                            } else {
+                                mask = 0xffffffffffffffffLL << shift;
+                            }
+                            tcg_gen_andi_i64(cpu_V1, cpu_V1, mask);
+                            tcg_gen_andi_i64(cpu_V0, cpu_V0, ~mask);
+                            tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
                         }
                         neon_store_reg64(cpu_V0, rd + pass);
                     } else { /* size < 3 */
@@ -4623,27 +4702,40 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             GEN_NEON_INTEGER_OP(rshl);
                             break;
                         case 4: /* VSRI */
-                            if (!u)
-                                return 1;
-                            GEN_NEON_INTEGER_OP(shl);
-                            break;
                         case 5: /* VSHL, VSLI */
                             switch (size) {
-                            case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
-                            case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
-                            case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            case 0:
+                                gen_helper_neon_shl_u8(tmp, tmp, tmp2);
+                                break;
+                            case 1:
+                                gen_helper_neon_shl_u16(tmp, tmp, tmp2);
+                                break;
+                            case 2:
+                                gen_helper_neon_shl_u32(tmp, tmp, tmp2);
+                                break;
+                            default:
+                                abort(); /* size == 3 is handled earlier */
                             }
                             break;
-                        case 6: /* VQSHL */
+                        case 6: /* VQSHLU */
                             GEN_NEON_INTEGER_OP_ENV(qshl);
                             break;
                         case 7: /* VQSHLU */
                             switch (size) {
-                            case 0: gen_helper_neon_qshl_u8(tmp, cpu_env, tmp, tmp2); break;
-                            case 1: gen_helper_neon_qshl_u16(tmp, cpu_env, tmp, tmp2); break;
-                            case 2: gen_helper_neon_qshl_u32(tmp, cpu_env, tmp, tmp2); break;
-                            default: return 1;
+                            case 0:
+                                gen_helper_neon_qshl_u8(tmp, cpu_env, tmp,
+                                                        tmp2);
+                                break;
+                            case 1:
+                                gen_helper_neon_qshl_u16(tmp, cpu_env, tmp,
+                                                         tmp2);
+                                break;
+                            case 2:
+                                gen_helper_neon_qshl_u32(tmp, cpu_env, tmp,
+                                                         tmp2);
+                                break;
+                            default:
+                                abort(); /* size == 3 is handled earlier */
                             }
                             break;
                         }
@@ -4658,32 +4750,35 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             /* Insert */
                             switch (size) {
                             case 0:
-                                if (op == 4)
+                                if (op == 4) {
                                     mask = 0xff >> -shift;
-                                else
+                                } else {
                                     mask = (uint8_t)(0xff << shift);
+                                }
                                 mask |= mask << 8;
                                 mask |= mask << 16;
                                 break;
                             case 1:
-                                if (op == 4)
+                                if (op == 4) {
                                     mask = 0xffff >> -shift;
-                                else
+                                } else {
                                     mask = (uint16_t)(0xffff << shift);
+                                }
                                 mask |= mask << 16;
                                 break;
                             case 2:
                                 if (shift < -31 || shift > 31) {
                                     mask = 0;
                                 } else {
-                                    if (op == 4)
+                                    if (op == 4) {
                                         mask = 0xffffffffu >> -shift;
-                                    else
+                                    } else {
                                         mask = 0xffffffffu << shift;
+                                    }
                                 }
                                 break;
                             default:
-                                abort();
+                                abort(); /* size == 3 is handled earlier */
                             }
                             tmp2 = neon_load_reg(rd, pass);
                             tcg_gen_andi_i32(tmp, tmp, mask);
@@ -4697,6 +4792,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             } else if (op < 10) {
                 /* Shift by immediate and narrow:
                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
+                if (rm & 1) {
+                    return 1;
+                }
                 shift = shift - (1 << (size + 3));
                 size++;
                 switch (size) {
@@ -4723,15 +4821,21 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     if (size == 3) {
                         neon_load_reg64(cpu_V0, rm + pass);
                         if (q) {
-                          if (u)
-                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
-                          else
-                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
+                            if (u) {
+                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0,
+                                                         tmp64);
+                            } else {
+                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0,
+                                                         tmp64);
+                            }
                         } else {
-                          if (u)
-                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64);
-                          else
-                            gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64);
+                            if (u) {
+                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0,
+                                                        tmp64);
+                            } else {
+                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0,
+                                                        tmp64);
+                            }
                         }
                     } else {
                         tmp = neon_load_reg(rm + pass, 0);
@@ -4746,10 +4850,11 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     if (op == 8 && !u) {
                         gen_neon_narrow(size - 1, tmp, cpu_V0);
                     } else {
-                        if (op == 8)
+                        if (op == 8) {
                             gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
-                        else
+                        } else {
                             gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
+                        }
                     }
                     neon_store_reg(rd, pass, tmp);
                 } /* for pass */
@@ -4760,14 +4865,15 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 }
             } else if (op == 10) {
                 /* VSHLL */
-                if (q || size == 3)
+                if (q) {
                     return 1;
+                }
                 tmp = neon_load_reg(rm, 0);
                 tmp2 = neon_load_reg(rm, 1);
                 for (pass = 0; pass < 2; pass++) {
-                    if (pass == 1)
+                    if (pass == 1) {
                         tmp = tmp2;
-
+                    }
                     gen_neon_widen(cpu_V0, tmp, size, u);
 
                     if (shift != 0) {
@@ -4788,22 +4894,29 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     neon_store_reg64(cpu_V0, rd + pass);
                 }
-            } else if (op == 15 || op == 16) {
+            } else if (op == 14 || op == 15) {
                 /* VCVT fixed-point.  */
+                if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
+                    return 1;
+                }
                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
-                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
+                    tcg_gen_ld_f32(cpu_F0s, cpu_env,
+                                   neon_reg_offset(rm, pass));
                     if (op & 1) {
-                        if (u)
+                        if (u) {
                             gen_vfp_ulto(0, shift);
-                        else
+                        } else {
                             gen_vfp_slto(0, shift);
+                        }
                     } else {
-                        if (u)
+                        if (u) {
                             gen_vfp_toul(0, shift);
-                        else
+                        } else {
                             gen_vfp_tosl(0, shift);
+                        }
                     }
-                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
+                    tcg_gen_st_f32(cpu_F0s, cpu_env,
+                                   neon_reg_offset(rd, pass));
                 }
             } else {
                 return 1;
@@ -4842,10 +4955,14 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 break;
             case 14:
                 imm |= (imm << 8) | (imm << 16) | (imm << 24);
-                if (invert)
+                if (invert) {
                     imm = ~imm;
+                }
                 break;
             case 15:
+                if (invert) {
+                    return 1;
+                }
                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
                 break;
@@ -4884,36 +5001,22 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     } else { /* (insn & 0x00800010 == 0x00800000) */
         if (size != 3) {
             op = (insn >> 8) & 0xf;
-            if ((insn & (1 << 6)) == 0) {
+            if (!q) {
                 /* Three registers of different lengths.  */
-                int src1_wide;
-                int src2_wide;
-                int prewiden;
-                /* prewiden, src1_wide, src2_wide */
-                static const int neon_3reg_wide[16][3] = {
-                    {1, 0, 0}, /* VADDL */
-                    {1, 1, 0}, /* VADDW */
-                    {1, 0, 0}, /* VSUBL */
-                    {1, 1, 0}, /* VSUBW */
-                    {0, 1, 1}, /* VADDHN */
-                    {0, 0, 0}, /* VABAL */
-                    {0, 1, 1}, /* VSUBHN */
-                    {0, 0, 0}, /* VABDL */
-                    {0, 0, 0}, /* VMLAL */
-                    {0, 0, 0}, /* VQDMLAL */
-                    {0, 0, 0}, /* VMLSL */
-                    {0, 0, 0}, /* VQDMLSL */
-                    {0, 0, 0}, /* Integer VMULL */
-                    {0, 0, 0}, /* VQDMULL */
-                    {0, 0, 0}  /* Polynomial VMULL */
-                };
-
-                prewiden = neon_3reg_wide[op][0];
-                src1_wide = neon_3reg_wide[op][1];
-                src2_wide = neon_3reg_wide[op][2];
-
-                if (size == 0 && (op == 9 || op == 11 || op == 13))
+
+                if (op == 15
+                    || (op < 4 && ((rd & 1) || ((op & 1) && (rn & 1))))
+                    || ((op == 4 || op == 6) && ((rn | rm) & 1))
+                    || ((op == 5 || op >= 7) && (rd & 1))
+                    || ((op == 9 || op == 11) && (u || size == 0))
+                    || (op == 13 && size == 0)
+                    || (op == 14 && (u || size))) {
                     return 1;
+                }
+
+                int src1_wide = (op == 1 || op == 3 || op == 4 || op == 6);
+                int src2_wide = (op == 4 || op == 6);
+                int prewiden = (op < 4);
 
                 /* Avoid overlapping operands.  Wide source operands are
                    always aligned so will never overlap with wide
@@ -4980,7 +5083,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         case 5:
                             gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
                             break;
-                        default: abort();
+                        default: abort(); /* size == 3 is handled earlier */
                         }
                         dead_tmp(tmp2);
                         dead_tmp(tmp);
@@ -4995,7 +5098,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         cpu_abort(env, "Polynomial VMULL not implemented");
 
                     default: /* 15 is RESERVED.  */
-                        return 1;
+                        abort(); /* op == 15 is handled earlier */
                     }
                     if (op == 5 || op == 13 || (op >= 8 && op <= 11)) {
                         /* Accumulate.  */
@@ -5038,7 +5141,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
                                 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
                                 break;
-                            default: abort();
+                            default: abort(); /* size == 3 is handled earlier */
                             }
                         } else {
                             switch (size) {
@@ -5053,7 +5156,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
                                 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
                                 break;
-                            default: abort();
+                            default: abort(); /* size == 3 is handled earlier */
                             }
                         }
                         if (pass == 0) {
@@ -5076,8 +5179,15 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 5: /* Floating point VMLS scalar */
                 case 8: /* Integer VMUL scalar */
                 case 9: /* Floating point VMUL scalar */
+                    if (size <= (op & 1)) {
+                        return 1;
+                    }
+                    /* fall through */
                 case 12: /* VQDMULH scalar */
                 case 13: /* VQRDMULH scalar */
+                    if (u && ((rd | rn) & 1)) {
+                        return 1;
+                    }
                     tmp = neon_get_scalar(size, rm);
                     neon_store_scratch(0, tmp);
                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
@@ -5085,24 +5195,35 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         tmp2 = neon_load_reg(rn, pass);
                         if (op == 12) {
                             if (size == 1) {
-                                gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
-                            } else {
-                                gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
+                                gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp,
+                                                           tmp2);
+                            } else { /* TODO: what happens when size == 0? */
+                                gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp,
+                                                           tmp2);
                             }
                         } else if (op == 13) {
                             if (size == 1) {
-                                gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
-                            } else {
-                                gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
+                                gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp,
+                                                            tmp2);
+                            } else { /* TODO: what happens when size == 0? */
+                                gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp,
+                                                            tmp2);
                             }
                         } else if (op & 1) {
                             gen_helper_neon_mul_f32(tmp, tmp, tmp2);
                         } else {
                             switch (size) {
-                            case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
-                            case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
-                            case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            case 0:
+                                gen_helper_neon_mul_u8(tmp, tmp, tmp2);
+                                break;
+                            case 1:
+                                gen_helper_neon_mul_u16(tmp, tmp, tmp2);
+                                break;
+                            case 2:
+                                tcg_gen_mul_i32(tmp, tmp, tmp2);
+                                break;
+                            default:
+                                abort(); /* size == 3 is handled earlier */
                             }
                         }
                         dead_tmp(tmp2);
@@ -5123,21 +5244,26 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                 gen_helper_neon_sub_f32(tmp, tmp2, tmp);
                                 break;
                             default:
-                                abort();
+                                abort(); /* size == 3 is handled earlier */
                             }
                             dead_tmp(tmp2);
                         }
                         neon_store_reg(rd, pass, tmp);
                     }
                     break;
-                case 2: /* VMLAL sclar */
                 case 3: /* VQDMLAL scalar */
-                case 6: /* VMLSL scalar */
                 case 7: /* VQDMLSL scalar */
-                case 10: /* VMULL scalar */
                 case 11: /* VQDMULL scalar */
-                    if (size == 0 && (op == 3 || op == 7 || op == 11))
+                    if (u) {
                         return 1;
+                    }
+                    /* fall through */
+                case 2: /* VMLAL sclar */
+                case 6: /* VMLSL scalar */
+                case 10: /* VMULL scalar */
+                    if (size == 0 || (rd & 1)) {
+                        return 1;
+                    }
 
                     tmp2 = neon_get_scalar(size, rm);
                     tmp3 = neon_load_reg(rn, 1);
@@ -5189,8 +5315,10 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 imm = (insn >> 8) & 0xf;
                 count = q + 1;
 
-                if (imm > 7 && !q)
+                if ((imm > 7 && !q)
+                    || (q && ((rd | rn | rm) & 1))) {
                     return 1;
+                }
 
                 if (imm == 0) {
                     neon_load_reg64(cpu_V0, rn);
@@ -5240,10 +5368,15 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 /* Two register misc.  */
                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
                 size = (insn >> 18) & 3;
+                if ((q && (op < 36 || op > 46) && ((rd | rm) & 1))
+                    || (op >= 56 && size != 2)) {
+                    return 1;
+                }
                 switch (op) {
                 case 0: /* VREV64 */
-                    if (size == 3)
+                    if (size == 3) {
                         return 1;
+                    }
                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
                         tmp = neon_load_reg(rm, pass * 2);
                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
@@ -5268,8 +5401,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     break;
                 case 4: case 5: /* VPADDL */
                 case 12: case 13: /* VPADAL */
-                    if (size == 3)
+                    if (size == 3) {
                         return 1;
+                    }
                     for (pass = 0; pass < q + 1; pass++) {
                         tmp = neon_load_reg(rm, pass * 2);
                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
@@ -5290,15 +5424,19 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 33: /* VTRN */
-                    if (size == 2) {
+                    switch (size) {
+                    case 0: case 1:
+                        goto elementwise;
+                    case 2:
                         for (n = 0; n < (q ? 4 : 2); n += 2) {
                             tmp = neon_load_reg(rm, n);
                             tmp2 = neon_load_reg(rd, n + 1);
                             neon_store_reg(rm, n, tmp2);
                             neon_store_reg(rd, n + 1, tmp);
                         }
-                    } else {
-                        goto elementwise;
+                        break;
+                    default:
+                        return 1;
                     }
                     break;
                 case 34: /* VUZP */
@@ -5306,8 +5444,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                        Rd   A3 A2 A1 A0  B2 B0 A2 A0
                        Rm   B3 B2 B1 B0  B3 B1 A3 A1
                      */
-                    if (size == 3)
+                    if (size == 3 || (!q && size == 2)) {
                         return 1;
+                    }
                     gen_neon_unzip(rd, q, 0, size);
                     gen_neon_unzip(rm, q, 4, size);
                     if (q) {
@@ -5333,8 +5472,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                        Rd   A3 A2 A1 A0  B1 A1 B0 A0
                        Rm   B3 B2 B1 B0  B3 A3 B2 A2
                      */
-                    if (size == 3)
+                    if (size == 3 || (!q && size == 2)) {
                         return 1;
+                    }
                     count = (q ? 4 : 2);
                     for (n = 0; n < count; n++) {
                         tmp = neon_load_reg(rd, n);
@@ -5355,8 +5495,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
-                    if (size == 3)
+                    if (size == 3 || (rm & 1)) {
                         return 1;
+                    }
                     TCGV_UNUSED(tmp2);
                     for (pass = 0; pass < 2; pass++) {
                         neon_load_reg64(cpu_V0, rm + pass);
@@ -5377,20 +5518,24 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     break;
                 case 38: /* VSHLL */
-                    if (q || size == 3)
+                    if (q || size == 3 || (rd & 1)) {
                         return 1;
+                    }
                     tmp = neon_load_reg(rm, 0);
                     tmp2 = neon_load_reg(rm, 1);
                     for (pass = 0; pass < 2; pass++) {
-                        if (pass == 1)
+                        if (pass == 1) {
                             tmp = tmp2;
+                        }
                         gen_neon_widen(cpu_V0, tmp, size, 1);
                         neon_store_reg64(cpu_V0, rd + pass);
                     }
                     break;
                 case 44: /* VCVT.F16.F32 */
-                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
-                      return 1;
+                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16)
+                        || q || size != 1 || (rm & 1)) {
+                        return 1;
+                    }
                     tmp = new_tmp();
                     tmp2 = new_tmp();
                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
@@ -5411,8 +5556,10 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     dead_tmp(tmp);
                     break;
                 case 46: /* VCVT.F32.F16 */
-                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
-                      return 1;
+                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16)
+                        || q || size != 1 || (rd & 1)) {
+                        return 1;
+                    }
                     tmp3 = new_tmp();
                     tmp = neon_load_reg(rm, 0);
                     tmp2 = neon_load_reg(rm, 1);
@@ -5447,38 +5594,44 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             switch (size) {
                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
                             case 1: gen_swap_half(tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 2: /* VREV16 */
-                            if (size != 0)
+                            if (size != 0) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             gen_rev16(tmp);
                             break;
-                        case 8: /* CLS */
+                        case 8: /* VCLS */
                             switch (size) {
                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
-                        case 9: /* CLZ */
+                        case 9: /* VCLZ */
                             switch (size) {
                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
                             case 2: gen_helper_clz(tmp, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
-                        case 10: /* CNT */
-                            if (size != 0)
+                        case 10: /* VCNT */
+                            if (size != 0) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             gen_helper_neon_cnt_u8(tmp, tmp);
                             break;
                         case 11: /* VNOT */
-                            if (size != 0)
+                            if (size != 0) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             tcg_gen_not_i32(tmp, tmp);
                             break;
                         case 14: /* VQABS */
@@ -5486,7 +5639,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); break;
                             case 1: gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); break;
                             case 2: gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 15: /* VQNEG */
@@ -5494,7 +5647,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); break;
                             case 1: gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); break;
                             case 2: gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 16: case 19: /* VCGT #0, VCLE #0 */
@@ -5503,11 +5656,12 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1;
                             }
                             tcg_temp_free(tmp2);
-                            if (op == 19)
+                            if (op == 19) {
                                 tcg_gen_not_i32(tmp, tmp);
+                            }
                             break;
                         case 17: case 20: /* VCGE #0, VCLT #0 */
                             tmp2 = tcg_const_i32(0);
@@ -5515,11 +5669,12 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1;
                             }
                             tcg_temp_free(tmp2);
-                            if (op == 20)
+                            if (op == 20) {
                                 tcg_gen_not_i32(tmp, tmp);
+                            }
                             break;
                         case 18: /* VCEQ #0 */
                             tmp2 = tcg_const_i32(0);
@@ -5527,7 +5682,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
                             case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
                             case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
-                            default: return 1;
+                            default: tcg_temp_free_i32(tmp2); dead_tmp(tmp); return 1;
                             }
                             tcg_temp_free(tmp2);
                             break;
@@ -5536,42 +5691,68 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
                             case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
                             case 2: tcg_gen_abs_i32(tmp, tmp); break;
-                            default: return 1;
+                            default: dead_tmp(tmp); return 1;
                             }
                             break;
                         case 23: /* VNEG */
-                            if (size == 3)
+                            if (size == 3) {
+                                dead_tmp(tmp);
                                 return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_neon_rsb(size, tmp, tmp2);
                             tcg_temp_free(tmp2);
                             break;
                         case 24: case 27: /* Float VCGT #0, Float VCLE #0 */
+                            if (size != 2) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
                             tcg_temp_free(tmp2);
-                            if (op == 27)
+                            if (op == 27) {
                                 tcg_gen_not_i32(tmp, tmp);
+                            }
                             break;
                         case 25: case 28: /* Float VCGE #0, Float VCLT #0 */
+                            if (size != 2) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_helper_neon_cge_f32(tmp, tmp, tmp2);
                             tcg_temp_free(tmp2);
-                            if (op == 28)
+                            if (op == 28) {
                                 tcg_gen_not_i32(tmp, tmp);
+                            }
                             break;
                         case 26: /* Float VCEQ #0 */
+                            if (size != 2) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = tcg_const_i32(0);
                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
                             tcg_temp_free(tmp2);
                             break;
                         case 30: /* Float VABS */
+                            if (size != 2) {
+                                return 1;
+                            }
                             gen_vfp_abs(0);
                             break;
                         case 31: /* Float VNEG */
+                            if (size != 2) {
+                                return 1;
+                            }
                             gen_vfp_neg(0);
                             break;
                         case 32: /* VSWP */
+                            if (size != 0) {
+                                dead_tmp(tmp);
+                                return 1;
+                            }
                             tmp2 = neon_load_reg(rd, pass);
                             neon_store_reg(rm, pass, tmp2);
                             break;
@@ -5580,8 +5761,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             switch (size) {
                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
-                            case 2: abort();
-                            default: return 1;
+                            default: abort(); /* size == 2,3 is handled earlier */
                             }
                             neon_store_reg(rm, pass, tmp2);
                             break;
@@ -5610,7 +5790,8 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             gen_vfp_uito(0);
                             break;
                         default:
-                            /* Reserved: 21, 29, 39-56 */
+                            /* Reserved: 3, 6, 7, 21, 29, 39-43, 45, 47-55 */
+                            dead_tmp(tmp);
                             return 1;
                         }
                         if (op == 30 || op == 31 || op >= 58) {
@@ -5625,7 +5806,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
             } else if ((insn & (1 << 10)) == 0) {
                 /* VTBL, VTBX.  */
                 n = ((insn >> 5) & 0x18) + 8;
-                if (insn & (1 << 6)) {
+                if (q) {
                     tmp = neon_load_reg(rd, 0);
                 } else {
                     tmp = new_tmp();
@@ -5636,7 +5817,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 tmp5 = tcg_const_i32(n);
                 gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5);
                 dead_tmp(tmp);
-                if (insn & (1 << 6)) {
+                if (q) {
                     tmp = neon_load_reg(rd, 1);
                 } else {
                     tmp = new_tmp();
@@ -5651,6 +5832,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 dead_tmp(tmp);
             } else if ((insn & 0x380) == 0) {
                 /* VDUP */
+                if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
+                    return 1;
+                }
                 if (insn & (1 << 19)) {
                     tmp = neon_load_reg(rm, 1);
                 } else {
@@ -5659,10 +5843,11 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 if (insn & (1 << 16)) {
                     gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
                 } else if (insn & (1 << 17)) {
-                    if ((insn >> 18) & 1)
+                    if ((insn >> 18) & 1) {
                         gen_neon_dup_high16(tmp);
-                    else
+                    } else {
                         gen_neon_dup_low16(tmp);
+                    }
                 }
                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
                     tmp2 = new_tmp();
@@ -7827,7 +8012,7 @@  static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
         /* Coprocessor.  */
         if (((insn >> 24) & 3) == 3) {
             /* Translate into the equivalent ARM encoding.  */
-            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4);
+            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
             if (disas_neon_data_insn(env, s, insn))
                 goto illegal_op;
         } else {