diff mbox

[11/12] target-arm: optimize neon vld/vst ops

Message ID D1FE725F-C260-49B5-9E87-543B280DBDAF@nokia.com
State New
Headers show

Commit Message

Juha.Riihimaki@nokia.com Oct. 21, 2009, 10:18 a.m. UTC
Reduce the amount of tcg ops generated from NEON vld/vst instructions  
by simplifying the code generation.

Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com>
---
        return 1;
@@ -3710,6 +3711,7 @@ static int disas_neon_ls_insn(CPUState * env,  
DisasContext *s, uint32_t insn)
          interleave = neon_ls_element_type[op].interleave;
          load_reg_var(s, addr, rn);
          stride = (1 << size) * interleave;
+        stride_v = tcg_const_i32(stride);
          for (reg = 0; reg < nregs; reg++) {
              if (interleave > 2 || (interleave == 2 && nregs == 2)) {
                  load_reg_var(s, addr, rn);
@@ -3728,7 +3730,7 @@ static int disas_neon_ls_insn(CPUState * env,  
DisasContext *s, uint32_t insn)
                      neon_load_reg64(tmp64, rd);
                      gen_st64(tmp64, addr, IS_USER(s));
                  }
-                tcg_gen_addi_i32(addr, addr, stride);
+                tcg_gen_add_i32(addr, addr, stride_v);
              } else {
                  for (pass = 0; pass < 2; pass++) {
                      if (size == 2) {
@@ -3739,58 +3741,57 @@ static int disas_neon_ls_insn(CPUState * env,  
DisasContext *s, uint32_t insn)
                              tmp = neon_load_reg(rd, pass);
                              gen_st32(tmp, addr, IS_USER(s));
                          }
-                        tcg_gen_addi_i32(addr, addr, stride);
+                        tcg_gen_add_i32(addr, addr, stride_v);
                      } else if (size == 1) {
                          if (load) {
                              tmp = gen_ld16u(addr, IS_USER(s));
                              tcg_gen_addi_i32(addr, addr, stride);
                              tmp2 = gen_ld16u(addr, IS_USER(s));
-                            tcg_gen_addi_i32(addr, addr, stride);
-                            gen_bfi(tmp, tmp, tmp2, 16, 0xffff);
+                            tcg_gen_add_i32(addr, addr, stride_v);
+                            tcg_gen_shli_i32(tmp2, tmp2, 16);
+                            tcg_gen_or_i32(tmp, tmp, tmp2);
                              dead_tmp(tmp2);
                              neon_store_reg(rd, pass, tmp);
                          } else {
                              tmp = neon_load_reg(rd, pass);
-                            tmp2 = new_tmp();
-                            tcg_gen_shri_i32(tmp2, tmp, 16);
-                            gen_st16(tmp, addr, IS_USER(s));
-                            tcg_gen_addi_i32(addr, addr, stride);
-                            gen_st16(tmp2, addr, IS_USER(s));
-                            tcg_gen_addi_i32(addr, addr, stride);
+                            tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+                            tcg_gen_add_i32(addr, addr, stride_v);
+                            tcg_gen_shri_i32(tmp, tmp, 16);
+                            tcg_gen_qemu_st16(tmp, addr, IS_USER(s));
+                            tcg_gen_add_i32(addr, addr, stride_v);
+                            dead_tmp(tmp);
                          }
                      } else /* size == 0 */ {
                          if (load) {
-                            TCGV_UNUSED(tmp2);
-                            for (n = 0; n < 4; n++) {
-                                tmp = gen_ld8u(addr, IS_USER(s));
-                                tcg_gen_addi_i32(addr, addr, stride);
-                                if (n == 0) {
-                                    tmp2 = tmp;
-                                } else {
-                                    gen_bfi(tmp2, tmp2, tmp, n * 8,  
0xff);
-                                    dead_tmp(tmp);
-                                }
+                            tmp = gen_ld8u(addr, IS_USER(s));
+                            tcg_gen_add_i32(addr, addr, stride_v);
+                            for (n = 1; n < 4; n++) {
+                                tmp2 = gen_ld8u(addr, IS_USER(s));
+                                tcg_gen_add_i32(addr, addr, stride_v);
+                                tcg_gen_shli_i32(tmp2, tmp2, n * 8);
+                                tcg_gen_or_i32(tmp, tmp, tmp2);
+                                dead_tmp(tmp2);
                              }
-                            neon_store_reg(rd, pass, tmp2);
+                            neon_store_reg(rd, pass, tmp);
                          } else {
-                            tmp2 = neon_load_reg(rd, pass);
-                            for (n = 0; n < 4; n++) {
-                                tmp = new_tmp();
-                                if (n == 0) {
-                                    tcg_gen_mov_i32(tmp, tmp2);
-                                } else {
-                                    tcg_gen_shri_i32(tmp, tmp2, n * 8);
-                                }
-                                gen_st8(tmp, addr, IS_USER(s));
-                                tcg_gen_addi_i32(addr, addr, stride);
+                            tmp2 = tcg_const_i32(8);
+                            tmp = neon_load_reg(rd, pass);
+                            for (n = 0; n < 3; n++) {
+                                tcg_gen_qemu_st8(tmp, addr, IS_USER 
(s));
+                                tcg_gen_add_i32(addr, addr, stride_v);
+                                tcg_gen_shr_i32(tmp, tmp, tmp2);
                              }
-                            dead_tmp(tmp2);
+                            tcg_gen_qemu_st8(tmp, addr, IS_USER(s));
+                            tcg_gen_add_i32(addr, addr, stride_v);
+                            dead_tmp(tmp);
+                            tcg_temp_free_i32(tmp2);
                          }
                      }
                  }
              }
              rd += neon_ls_element_type[op].spacing;
          }
+        tcg_temp_free_i32(stride_v);
          stride = nregs * 8;
      } else {
          size = (insn >> 10) & 3;
diff mbox

Patch

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 1734fae..fa03df8 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -3692,6 +3692,7 @@  static int disas_neon_ls_insn(CPUState * env,  
DisasContext *s, uint32_t insn)
      TCGv tmp;
      TCGv tmp2;
      TCGv_i64 tmp64;
+    TCGv stride_v;

      if (!vfp_enabled(env))