Patchwork target-arm: Fix garbage collection of temporaries in Neon emulation.

login
register
mail settings
Submitter Christophe LYON
Date Jan. 19, 2011, 2:37 p.m.
Message ID <4D36F746.30307@st.com>
Download mbox | patch
Permalink /patch/79478/
State New
Headers show

Comments

Christophe LYON - Jan. 19, 2011, 2:37 p.m.
Here is an updated patch which will hopefully not be mangled by my mailer.

Fix garbage collection of temporaries in Neon emulation.


Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/translate.c |   18 +++++++++++++-----
 1 files changed, 13 insertions(+), 5 deletions(-)
Peter Maydell - Jan. 19, 2011, 7:12 p.m.
On 19 January 2011 14:37, Christophe Lyon <christophe.lyon@st.com> wrote:
> Here is an updated patch which will hopefully not be mangled by my mailer.
>
> Fix garbage collection of temporaries in Neon emulation.

I've tested this patch and it does indeed fix the problems with VMULL
and friends (I was seeing assertions/hangs). I've tested with random
instruction sequence generation and with this patch the non-scalar
forms of VMLAL, VMLSL, VQDMLAL, VQDMLSL, VMULL, VQDMULL
now all pass. The scalar forms now pass random-sequence testing
with the addition of a patch from the qemu-meego tree. Since I have
effectively just tested that meego patch I'll post it to the list in
a moment.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

I would personally prefer slightly less terse commit messages
(for instance it might be nice to list the affected instructions in
this case). The convention is also to preface the summary line
with the file or directory affected, ie "target-arm: Fix garbage
collection of temporaries in Neon emulation".

-- PMM
Aurelien Jarno - Jan. 26, 2011, 1:34 p.m.
On Wed, Jan 19, 2011 at 03:37:58PM +0100, Christophe Lyon wrote:
> Here is an updated patch which will hopefully not be mangled by my mailer.
> 
> Fix garbage collection of temporaries in Neon emulation.
> 
> 
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
> ---
>  target-arm/translate.c |   18 +++++++++++++-----
>  1 files changed, 13 insertions(+), 5 deletions(-)

Thanks, applied.

> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 57664bc..b3e3d70 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -4176,6 +4176,13 @@ static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
>          break;
>      default: abort();
>      }
> +
> +    /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
> +       Don't forget to clean them now.  */
> +    if (size < 2) {
> +      dead_tmp(a);
> +      dead_tmp(b);
> +    }
>  }
>  
>  /* Translate a NEON data processing instruction.  Return nonzero if the
> @@ -4840,7 +4847,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                  if (size == 3) {
>                      tcg_temp_free_i64(tmp64);
>                  } else {
> -                    dead_tmp(tmp2);
> +                    tcg_temp_free_i32(tmp2);
>                  }
>              } else if (op == 10) {
>                  /* VSHLL */
> @@ -5076,8 +5083,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                      case 8: case 9: case 10: case 11: case 12: case 13:
>                          /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
>                          gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
> -                        dead_tmp(tmp2);
> -                        dead_tmp(tmp);
>                          break;
>                      case 14: /* Polynomial VMULL */
>                          cpu_abort(env, "Polynomial VMULL not implemented");
> @@ -5228,6 +5233,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                          return 1;
>  
>                      tmp2 = neon_get_scalar(size, rm);
> +                    /* We need a copy of tmp2 because gen_neon_mull
> +                     * deletes it during pass 0.  */
> +                    tmp4 = new_tmp();
> +                    tcg_gen_mov_i32(tmp4, tmp2);
>                      tmp3 = neon_load_reg(rn, 1);
>  
>                      for (pass = 0; pass < 2; pass++) {
> @@ -5235,9 +5244,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                              tmp = neon_load_reg(rn, 0);
>                          } else {
>                              tmp = tmp3;
> +                            tmp2 = tmp4;
>                          }
>                          gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
> -                        dead_tmp(tmp);
>                          if (op == 6 || op == 7) {
>                              gen_neon_negl(cpu_V0, size);
>                          }
> @@ -5264,7 +5273,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                          neon_store_reg64(cpu_V0, rd + pass);
>                      }
>  
> -                    dead_tmp(tmp2);
>  
>                      break;
>                  default: /* 14 and 15 are RESERVED */
> -- 
> 1.7.2.3
> 
> 
>

Patch

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 57664bc..b3e3d70 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4176,6 +4176,13 @@  static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
         break;
     default: abort();
     }
+
+    /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
+       Don't forget to clean them now.  */
+    if (size < 2) {
+      dead_tmp(a);
+      dead_tmp(b);
+    }
 }
 
 /* Translate a NEON data processing instruction.  Return nonzero if the
@@ -4840,7 +4847,7 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 if (size == 3) {
                     tcg_temp_free_i64(tmp64);
                 } else {
-                    dead_tmp(tmp2);
+                    tcg_temp_free_i32(tmp2);
                 }
             } else if (op == 10) {
                 /* VSHLL */
@@ -5076,8 +5083,6 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     case 8: case 9: case 10: case 11: case 12: case 13:
                         /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
-                        dead_tmp(tmp2);
-                        dead_tmp(tmp);
                         break;
                     case 14: /* Polynomial VMULL */
                         cpu_abort(env, "Polynomial VMULL not implemented");
@@ -5228,6 +5233,10 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         return 1;
 
                     tmp2 = neon_get_scalar(size, rm);
+                    /* We need a copy of tmp2 because gen_neon_mull
+                     * deletes it during pass 0.  */
+                    tmp4 = new_tmp();
+                    tcg_gen_mov_i32(tmp4, tmp2);
                     tmp3 = neon_load_reg(rn, 1);
 
                     for (pass = 0; pass < 2; pass++) {
@@ -5235,9 +5244,9 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             tmp = neon_load_reg(rn, 0);
                         } else {
                             tmp = tmp3;
+                            tmp2 = tmp4;
                         }
                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
-                        dead_tmp(tmp);
                         if (op == 6 || op == 7) {
                             gen_neon_negl(cpu_V0, size);
                         }
@@ -5264,7 +5273,6 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_store_reg64(cpu_V0, rd + pass);
                     }
 
-                    dead_tmp(tmp2);
 
                     break;
                 default: /* 14 and 15 are RESERVED */