Patchwork [10/10] target-arm: Fix shift by immediate and narrow where src, dest overlap

login
register
mail settings
Submitter Peter Maydell
Date Feb. 15, 2011, 1:44 p.m.
Message ID <1297777490-5323-11-git-send-email-peter.maydell@linaro.org>
Download mbox | patch
Permalink /patch/83248/
State New
Headers show

Comments

Peter Maydell - Feb. 15, 2011, 1:44 p.m.
For Neon shifts by immediate and narrow, correctly handle the case
where the source registers and the destination registers overlap
(the second pass should use the original register contents, not the
results of the first pass).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/translate.c |   38 +++++++++++++++++++++++++++-----------
 1 files changed, 27 insertions(+), 11 deletions(-)
Aurelien Jarno - Feb. 20, 2011, 4:52 p.m.
On Tue, Feb 15, 2011 at 01:44:50PM +0000, Peter Maydell wrote:
> For Neon shifts by immediate and narrow, correctly handle the case
> where the source registers and the destination registers overlap
> (the second pass should use the original register contents, not the
> results of the first pass).
> 
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>  target-arm/translate.c |   38 +++++++++++++++++++++++++++-----------
>  1 files changed, 27 insertions(+), 11 deletions(-)

That looks correct, but it makes GCC (tested 4.3 to 4.6) complaining:

| cc1: warnings being treated as errors
| qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’:
| qemu/target-arm/translate.c:4185: error: ‘tmp4’ may be used uninitialized in this function
| qemu/target-arm/translate.c:4185: error: ‘tmp5’ may be used uninitialized in this function

For a quick look, it seems to be a GCC issue, but we have no other 
choices than workarouding it.

> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index a02b20f..4d5d305 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -4839,31 +4839,47 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                      abort();
>                  }
>  
> +                if (size == 3) {
> +                    neon_load_reg64(cpu_V0, rm);
> +                    neon_load_reg64(cpu_V1, rm + 1);
> +                } else {
> +                    tmp4 = neon_load_reg(rm + 1, 0);
> +                    tmp5 = neon_load_reg(rm + 1, 1);
> +                }
>                  for (pass = 0; pass < 2; pass++) {
>                      if (size == 3) {
> -                        neon_load_reg64(cpu_V0, rm + pass);
> +                        TCGv_i64 in;
> +                        if (pass == 0) {
> +                            in = cpu_V0;
> +                        } else {
> +                            in = cpu_V1;
> +                        }
>                          if (q) {
>                              if (input_unsigned) {
> -                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0,
> -                                                         tmp64);
> +                                gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
>                              } else {
> -                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0,
> -                                                         tmp64);
> +                                gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
>                              }
>                          } else {
>                              if (input_unsigned) {
> -                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0,
> -                                                        tmp64);
> +                                gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
>                              } else {
> -                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0,
> -                                                        tmp64);
> +                                gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
>                              }
>                          }
>                      } else {
> -                        tmp = neon_load_reg(rm + pass, 0);
> +                        if (pass == 0) {
> +                            tmp = neon_load_reg(rm, 0);
> +                        } else {
> +                            tmp = tmp4;
> +                        }
>                          gen_neon_shift_narrow(size, tmp, tmp2, q,
>                                                input_unsigned);
> -                        tmp3 = neon_load_reg(rm + pass, 1);
> +                        if (pass == 0) {
> +                            tmp3 = neon_load_reg(rm, 1);
> +                        } else {
> +                            tmp3 = tmp5;
> +                        }
>                          gen_neon_shift_narrow(size, tmp3, tmp2, q,
>                                                input_unsigned);
>                          tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
> -- 
> 1.7.1
> 
> 
>

Patch

diff --git a/target-arm/translate.c b/target-arm/translate.c
index a02b20f..4d5d305 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4839,31 +4839,47 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     abort();
                 }
 
+                if (size == 3) {
+                    neon_load_reg64(cpu_V0, rm);
+                    neon_load_reg64(cpu_V1, rm + 1);
+                } else {
+                    tmp4 = neon_load_reg(rm + 1, 0);
+                    tmp5 = neon_load_reg(rm + 1, 1);
+                }
                 for (pass = 0; pass < 2; pass++) {
                     if (size == 3) {
-                        neon_load_reg64(cpu_V0, rm + pass);
+                        TCGv_i64 in;
+                        if (pass == 0) {
+                            in = cpu_V0;
+                        } else {
+                            in = cpu_V1;
+                        }
                         if (q) {
                             if (input_unsigned) {
-                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0,
-                                                         tmp64);
+                                gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
                             } else {
-                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0,
-                                                         tmp64);
+                                gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
                             }
                         } else {
                             if (input_unsigned) {
-                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0,
-                                                        tmp64);
+                                gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
                             } else {
-                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0,
-                                                        tmp64);
+                                gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
                             }
                         }
                     } else {
-                        tmp = neon_load_reg(rm + pass, 0);
+                        if (pass == 0) {
+                            tmp = neon_load_reg(rm, 0);
+                        } else {
+                            tmp = tmp4;
+                        }
                         gen_neon_shift_narrow(size, tmp, tmp2, q,
                                               input_unsigned);
-                        tmp3 = neon_load_reg(rm + pass, 1);
+                        if (pass == 0) {
+                            tmp3 = neon_load_reg(rm, 1);
+                        } else {
+                            tmp3 = tmp5;
+                        }
                         gen_neon_shift_narrow(size, tmp3, tmp2, q,
                                               input_unsigned);
                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);