Patchwork [02/12] target-arm: optimize thumb 32bit multiply

login
register
mail settings
Submitter Juha.Riihimaki@nokia.com
Date Oct. 21, 2009, 10:17 a.m.
Message ID <82376447-482D-43AA-99FA-73C4A7484615@nokia.com>
Download mbox | patch
Permalink /patch/36517/
State New
Headers show

Comments

Juha.Riihimaki@nokia.com - Oct. 21, 2009, 10:17 a.m.
Current implementation of thumb mul instruction is implemented as a  
32x32->64 multiply which then uses only 32 least significant bits of  
the result. Replace that with a simple 32x32->32 multiply.

Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com>
---
              break;
Laurent Desnogues - Oct. 21, 2009, 10:27 a.m.
On Wed, Oct 21, 2009 at 12:17 PM,  <Juha.Riihimaki@nokia.com> wrote:
> Current implementation of thumb mul instruction is implemented as a
> 32x32->64 multiply which then uses only 32 least significant bits of
> the result. Replace that with a simple 32x32->32 multiply.
>
> Signed-off-by: Juha Riihimäki <juha.riihimaki@nokia.com>

Acked-by: Laurent Desnogues <laurent.desnogues@gmail.com>

> ---
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index bda105e..3ea9d51 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -310,22 +310,6 @@ static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
>      return tmp1;
>  }
>
> -/* Unsigned 32x32->64 multiply.  */
> -static void gen_mull(TCGv a, TCGv b)
> -{
> -    TCGv_i64 tmp1 = tcg_temp_new_i64();
> -    TCGv_i64 tmp2 = tcg_temp_new_i64();
> -
> -    tcg_gen_extu_i32_i64(tmp1, a);
> -    tcg_gen_extu_i32_i64(tmp2, b);
> -    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
> -    tcg_temp_free_i64(tmp2);
> -    tcg_gen_trunc_i64_i32(a, tmp1);
> -    tcg_gen_shri_i64(tmp1, tmp1, 32);
> -    tcg_gen_trunc_i64_i32(b, tmp1);
> -    tcg_temp_free_i64(tmp1);
> -}
> -
>  /* Signed 32x32->64 multiply.  */
>  static void gen_imull(TCGv a, TCGv b)
>  {
> @@ -8358,7 +8342,7 @@ static void disas_thumb_insn(CPUState *env,
> DisasContext *s)
>                  gen_logic_CC(tmp);
>              break;
>          case 0xd: /* mul */
> -            gen_mull(tmp, tmp2);
> +            tcg_gen_mul_i32(tmp, tmp, tmp2);
>              if (!s->condexec_mask)
>                  gen_logic_CC(tmp);
>              break;
>

Patch

diff --git a/target-arm/translate.c b/target-arm/translate.c
index bda105e..3ea9d51 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -310,22 +310,6 @@  static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
      return tmp1;
  }

-/* Unsigned 32x32->64 multiply.  */
-static void gen_mull(TCGv a, TCGv b)
-{
-    TCGv_i64 tmp1 = tcg_temp_new_i64();
-    TCGv_i64 tmp2 = tcg_temp_new_i64();
-
-    tcg_gen_extu_i32_i64(tmp1, a);
-    tcg_gen_extu_i32_i64(tmp2, b);
-    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
-    tcg_temp_free_i64(tmp2);
-    tcg_gen_trunc_i64_i32(a, tmp1);
-    tcg_gen_shri_i64(tmp1, tmp1, 32);
-    tcg_gen_trunc_i64_i32(b, tmp1);
-    tcg_temp_free_i64(tmp1);
-}
-
  /* Signed 32x32->64 multiply.  */
  static void gen_imull(TCGv a, TCGv b)
  {
@@ -8358,7 +8342,7 @@  static void disas_thumb_insn(CPUState *env,  
DisasContext *s)
                  gen_logic_CC(tmp);
              break;
          case 0xd: /* mul */
-            gen_mull(tmp, tmp2);
+            tcg_gen_mul_i32(tmp, tmp, tmp2);
              if (!s->condexec_mask)
                  gen_logic_CC(tmp);