diff mbox series

[1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.

Message ID 20240416063927.99428-2-rathc@linux.ibm.com
State New
Headers show
Series target/ppc: Move fixed-point insns to decodetree. | expand

Commit Message

Chinmay Rath April 16, 2024, 6:39 a.m. UTC
Moving the following instructions to decodetree specification :
	mulli                   	: D-form
	mul{lw, lwo, hw, hwu}[.]	: XO-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
 target/ppc/insn32.decode                   |  9 +++
 target/ppc/translate.c                     | 89 ----------------------
 target/ppc/translate/fixedpoint-impl.c.inc | 71 +++++++++++++++++
 3 files changed, 80 insertions(+), 89 deletions(-)

Comments

Richard Henderson April 16, 2024, 5:56 p.m. UTC | #1
On 4/15/24 23:39, Chinmay Rath wrote:
> Moving the following instructions to decodetree specification :
> 	mulli                   	: D-form
> 	mul{lw, lwo, hw, hwu}[.]	: XO-form
> 
> The changes were verified by validating that the tcg ops generated by those
> instructions remain the same, which were captured with the '-d in_asm,op' flag.
> 
> Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
> ---
>   target/ppc/insn32.decode                   |  9 +++
>   target/ppc/translate.c                     | 89 ----------------------
>   target/ppc/translate/fixedpoint-impl.c.inc | 71 +++++++++++++++++
>   3 files changed, 80 insertions(+), 89 deletions(-)

This is an accurate reorg of the current code, so
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

However, as follow-up, the code generation could be cleaned up:


> +static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a)
> +{
> +#if defined(TARGET_PPC64)
> +    TCGv_i64 t0, t1;
> +    t0 = tcg_temp_new_i64();
> +    t1 = tcg_temp_new_i64();
> +    tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]);
> +    tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]);
> +    tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1);
> +#else
> +    tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
> +#endif
> +    if (unlikely(a->rc)) {
> +        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
> +    }
> +    return true;
> +}

Without ifdefs:

     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();

     tcg_gen_ext32s_tl(t0, ra);
     tcg_gen_ext32s_tl(t1, rb);
     tcg_gen_mul_tl(rt, t0, t1);

For ppc32, ext32s_tl will turn into a mov, which will be optimized away.  So ideal code 
generation for both modes.


> +static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a)
> +{
> +    TCGv_i32 t0 = tcg_temp_new_i32();
> +    TCGv_i32 t1 = tcg_temp_new_i32();
> +
> +    tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
> +    tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
> +    tcg_gen_muls2_i32(t0, t1, t0, t1);
> +#if defined(TARGET_PPC64)
> +    tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1);
> +#else
> +    tcg_gen_mov_i32(cpu_gpr[a->rt], t0);
> +#endif
> +
> +    tcg_gen_sari_i32(t0, t0, 31);
> +    tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
> +    tcg_gen_extu_i32_tl(cpu_ov, t0);

Usually hosts need to create the full 64-bit product and then break it apart for 
tcg_gen_muls2_i32, so split followed immediately by concatenate isn't great.


     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();

#ifdef TARGET_PPC64
     tcg_gen_ext32s_i64(t0, ra);
     tcg_gen_ext32s_i64(t1, rb);
     tcg_gen_mul_i64(rt, t0, t1);
     tcg_gen_sextract_i64(t0, rt, 31, 1);
     tcg_gen_sari_i64(t1, rt, 32);
#else
     tcg_gen_muls2_i32(rt, t1, ra, rb);
     tcg_gen_sari_i32(t0, rt, 31);
#endif
     tcg_gen_setcond_tl(TCG_COND_NE, cpu_ov, t0, t1);


> +    if (is_isa300(ctx)) {
> +        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
> +    }
> +    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
> +
> +    if (unlikely(a->rc)) {
> +        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
> +    }
> +    return true;
> +}


r~
Chinmay Rath April 19, 2024, 9:08 a.m. UTC | #2
Hi Richard,

On 4/16/24 23:26, Richard Henderson wrote:
> On 4/15/24 23:39, Chinmay Rath wrote:
>> Moving the following instructions to decodetree specification :
>>     mulli                       : D-form
>>     mul{lw, lwo, hw, hwu}[.]    : XO-form
>>
>> The changes were verified by validating that the tcg ops generated by 
>> those
>> instructions remain the same, which were captured with the '-d 
>> in_asm,op' flag.
>>
>> Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
>> ---
>>   target/ppc/insn32.decode                   |  9 +++
>>   target/ppc/translate.c                     | 89 ----------------------
>>   target/ppc/translate/fixedpoint-impl.c.inc | 71 +++++++++++++++++
>>   3 files changed, 80 insertions(+), 89 deletions(-)
>
> This is an accurate reorg of the current code, so
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

Thank you.
>
> However, as follow-up, the code generation could be cleaned up:
>
>
>> +static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a)
>> +{
>> +#if defined(TARGET_PPC64)
>> +    TCGv_i64 t0, t1;
>> +    t0 = tcg_temp_new_i64();
>> +    t1 = tcg_temp_new_i64();
>> +    tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]);
>> +    tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]);
>> +    tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1);
>> +#else
>> +    tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
>> +#endif
>> +    if (unlikely(a->rc)) {
>> +        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
>> +    }
>> +    return true;
>> +}
>
> Without ifdefs:
>
>     TCGv t0 = tcg_temp_new();
>     TCGv t1 = tcg_temp_new();
>
>     tcg_gen_ext32s_tl(t0, ra);
>     tcg_gen_ext32s_tl(t1, rb);
>     tcg_gen_mul_tl(rt, t0, t1);
>
> For ppc32, ext32s_tl will turn into a mov, which will be optimized 
> away.  So ideal code generation for both modes.
>
>
>> +static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a)
>> +{
>> +    TCGv_i32 t0 = tcg_temp_new_i32();
>> +    TCGv_i32 t1 = tcg_temp_new_i32();
>> +
>> +    tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
>> +    tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
>> +    tcg_gen_muls2_i32(t0, t1, t0, t1);
>> +#if defined(TARGET_PPC64)
>> +    tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1);
>> +#else
>> +    tcg_gen_mov_i32(cpu_gpr[a->rt], t0);
>> +#endif
>> +
>> +    tcg_gen_sari_i32(t0, t0, 31);
>> +    tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
>> +    tcg_gen_extu_i32_tl(cpu_ov, t0);
>
> Usually hosts need to create the full 64-bit product and then break it 
> apart for tcg_gen_muls2_i32, so split followed immediately by 
> concatenate isn't great.
>
>
>     TCGv t0 = tcg_temp_new();
>     TCGv t1 = tcg_temp_new();
>
> #ifdef TARGET_PPC64
>     tcg_gen_ext32s_i64(t0, ra);
>     tcg_gen_ext32s_i64(t1, rb);
>     tcg_gen_mul_i64(rt, t0, t1);
>     tcg_gen_sextract_i64(t0, rt, 31, 1);
>     tcg_gen_sari_i64(t1, rt, 32);
> #else
>     tcg_gen_muls2_i32(rt, t1, ra, rb);
>     tcg_gen_sari_i32(t0, rt, 31);
> #endif
>     tcg_gen_setcond_tl(TCG_COND_NE, cpu_ov, t0, t1);
>
Sure, will update in v2.
Thanks,
Chinmay
>
>> +    if (is_isa300(ctx)) {
>> +        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
>> +    }
>> +    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
>> +
>> +    if (unlikely(a->rc)) {
>> +        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
>> +    }
>> +    return true;
>> +}
>
>
> r~
>
diff mbox series

Patch

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index eada59f59f..0184680db8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -193,6 +193,9 @@ 
 &XO_ta          rt ra oe:bool rc:bool
 @XO_ta          ...... rt:5 ra:5 ..... oe:1 ......... rc:1      &XO_ta
 
+&XO_tab_rc      rt ra rb rc:bool
+@XO_tab_rc      ...... rt:5 ra:5 rb:5 . ......... rc:1          &XO_tab_rc
+
 %xx_xt          0:1 21:5
 %xx_xb          1:1 11:5
 %xx_xa          2:1 16:5
@@ -353,6 +356,12 @@  SUBFE           011111 ..... ..... ..... . 010001000 .  @XO
 SUBFME          011111 ..... ..... ----- . 011101000 .  @XO_ta
 SUBFZE          011111 ..... ..... ----- . 011001000 .  @XO_ta
 
+MULLI           000111 ..... ..... ................     @D
+MULLW           011111 ..... ..... ..... 0 011101011 .  @XO_tab_rc
+MULLWO          011111 ..... ..... ..... 1 011101011 .  @XO_tab_rc
+MULHW           011111 ..... ..... ..... - 001001011 .  @XO_tab_rc
+MULHWU          011111 ..... ..... ..... - 000001011 .  @XO_tab_rc
+
 ## Fixed-Point Logical Instructions
 
 CFUGED          011111 ..... ..... ..... 0011011100 -   @X
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 93ffec787c..c45547a770 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1948,90 +1948,6 @@  GEN_INT_ARITH_MODD(modud, 0x08, 0);
 GEN_INT_ARITH_MODD(modsd, 0x18, 1);
 #endif
 
-/* mulhw  mulhw. */
-static void gen_mulhw(DisasContext *ctx)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_muls2_i32(t0, t1, t0, t1);
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mulhwu  mulhwu.  */
-static void gen_mulhwu(DisasContext *ctx)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_mulu2_i32(t0, t1, t0, t1);
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mullw  mullw. */
-static void gen_mullw(DisasContext *ctx)
-{
-#if defined(TARGET_PPC64)
-    TCGv_i64 t0, t1;
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-    tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
-#else
-    tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                    cpu_gpr[rB(ctx->opcode)]);
-#endif
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mullwo  mullwo. */
-static void gen_mullwo(DisasContext *ctx)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_muls2_i32(t0, t1, t0, t1);
-#if defined(TARGET_PPC64)
-    tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
-#else
-    tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0);
-#endif
-
-    tcg_gen_sari_i32(t0, t0, 31);
-    tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
-    tcg_gen_extu_i32_tl(cpu_ov, t0);
-    if (is_isa300(ctx)) {
-        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
-    }
-    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
-
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-    }
-}
-
-/* mulli */
-static void gen_mulli(DisasContext *ctx)
-{
-    tcg_gen_muli_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                    SIMM(ctx->opcode));
-}
-
 #if defined(TARGET_PPC64)
 /* mulhd  mulhd. */
 static void gen_mulhd(DisasContext *ctx)
@@ -6430,11 +6346,6 @@  GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
-GEN_HANDLER(mulhw, 0x1F, 0x0B, 0x02, 0x00000400, PPC_INTEGER),
-GEN_HANDLER(mulhwu, 0x1F, 0x0B, 0x00, 0x00000400, PPC_INTEGER),
-GEN_HANDLER(mullw, 0x1F, 0x0B, 0x07, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(mullwo, 0x1F, 0x0B, 0x17, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(mulli, 0x07, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(mulld, 0x1F, 0x09, 0x07, 0x00000000, PPC_64B),
 #endif
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index 0c66465d96..e12e533c67 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -395,6 +395,77 @@  TRANS(SUBFE, do_subf_XO, true, true)
 TRANS(SUBFME, do_subf_const_XO, tcg_constant_tl(-1LL), true, true)
 TRANS(SUBFZE, do_subf_const_XO, tcg_constant_tl(0), true, true)
 
+static bool trans_MULLI(DisasContext *ctx, arg_MULLI *a)
+{
+    tcg_gen_muli_tl(cpu_gpr[a->rt], cpu_gpr[a->ra], a->si);
+    return true;
+}
+
+static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a)
+{
+#if defined(TARGET_PPC64)
+    TCGv_i64 t0, t1;
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]);
+    tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]);
+    tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1);
+#else
+    tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
+#endif
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}
+
+static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
+    tcg_gen_muls2_i32(t0, t1, t0, t1);
+#if defined(TARGET_PPC64)
+    tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1);
+#else
+    tcg_gen_mov_i32(cpu_gpr[a->rt], t0);
+#endif
+
+    tcg_gen_sari_i32(t0, t0, 31);
+    tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
+    tcg_gen_extu_i32_tl(cpu_ov, t0);
+    if (is_isa300(ctx)) {
+        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+    }
+    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
+
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}
+
+static bool do_mulhw(DisasContext *ctx, arg_XO_tab_rc *a,
+                     void (*helper)(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1,
+                                    TCGv_i32 arg2))
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
+    helper(t0, t1, t0, t1);
+    tcg_gen_extu_i32_tl(cpu_gpr[a->rt], t1);
+    if (unlikely(a->rc)) {
+        gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}
+
+TRANS(MULHW, do_mulhw, tcg_gen_muls2_i32)
+TRANS(MULHWU, do_mulhw, tcg_gen_mulu2_i32)
+
 static bool trans_INVALID(DisasContext *ctx, arg_INVALID *a)
 {
     gen_invalid(ctx);