Patchwork tcg: Optimize some forms of deposit.

login
register
mail settings
Submitter Richard Henderson
Date Oct. 27, 2011, 9:15 p.m.
Message ID <1319750100-7592-1-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/122250/
State New
Headers show

Comments

Richard Henderson - Oct. 27, 2011, 9:15 p.m.
If the deposit replaces the entire word, optimize to a move.

If we're inserting to the top of the word, avoid the mask of arg2
as we'll be shifting out all of the garbage and shifting in zeros.

If the host is 32-bit, reduce a 64-bit deposit to a 32-bit deposit
when possible.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/tcg-op.h |   65 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 51 insertions(+), 14 deletions(-)

	V2: checkpatch errors fixed.
Blue Swirl - Oct. 30, 2011, 11:01 a.m.
Thanks, applied.

On Thu, Oct 27, 2011 at 21:15, Richard Henderson <rth@twiddle.net> wrote:
> If the deposit replaces the entire word, optimize to a move.
>
> If we're inserting to the top of the word, avoid the mask of arg2
> as we'll be shifting out all of the garbage and shifting in zeros.
>
> If the host is 32-bit, reduce a 64-bit deposit to a 32-bit deposit
> when possible.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/tcg-op.h |   65 +++++++++++++++++++++++++++++++++++++++++++++------------
>  1 files changed, 51 insertions(+), 14 deletions(-)
>
>        V2: checkpatch errors fixed.
>
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index fea5983..24ec7fc 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -2045,38 +2045,75 @@ static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
>                                       TCGv_i32 arg2, unsigned int ofs,
>                                       unsigned int len)
>  {
> +    uint32_t mask;
> +    TCGv_i32 t1;
> +
> +    if (ofs == 0 && len == 32) {
> +        tcg_gen_mov_i32(ret, arg2);
> +        return;
> +    }
>     if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
>         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
> -    } else {
> -        uint32_t mask = (1u << len) - 1;
> -        TCGv_i32 t1 = tcg_temp_new_i32 ();
> +        return;
> +    }
> +
> +    mask = (1u << len) - 1;
> +    t1 = tcg_temp_new_i32();
>
> +    if (ofs + len < 32) {
>         tcg_gen_andi_i32(t1, arg2, mask);
>         tcg_gen_shli_i32(t1, t1, ofs);
> -        tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
> -        tcg_gen_or_i32(ret, ret, t1);
> -
> -        tcg_temp_free_i32(t1);
> +    } else {
> +        tcg_gen_shli_i32(t1, arg2, ofs);
>     }
> +    tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
> +    tcg_gen_or_i32(ret, ret, t1);
> +
> +    tcg_temp_free_i32(t1);
>  }
>
>  static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
>                                       TCGv_i64 arg2, unsigned int ofs,
>                                       unsigned int len)
>  {
> +    uint64_t mask;
> +    TCGv_i64 t1;
> +
> +    if (ofs == 0 && len == 64) {
> +        tcg_gen_mov_i64(ret, arg2);
> +        return;
> +    }
>     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
>         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
> -    } else {
> -        uint64_t mask = (1ull << len) - 1;
> -        TCGv_i64 t1 = tcg_temp_new_i64 ();
> +        return;
> +    }
>
> +#if TCG_TARGET_REG_BITS == 32
> +    if (ofs >= 32) {
> +        tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
> +                            TCGV_LOW(arg2), ofs - 32, len);
> +        return;
> +    }
> +    if (ofs + len <= 32) {
> +        tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
> +                            TCGV_LOW(arg2), ofs, len);
> +        return;
> +    }
> +#endif
> +
> +    mask = (1ull << len) - 1;
> +    t1 = tcg_temp_new_i64();
> +
> +    if (ofs + len < 64) {
>         tcg_gen_andi_i64(t1, arg2, mask);
>         tcg_gen_shli_i64(t1, t1, ofs);
> -        tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
> -        tcg_gen_or_i64(ret, ret, t1);
> -
> -        tcg_temp_free_i64(t1);
> +    } else {
> +        tcg_gen_shli_i64(t1, arg2, ofs);
>     }
> +    tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
> +    tcg_gen_or_i64(ret, ret, t1);
> +
> +    tcg_temp_free_i64(t1);
>  }
>
>  /***************************************/
> --
> 1.7.4.4
>
>
Alexander Graf - Oct. 31, 2011, 3:47 a.m.
On 27.10.2011, at 23:15, Richard Henderson wrote:

> If the deposit replaces the entire word, optimize to a move.
> 
> If we're inserting to the top of the word, avoid the mask of arg2
> as we'll be shifting out all of the garbage and shifting in zeros.
> 
> If the host is 32-bit, reduce a 64-bit deposit to a 32-bit deposit
> when possible.

This patch breaks qemu-system-ppc64 on ppc32 hosts:

IN: 
0x00000000fff08618:  mfmsr   r0
0x00000000fff0861c:  ori     r0,r0,48
0x00000000fff08620:  mtmsr   r0

OP:
 ---- 0xfff08618
 mov_i32 r0_0,msr_0
 mov_i32 r0_1,msr_1

 ---- 0xfff0861c
 movi_i32 tmp0,$0x30
 or_i32 r0_0,r0_0,tmp0

 ---- 0xfff08620
 movi_i32 nip_0,$0xfff08624
 movi_i32 nip_1,$0x0
 mov_i32 tmp1,r0_0
 movi_i32 tmp0,$store_msr
 call tmp0,$0x0,$0,tmp2,tmp1
 movi_i32 nip_0,$0xfff08624
 movi_i32 nip_1,$0x0
 exit_tb $0x0

OP after liveness analysis:
 ---- 0xfff08618
 mov_i32 r0_0,msr_0
 mov_i32 r0_1,msr_1

 ---- 0xfff0861c
 movi_i32 tmp0,$0x30
 or_i32 r0_0,r0_0,tmp0

 ---- 0xfff08620
 movi_i32 nip_0,$0xfff08624
 movi_i32 nip_1,$0x0
 mov_i32 tmp1,r0_0
 movi_i32 tmp0,$store_msr
 call tmp0,$0x0,$0,tmp2,tmp1
 movi_i32 nip_0,$0xfff08624
 movi_i32 nip_1,$0x0
 exit_tb $0x0
 end 


agraf@lychee:/home/agraf/release/qemu> ./ppc64-softmmu/qemu-system-ppc64 -kernel /boot/vmlinux -initrd /boot/initrd -nographic -d in_asm,cpu,int,op,op_opt,out_asm
/home/agraf/release/qemu/tcg/tcg.c:1929: tcg fatal error
Aborted



Alex
Blue Swirl - Nov. 1, 2011, 7:26 p.m.
On Mon, Oct 31, 2011 at 03:47, Alexander Graf <agraf@suse.de> wrote:
>
> On 27.10.2011, at 23:15, Richard Henderson wrote:
>
>> If the deposit replaces the entire word, optimize to a move.
>>
>> If we're inserting to the top of the word, avoid the mask of arg2
>> as we'll be shifting out all of the garbage and shifting in zeros.
>>
>> If the host is 32-bit, reduce a 64-bit deposit to a 32-bit deposit
>> when possible.
>
> This patch breaks qemu-system-ppc64 on ppc32 hosts:
>
> IN:
> 0x00000000fff08618:  mfmsr   r0
> 0x00000000fff0861c:  ori     r0,r0,48
> 0x00000000fff08620:  mtmsr   r0
>
> OP:
>  ---- 0xfff08618
>  mov_i32 r0_0,msr_0
>  mov_i32 r0_1,msr_1
>
>  ---- 0xfff0861c
>  movi_i32 tmp0,$0x30
>  or_i32 r0_0,r0_0,tmp0
>
>  ---- 0xfff08620
>  movi_i32 nip_0,$0xfff08624
>  movi_i32 nip_1,$0x0
>  mov_i32 tmp1,r0_0
>  movi_i32 tmp0,$store_msr
>  call tmp0,$0x0,$0,tmp2,tmp1

tmp2 is not defined. Where does it come from?

The patch still looks fine to me. Maybe the problem is with the
optimizer, or a different bug is exposed by one of these. Can you try
if #undefining USE_TCG_OPTIMIZATIONS changes anything?

>  movi_i32 nip_0,$0xfff08624
>  movi_i32 nip_1,$0x0
>  exit_tb $0x0
>
> OP after liveness analysis:
>  ---- 0xfff08618
>  mov_i32 r0_0,msr_0
>  mov_i32 r0_1,msr_1
>
>  ---- 0xfff0861c
>  movi_i32 tmp0,$0x30
>  or_i32 r0_0,r0_0,tmp0
>
>  ---- 0xfff08620
>  movi_i32 nip_0,$0xfff08624
>  movi_i32 nip_1,$0x0
>  mov_i32 tmp1,r0_0
>  movi_i32 tmp0,$store_msr
>  call tmp0,$0x0,$0,tmp2,tmp1
>  movi_i32 nip_0,$0xfff08624
>  movi_i32 nip_1,$0x0
>  exit_tb $0x0
>  end
>
>
> agraf@lychee:/home/agraf/release/qemu> ./ppc64-softmmu/qemu-system-ppc64 -kernel /boot/vmlinux -initrd /boot/initrd -nographic -d in_asm,cpu,int,op,op_opt,out_asm
> /home/agraf/release/qemu/tcg/tcg.c:1929: tcg fatal error
> Aborted
>
>
>
> Alex
>
>

Patch

diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index fea5983..24ec7fc 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2045,38 +2045,75 @@  static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
 				       TCGv_i32 arg2, unsigned int ofs,
 				       unsigned int len)
 {
+    uint32_t mask;
+    TCGv_i32 t1;
+
+    if (ofs == 0 && len == 32) {
+        tcg_gen_mov_i32(ret, arg2);
+        return;
+    }
     if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
-    } else {
-        uint32_t mask = (1u << len) - 1;
-        TCGv_i32 t1 = tcg_temp_new_i32 ();
+        return;
+    }
+
+    mask = (1u << len) - 1;
+    t1 = tcg_temp_new_i32();
 
+    if (ofs + len < 32) {
         tcg_gen_andi_i32(t1, arg2, mask);
         tcg_gen_shli_i32(t1, t1, ofs);
-        tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
-        tcg_gen_or_i32(ret, ret, t1);
-
-        tcg_temp_free_i32(t1);
+    } else {
+        tcg_gen_shli_i32(t1, arg2, ofs);
     }
+    tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
+    tcg_gen_or_i32(ret, ret, t1);
+
+    tcg_temp_free_i32(t1);
 }
 
 static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
 				       TCGv_i64 arg2, unsigned int ofs,
 				       unsigned int len)
 {
+    uint64_t mask;
+    TCGv_i64 t1;
+
+    if (ofs == 0 && len == 64) {
+        tcg_gen_mov_i64(ret, arg2);
+        return;
+    }
     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
-    } else {
-        uint64_t mask = (1ull << len) - 1;
-        TCGv_i64 t1 = tcg_temp_new_i64 ();
+        return;
+    }
 
+#if TCG_TARGET_REG_BITS == 32
+    if (ofs >= 32) {
+        tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
+                            TCGV_LOW(arg2), ofs - 32, len);
+        return;
+    }
+    if (ofs + len <= 32) {
+        tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
+                            TCGV_LOW(arg2), ofs, len);
+        return;
+    }
+#endif
+
+    mask = (1ull << len) - 1;
+    t1 = tcg_temp_new_i64();
+
+    if (ofs + len < 64) {
         tcg_gen_andi_i64(t1, arg2, mask);
         tcg_gen_shli_i64(t1, t1, ofs);
-        tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
-        tcg_gen_or_i64(ret, ret, t1);
-
-        tcg_temp_free_i64(t1);
+    } else {
+        tcg_gen_shli_i64(t1, arg2, ofs);
     }
+    tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
+    tcg_gen_or_i64(ret, ret, t1);
+
+    tcg_temp_free_i64(t1);
 }
 
 /***************************************/