diff mbox

[007/126] target-s390: Use TCG registers for FPR

Message ID 1347224784-19472-8-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Sept. 9, 2012, 9:04 p.m. UTC
At the same time, tidy other usages of tcg_gen_deposit_i64.
In some cases we can "type cast" rather than extend, and in
others we can allow tcg_gen_deposit_i64 itself to optimize
the HOST_LONG_BITS==32 case.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-s390x/translate.c | 68 ++++++++++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 28 deletions(-)

Comments

Aurelien Jarno Sept. 10, 2012, 2:34 p.m. UTC | #1
On Sun, Sep 09, 2012 at 02:04:25PM -0700, Richard Henderson wrote:
> At the same time, tidy other usages of tcg_gen_deposit_i64.
> In some cases we can "type cast" rather than extend, and in
> others we can allow tcg_gen_deposit_i64 itself to optimize
> the HOST_LONG_BITS==32 case.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target-s390x/translate.c | 68 ++++++++++++++++++++++++++++--------------------
>  1 file changed, 40 insertions(+), 28 deletions(-)
> 
> diff --git a/target-s390x/translate.c b/target-s390x/translate.c
> index 3080cef..bf35a65 100644
> --- a/target-s390x/translate.c
> +++ b/target-s390x/translate.c
> @@ -89,7 +89,7 @@ void cpu_dump_state(CPUS390XState *env, FILE *f, fprintf_function cpu_fprintf,
>      }
>  
>      for (i = 0; i < 16; i++) {
> -        cpu_fprintf(f, "F%02d=%016" PRIx64, i, *(uint64_t *)&env->fregs[i]);
> +        cpu_fprintf(f, "F%02d=%016" PRIx64, i, env->fregs[i].ll);
>          if ((i % 4) == 3) {
>              cpu_fprintf(f, "\n");
>          } else {
> @@ -136,21 +136,22 @@ static TCGv_i64 cc_src;
>  static TCGv_i64 cc_dst;
>  static TCGv_i64 cc_vr;
>  
> -static char cpu_reg_names[10*3 + 6*4];
> +static char cpu_reg_names[32][4];
>  static TCGv_i64 regs[16];
> +static TCGv_i64 fregs[16];
>  
>  static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
>  
>  void s390x_translate_init(void)
>  {
>      int i;
> -    size_t cpu_reg_names_size = sizeof(cpu_reg_names);
> -    char *p;
>  
>      cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
> -    psw_addr = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUS390XState, psw.addr),
> +    psw_addr = tcg_global_mem_new_i64(TCG_AREG0,
> +                                      offsetof(CPUS390XState, psw.addr),
>                                        "psw_addr");
> -    psw_mask = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUS390XState, psw.mask),
> +    psw_mask = tcg_global_mem_new_i64(TCG_AREG0,
> +                                      offsetof(CPUS390XState, psw.mask),
>                                        "psw_mask");
>  
>      cc_op = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUS390XState, cc_op),
> @@ -162,13 +163,18 @@ void s390x_translate_init(void)
>      cc_vr = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUS390XState, cc_vr),
>                                     "cc_vr");
>  
> -    p = cpu_reg_names;
>      for (i = 0; i < 16; i++) {
> -        snprintf(p, cpu_reg_names_size, "r%d", i);
> +        snprintf(cpu_reg_names[i], sizeof(cpu_reg_names[0]), "r%d", i);
>          regs[i] = tcg_global_mem_new(TCG_AREG0,
> -                                     offsetof(CPUS390XState, regs[i]), p);
> -        p += (i < 10) ? 3 : 4;
> -        cpu_reg_names_size -= (i < 10) ? 3 : 4;
> +                                     offsetof(CPUS390XState, regs[i]),
> +                                     cpu_reg_names[i]);
> +    }
> +
> +    for (i = 0; i < 16; i++) {
> +        snprintf(cpu_reg_names[i + 16], sizeof(cpu_reg_names[0]), "f%d", i);
> +        fregs[i] = tcg_global_mem_new(TCG_AREG0,
> +                                      offsetof(CPUS390XState, fregs[i].d),
> +                                      cpu_reg_names[i + 16]);
>      }
>  }
>  
> @@ -182,14 +188,18 @@ static inline TCGv_i64 load_reg(int reg)
>  static inline TCGv_i64 load_freg(int reg)
>  {
>      TCGv_i64 r = tcg_temp_new_i64();
> -    tcg_gen_ld_i64(r, cpu_env, offsetof(CPUS390XState, fregs[reg].d));
> +    tcg_gen_mov_i64(r, fregs[reg]);
>      return r;
>  }
>  
>  static inline TCGv_i32 load_freg32(int reg)
>  {
>      TCGv_i32 r = tcg_temp_new_i32();
> -    tcg_gen_ld_i32(r, cpu_env, offsetof(CPUS390XState, fregs[reg].l.upper));
> +#if HOST_LONG_BITS == 32
> +    tcg_gen_mov_i32(r, TCGV_HIGH(fregs[reg]));
> +#else
> +    tcg_gen_shri_i64(MAKE_TCGV_I64(GET_TCGV_I32(r)), fregs[reg], 32);
> +#endif
>      return r;
>  }
>  
> @@ -214,39 +224,35 @@ static inline void store_reg(int reg, TCGv_i64 v)
>  
>  static inline void store_freg(int reg, TCGv_i64 v)
>  {
> -    tcg_gen_st_i64(v, cpu_env, offsetof(CPUS390XState, fregs[reg].d));
> +    tcg_gen_mov_i64(fregs[reg], v);
>  }
>  
>  static inline void store_reg32(int reg, TCGv_i32 v)
>  {
> +    /* 32 bit register writes keep the upper half */
>  #if HOST_LONG_BITS == 32
>      tcg_gen_mov_i32(TCGV_LOW(regs[reg]), v);
>  #else
> -    TCGv_i64 tmp = tcg_temp_new_i64();
> -    tcg_gen_extu_i32_i64(tmp, v);
> -    /* 32 bit register writes keep the upper half */
> -    tcg_gen_deposit_i64(regs[reg], regs[reg], tmp, 0, 32);
> -    tcg_temp_free_i64(tmp);
> +    tcg_gen_deposit_i64(regs[reg], regs[reg],
> +                        MAKE_TCGV_I64(GET_TCGV_I32(v)), 0, 32);
>  #endif
>  }
>  
>  static inline void store_reg32_i64(int reg, TCGv_i64 v)
>  {
>      /* 32 bit register writes keep the upper half */
> -#if HOST_LONG_BITS == 32
> -    tcg_gen_mov_i32(TCGV_LOW(regs[reg]), TCGV_LOW(v));
> -#else
>      tcg_gen_deposit_i64(regs[reg], regs[reg], v, 0, 32);
> -#endif
>  }
>  
>  static inline void store_reg16(int reg, TCGv_i32 v)
>  {
> -    TCGv_i64 tmp = tcg_temp_new_i64();
> -    tcg_gen_extu_i32_i64(tmp, v);
>      /* 16 bit register writes keep the upper bytes */
> -    tcg_gen_deposit_i64(regs[reg], regs[reg], tmp, 0, 16);
> -    tcg_temp_free_i64(tmp);
> +#if HOST_LONG_BITS == 32
> +    tcg_gen_deposit_i32(TCGV_LOW(regs[reg]), TCGV_LOW(regs[reg]), v, 0, 16);
> +#else
> +    tcg_gen_deposit_i64(regs[reg], regs[reg],
> +                        MAKE_TCGV_I64(GET_TCGV_I32(v)), 0, 16);
> +#endif
>  }
>  
>  static inline void store_reg8(int reg, TCGv_i64 v)
> @@ -257,7 +263,13 @@ static inline void store_reg8(int reg, TCGv_i64 v)
>  
>  static inline void store_freg32(int reg, TCGv_i32 v)
>  {
> -    tcg_gen_st_i32(v, cpu_env, offsetof(CPUS390XState, fregs[reg].l.upper));
> +    /* 32 bit register writes keep the lower half */
> +#if HOST_LONG_BITS == 32
> +    tcg_gen_mov_i32(TCGV_HIGH(fregs[reg]), v);
> +#else
> +    tcg_gen_deposit_i64(fregs[reg], fregs[reg],
> +                        MAKE_TCGV_I64(GET_TCGV_I32(v)), 32, 32);
> +#endif
>  }
>  

I am not sure we want to start mixing different layers, and especially
having some assumptions about the host inside the target. That's why I
don't think we should have things like HOST_LONG_BITS in target-*/ and
even less MAKE_TCGV_I64() and GET_TCGV_I32().

Your code take the assumption that it's possible to do moves between 32
and 64-bit registers, which might not be guaranteed on some hosts. It's
fine taking such assumptions (as long as there is also a comment) in the
TCG code.

To handle conversion between 32- and 64-bit registers we have functions
like:
- concat_i32_i64
- extu_i32_i64
- ext_i32_i64
- trunc_i64_i32

These functions work on both 32- and 64-bit hosts, accessing directly
the high and low part on 32-bit hosts.

If it is not possible to implement your FPR code using these functions,
we might want to add some more, but I really thing it's a bad idea to
have this code in the targets.
Richard Henderson Sept. 10, 2012, 2:45 p.m. UTC | #2
On 09/10/2012 07:34 AM, Aurelien Jarno wrote:
> If it is not possible to implement your FPR code using these functions,
> we might want to add some more, but I really thing it's a bad idea to
> have this code in the targets.

I thought I got rid of all this in the various rebasing.  Certainly it's
all gone by the end of the patch series.

In particular, the optimization that I was looking for is handled inside
tcg_gen_deposit_i64, so there's no point in doing this in target code too.

I'll make sure this is gone from the next revision.


r~
Aurelien Jarno Sept. 10, 2012, 2:52 p.m. UTC | #3
On Mon, Sep 10, 2012 at 07:45:04AM -0700, Richard Henderson wrote:
> On 09/10/2012 07:34 AM, Aurelien Jarno wrote:
> > If it is not possible to implement your FPR code using these functions,
> > we might want to add some more, but I really thing it's a bad idea to
> > have this code in the targets.
> 
> I thought I got rid of all this in the various rebasing.  Certainly it's
> all gone by the end of the patch series.
> 
> In particular, the optimization that I was looking for is handled inside
> tcg_gen_deposit_i64, so there's no point in doing this in target code too.
> 
> I'll make sure this is gone from the next revision.

Ok, I have to say the series being quite long, I haven't looked at it in
details. If they are gone at the end, it's pretty fine for me.
diff mbox

Patch

diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 3080cef..bf35a65 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -89,7 +89,7 @@  void cpu_dump_state(CPUS390XState *env, FILE *f, fprintf_function cpu_fprintf,
     }
 
     for (i = 0; i < 16; i++) {
-        cpu_fprintf(f, "F%02d=%016" PRIx64, i, *(uint64_t *)&env->fregs[i]);
+        cpu_fprintf(f, "F%02d=%016" PRIx64, i, env->fregs[i].ll);
         if ((i % 4) == 3) {
             cpu_fprintf(f, "\n");
         } else {
@@ -136,21 +136,22 @@  static TCGv_i64 cc_src;
 static TCGv_i64 cc_dst;
 static TCGv_i64 cc_vr;
 
-static char cpu_reg_names[10*3 + 6*4];
+static char cpu_reg_names[32][4];
 static TCGv_i64 regs[16];
+static TCGv_i64 fregs[16];
 
 static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
 
 void s390x_translate_init(void)
 {
     int i;
-    size_t cpu_reg_names_size = sizeof(cpu_reg_names);
-    char *p;
 
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
-    psw_addr = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUS390XState, psw.addr),
+    psw_addr = tcg_global_mem_new_i64(TCG_AREG0,
+                                      offsetof(CPUS390XState, psw.addr),
                                       "psw_addr");
-    psw_mask = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUS390XState, psw.mask),
+    psw_mask = tcg_global_mem_new_i64(TCG_AREG0,
+                                      offsetof(CPUS390XState, psw.mask),
                                       "psw_mask");
 
     cc_op = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUS390XState, cc_op),
@@ -162,13 +163,18 @@  void s390x_translate_init(void)
     cc_vr = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUS390XState, cc_vr),
                                    "cc_vr");
 
-    p = cpu_reg_names;
     for (i = 0; i < 16; i++) {
-        snprintf(p, cpu_reg_names_size, "r%d", i);
+        snprintf(cpu_reg_names[i], sizeof(cpu_reg_names[0]), "r%d", i);
         regs[i] = tcg_global_mem_new(TCG_AREG0,
-                                     offsetof(CPUS390XState, regs[i]), p);
-        p += (i < 10) ? 3 : 4;
-        cpu_reg_names_size -= (i < 10) ? 3 : 4;
+                                     offsetof(CPUS390XState, regs[i]),
+                                     cpu_reg_names[i]);
+    }
+
+    for (i = 0; i < 16; i++) {
+        snprintf(cpu_reg_names[i + 16], sizeof(cpu_reg_names[0]), "f%d", i);
+        fregs[i] = tcg_global_mem_new(TCG_AREG0,
+                                      offsetof(CPUS390XState, fregs[i].d),
+                                      cpu_reg_names[i + 16]);
     }
 }
 
@@ -182,14 +188,18 @@  static inline TCGv_i64 load_reg(int reg)
 static inline TCGv_i64 load_freg(int reg)
 {
     TCGv_i64 r = tcg_temp_new_i64();
-    tcg_gen_ld_i64(r, cpu_env, offsetof(CPUS390XState, fregs[reg].d));
+    tcg_gen_mov_i64(r, fregs[reg]);
     return r;
 }
 
 static inline TCGv_i32 load_freg32(int reg)
 {
     TCGv_i32 r = tcg_temp_new_i32();
-    tcg_gen_ld_i32(r, cpu_env, offsetof(CPUS390XState, fregs[reg].l.upper));
+#if HOST_LONG_BITS == 32
+    tcg_gen_mov_i32(r, TCGV_HIGH(fregs[reg]));
+#else
+    tcg_gen_shri_i64(MAKE_TCGV_I64(GET_TCGV_I32(r)), fregs[reg], 32);
+#endif
     return r;
 }
 
@@ -214,39 +224,35 @@  static inline void store_reg(int reg, TCGv_i64 v)
 
 static inline void store_freg(int reg, TCGv_i64 v)
 {
-    tcg_gen_st_i64(v, cpu_env, offsetof(CPUS390XState, fregs[reg].d));
+    tcg_gen_mov_i64(fregs[reg], v);
 }
 
 static inline void store_reg32(int reg, TCGv_i32 v)
 {
+    /* 32 bit register writes keep the upper half */
 #if HOST_LONG_BITS == 32
     tcg_gen_mov_i32(TCGV_LOW(regs[reg]), v);
 #else
-    TCGv_i64 tmp = tcg_temp_new_i64();
-    tcg_gen_extu_i32_i64(tmp, v);
-    /* 32 bit register writes keep the upper half */
-    tcg_gen_deposit_i64(regs[reg], regs[reg], tmp, 0, 32);
-    tcg_temp_free_i64(tmp);
+    tcg_gen_deposit_i64(regs[reg], regs[reg],
+                        MAKE_TCGV_I64(GET_TCGV_I32(v)), 0, 32);
 #endif
 }
 
 static inline void store_reg32_i64(int reg, TCGv_i64 v)
 {
     /* 32 bit register writes keep the upper half */
-#if HOST_LONG_BITS == 32
-    tcg_gen_mov_i32(TCGV_LOW(regs[reg]), TCGV_LOW(v));
-#else
     tcg_gen_deposit_i64(regs[reg], regs[reg], v, 0, 32);
-#endif
 }
 
 static inline void store_reg16(int reg, TCGv_i32 v)
 {
-    TCGv_i64 tmp = tcg_temp_new_i64();
-    tcg_gen_extu_i32_i64(tmp, v);
     /* 16 bit register writes keep the upper bytes */
-    tcg_gen_deposit_i64(regs[reg], regs[reg], tmp, 0, 16);
-    tcg_temp_free_i64(tmp);
+#if HOST_LONG_BITS == 32
+    tcg_gen_deposit_i32(TCGV_LOW(regs[reg]), TCGV_LOW(regs[reg]), v, 0, 16);
+#else
+    tcg_gen_deposit_i64(regs[reg], regs[reg],
+                        MAKE_TCGV_I64(GET_TCGV_I32(v)), 0, 16);
+#endif
 }
 
 static inline void store_reg8(int reg, TCGv_i64 v)
@@ -257,7 +263,13 @@  static inline void store_reg8(int reg, TCGv_i64 v)
 
 static inline void store_freg32(int reg, TCGv_i32 v)
 {
-    tcg_gen_st_i32(v, cpu_env, offsetof(CPUS390XState, fregs[reg].l.upper));
+    /* 32 bit register writes keep the lower half */
+#if HOST_LONG_BITS == 32
+    tcg_gen_mov_i32(TCGV_HIGH(fregs[reg]), v);
+#else
+    tcg_gen_deposit_i64(fregs[reg], fregs[reg],
+                        MAKE_TCGV_I64(GET_TCGV_I32(v)), 32, 32);
+#endif
 }
 
 static inline void update_psw_addr(DisasContext *s)