Patchwork [2/4] target-ppc: Extend FPU state for newer POWER CPUs

login
register
mail settings
Submitter David Gibson
Date Oct. 9, 2012, 4:17 a.m.
Message ID <1349756259-12975-3-git-send-email-david@gibson.dropbear.id.au>
Download mbox | patch
Permalink /patch/190218/
State New
Headers show

Comments

David Gibson - Oct. 9, 2012, 4:17 a.m.
This patch adds some extra FPU state to CPUPPCState.  Specifically,
fpscr is extended to a target_ulong bits, since some recent (64 bit)
CPUs now have more status bits than fit inside 32 bits.  Also, we add
the 32 VSR registers present on CPUs with VSX (these extend the
standard FP regs, which together with the Altivec/VMX registers form a
64 x 128bit register file for VSX).

We don't actually support the instructions using these extra registers
in TCG yet, but we still need a place to store the state so we can
sync it with KVM and savevm/loadvm it.  This patch updates the savevm
code to not fail on the extended state, but also does not actually
save it - that's a project for another patch.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---

v2:
 * Used target_ulong instead of uint64_t, since the extended state is used
only on ppc64 targets.
 * Fixed the TCG mapping of fpscr to match the new type.
---
 target-ppc/cpu.h       |    4 +++-
 target-ppc/machine.c   |    8 ++++++--
 target-ppc/translate.c |    4 ++--
 3 files changed, 11 insertions(+), 5 deletions(-)
Alexander Graf - Oct. 9, 2012, 11:38 a.m.
On 09.10.2012, at 06:17, David Gibson wrote:

> This patch adds some extra FPU state to CPUPPCState.  Specifically,
> fpscr is extended to a target_ulong bits, since some recent (64 bit)
> CPUs now have more status bits than fit inside 32 bits.  Also, we add
> the 32 VSR registers present on CPUs with VSX (these extend the
> standard FP regs, which together with the Altivec/VMX registers form a
> 64 x 128bit register file for VSX).
> 
> We don't actually support the instructions using these extra registers
> in TCG yet, but we still need a place to store the state so we can
> sync it with KVM and savevm/loadvm it.  This patch updates the savevm
> code to not fail on the extended state, but also does not actually
> save it - that's a project for another patch.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> 
> v2:
> * Used target_ulong instead of uint64_t, since the extended state is used
> only on ppc64 targets.
> * Fixed the TCG mapping of fpscr to match the new type.
> ---
> target-ppc/cpu.h       |    4 +++-
> target-ppc/machine.c   |    8 ++++++--
> target-ppc/translate.c |    4 ++--
> 3 files changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index faf4404..7627722 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -963,7 +963,7 @@ struct CPUPPCState {
>     /* floating point registers */
>     float64 fpr[32];
>     /* floating point status and control register */
> -    uint32_t fpscr;
> +    target_ulong fpscr;

This will still break TCG for qemu-system-ppc64, no?


Alex

> 
>     /* Next instruction pointer */
>     target_ulong nip;
> @@ -1014,6 +1014,8 @@ struct CPUPPCState {
>     /* Altivec registers */
>     ppc_avr_t avr[32];
>     uint32_t vscr;
> +    /* VSX registers */
> +    uint64_t vsr[32];
>     /* SPE registers */
>     uint64_t spe_acc;
>     uint32_t spe_fscr;
> diff --git a/target-ppc/machine.c b/target-ppc/machine.c
> index 21ce757..5e7bc00 100644
> --- a/target-ppc/machine.c
> +++ b/target-ppc/machine.c
> @@ -6,6 +6,7 @@ void cpu_save(QEMUFile *f, void *opaque)
> {
>     CPUPPCState *env = (CPUPPCState *)opaque;
>     unsigned int i, j;
> +    uint32_t fpscr;
> 
>     for (i = 0; i < 32; i++)
>         qemu_put_betls(f, &env->gpr[i]);
> @@ -30,7 +31,8 @@ void cpu_save(QEMUFile *f, void *opaque)
>         u.d = env->fpr[i];
>         qemu_put_be64(f, u.l);
>     }
> -    qemu_put_be32s(f, &env->fpscr);
> +    fpscr = env->fpscr;
> +    qemu_put_be32s(f, &fpscr);
>     qemu_put_sbe32s(f, &env->access_type);
> #if defined(TARGET_PPC64)
>     qemu_put_betls(f, &env->asr);
> @@ -90,6 +92,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
>     CPUPPCState *env = (CPUPPCState *)opaque;
>     unsigned int i, j;
>     target_ulong sdr1;
> +    uint32_t fpscr;
> 
>     for (i = 0; i < 32; i++)
>         qemu_get_betls(f, &env->gpr[i]);
> @@ -114,7 +117,8 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
>         u.l = qemu_get_be64(f);
>         env->fpr[i] = u.d;
>     }
> -    qemu_get_be32s(f, &env->fpscr);
> +    qemu_get_be32s(f, &fpscr);
> +    env->fpscr = fpscr;
>     qemu_get_sbe32s(f, &env->access_type);
> #if defined(TARGET_PPC64)
>     qemu_get_betls(f, &env->asr);
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 1042268..01c2907 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -68,7 +68,7 @@ static TCGv cpu_cfar;
> #endif
> static TCGv cpu_xer;
> static TCGv cpu_reserve;
> -static TCGv_i32 cpu_fpscr;
> +static TCGv cpu_fpscr;
> static TCGv_i32 cpu_access_type;
> 
> #include "gen-icount.h"
> @@ -9463,7 +9463,7 @@ void cpu_dump_state (CPUPPCState *env, FILE *f, fprintf_function cpu_fprintf,
>         if ((i & (RFPL - 1)) == (RFPL - 1))
>             cpu_fprintf(f, "\n");
>     }
> -    cpu_fprintf(f, "FPSCR %08x\n", env->fpscr);
> +    cpu_fprintf(f, "FPSCR " TARGET_FMT_lx "\n", env->fpscr);
> #if !defined(CONFIG_USER_ONLY)
>     cpu_fprintf(f, " SRR0 " TARGET_FMT_lx "  SRR1 " TARGET_FMT_lx
>                    "    PVR " TARGET_FMT_lx " VRSAVE " TARGET_FMT_lx "\n",
> -- 
> 1.7.10.4
>
Alexander Graf - Oct. 9, 2012, 11:41 a.m.
On 09.10.2012, at 13:38, Alexander Graf wrote:

> 
> On 09.10.2012, at 06:17, David Gibson wrote:
> 
>> This patch adds some extra FPU state to CPUPPCState.  Specifically,
>> fpscr is extended to a target_ulong bits, since some recent (64 bit)
>> CPUs now have more status bits than fit inside 32 bits.  Also, we add
>> the 32 VSR registers present on CPUs with VSX (these extend the
>> standard FP regs, which together with the Altivec/VMX registers form a
>> 64 x 128bit register file for VSX).
>> 
>> We don't actually support the instructions using these extra registers
>> in TCG yet, but we still need a place to store the state so we can
>> sync it with KVM and savevm/loadvm it.  This patch updates the savevm
>> code to not fail on the extended state, but also does not actually
>> save it - that's a project for another patch.
>> 
>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>> ---
>> 
>> v2:
>> * Used target_ulong instead of uint64_t, since the extended state is used
>> only on ppc64 targets.
>> * Fixed the TCG mapping of fpscr to match the new type.
>> ---
>> target-ppc/cpu.h       |    4 +++-
>> target-ppc/machine.c   |    8 ++++++--
>> target-ppc/translate.c |    4 ++--
>> 3 files changed, 11 insertions(+), 5 deletions(-)
>> 
>> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
>> index faf4404..7627722 100644
>> --- a/target-ppc/cpu.h
>> +++ b/target-ppc/cpu.h
>> @@ -963,7 +963,7 @@ struct CPUPPCState {
>>    /* floating point registers */
>>    float64 fpr[32];
>>    /* floating point status and control register */
>> -    uint32_t fpscr;
>> +    target_ulong fpscr;
> 
> This will still break TCG for qemu-system-ppc64, no?

To be more precise:

agraf@lychee:/home/agraf/release/qemu> grep -R cpu_fpscr target-ppc
target-ppc/translate.c:static TCGv_i32 cpu_fpscr;
target-ppc/translate.c:    cpu_fpscr = tcg_global_mem_new_i32(TCG_AREG0,
target-ppc/translate.c:    tcg_gen_shri_i32(cpu_crf[crfD(ctx->opcode)], cpu_fpscr, bfa);
target-ppc/translate.c:    tcg_gen_andi_i32(cpu_fpscr, cpu_fpscr, ~(0xF << bfa));
target-ppc/translate.c:    tcg_gen_extu_i32_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpscr);
target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);

All those functions assume cpu_fpscr is a TCGv32. They need to be adjusted to work on tl instead.

Please test compile your code with configure --enable-tcg-debug.


Alex
David Gibson - Oct. 9, 2012, 12:50 p.m.
On Tue, Oct 09, 2012 at 01:41:02PM +0200, Alexander Graf wrote:
> On 09.10.2012, at 13:38, Alexander Graf wrote:
> > On 09.10.2012, at 06:17, David Gibson wrote:
[snip]
> > This will still break TCG for qemu-system-ppc64, no?
> 
> To be more precise:
> 
> agraf@lychee:/home/agraf/release/qemu> grep -R cpu_fpscr target-ppc
> target-ppc/translate.c:static TCGv_i32 cpu_fpscr;

I did update the type of cpu_fpscr..

> target-ppc/translate.c:    cpu_fpscr = tcg_global_mem_new_i32(TCG_AREG0,
> target-ppc/translate.c:    tcg_gen_shri_i32(cpu_crf[crfD(ctx->opcode)], cpu_fpscr, bfa);
> target-ppc/translate.c:    tcg_gen_andi_i32(cpu_fpscr, cpu_fpscr, ~(0xF << bfa));
> target-ppc/translate.c:    tcg_gen_extu_i32_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpscr);
> target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
> target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
> target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
> target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
> 
> All those functions assume cpu_fpscr is a TCGv32. They need to be
> adjusted to work on tl instead.

But I didn't spot the type dependent calls.  I figured type checking
would catch that sort of thing, but apparently not.

> Please test compile your code with configure --enable-tcg-debug.

Ok, will do.
Alexander Graf - Oct. 9, 2012, 3:03 p.m.
On 10/09/2012 02:50 PM, David Gibson wrote:
> On Tue, Oct 09, 2012 at 01:41:02PM +0200, Alexander Graf wrote:
>> On 09.10.2012, at 13:38, Alexander Graf wrote:
>>> On 09.10.2012, at 06:17, David Gibson wrote:
> [snip]
>>> This will still break TCG for qemu-system-ppc64, no?
>> To be more precise:
>>
>> agraf@lychee:/home/agraf/release/qemu>  grep -R cpu_fpscr target-ppc
>> target-ppc/translate.c:static TCGv_i32 cpu_fpscr;
> I did update the type of cpu_fpscr..
>
>> target-ppc/translate.c:    cpu_fpscr = tcg_global_mem_new_i32(TCG_AREG0,
>> target-ppc/translate.c:    tcg_gen_shri_i32(cpu_crf[crfD(ctx->opcode)], cpu_fpscr, bfa);
>> target-ppc/translate.c:    tcg_gen_andi_i32(cpu_fpscr, cpu_fpscr, ~(0xF<<  bfa));
>> target-ppc/translate.c:    tcg_gen_extu_i32_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpscr);
>> target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
>> target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
>> target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
>> target-ppc/translate.c:        tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX);
>>
>> All those functions assume cpu_fpscr is a TCGv32. They need to be
>> adjusted to work on tl instead.
> But I didn't spot the type dependent calls.  I figured type checking
> would catch that sort of thing, but apparently not.

Type checking only happens with enable-tcg-debug :)


Alex

Patch

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index faf4404..7627722 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -963,7 +963,7 @@  struct CPUPPCState {
     /* floating point registers */
     float64 fpr[32];
     /* floating point status and control register */
-    uint32_t fpscr;
+    target_ulong fpscr;
 
     /* Next instruction pointer */
     target_ulong nip;
@@ -1014,6 +1014,8 @@  struct CPUPPCState {
     /* Altivec registers */
     ppc_avr_t avr[32];
     uint32_t vscr;
+    /* VSX registers */
+    uint64_t vsr[32];
     /* SPE registers */
     uint64_t spe_acc;
     uint32_t spe_fscr;
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index 21ce757..5e7bc00 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -6,6 +6,7 @@  void cpu_save(QEMUFile *f, void *opaque)
 {
     CPUPPCState *env = (CPUPPCState *)opaque;
     unsigned int i, j;
+    uint32_t fpscr;
 
     for (i = 0; i < 32; i++)
         qemu_put_betls(f, &env->gpr[i]);
@@ -30,7 +31,8 @@  void cpu_save(QEMUFile *f, void *opaque)
         u.d = env->fpr[i];
         qemu_put_be64(f, u.l);
     }
-    qemu_put_be32s(f, &env->fpscr);
+    fpscr = env->fpscr;
+    qemu_put_be32s(f, &fpscr);
     qemu_put_sbe32s(f, &env->access_type);
 #if defined(TARGET_PPC64)
     qemu_put_betls(f, &env->asr);
@@ -90,6 +92,7 @@  int cpu_load(QEMUFile *f, void *opaque, int version_id)
     CPUPPCState *env = (CPUPPCState *)opaque;
     unsigned int i, j;
     target_ulong sdr1;
+    uint32_t fpscr;
 
     for (i = 0; i < 32; i++)
         qemu_get_betls(f, &env->gpr[i]);
@@ -114,7 +117,8 @@  int cpu_load(QEMUFile *f, void *opaque, int version_id)
         u.l = qemu_get_be64(f);
         env->fpr[i] = u.d;
     }
-    qemu_get_be32s(f, &env->fpscr);
+    qemu_get_be32s(f, &fpscr);
+    env->fpscr = fpscr;
     qemu_get_sbe32s(f, &env->access_type);
 #if defined(TARGET_PPC64)
     qemu_get_betls(f, &env->asr);
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 1042268..01c2907 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -68,7 +68,7 @@  static TCGv cpu_cfar;
 #endif
 static TCGv cpu_xer;
 static TCGv cpu_reserve;
-static TCGv_i32 cpu_fpscr;
+static TCGv cpu_fpscr;
 static TCGv_i32 cpu_access_type;
 
 #include "gen-icount.h"
@@ -9463,7 +9463,7 @@  void cpu_dump_state (CPUPPCState *env, FILE *f, fprintf_function cpu_fprintf,
         if ((i & (RFPL - 1)) == (RFPL - 1))
             cpu_fprintf(f, "\n");
     }
-    cpu_fprintf(f, "FPSCR %08x\n", env->fpscr);
+    cpu_fprintf(f, "FPSCR " TARGET_FMT_lx "\n", env->fpscr);
 #if !defined(CONFIG_USER_ONLY)
     cpu_fprintf(f, " SRR0 " TARGET_FMT_lx "  SRR1 " TARGET_FMT_lx
                    "    PVR " TARGET_FMT_lx " VRSAVE " TARGET_FMT_lx "\n",