diff mbox

[5/6] target-ppc: support for 32-bit carry and overflow

Message ID 1486636445-24109-6-git-send-email-nikunj@linux.vnet.ibm.com
State New
Headers show

Commit Message

Nikunj A Dadhania Feb. 9, 2017, 10:34 a.m. UTC
POWER ISA 3.0 adds CA32 and OV32 status in 64-bit mode. Add the flags
and corresponding defines. Moreover, CA32 is set when CA is set and
OV32 is set when OV is set, there is no need to have a new
fields in the CPUPPCState structure.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target/ppc/cpu.h       | 26 ++++++++++++++++++++++++++
 target/ppc/translate.c |  6 ++++++
 2 files changed, 32 insertions(+)

Comments

David Gibson Feb. 10, 2017, 12:10 a.m. UTC | #1
On Thu, Feb 09, 2017 at 04:04:04PM +0530, Nikunj A Dadhania wrote:
> POWER ISA 3.0 adds CA32 and OV32 status in 64-bit mode. Add the flags
> and corresponding defines. Moreover, CA32 is set when CA is set and
> OV32 is set when OV is set, there is no need to have a new
> fields in the CPUPPCState structure.
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>

Um.. I don't quite understand this.  If CA always has the same value
as CA32, what's the point?

> ---
>  target/ppc/cpu.h       | 26 ++++++++++++++++++++++++++
>  target/ppc/translate.c |  6 ++++++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index bc2a2ce..181919b 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -1354,11 +1354,15 @@ int ppc_compat_max_threads(PowerPCCPU *cpu);
>  #define XER_SO  31
>  #define XER_OV  30
>  #define XER_CA  29
> +#define XER_OV32  19
> +#define XER_CA32  18
>  #define XER_CMP  8
>  #define XER_BC   0
>  #define xer_so  (env->so)
>  #define xer_ov  (env->ov)
>  #define xer_ca  (env->ca)
> +#define xer_ov32  (env->ov)
> +#define xer_ca32  (env->ca)
>  #define xer_cmp ((env->xer >> XER_CMP) & 0xFF)
>  #define xer_bc  ((env->xer >> XER_BC)  & 0x7F)
>  
> @@ -2325,11 +2329,21 @@ enum {
>  
>  /*****************************************************************************/
>  
> +#ifndef TARGET_PPC64
>  static inline target_ulong cpu_read_xer(CPUPPCState *env)
>  {
>      return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) | (env->ca << XER_CA);
>  }
> +#else
> +static inline target_ulong cpu_read_xer(CPUPPCState *env)
> +{
> +    return env->xer | (env->so << XER_SO) |
> +        (env->ov << XER_OV) | (env->ca << XER_CA) |
> +        (env->ov << XER_OV32) | (env->ca << XER_CA32);
> +}
> +#endif
>  
> +#ifndef TARGET_PPC64
>  static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
>  {
>      env->so = (xer >> XER_SO) & 1;
> @@ -2337,6 +2351,18 @@ static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
>      env->ca = (xer >> XER_CA) & 1;
>      env->xer = xer & ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA));
>  }
> +#else
> +static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
> +{
> +    env->so = (xer >> XER_SO) & 1;
> +    env->ov = ((xer >> XER_OV) & 1) | ((xer >> XER_OV32) & 1);
> +    env->ca = ((xer >> XER_CA) & 1) | ((xer >> XER_CA32) & 1);
> +    env->xer = xer & ~((1ul << XER_SO) |
> +                       (1ul << XER_OV) | (1ul << XER_CA) |
> +                       (1ul << XER_OV32) | (1ul << XER_CA32));
> +}
> +#endif
> +
>  
>  static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
>                                          target_ulong *cs_base, uint32_t *flags)
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 3ba2616..724ad17 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -3715,6 +3715,12 @@ static void gen_read_xer(TCGv dst)
>      tcg_gen_or_tl(t0, t0, t1);
>      tcg_gen_or_tl(dst, dst, t2);
>      tcg_gen_or_tl(dst, dst, t0);
> +#ifdef TARGET_PPC64
> +    tcg_gen_shli_tl(t0, cpu_ov, XER_OV32);
> +    tcg_gen_or_tl(dst, dst, t0);
> +    tcg_gen_shli_tl(t0, cpu_ca, XER_CA32);
> +    tcg_gen_or_tl(dst, dst, t0);
> +#endif
>      tcg_temp_free(t0);
>      tcg_temp_free(t1);
>      tcg_temp_free(t2);
Nikunj A Dadhania Feb. 10, 2017, 4:19 a.m. UTC | #2
David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Thu, Feb 09, 2017 at 04:04:04PM +0530, Nikunj A Dadhania wrote:
>> POWER ISA 3.0 adds CA32 and OV32 status in 64-bit mode. Add the flags
>> and corresponding defines. Moreover, CA32 is set when CA is set and
>> OV32 is set when OV is set, there is no need to have a new
>> fields in the CPUPPCState structure.
>> 
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>
> Um.. I don't quite understand this.  If CA always has the same value
> as CA32, what's the point?

I am not clear either. I think that as CA was set for both 32/64-bit
mode, that couldn't be changed for backward compatibility. CA32 should
have affected only the instructions working one word variants.

Re-scanning the ISA 3.0, found this in 3.3.9 Fixed-Point Arithmetic
Instructions:

=================================================================
addic, addic., subfic, addc, subfc, adde, subfe,
addme, subfme, addze, and subfze always set CA, to
reflect the carry out of bit 0 in 64-bit mode and out of bit
32 in 32-bit mode. These instructions also always set
CA32 to reflect the carry out of bit 32.
=================================================================

Which is conflicting to what is said in 3.2.2 Fixed-Point Exception
Register:
=================================================================
Carry32 (CA32)
CA32 is set whenever CA is set, and is set to
the same value that CA is defined to be set to
in 32-bit mode.
=================================================================

Regards
Nikunj
David Gibson Feb. 13, 2017, 1:54 a.m. UTC | #3
On Fri, Feb 10, 2017 at 09:49:17AM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > [ Unknown signature status ]
> > On Thu, Feb 09, 2017 at 04:04:04PM +0530, Nikunj A Dadhania wrote:
> >> POWER ISA 3.0 adds CA32 and OV32 status in 64-bit mode. Add the flags
> >> and corresponding defines. Moreover, CA32 is set when CA is set and
> >> OV32 is set when OV is set, there is no need to have a new
> >> fields in the CPUPPCState structure.
> >> 
> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> >
> > Um.. I don't quite understand this.  If CA always has the same value
> > as CA32, what's the point?
> 
> I am not clear either. I think that as CA was set for both 32/64-bit
> mode, that couldn't be changed for backward compatibility. CA32 should
> have affected only the instructions working one word variants.
> 
> Re-scanning the ISA 3.0, found this in 3.3.9 Fixed-Point Arithmetic
> Instructions:
> 
> =================================================================
> addic, addic., subfic, addc, subfc, adde, subfe,
> addme, subfme, addze, and subfze always set CA, to
> reflect the carry out of bit 0 in 64-bit mode and out of bit
> 32 in 32-bit mode. These instructions also always set
> CA32 to reflect the carry out of bit 32.
> =================================================================
> 
> Which is conflicting to what is said in 3.2.2 Fixed-Point Exception
> Register:
> =================================================================
> Carry32 (CA32)
> CA32 is set whenever CA is set, and is set to
> the same value that CA is defined to be set to
> in 32-bit mode.
> =================================================================

Well, that's certainly confusing.

Can you try and find out what's going on here within IBM, and repost
these patches once there's a straight story about how this bit works.
David Gibson Feb. 14, 2017, 2:43 a.m. UTC | #4
On Fri, Feb 10, 2017 at 09:49:17AM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > [ Unknown signature status ]
> > On Thu, Feb 09, 2017 at 04:04:04PM +0530, Nikunj A Dadhania wrote:
> >> POWER ISA 3.0 adds CA32 and OV32 status in 64-bit mode. Add the flags
> >> and corresponding defines. Moreover, CA32 is set when CA is set and
> >> OV32 is set when OV is set, there is no need to have a new
> >> fields in the CPUPPCState structure.
> >> 
> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> >
> > Um.. I don't quite understand this.  If CA always has the same value
> > as CA32, what's the point?
> 
> I am not clear either. I think that as CA was set for both 32/64-bit
> mode, that couldn't be changed for backward compatibility. CA32 should
> have affected only the instructions working one word variants.
> 
> Re-scanning the ISA 3.0, found this in 3.3.9 Fixed-Point Arithmetic
> Instructions:
> 
> =================================================================
> addic, addic., subfic, addc, subfc, adde, subfe,
> addme, subfme, addze, and subfze always set CA, to
> reflect the carry out of bit 0 in 64-bit mode and out of bit
> 32 in 32-bit mode. These instructions also always set
> CA32 to reflect the carry out of bit 32.
> =================================================================
> 
> Which is conflicting to what is said in 3.2.2 Fixed-Point Exception
> Register:
> =================================================================
> Carry32 (CA32)
> CA32 is set whenever CA is set, and is set to
> the same value that CA is defined to be set to
> in 32-bit mode.
> =================================================================

Ok, I've had a look at the ISA and discussed this with Michael
Ellerman.  We think what's going on here is that it's using some
unfortunately unclear wording.  When it says "OV32 is set when OV is
set" we think that means "OV32 is updated when OV is updated", not
that "OV32 is set to the same value as OV".

So although they're updated at the same time, the 32-bit variants can
have different values and will need real representation in the CPU
model.  Well, at least in 64-bit mode.  When the CPU is in 32-bit
mode, I believe they really will have the same values.

That would make your implementation suggested here incorrect.
Nikunj A Dadhania Feb. 14, 2017, 3:05 a.m. UTC | #5
David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Fri, Feb 10, 2017 at 09:49:17AM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > [ Unknown signature status ]
>> > On Thu, Feb 09, 2017 at 04:04:04PM +0530, Nikunj A Dadhania wrote:
>> >> POWER ISA 3.0 adds CA32 and OV32 status in 64-bit mode. Add the flags
>> >> and corresponding defines. Moreover, CA32 is set when CA is set and
>> >> OV32 is set when OV is set, there is no need to have a new
>> >> fields in the CPUPPCState structure.
>> >> 
>> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> >
>> > Um.. I don't quite understand this.  If CA always has the same value
>> > as CA32, what's the point?
>> 
>> I am not clear either. I think that as CA was set for both 32/64-bit
>> mode, that couldn't be changed for backward compatibility. CA32 should
>> have affected only the instructions working one word variants.
>> 
>> Re-scanning the ISA 3.0, found this in 3.3.9 Fixed-Point Arithmetic
>> Instructions:
>> 
>> =================================================================
>> addic, addic., subfic, addc, subfc, adde, subfe,
>> addme, subfme, addze, and subfze always set CA, to
>> reflect the carry out of bit 0 in 64-bit mode and out of bit
>> 32 in 32-bit mode. These instructions also always set
>> CA32 to reflect the carry out of bit 32.
>> =================================================================
>> 
>> Which is conflicting to what is said in 3.2.2 Fixed-Point Exception
>> Register:
>> =================================================================
>> Carry32 (CA32)
>> CA32 is set whenever CA is set, and is set to
>> the same value that CA is defined to be set to
>> in 32-bit mode.
>> =================================================================
>
> Ok, I've had a look at the ISA and discussed this with Michael
> Ellerman.  We think what's going on here is that it's using some
> unfortunately unclear wording.  When it says "OV32 is set when OV is
> set" we think that means "OV32 is updated when OV is updated", not
> that "OV32 is set to the same value as OV".
>
> So although they're updated at the same time, the 32-bit variants can
> have different values and will need real representation in the CPU
> model.  Well, at least in 64-bit mode.  When the CPU is in 32-bit
> mode, I believe they really will have the same values.
>
> That would make your implementation suggested here incorrect.

Yes, you are right. I had a discussion with Paul Mackerras yesterday, he
explained to me in detail about the bits. I am working on the revised
implementation. Will detail it in the commit message.

Regards
Nikunj
Richard Henderson Feb. 14, 2017, 3:21 a.m. UTC | #6
On 02/14/2017 02:05 PM, Nikunj A Dadhania wrote:
> Yes, you are right. I had a discussion with Paul Mackerras yesterday, he
> explained to me in detail about the bits. I am working on the revised
> implementation. Will detail it in the commit message.

As you're working on this, consider changing the definition of cpu_ov such that 
the MSB is OV and bit 31 is OV32.

E.g.


  static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
                                             TCGv arg1, TCGv arg2, int sub)
  {
      TCGv t0 = tcg_temp_new();

      tcg_gen_xor_tl(cpu_ov, arg0, arg2);
      tcg_gen_xor_tl(t0, arg1, arg2);
      if (sub) {
          tcg_gen_and_tl(cpu_ov, cpu_ov, t0);
      } else {
          tcg_gen_andc_tl(cpu_ov, cpu_ov, t0);
      }
      tcg_temp_free(t0);
      if (NARROW_MODE(ctx)) {
          tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
      }
-    tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
      tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
  }


is all that is required for arithmetic to compute OV and OV32 into those two bits.

Obviously more is required for multiplication and division, and you'd also have 
to change cpu_so to examine the MSB as well.


r~
Nikunj A Dadhania Feb. 16, 2017, 5:08 a.m. UTC | #7
Richard Henderson <rth@twiddle.net> writes:

> On 02/14/2017 02:05 PM, Nikunj A Dadhania wrote:
>> Yes, you are right. I had a discussion with Paul Mackerras yesterday, he
>> explained to me in detail about the bits. I am working on the revised
>> implementation. Will detail it in the commit message.
>
> As you're working on this, consider changing the definition of cpu_ov such that 
> the MSB is OV and bit 31 is OV32.
>
> E.g.
>
>
>   static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
>                                              TCGv arg1, TCGv arg2, int sub)
>   {
>       TCGv t0 = tcg_temp_new();
>
>       tcg_gen_xor_tl(cpu_ov, arg0, arg2);
>       tcg_gen_xor_tl(t0, arg1, arg2);
>       if (sub) {
>           tcg_gen_and_tl(cpu_ov, cpu_ov, t0);
>       } else {
>           tcg_gen_andc_tl(cpu_ov, cpu_ov, t0);
>       }
>       tcg_temp_free(t0);
>       if (NARROW_MODE(ctx)) {
>           tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
>       }
> -    tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
>       tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
>   }
>
>
> is all that is required for arithmetic to compute OV and OV32 into those two bits.

How about the below?

@@ -809,10 +809,11 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
         tcg_gen_andc_tl(cpu_ov, cpu_ov, t0);
     }
     tcg_temp_free(t0);
+    tcg_gen_extract_tl(cpu_ov32, cpu_ov, 31, 1);
+    tcg_gen_extract_tl(cpu_ov, cpu_ov, 63, 1);
     if (NARROW_MODE(ctx)) {
-        tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
+        tcg_gen_mov_tl(cpu_ov, cpu_ov32);
     }
-    tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
     tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 }

Regards
Nikunj
Richard Henderson Feb. 16, 2017, 8:52 p.m. UTC | #8
On 02/16/2017 04:08 PM, Nikunj A Dadhania wrote:
> Richard Henderson <rth@twiddle.net> writes:
>
>> On 02/14/2017 02:05 PM, Nikunj A Dadhania wrote:
>>> Yes, you are right. I had a discussion with Paul Mackerras yesterday, he
>>> explained to me in detail about the bits. I am working on the revised
>>> implementation. Will detail it in the commit message.
>>
>> As you're working on this, consider changing the definition of cpu_ov such that
>> the MSB is OV and bit 31 is OV32.
>>
>> E.g.
>>
>>
>>   static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
>>                                              TCGv arg1, TCGv arg2, int sub)
>>   {
>>       TCGv t0 = tcg_temp_new();
>>
>>       tcg_gen_xor_tl(cpu_ov, arg0, arg2);
>>       tcg_gen_xor_tl(t0, arg1, arg2);
>>       if (sub) {
>>           tcg_gen_and_tl(cpu_ov, cpu_ov, t0);
>>       } else {
>>           tcg_gen_andc_tl(cpu_ov, cpu_ov, t0);
>>       }
>>       tcg_temp_free(t0);
>>       if (NARROW_MODE(ctx)) {
>>           tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
>>       }
>> -    tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
>>       tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
>>   }
>>
>>
>> is all that is required for arithmetic to compute OV and OV32 into those two bits.
>
> How about the below?
>
> @@ -809,10 +809,11 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
>          tcg_gen_andc_tl(cpu_ov, cpu_ov, t0);
>      }
>      tcg_temp_free(t0);
> +    tcg_gen_extract_tl(cpu_ov32, cpu_ov, 31, 1);
> +    tcg_gen_extract_tl(cpu_ov, cpu_ov, 63, 1);
>      if (NARROW_MODE(ctx)) {
> -        tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
> +        tcg_gen_mov_tl(cpu_ov, cpu_ov32);
>      }
> -    tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
>      tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
>  }

Why do you want to extract these bits?


r~
Nikunj A Dadhania Feb. 17, 2017, 4:47 a.m. UTC | #9
Richard Henderson <rth@twiddle.net> writes:

> On 02/16/2017 04:08 PM, Nikunj A Dadhania wrote:
>> Richard Henderson <rth@twiddle.net> writes:
>>
>>> On 02/14/2017 02:05 PM, Nikunj A Dadhania wrote:
>>>> Yes, you are right. I had a discussion with Paul Mackerras yesterday, he
>>>> explained to me in detail about the bits. I am working on the revised
>>>> implementation. Will detail it in the commit message.
>>>
>>> As you're working on this, consider changing the definition of cpu_ov such that
>>> the MSB is OV and bit 31 is OV32.
>>>
>>> E.g.
>>>
>>>
>>>   static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
>>>                                              TCGv arg1, TCGv arg2, int sub)
>>>   {
>>>       TCGv t0 = tcg_temp_new();
>>>
>>>       tcg_gen_xor_tl(cpu_ov, arg0, arg2);
>>>       tcg_gen_xor_tl(t0, arg1, arg2);
>>>       if (sub) {
>>>           tcg_gen_and_tl(cpu_ov, cpu_ov, t0);
>>>       } else {
>>>           tcg_gen_andc_tl(cpu_ov, cpu_ov, t0);
>>>       }
>>>       tcg_temp_free(t0);
>>>       if (NARROW_MODE(ctx)) {
>>>           tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
>>>       }
>>> -    tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
>>>       tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
>>>   }
>>>
>>>
>>> is all that is required for arithmetic to compute OV and OV32 into those two bits.
>>
>> How about the below?
>>
>> @@ -809,10 +809,11 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
>>          tcg_gen_andc_tl(cpu_ov, cpu_ov, t0);
>>      }
>>      tcg_temp_free(t0);
>> +    tcg_gen_extract_tl(cpu_ov32, cpu_ov, 31, 1);
>> +    tcg_gen_extract_tl(cpu_ov, cpu_ov, 63, 1);
>>      if (NARROW_MODE(ctx)) {
>> -        tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
>> +        tcg_gen_mov_tl(cpu_ov, cpu_ov32);
>>      }
>> -    tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
>>      tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
>>  }
>
> Why do you want to extract these bits?

Convinient to copy that to XER later.

Regards
Nikunj
Richard Henderson Feb. 17, 2017, 7:33 p.m. UTC | #10
On 02/17/2017 03:47 PM, Nikunj A Dadhania wrote:
>> Why do you want to extract these bits?
>
> Convinient to copy that to XER later.

Ideally you make the most common operation cheapest, and the more rare 
operation more expensive.  That said, I suppose even using ADDO is rare in the 
first place.  So it probably doesn't much matter.


r~
diff mbox

Patch

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index bc2a2ce..181919b 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1354,11 +1354,15 @@  int ppc_compat_max_threads(PowerPCCPU *cpu);
 #define XER_SO  31
 #define XER_OV  30
 #define XER_CA  29
+#define XER_OV32  19
+#define XER_CA32  18
 #define XER_CMP  8
 #define XER_BC   0
 #define xer_so  (env->so)
 #define xer_ov  (env->ov)
 #define xer_ca  (env->ca)
+#define xer_ov32  (env->ov)
+#define xer_ca32  (env->ca)
 #define xer_cmp ((env->xer >> XER_CMP) & 0xFF)
 #define xer_bc  ((env->xer >> XER_BC)  & 0x7F)
 
@@ -2325,11 +2329,21 @@  enum {
 
 /*****************************************************************************/
 
+#ifndef TARGET_PPC64
 static inline target_ulong cpu_read_xer(CPUPPCState *env)
 {
     return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) | (env->ca << XER_CA);
 }
+#else
+static inline target_ulong cpu_read_xer(CPUPPCState *env)
+{
+    return env->xer | (env->so << XER_SO) |
+        (env->ov << XER_OV) | (env->ca << XER_CA) |
+        (env->ov << XER_OV32) | (env->ca << XER_CA32);
+}
+#endif
 
+#ifndef TARGET_PPC64
 static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
 {
     env->so = (xer >> XER_SO) & 1;
@@ -2337,6 +2351,18 @@  static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
     env->ca = (xer >> XER_CA) & 1;
     env->xer = xer & ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA));
 }
+#else
+static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
+{
+    env->so = (xer >> XER_SO) & 1;
+    env->ov = ((xer >> XER_OV) & 1) | ((xer >> XER_OV32) & 1);
+    env->ca = ((xer >> XER_CA) & 1) | ((xer >> XER_CA32) & 1);
+    env->xer = xer & ~((1ul << XER_SO) |
+                       (1ul << XER_OV) | (1ul << XER_CA) |
+                       (1ul << XER_OV32) | (1ul << XER_CA32));
+}
+#endif
+
 
 static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 3ba2616..724ad17 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -3715,6 +3715,12 @@  static void gen_read_xer(TCGv dst)
     tcg_gen_or_tl(t0, t0, t1);
     tcg_gen_or_tl(dst, dst, t2);
     tcg_gen_or_tl(dst, dst, t0);
+#ifdef TARGET_PPC64
+    tcg_gen_shli_tl(t0, cpu_ov, XER_OV32);
+    tcg_gen_or_tl(dst, dst, t0);
+    tcg_gen_shli_tl(t0, cpu_ca, XER_CA32);
+    tcg_gen_or_tl(dst, dst, t0);
+#endif
     tcg_temp_free(t0);
     tcg_temp_free(t1);
     tcg_temp_free(t2);