diff mbox

[3/5] target-ppc: Build error log

Message ID 20140825134535.2361.37728.stgit@aravindap
State New
Headers show

Commit Message

Aravinda Prasad Aug. 25, 2014, 1:45 p.m. UTC
Whenever there is a physical memory error due to bit
flips, which cannot be corrected by hardware, the error
is passed on to the kernel. If the memory address in
error belongs to guest address space then guest kernel
is responsible to take action. Hence the error is passed
on to guest via KVM by invoking 0x200 NMI vector.

However, guest OS, as per PAPR, expects an error log
upon such error. This patch registers a new hcall
which is issued from 0x200 interrupt vector and builds
the error log, copies the error log to rtas space and
passes the address of the error log to guest

Enhancement to KVM to perform above functionality is
already in upstream kernel.

Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
---
 hw/ppc/spapr_hcall.c   |  147 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |    4 +
 2 files changed, 150 insertions(+), 1 deletion(-)

Comments

David Gibson Aug. 26, 2014, 5:47 a.m. UTC | #1
On Mon, Aug 25, 2014 at 07:15:35PM +0530, Aravinda Prasad wrote:
> Whenever there is a physical memory error due to bit
> flips, which cannot be corrected by hardware, the error
> is passed on to the kernel. If the memory address in
> error belongs to guest address space then guest kernel
> is responsible to take action. Hence the error is passed
> on to guest via KVM by invoking 0x200 NMI vector.
> 
> However, guest OS, as per PAPR, expects an error log
> upon such error. This patch registers a new hcall
> which is issued from 0x200 interrupt vector and builds
> the error log, copies the error log to rtas space and
> passes the address of the error log to guest
> 
> Enhancement to KVM to perform above functionality is
> already in upstream kernel.
> 
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr_hcall.c   |  147 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |    4 +
>  2 files changed, 150 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 118ee77..be063f4 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -14,6 +14,86 @@ struct SPRSyncState {
>      target_ulong mask;
>  };
>  
> +#define SEVERITY_SHIFT         0x5
> +#define DISPOSITION_SHIFT      0x3
> +#define INITIATOR_SHIFT        0x4
> +
> +/*
> + * Only required RTAS event severity, disposition, initiator
> + * target and type are copied from arch/powerpc/include/asm/rtas.h
> + */
> +
> +/* RTAS event severity */
> +#define RTAS_SEVERITY_ERROR_SYNC    0x3
> +
> +/* RTAS event disposition */
> +#define RTAS_DISP_NOT_RECOVERED     0x2
> +
> +/* RTAS event initiator */
> +#define RTAS_INITIATOR_MEMORY       0x4
> +
> +/* RTAS event target */
> +#define RTAS_TARGET_MEMORY          0x4
> +
> +/* RTAS event type */
> +#define RTAS_TYPE_ECC_UNCORR        0x09
> +
> +/*
> + * Currently KVM only passes on the uncorrected machine
> + * check memory error to guest. Other machine check errors
> + * such as SLB multi-hit and TLB multi-hit are recovered
> + * in KVM and are not passed on to guest.
> + *
> + * DSISR Bit for uncorrected machine check error. Copied
> + * from arch/powerpc/include/asm/mce.h
> + */
> +#define PPC_BITLSHIFT(be)           (64 - 1 - (be))
> +#define PPC_BIT(bit)                (1UL << PPC_BITLSHIFT(bit))

Just opencode the BITLSHIFT macro into the BIT macro, it doesn't
improve either brevity or clarity as it is.

Or alternatively define PPC_BIT as (0x8000000000000000ULL >> bit)
which might be clearer.

> +#define P7_DSISR_MC_UE              (PPC_BIT(48))  /* P8 too */
> +
> +/* Adopted from kernel source arch/powerpc/include/asm/rtas.h */
> +struct rtas_error_log {
> +    /* Byte 0 */
> +    uint8_t     byte0;          /* Architectural version */
> +
> +    /* Byte 1 */
> +    uint8_t     byte1;
> +    /* XXXXXXXX
> +     * XXX      3: Severity level of error
> +     *    XX    2: Degree of recovery
> +     *      X   1: Extended log present?
> +     *       XX 2: Reserved
> +     */
> +
> +    /* Byte 2 */
> +    uint8_t     byte2;
> +    /* XXXXXXXX
> +     * XXXX     4: Initiator of event
> +     *     XXXX 4: Target of failed operation
> +     */
> +    uint8_t     byte3;          /* General event or error*/
> +};
> +
> +/*
> + * Data format in RTAS-Blob
> + *
> + * This structure contains error information related to Machine
> + * Check exception. This is filled up and copied to rtas-blob
> + * upon machine check exception.
> + */
> +struct rtas_mc_log {
> +    target_ulong srr0;
> +    target_ulong srr1;
> +    /*
> +     * Beginning of error log address. This is properly
> +     * populated and passed on to OS registered machine
> +     * check notification routine upon machine check
> +     * exception
> +     */
> +    target_ulong r3;
> +    struct rtas_error_log err_log;
> +};
> +
>  static void do_spr_sync(void *arg)
>  {
>      struct SPRSyncState *s = arg;
> @@ -586,6 +666,72 @@ static target_ulong h_rtas_update(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>      return 0;
>  }
>  
> +static target_ulong h_report_mc_err(PowerPCCPU *cpu, sPAPREnvironment *spapr,
> +                                 target_ulong opcode, target_ulong *args)
> +{
> +    struct rtas_mc_log mc_log;
> +    CPUPPCState *env = &cpu->env;
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> +
> +    /*
> +     * We save the original r3 register in SPRG2 in 0x200 vector,
> +     * which is patched during call to ibm.nmi-register. Original
> +     * r3 is required to be included in error log
> +     */
> +    mc_log.r3 = env->spr[SPR_SPRG2];
> +
> +    /*
> +     * SRR0 and SRR1 contains nip and msr at the time of exception
> +     * and are clobbered when we return from this hcall
> +     * and hence need to be properly restored. We save srr0
> +     * and srr1 in rtas blob and restore it in 0x200 vector
> +     * before branching to OS registered machine check handler
> +     */
> +    mc_log.srr0 = env->spr[SPR_SRR0];
> +    mc_log.srr1 = env->spr[SPR_SRR1];
> +
> +    /* Set error log fields */
> +    mc_log.err_log.byte0 = 0x00;
> +    mc_log.err_log.byte1 = (RTAS_SEVERITY_ERROR_SYNC << SEVERITY_SHIFT);
> +    mc_log.err_log.byte1 |= (RTAS_DISP_NOT_RECOVERED << DISPOSITION_SHIFT);
> +    mc_log.err_log.byte2 = (RTAS_INITIATOR_MEMORY << INITIATOR_SHIFT);
> +    mc_log.err_log.byte2 |= RTAS_TARGET_MEMORY;
> +
> +    if (env->spr[SPR_DSISR] & P7_DSISR_MC_UE) {
> +        mc_log.err_log.byte3 = RTAS_TYPE_ECC_UNCORR;
> +    } else {
> +        mc_log.err_log.byte3 = 0x0;
> +    }
> +
> +    /* Handle all Host/Guest LE/BE combinations */
> +    if ((*pcc->interrupts_big_endian)(cpu)) {
> +        mc_log.srr0 = cpu_to_be64(mc_log.srr0);
> +        mc_log.srr1 = cpu_to_be64(mc_log.srr1);
> +        mc_log.r3 = cpu_to_be64(mc_log.r3);
> +    } else {
> +        mc_log.srr0 = cpu_to_le64(mc_log.srr0);
> +        mc_log.srr1 = cpu_to_le64(mc_log.srr1);
> +        mc_log.r3 = cpu_to_le64(mc_log.r3);
> +    }
> +
> +    cpu_physical_memory_write(spapr->rtas_addr, &mc_log,
> +                              sizeof(mc_log));
> +
> +    /*
> +     * spapr->rtas_addr now contains srr0, srr1, original r3, followed
> +     * by the error log structure. The address of the error log
> +     * should be passed on to guest's machine check notification
> +     * routine. As this hcall is directly called from 0x200 interrupt
> +     * vector and returns to assembly routine, we return spapr->rtas_addr
> +     * instead of H_SUCCESS. Upon return, We restore srr0 and srr1, increment
> +     * r3 to point to the error log and branch to machine check notification
> +     * routine in 0x200. r3 containing the error address is now argument
> +     * to OS registered machine check notification routine. This way we
> +     * also avoids clobbering additional registers in 0x200 vector.
> +     */
> +    return spapr->rtas_addr;
> +}
> +
>  static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>                                     target_ulong opcode, target_ulong *args)
>  {
> @@ -1011,6 +1157,7 @@ static void hypercall_register_types(void)
>      /* qemu/KVM-PPC specific hcalls */
>      spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
>      spapr_register_hypercall(KVMPPC_H_RTAS_UPDATE, h_rtas_update);
> +    spapr_register_hypercall(KVMPPC_H_REPORT_MC_ERR, h_report_mc_err);
>  
>      spapr_register_hypercall(H_SET_MODE, h_set_mode);
>  
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index fb8cc63..57199f5 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -310,7 +310,9 @@ typedef struct sPAPREnvironment {
>  /* Client Architecture support */
>  #define KVMPPC_H_CAS            (KVMPPC_HCALL_BASE + 0x2)
>  #define KVMPPC_H_RTAS_UPDATE    (KVMPPC_HCALL_BASE + 0x3)
> -#define KVMPPC_HCALL_MAX        KVMPPC_H_RTAS_UPDATE
> +/* Report Machine Check error */
> +#define KVMPPC_H_REPORT_MC_ERR  (KVMPPC_HCALL_BASE + 0x4)
> +#define KVMPPC_HCALL_MAX        KVMPPC_H_REPORT_MC_ERR
>  
>  extern sPAPREnvironment *spapr;
>  
> 
>
Aravinda Prasad Aug. 26, 2014, 6:40 a.m. UTC | #2
On Tuesday 26 August 2014 11:17 AM, David Gibson wrote:
> On Mon, Aug 25, 2014 at 07:15:35PM +0530, Aravinda Prasad wrote:
>> Whenever there is a physical memory error due to bit
>> flips, which cannot be corrected by hardware, the error
>> is passed on to the kernel. If the memory address in
>> error belongs to guest address space then guest kernel
>> is responsible to take action. Hence the error is passed
>> on to guest via KVM by invoking 0x200 NMI vector.
>>
>> However, guest OS, as per PAPR, expects an error log
>> upon such error. This patch registers a new hcall
>> which is issued from 0x200 interrupt vector and builds
>> the error log, copies the error log to rtas space and
>> passes the address of the error log to guest
>>
>> Enhancement to KVM to perform above functionality is
>> already in upstream kernel.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr_hcall.c   |  147 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    4 +
>>  2 files changed, 150 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
>> index 118ee77..be063f4 100644
>> --- a/hw/ppc/spapr_hcall.c
>> +++ b/hw/ppc/spapr_hcall.c
>> @@ -14,6 +14,86 @@ struct SPRSyncState {
>>      target_ulong mask;
>>  };
>>  
>> +#define SEVERITY_SHIFT         0x5
>> +#define DISPOSITION_SHIFT      0x3
>> +#define INITIATOR_SHIFT        0x4
>> +
>> +/*
>> + * Only required RTAS event severity, disposition, initiator
>> + * target and type are copied from arch/powerpc/include/asm/rtas.h
>> + */
>> +
>> +/* RTAS event severity */
>> +#define RTAS_SEVERITY_ERROR_SYNC    0x3
>> +
>> +/* RTAS event disposition */
>> +#define RTAS_DISP_NOT_RECOVERED     0x2
>> +
>> +/* RTAS event initiator */
>> +#define RTAS_INITIATOR_MEMORY       0x4
>> +
>> +/* RTAS event target */
>> +#define RTAS_TARGET_MEMORY          0x4
>> +
>> +/* RTAS event type */
>> +#define RTAS_TYPE_ECC_UNCORR        0x09
>> +
>> +/*
>> + * Currently KVM only passes on the uncorrected machine
>> + * check memory error to guest. Other machine check errors
>> + * such as SLB multi-hit and TLB multi-hit are recovered
>> + * in KVM and are not passed on to guest.
>> + *
>> + * DSISR Bit for uncorrected machine check error. Copied
>> + * from arch/powerpc/include/asm/mce.h
>> + */
>> +#define PPC_BITLSHIFT(be)           (64 - 1 - (be))
>> +#define PPC_BIT(bit)                (1UL << PPC_BITLSHIFT(bit))
> 
> Just opencode the BITLSHIFT macro into the BIT macro, it doesn't
> improve either brevity or clarity as it is.
> 
> Or alternatively define PPC_BIT as (0x8000000000000000ULL >> bit)
> which might be clearer.

sure.

Regards,
Aravinda

> 
>> +#define P7_DSISR_MC_UE              (PPC_BIT(48))  /* P8 too */
>> +
>> +/* Adopted from kernel source arch/powerpc/include/asm/rtas.h */
>> +struct rtas_error_log {
>> +    /* Byte 0 */
>> +    uint8_t     byte0;          /* Architectural version */
>> +
>> +    /* Byte 1 */
>> +    uint8_t     byte1;
>> +    /* XXXXXXXX
>> +     * XXX      3: Severity level of error
>> +     *    XX    2: Degree of recovery
>> +     *      X   1: Extended log present?
>> +     *       XX 2: Reserved
>> +     */
>> +
>> +    /* Byte 2 */
>> +    uint8_t     byte2;
>> +    /* XXXXXXXX
>> +     * XXXX     4: Initiator of event
>> +     *     XXXX 4: Target of failed operation
>> +     */
>> +    uint8_t     byte3;          /* General event or error*/
>> +};
>> +
>> +/*
>> + * Data format in RTAS-Blob
>> + *
>> + * This structure contains error information related to Machine
>> + * Check exception. This is filled up and copied to rtas-blob
>> + * upon machine check exception.
>> + */
>> +struct rtas_mc_log {
>> +    target_ulong srr0;
>> +    target_ulong srr1;
>> +    /*
>> +     * Beginning of error log address. This is properly
>> +     * populated and passed on to OS registered machine
>> +     * check notification routine upon machine check
>> +     * exception
>> +     */
>> +    target_ulong r3;
>> +    struct rtas_error_log err_log;
>> +};
>> +
>>  static void do_spr_sync(void *arg)
>>  {
>>      struct SPRSyncState *s = arg;
>> @@ -586,6 +666,72 @@ static target_ulong h_rtas_update(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>>      return 0;
>>  }
>>  
>> +static target_ulong h_report_mc_err(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>> +                                 target_ulong opcode, target_ulong *args)
>> +{
>> +    struct rtas_mc_log mc_log;
>> +    CPUPPCState *env = &cpu->env;
>> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
>> +
>> +    /*
>> +     * We save the original r3 register in SPRG2 in 0x200 vector,
>> +     * which is patched during call to ibm.nmi-register. Original
>> +     * r3 is required to be included in error log
>> +     */
>> +    mc_log.r3 = env->spr[SPR_SPRG2];
>> +
>> +    /*
>> +     * SRR0 and SRR1 contains nip and msr at the time of exception
>> +     * and are clobbered when we return from this hcall
>> +     * and hence need to be properly restored. We save srr0
>> +     * and srr1 in rtas blob and restore it in 0x200 vector
>> +     * before branching to OS registered machine check handler
>> +     */
>> +    mc_log.srr0 = env->spr[SPR_SRR0];
>> +    mc_log.srr1 = env->spr[SPR_SRR1];
>> +
>> +    /* Set error log fields */
>> +    mc_log.err_log.byte0 = 0x00;
>> +    mc_log.err_log.byte1 = (RTAS_SEVERITY_ERROR_SYNC << SEVERITY_SHIFT);
>> +    mc_log.err_log.byte1 |= (RTAS_DISP_NOT_RECOVERED << DISPOSITION_SHIFT);
>> +    mc_log.err_log.byte2 = (RTAS_INITIATOR_MEMORY << INITIATOR_SHIFT);
>> +    mc_log.err_log.byte2 |= RTAS_TARGET_MEMORY;
>> +
>> +    if (env->spr[SPR_DSISR] & P7_DSISR_MC_UE) {
>> +        mc_log.err_log.byte3 = RTAS_TYPE_ECC_UNCORR;
>> +    } else {
>> +        mc_log.err_log.byte3 = 0x0;
>> +    }
>> +
>> +    /* Handle all Host/Guest LE/BE combinations */
>> +    if ((*pcc->interrupts_big_endian)(cpu)) {
>> +        mc_log.srr0 = cpu_to_be64(mc_log.srr0);
>> +        mc_log.srr1 = cpu_to_be64(mc_log.srr1);
>> +        mc_log.r3 = cpu_to_be64(mc_log.r3);
>> +    } else {
>> +        mc_log.srr0 = cpu_to_le64(mc_log.srr0);
>> +        mc_log.srr1 = cpu_to_le64(mc_log.srr1);
>> +        mc_log.r3 = cpu_to_le64(mc_log.r3);
>> +    }
>> +
>> +    cpu_physical_memory_write(spapr->rtas_addr, &mc_log,
>> +                              sizeof(mc_log));
>> +
>> +    /*
>> +     * spapr->rtas_addr now contains srr0, srr1, original r3, followed
>> +     * by the error log structure. The address of the error log
>> +     * should be passed on to guest's machine check notification
>> +     * routine. As this hcall is directly called from 0x200 interrupt
>> +     * vector and returns to assembly routine, we return spapr->rtas_addr
>> +     * instead of H_SUCCESS. Upon return, We restore srr0 and srr1, increment
>> +     * r3 to point to the error log and branch to machine check notification
>> +     * routine in 0x200. r3 containing the error address is now argument
>> +     * to OS registered machine check notification routine. This way we
>> +     * also avoids clobbering additional registers in 0x200 vector.
>> +     */
>> +    return spapr->rtas_addr;
>> +}
>> +
>>  static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>>                                     target_ulong opcode, target_ulong *args)
>>  {
>> @@ -1011,6 +1157,7 @@ static void hypercall_register_types(void)
>>      /* qemu/KVM-PPC specific hcalls */
>>      spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
>>      spapr_register_hypercall(KVMPPC_H_RTAS_UPDATE, h_rtas_update);
>> +    spapr_register_hypercall(KVMPPC_H_REPORT_MC_ERR, h_report_mc_err);
>>  
>>      spapr_register_hypercall(H_SET_MODE, h_set_mode);
>>  
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index fb8cc63..57199f5 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -310,7 +310,9 @@ typedef struct sPAPREnvironment {
>>  /* Client Architecture support */
>>  #define KVMPPC_H_CAS            (KVMPPC_HCALL_BASE + 0x2)
>>  #define KVMPPC_H_RTAS_UPDATE    (KVMPPC_HCALL_BASE + 0x3)
>> -#define KVMPPC_HCALL_MAX        KVMPPC_H_RTAS_UPDATE
>> +/* Report Machine Check error */
>> +#define KVMPPC_H_REPORT_MC_ERR  (KVMPPC_HCALL_BASE + 0x4)
>> +#define KVMPPC_HCALL_MAX        KVMPPC_H_REPORT_MC_ERR
>>  
>>  extern sPAPREnvironment *spapr;
>>  
>>
>>
>
Alexander Graf Aug. 27, 2014, 9:50 a.m. UTC | #3
On 25.08.14 15:45, Aravinda Prasad wrote:
> Whenever there is a physical memory error due to bit
> flips, which cannot be corrected by hardware, the error
> is passed on to the kernel. If the memory address in
> error belongs to guest address space then guest kernel
> is responsible to take action. Hence the error is passed
> on to guest via KVM by invoking 0x200 NMI vector.
> 
> However, guest OS, as per PAPR, expects an error log
> upon such error. This patch registers a new hcall
> which is issued from 0x200 interrupt vector and builds
> the error log, copies the error log to rtas space and
> passes the address of the error log to guest
> 
> Enhancement to KVM to perform above functionality is
> already in upstream kernel.
> 
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>

Why do we have to maintain the error log inside the rtas blob? Can't we
just create a fixed MMIO region that is backed by an error log device in
QEMU?

Then this call would just always return that specific address and we'd
also have a single file that deals with all of the error reporting.


Alex
Aravinda Prasad Aug. 28, 2014, 6:12 a.m. UTC | #4
On Wednesday 27 August 2014 03:20 PM, Alexander Graf wrote:
> 
> 
> On 25.08.14 15:45, Aravinda Prasad wrote:
>> Whenever there is a physical memory error due to bit
>> flips, which cannot be corrected by hardware, the error
>> is passed on to the kernel. If the memory address in
>> error belongs to guest address space then guest kernel
>> is responsible to take action. Hence the error is passed
>> on to guest via KVM by invoking 0x200 NMI vector.
>>
>> However, guest OS, as per PAPR, expects an error log
>> upon such error. This patch registers a new hcall
>> which is issued from 0x200 interrupt vector and builds
>> the error log, copies the error log to rtas space and
>> passes the address of the error log to guest
>>
>> Enhancement to KVM to perform above functionality is
>> already in upstream kernel.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> 
> Why do we have to maintain the error log inside the rtas blob? Can't we
> just create a fixed MMIO region that is backed by an error log device in
> QEMU?
> 
> Then this call would just always return that specific address and we'd
> also have a single file that deals with all of the error reporting.

PAPR requires error log to be inside rtas space. Hence guest OS
explicitly checks if error log is inside rtas space.

VALID_FWNMI_BUFFER does this check inside fwnmi_get_errinfo() in the kernel.

Regards,
Aravinda

> 
> 
> Alex
>
Alexander Graf Aug. 28, 2014, 8:36 a.m. UTC | #5
On 28.08.14 08:12, Aravinda Prasad wrote:
> 
> 
> On Wednesday 27 August 2014 03:20 PM, Alexander Graf wrote:
>>
>>
>> On 25.08.14 15:45, Aravinda Prasad wrote:
>>> Whenever there is a physical memory error due to bit
>>> flips, which cannot be corrected by hardware, the error
>>> is passed on to the kernel. If the memory address in
>>> error belongs to guest address space then guest kernel
>>> is responsible to take action. Hence the error is passed
>>> on to guest via KVM by invoking 0x200 NMI vector.
>>>
>>> However, guest OS, as per PAPR, expects an error log
>>> upon such error. This patch registers a new hcall
>>> which is issued from 0x200 interrupt vector and builds
>>> the error log, copies the error log to rtas space and
>>> passes the address of the error log to guest
>>>
>>> Enhancement to KVM to perform above functionality is
>>> already in upstream kernel.
>>>
>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>
>> Why do we have to maintain the error log inside the rtas blob? Can't we
>> just create a fixed MMIO region that is backed by an error log device in
>> QEMU?
>>
>> Then this call would just always return that specific address and we'd
>> also have a single file that deals with all of the error reporting.
> 
> PAPR requires error log to be inside rtas space. Hence guest OS
> explicitly checks if error log is inside rtas space.
> 
> VALID_FWNMI_BUFFER does this check inside fwnmi_get_errinfo() in the kernel.

So why not put it at 0x7000 then?


Alex
Benjamin Herrenschmidt Aug. 28, 2014, 10:21 a.m. UTC | #6
On Thu, 2014-08-28 at 10:36 +0200, Alexander Graf wrote:

> So why not put it at 0x7000 then?

Because PAPR says it has to be inside RTAS iirc

Cheers,
Ben.
Alexander Graf Aug. 28, 2014, 10:29 a.m. UTC | #7
On 28.08.14 12:21, Benjamin Herrenschmidt wrote:
> On Thu, 2014-08-28 at 10:36 +0200, Alexander Graf wrote:
> 
>> So why not put it at 0x7000 then?
> 
> Because PAPR says it has to be inside RTAS iirc

Please show me the section of PAPR that does say so.


Alex
Benjamin Herrenschmidt Aug. 28, 2014, 10:33 a.m. UTC | #8
On Thu, 2014-08-28 at 12:29 +0200, Alexander Graf wrote:
> 
> On 28.08.14 12:21, Benjamin Herrenschmidt wrote:
> > On Thu, 2014-08-28 at 10:36 +0200, Alexander Graf wrote:
> > 
> >> So why not put it at 0x7000 then?
> > 
> > Because PAPR says it has to be inside RTAS iirc
> 
> Please show me the section of PAPR that does say so.

Actually, we should probably support both options:

PAPR V2.7

7.3.14 Firmware Assisted Non-Maskable Interrupts Option (FWNMI)

<<
The difference between ibm,nmi-register and ibm,nmi-register-2 is that
ibm,nmi-register allocates the error reporting
structure in RTAS space while ibm,nmi-register-2 places the error
reporting structure in real page 7. New OS designs
should use ibm,nmi-register since support for ibm,nmi-register-2 will be
terminated at some future date.
>>

Cheers,
Ben.
Benjamin Herrenschmidt Aug. 28, 2014, 10:34 a.m. UTC | #9
On Thu, 2014-08-28 at 20:33 +1000, Benjamin Herrenschmidt wrote:
> On Thu, 2014-08-28 at 12:29 +0200, Alexander Graf wrote:
> > 
> > On 28.08.14 12:21, Benjamin Herrenschmidt wrote:
> > > On Thu, 2014-08-28 at 10:36 +0200, Alexander Graf wrote:
> > > 
> > >> So why not put it at 0x7000 then?
> > > 
> > > Because PAPR says it has to be inside RTAS iirc
> > 
> > Please show me the section of PAPR that does say so.
> 
> Actually, we should probably support both options:
> 
> PAPR V2.7
> 
> 7.3.14 Firmware Assisted Non-Maskable Interrupts Option (FWNMI)
> 
> <<
> The difference between ibm,nmi-register and ibm,nmi-register-2 is that
> ibm,nmi-register allocates the error reporting
> structure in RTAS space while ibm,nmi-register-2 places the error
> reporting structure in real page 7. New OS designs
> should use ibm,nmi-register since support for ibm,nmi-register-2 will be
> terminated at some future date.
> >>

Also of interest:

<<
As with all first level interrupt service routines, the SPRG-2 register
is used to save the state of one general purpose
register while the processor computes the location of its state save
area.
>>

Cheers,
Ben.

> 
> Cheers,
> Ben.
>
Aravinda Prasad Aug. 28, 2014, 5:17 p.m. UTC | #10
On Thursday 28 August 2014 04:03 PM, Benjamin Herrenschmidt wrote:
> On Thu, 2014-08-28 at 12:29 +0200, Alexander Graf wrote:
>>
>> On 28.08.14 12:21, Benjamin Herrenschmidt wrote:
>>> On Thu, 2014-08-28 at 10:36 +0200, Alexander Graf wrote:
>>>
>>>> So why not put it at 0x7000 then?
>>>
>>> Because PAPR says it has to be inside RTAS iirc
>>
>> Please show me the section of PAPR that does say so.
> 
> Actually, we should probably support both options:
> 
> PAPR V2.7
> 
> 7.3.14 Firmware Assisted Non-Maskable Interrupts Option (FWNMI)
> 
> <<
> The difference between ibm,nmi-register and ibm,nmi-register-2 is that
> ibm,nmi-register allocates the error reporting
> structure in RTAS space while ibm,nmi-register-2 places the error
> reporting structure in real page 7. New OS designs
> should use ibm,nmi-register since support for ibm,nmi-register-2 will be
> terminated at some future date.

Should we avoid having error log in 0x7000? As per above only
ibm,nmi-register-2 places error log in 0x7000 which will be terminated
in future?

>>>
> 
> Cheers,
> Ben.
> 
> 
>
Benjamin Herrenschmidt Aug. 28, 2014, 8:07 p.m. UTC | #11
On Thu, 2014-08-28 at 22:47 +0530, Aravinda Prasad wrote:
> Should we avoid having error log in 0x7000? As per above only
> ibm,nmi-register-2 places error log in 0x7000 which will be terminated
> in future?

The question is which variant Linux uses and which variant are other
OSes we might be interested in use ?

We might have to implement both, but I would probably start with just
ibm,nmi-register

Cheers,
Ben.
Aravinda Prasad Aug. 30, 2014, 8:06 a.m. UTC | #12
On Friday 29 August 2014 01:37 AM, Benjamin Herrenschmidt wrote:
> On Thu, 2014-08-28 at 22:47 +0530, Aravinda Prasad wrote:
>> Should we avoid having error log in 0x7000? As per above only
>> ibm,nmi-register-2 places error log in 0x7000 which will be terminated
>> in future?
> 
> The question is which variant Linux uses and which variant are other
> OSes we might be interested in use ?
> 
> We might have to implement both, but I would probably start with just
> ibm,nmi-register

sure.

> 
> Cheers,
> Ben.
> 
>
diff mbox

Patch

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 118ee77..be063f4 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -14,6 +14,86 @@  struct SPRSyncState {
     target_ulong mask;
 };
 
+#define SEVERITY_SHIFT         0x5
+#define DISPOSITION_SHIFT      0x3
+#define INITIATOR_SHIFT        0x4
+
+/*
+ * Only required RTAS event severity, disposition, initiator
+ * target and type are copied from arch/powerpc/include/asm/rtas.h
+ */
+
+/* RTAS event severity */
+#define RTAS_SEVERITY_ERROR_SYNC    0x3
+
+/* RTAS event disposition */
+#define RTAS_DISP_NOT_RECOVERED     0x2
+
+/* RTAS event initiator */
+#define RTAS_INITIATOR_MEMORY       0x4
+
+/* RTAS event target */
+#define RTAS_TARGET_MEMORY          0x4
+
+/* RTAS event type */
+#define RTAS_TYPE_ECC_UNCORR        0x09
+
+/*
+ * Currently KVM only passes on the uncorrected machine
+ * check memory error to guest. Other machine check errors
+ * such as SLB multi-hit and TLB multi-hit are recovered
+ * in KVM and are not passed on to guest.
+ *
+ * DSISR Bit for uncorrected machine check error. Copied
+ * from arch/powerpc/include/asm/mce.h
+ */
+#define PPC_BITLSHIFT(be)           (64 - 1 - (be))
+#define PPC_BIT(bit)                (1UL << PPC_BITLSHIFT(bit))
+#define P7_DSISR_MC_UE              (PPC_BIT(48))  /* P8 too */
+
+/* Adopted from kernel source arch/powerpc/include/asm/rtas.h */
+struct rtas_error_log {
+    /* Byte 0 */
+    uint8_t     byte0;          /* Architectural version */
+
+    /* Byte 1 */
+    uint8_t     byte1;
+    /* XXXXXXXX
+     * XXX      3: Severity level of error
+     *    XX    2: Degree of recovery
+     *      X   1: Extended log present?
+     *       XX 2: Reserved
+     */
+
+    /* Byte 2 */
+    uint8_t     byte2;
+    /* XXXXXXXX
+     * XXXX     4: Initiator of event
+     *     XXXX 4: Target of failed operation
+     */
+    uint8_t     byte3;          /* General event or error*/
+};
+
+/*
+ * Data format in RTAS-Blob
+ *
+ * This structure contains error information related to Machine
+ * Check exception. This is filled up and copied to rtas-blob
+ * upon machine check exception.
+ */
+struct rtas_mc_log {
+    target_ulong srr0;
+    target_ulong srr1;
+    /*
+     * Beginning of error log address. This is properly
+     * populated and passed on to OS registered machine
+     * check notification routine upon machine check
+     * exception
+     */
+    target_ulong r3;
+    struct rtas_error_log err_log;
+};
+
 static void do_spr_sync(void *arg)
 {
     struct SPRSyncState *s = arg;
@@ -586,6 +666,72 @@  static target_ulong h_rtas_update(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     return 0;
 }
 
+static target_ulong h_report_mc_err(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+                                 target_ulong opcode, target_ulong *args)
+{
+    struct rtas_mc_log mc_log;
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+    /*
+     * We save the original r3 register in SPRG2 in 0x200 vector,
+     * which is patched during call to ibm.nmi-register. Original
+     * r3 is required to be included in error log
+     */
+    mc_log.r3 = env->spr[SPR_SPRG2];
+
+    /*
+     * SRR0 and SRR1 contains nip and msr at the time of exception
+     * and are clobbered when we return from this hcall
+     * and hence need to be properly restored. We save srr0
+     * and srr1 in rtas blob and restore it in 0x200 vector
+     * before branching to OS registered machine check handler
+     */
+    mc_log.srr0 = env->spr[SPR_SRR0];
+    mc_log.srr1 = env->spr[SPR_SRR1];
+
+    /* Set error log fields */
+    mc_log.err_log.byte0 = 0x00;
+    mc_log.err_log.byte1 = (RTAS_SEVERITY_ERROR_SYNC << SEVERITY_SHIFT);
+    mc_log.err_log.byte1 |= (RTAS_DISP_NOT_RECOVERED << DISPOSITION_SHIFT);
+    mc_log.err_log.byte2 = (RTAS_INITIATOR_MEMORY << INITIATOR_SHIFT);
+    mc_log.err_log.byte2 |= RTAS_TARGET_MEMORY;
+
+    if (env->spr[SPR_DSISR] & P7_DSISR_MC_UE) {
+        mc_log.err_log.byte3 = RTAS_TYPE_ECC_UNCORR;
+    } else {
+        mc_log.err_log.byte3 = 0x0;
+    }
+
+    /* Handle all Host/Guest LE/BE combinations */
+    if ((*pcc->interrupts_big_endian)(cpu)) {
+        mc_log.srr0 = cpu_to_be64(mc_log.srr0);
+        mc_log.srr1 = cpu_to_be64(mc_log.srr1);
+        mc_log.r3 = cpu_to_be64(mc_log.r3);
+    } else {
+        mc_log.srr0 = cpu_to_le64(mc_log.srr0);
+        mc_log.srr1 = cpu_to_le64(mc_log.srr1);
+        mc_log.r3 = cpu_to_le64(mc_log.r3);
+    }
+
+    cpu_physical_memory_write(spapr->rtas_addr, &mc_log,
+                              sizeof(mc_log));
+
+    /*
+     * spapr->rtas_addr now contains srr0, srr1, original r3, followed
+     * by the error log structure. The address of the error log
+     * should be passed on to guest's machine check notification
+     * routine. As this hcall is directly called from 0x200 interrupt
+     * vector and returns to assembly routine, we return spapr->rtas_addr
+     * instead of H_SUCCESS. Upon return, We restore srr0 and srr1, increment
+     * r3 to point to the error log and branch to machine check notification
+     * routine in 0x200. r3 containing the error address is now argument
+     * to OS registered machine check notification routine. This way we
+     * also avoids clobbering additional registers in 0x200 vector.
+     */
+    return spapr->rtas_addr;
+}
+
 static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                                    target_ulong opcode, target_ulong *args)
 {
@@ -1011,6 +1157,7 @@  static void hypercall_register_types(void)
     /* qemu/KVM-PPC specific hcalls */
     spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
     spapr_register_hypercall(KVMPPC_H_RTAS_UPDATE, h_rtas_update);
+    spapr_register_hypercall(KVMPPC_H_REPORT_MC_ERR, h_report_mc_err);
 
     spapr_register_hypercall(H_SET_MODE, h_set_mode);
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index fb8cc63..57199f5 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -310,7 +310,9 @@  typedef struct sPAPREnvironment {
 /* Client Architecture support */
 #define KVMPPC_H_CAS            (KVMPPC_HCALL_BASE + 0x2)
 #define KVMPPC_H_RTAS_UPDATE    (KVMPPC_HCALL_BASE + 0x3)
-#define KVMPPC_HCALL_MAX        KVMPPC_H_RTAS_UPDATE
+/* Report Machine Check error */
+#define KVMPPC_H_REPORT_MC_ERR  (KVMPPC_HCALL_BASE + 0x4)
+#define KVMPPC_HCALL_MAX        KVMPPC_H_REPORT_MC_ERR
 
 extern sPAPREnvironment *spapr;