diff mbox

[v13,2/3] sPAPR: Implement EEH RTAS calls

Message ID 1418602508-30845-3-git-send-email-gwshan@linux.vnet.ibm.com
State New
Headers show

Commit Message

Gavin Shan Dec. 15, 2014, 12:15 a.m. UTC
The emulation for EEH RTAS requests from guest isn't covered
by QEMU yet and the patch implements them.

The patch defines constants used by EEH RTAS calls and adds
callback sPAPRPHBClass::eeh_handler, which is going to be used
this way:

  * RTAS calls are received in spapr_pci.c, sanity check is done
    there.
  * RTAS handlers handle what they can. If there is something it
    cannot handle and sPAPRPHBClass::eeh_handler callback is defined,
    it is called.
  * sPAPRPHBClass::eeh_handler is only implemented for VFIO now. It
    does ioctl() to the IOMMU container fd to complete the call. Error
    codes from that ioctl() are transferred back to the guest.

[aik: defined RTAS tokens for EEH RTAS calls]
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 hw/ppc/spapr_pci.c          | 246 ++++++++++++++++++++++++++++++++++++++++++++
 include/hw/pci-host/spapr.h |   7 ++
 include/hw/ppc/spapr.h      |  43 +++++++-
 3 files changed, 294 insertions(+), 2 deletions(-)

Comments

Alexander Graf Dec. 15, 2014, 2:52 p.m. UTC | #1
On 15.12.14 01:15, Gavin Shan wrote:
> The emulation for EEH RTAS requests from guest isn't covered
> by QEMU yet and the patch implements them.
> 
> The patch defines constants used by EEH RTAS calls and adds
> callback sPAPRPHBClass::eeh_handler, which is going to be used
> this way:
> 
>   * RTAS calls are received in spapr_pci.c, sanity check is done
>     there.
>   * RTAS handlers handle what they can. If there is something it
>     cannot handle and sPAPRPHBClass::eeh_handler callback is defined,
>     it is called.
>   * sPAPRPHBClass::eeh_handler is only implemented for VFIO now. It
>     does ioctl() to the IOMMU container fd to complete the call. Error
>     codes from that ioctl() are transferred back to the guest.
> 
> [aik: defined RTAS tokens for EEH RTAS calls]
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr_pci.c          | 246 ++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/pci-host/spapr.h |   7 ++
>  include/hw/ppc/spapr.h      |  43 +++++++-
>  3 files changed, 294 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 3d70efe..3bb1971 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -406,6 +406,233 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
>      rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
>  }
>  
> +static int rtas_handle_eeh_request(sPAPREnvironment *spapr,
> +                                   uint64_t buid, uint32_t req, uint32_t opt)
> +{
> +    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
> +    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);

What happens when you try to cast NULL? Could a guest process invoke a
host assert() through this and abort the whole VM?

> +
> +    if (!sphb || !info->eeh_handler) {
> +        return -ENOENT;
> +    }
> +
> +    return info->eeh_handler(sphb, req, opt);
> +}
> +
> +static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
> +                                    sPAPREnvironment *spapr,
> +                                    uint32_t token, uint32_t nargs,
> +                                    target_ulong args, uint32_t nret,
> +                                    target_ulong rets)
> +{
> +    uint32_t addr, option;
> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
> +    int ret;
> +
> +    if ((nargs != 4) || (nret != 1)) {
> +        goto param_error_exit;
> +    }
> +
> +    addr = rtas_ld(args, 0);
> +    option = rtas_ld(args, 3);
> +    switch (option) {
> +    case RTAS_EEH_ENABLE:
> +        if (!spapr_pci_find_dev(spapr, buid, addr)) {
> +            goto param_error_exit;
> +        }
> +        break;
> +    case RTAS_EEH_DISABLE:
> +    case RTAS_EEH_THAW_IO:
> +    case RTAS_EEH_THAW_DMA:

So these don't use the addr hint?


Alex
Gavin Shan Dec. 15, 2014, 11:08 p.m. UTC | #2
On Mon, Dec 15, 2014 at 03:52:17PM +0100, Alexander Graf wrote:
>On 15.12.14 01:15, Gavin Shan wrote:
>> The emulation for EEH RTAS requests from guest isn't covered
>> by QEMU yet and the patch implements them.
>> 
>> The patch defines constants used by EEH RTAS calls and adds
>> callback sPAPRPHBClass::eeh_handler, which is going to be used
>> this way:
>> 
>>   * RTAS calls are received in spapr_pci.c, sanity check is done
>>     there.
>>   * RTAS handlers handle what they can. If there is something it
>>     cannot handle and sPAPRPHBClass::eeh_handler callback is defined,
>>     it is called.
>>   * sPAPRPHBClass::eeh_handler is only implemented for VFIO now. It
>>     does ioctl() to the IOMMU container fd to complete the call. Error
>>     codes from that ioctl() are transferred back to the guest.
>> 
>> [aik: defined RTAS tokens for EEH RTAS calls]
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr_pci.c          | 246 ++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/pci-host/spapr.h |   7 ++
>>  include/hw/ppc/spapr.h      |  43 +++++++-
>>  3 files changed, 294 insertions(+), 2 deletions(-)
>> 
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index 3d70efe..3bb1971 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -406,6 +406,233 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
>>      rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
>>  }
>>  
>> +static int rtas_handle_eeh_request(sPAPREnvironment *spapr,
>> +                                   uint64_t buid, uint32_t req, uint32_t opt)
>> +{
>> +    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>> +    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>
>What happens when you try to cast NULL? Could a guest process invoke a
>host assert() through this and abort the whole VM?
>

Yes, it would cause core dump. I had one experiment to force assigning
NULL to "sphb" before doing the cast, the whole VM is aborted. So I
guess you're happy to have something as follows. If you're not suggesting
something else, I'll update the code as follows in next version:

	sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
	sPAPRPHBClass *info;

	if (!sphb) {
	    return -ENODEV;
	}

	info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
	if (!info->eeh_handler) {
	    return -ENOENT;
	}

	return info->eeh_handler(sphb, req, opt);

>> +
>> +    if (!sphb || !info->eeh_handler) {
>> +        return -ENOENT;
>> +    }
>> +
>> +    return info->eeh_handler(sphb, req, opt);
>> +}
>> +
>> +static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
>> +                                    sPAPREnvironment *spapr,
>> +                                    uint32_t token, uint32_t nargs,
>> +                                    target_ulong args, uint32_t nret,
>> +                                    target_ulong rets)
>> +{
>> +    uint32_t addr, option;
>> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
>> +    int ret;
>> +
>> +    if ((nargs != 4) || (nret != 1)) {
>> +        goto param_error_exit;
>> +    }
>> +
>> +    addr = rtas_ld(args, 0);
>> +    option = rtas_ld(args, 3);
>> +    switch (option) {
>> +    case RTAS_EEH_ENABLE:
>> +        if (!spapr_pci_find_dev(spapr, buid, addr)) {
>> +            goto param_error_exit;
>> +        }
>> +        break;
>> +    case RTAS_EEH_DISABLE:
>> +    case RTAS_EEH_THAW_IO:
>> +    case RTAS_EEH_THAW_DMA:
>
>So these don't use the addr hint?
>

No, there're no address as argument of this RTAS call "ibm,set-eeh-option".
The RTAS call has 4 arguments, all of them are 32-bits: BUID high part, BUID
low part, PE address, option. The option could be one of: enable/disable EEH
functionality, enable IO path, enable DMA path.

Thanks,
Gavin

>
>Alex
>
Alexander Graf Dec. 15, 2014, 11:13 p.m. UTC | #3
On 16.12.14 00:08, Gavin Shan wrote:
> On Mon, Dec 15, 2014 at 03:52:17PM +0100, Alexander Graf wrote:
>> On 15.12.14 01:15, Gavin Shan wrote:
>>> The emulation for EEH RTAS requests from guest isn't covered
>>> by QEMU yet and the patch implements them.
>>>
>>> The patch defines constants used by EEH RTAS calls and adds
>>> callback sPAPRPHBClass::eeh_handler, which is going to be used
>>> this way:
>>>
>>>   * RTAS calls are received in spapr_pci.c, sanity check is done
>>>     there.
>>>   * RTAS handlers handle what they can. If there is something it
>>>     cannot handle and sPAPRPHBClass::eeh_handler callback is defined,
>>>     it is called.
>>>   * sPAPRPHBClass::eeh_handler is only implemented for VFIO now. It
>>>     does ioctl() to the IOMMU container fd to complete the call. Error
>>>     codes from that ioctl() are transferred back to the guest.
>>>
>>> [aik: defined RTAS tokens for EEH RTAS calls]
>>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>> ---
>>>  hw/ppc/spapr_pci.c          | 246 ++++++++++++++++++++++++++++++++++++++++++++
>>>  include/hw/pci-host/spapr.h |   7 ++
>>>  include/hw/ppc/spapr.h      |  43 +++++++-
>>>  3 files changed, 294 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>> index 3d70efe..3bb1971 100644
>>> --- a/hw/ppc/spapr_pci.c
>>> +++ b/hw/ppc/spapr_pci.c
>>> @@ -406,6 +406,233 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
>>>      rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
>>>  }
>>>  
>>> +static int rtas_handle_eeh_request(sPAPREnvironment *spapr,
>>> +                                   uint64_t buid, uint32_t req, uint32_t opt)
>>> +{
>>> +    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>>> +    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>>
>> What happens when you try to cast NULL? Could a guest process invoke a
>> host assert() through this and abort the whole VM?
>>
> 
> Yes, it would cause core dump. I had one experiment to force assigning
> NULL to "sphb" before doing the cast, the whole VM is aborted. So I
> guess you're happy to have something as follows. If you're not suggesting
> something else, I'll update the code as follows in next version:
> 
> 	sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
> 	sPAPRPHBClass *info;
> 
> 	if (!sphb) {
> 	    return -ENODEV;
> 	}
> 
> 	info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
> 	if (!info->eeh_handler) {
> 	    return -ENOENT;
> 	}
> 
> 	return info->eeh_handler(sphb, req, opt);

Yes, I think this is a lot safer. And yes, the other patch looks sane to me.

> 
>>> +
>>> +    if (!sphb || !info->eeh_handler) {
>>> +        return -ENOENT;
>>> +    }
>>> +
>>> +    return info->eeh_handler(sphb, req, opt);
>>> +}
>>> +
>>> +static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
>>> +                                    sPAPREnvironment *spapr,
>>> +                                    uint32_t token, uint32_t nargs,
>>> +                                    target_ulong args, uint32_t nret,
>>> +                                    target_ulong rets)
>>> +{
>>> +    uint32_t addr, option;
>>> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
>>> +    int ret;
>>> +
>>> +    if ((nargs != 4) || (nret != 1)) {
>>> +        goto param_error_exit;
>>> +    }
>>> +
>>> +    addr = rtas_ld(args, 0);
>>> +    option = rtas_ld(args, 3);
>>> +    switch (option) {
>>> +    case RTAS_EEH_ENABLE:
>>> +        if (!spapr_pci_find_dev(spapr, buid, addr)) {
>>> +            goto param_error_exit;
>>> +        }
>>> +        break;
>>> +    case RTAS_EEH_DISABLE:
>>> +    case RTAS_EEH_THAW_IO:
>>> +    case RTAS_EEH_THAW_DMA:
>>
>> So these don't use the addr hint?
>>
> 
> No, there're no address as argument of this RTAS call "ibm,set-eeh-option".
> The RTAS call has 4 arguments, all of them are 32-bits: BUID high part, BUID
> low part, PE address, option. The option could be one of: enable/disable EEH
> functionality, enable IO path, enable DMA path.

Well, I'm just wondering that ENABLE wants to make sure there's a device
and the others don't.


Alex
Gavin Shan Dec. 15, 2014, 11:29 p.m. UTC | #4
On Tue, Dec 16, 2014 at 12:13:03AM +0100, Alexander Graf wrote:
>On 16.12.14 00:08, Gavin Shan wrote:
>> On Mon, Dec 15, 2014 at 03:52:17PM +0100, Alexander Graf wrote:
>>> On 15.12.14 01:15, Gavin Shan wrote:
>>>> The emulation for EEH RTAS requests from guest isn't covered
>>>> by QEMU yet and the patch implements them.
>>>>
>>>> The patch defines constants used by EEH RTAS calls and adds
>>>> callback sPAPRPHBClass::eeh_handler, which is going to be used
>>>> this way:
>>>>
>>>>   * RTAS calls are received in spapr_pci.c, sanity check is done
>>>>     there.
>>>>   * RTAS handlers handle what they can. If there is something it
>>>>     cannot handle and sPAPRPHBClass::eeh_handler callback is defined,
>>>>     it is called.
>>>>   * sPAPRPHBClass::eeh_handler is only implemented for VFIO now. It
>>>>     does ioctl() to the IOMMU container fd to complete the call. Error
>>>>     codes from that ioctl() are transferred back to the guest.
>>>>
>>>> [aik: defined RTAS tokens for EEH RTAS calls]
>>>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>>> ---
>>>>  hw/ppc/spapr_pci.c          | 246 ++++++++++++++++++++++++++++++++++++++++++++
>>>>  include/hw/pci-host/spapr.h |   7 ++
>>>>  include/hw/ppc/spapr.h      |  43 +++++++-
>>>>  3 files changed, 294 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>>> index 3d70efe..3bb1971 100644
>>>> --- a/hw/ppc/spapr_pci.c
>>>> +++ b/hw/ppc/spapr_pci.c
>>>> @@ -406,6 +406,233 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
>>>>      rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
>>>>  }
>>>>  
>>>> +static int rtas_handle_eeh_request(sPAPREnvironment *spapr,
>>>> +                                   uint64_t buid, uint32_t req, uint32_t opt)
>>>> +{
>>>> +    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>>>> +    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>>>
>>> What happens when you try to cast NULL? Could a guest process invoke a
>>> host assert() through this and abort the whole VM?
>>>
>> 
>> Yes, it would cause core dump. I had one experiment to force assigning
>> NULL to "sphb" before doing the cast, the whole VM is aborted. So I
>> guess you're happy to have something as follows. If you're not suggesting
>> something else, I'll update the code as follows in next version:
>> 
>> 	sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>> 	sPAPRPHBClass *info;
>> 
>> 	if (!sphb) {
>> 	    return -ENODEV;
>> 	}
>> 
>> 	info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>> 	if (!info->eeh_handler) {
>> 	    return -ENOENT;
>> 	}
>> 
>> 	return info->eeh_handler(sphb, req, opt);
>
>Yes, I think this is a lot safer. And yes, the other patch looks sane to me.
>

Thank you for your time reviewing this. Will update in next version.

>> 
>>>> +
>>>> +    if (!sphb || !info->eeh_handler) {
>>>> +        return -ENOENT;
>>>> +    }
>>>> +
>>>> +    return info->eeh_handler(sphb, req, opt);
>>>> +}
>>>> +
>>>> +static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
>>>> +                                    sPAPREnvironment *spapr,
>>>> +                                    uint32_t token, uint32_t nargs,
>>>> +                                    target_ulong args, uint32_t nret,
>>>> +                                    target_ulong rets)
>>>> +{
>>>> +    uint32_t addr, option;
>>>> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
>>>> +    int ret;
>>>> +
>>>> +    if ((nargs != 4) || (nret != 1)) {
>>>> +        goto param_error_exit;
>>>> +    }
>>>> +
>>>> +    addr = rtas_ld(args, 0);
>>>> +    option = rtas_ld(args, 3);
>>>> +    switch (option) {
>>>> +    case RTAS_EEH_ENABLE:
>>>> +        if (!spapr_pci_find_dev(spapr, buid, addr)) {
>>>> +            goto param_error_exit;
>>>> +        }
>>>> +        break;
>>>> +    case RTAS_EEH_DISABLE:
>>>> +    case RTAS_EEH_THAW_IO:
>>>> +    case RTAS_EEH_THAW_DMA:
>>>
>>> So these don't use the addr hint?
>>>
>> 
>> No, there're no address as argument of this RTAS call "ibm,set-eeh-option".
>> The RTAS call has 4 arguments, all of them are 32-bits: BUID high part, BUID
>> low part, PE address, option. The option could be one of: enable/disable EEH
>> functionality, enable IO path, enable DMA path.
>
>Well, I'm just wondering that ENABLE wants to make sure there's a device
>and the others don't.
>

Oh, I misunderstood your question. Yes, you're correct. All options
except ENABLE will have address check in rtas_handle_eeh_request()
where we check on "BUID" since each PHB and PE have one-to-one
relationship.

ENABLE and other options are using different address: enable
uses config address of one specific device, but other options use PE
address. From guest's sides, it enables EEH capability on basis
of PCI device and left options are supported on basis of PE. Each
PE could contain one or multiple PCI devices.

DISABLE option isn't used until now.

Thanks,
Gavin

>
>Alex
>
Alexander Graf Dec. 16, 2014, 12:08 a.m. UTC | #5
On 16.12.14 00:29, Gavin Shan wrote:
> On Tue, Dec 16, 2014 at 12:13:03AM +0100, Alexander Graf wrote:
>> On 16.12.14 00:08, Gavin Shan wrote:
>>> On Mon, Dec 15, 2014 at 03:52:17PM +0100, Alexander Graf wrote:
>>>> On 15.12.14 01:15, Gavin Shan wrote:
>>>>> The emulation for EEH RTAS requests from guest isn't covered
>>>>> by QEMU yet and the patch implements them.
>>>>>
>>>>> The patch defines constants used by EEH RTAS calls and adds
>>>>> callback sPAPRPHBClass::eeh_handler, which is going to be used
>>>>> this way:
>>>>>
>>>>>   * RTAS calls are received in spapr_pci.c, sanity check is done
>>>>>     there.
>>>>>   * RTAS handlers handle what they can. If there is something it
>>>>>     cannot handle and sPAPRPHBClass::eeh_handler callback is defined,
>>>>>     it is called.
>>>>>   * sPAPRPHBClass::eeh_handler is only implemented for VFIO now. It
>>>>>     does ioctl() to the IOMMU container fd to complete the call. Error
>>>>>     codes from that ioctl() are transferred back to the guest.
>>>>>
>>>>> [aik: defined RTAS tokens for EEH RTAS calls]
>>>>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>>>> ---
>>>>>  hw/ppc/spapr_pci.c          | 246 ++++++++++++++++++++++++++++++++++++++++++++
>>>>>  include/hw/pci-host/spapr.h |   7 ++
>>>>>  include/hw/ppc/spapr.h      |  43 +++++++-
>>>>>  3 files changed, 294 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>>>> index 3d70efe..3bb1971 100644
>>>>> --- a/hw/ppc/spapr_pci.c
>>>>> +++ b/hw/ppc/spapr_pci.c
>>>>> @@ -406,6 +406,233 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
>>>>>      rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
>>>>>  }
>>>>>  
>>>>> +static int rtas_handle_eeh_request(sPAPREnvironment *spapr,
>>>>> +                                   uint64_t buid, uint32_t req, uint32_t opt)
>>>>> +{
>>>>> +    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>>>>> +    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>>>>
>>>> What happens when you try to cast NULL? Could a guest process invoke a
>>>> host assert() through this and abort the whole VM?
>>>>
>>>
>>> Yes, it would cause core dump. I had one experiment to force assigning
>>> NULL to "sphb" before doing the cast, the whole VM is aborted. So I
>>> guess you're happy to have something as follows. If you're not suggesting
>>> something else, I'll update the code as follows in next version:
>>>
>>> 	sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>>> 	sPAPRPHBClass *info;
>>>
>>> 	if (!sphb) {
>>> 	    return -ENODEV;
>>> 	}
>>>
>>> 	info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>>> 	if (!info->eeh_handler) {
>>> 	    return -ENOENT;
>>> 	}
>>>
>>> 	return info->eeh_handler(sphb, req, opt);
>>
>> Yes, I think this is a lot safer. And yes, the other patch looks sane to me.
>>
> 
> Thank you for your time reviewing this. Will update in next version.
> 
>>>
>>>>> +
>>>>> +    if (!sphb || !info->eeh_handler) {
>>>>> +        return -ENOENT;
>>>>> +    }
>>>>> +
>>>>> +    return info->eeh_handler(sphb, req, opt);
>>>>> +}
>>>>> +
>>>>> +static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
>>>>> +                                    sPAPREnvironment *spapr,
>>>>> +                                    uint32_t token, uint32_t nargs,
>>>>> +                                    target_ulong args, uint32_t nret,
>>>>> +                                    target_ulong rets)
>>>>> +{
>>>>> +    uint32_t addr, option;
>>>>> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
>>>>> +    int ret;
>>>>> +
>>>>> +    if ((nargs != 4) || (nret != 1)) {
>>>>> +        goto param_error_exit;
>>>>> +    }
>>>>> +
>>>>> +    addr = rtas_ld(args, 0);
>>>>> +    option = rtas_ld(args, 3);
>>>>> +    switch (option) {
>>>>> +    case RTAS_EEH_ENABLE:
>>>>> +        if (!spapr_pci_find_dev(spapr, buid, addr)) {
>>>>> +            goto param_error_exit;
>>>>> +        }
>>>>> +        break;
>>>>> +    case RTAS_EEH_DISABLE:
>>>>> +    case RTAS_EEH_THAW_IO:
>>>>> +    case RTAS_EEH_THAW_DMA:
>>>>
>>>> So these don't use the addr hint?
>>>>
>>>
>>> No, there're no address as argument of this RTAS call "ibm,set-eeh-option".
>>> The RTAS call has 4 arguments, all of them are 32-bits: BUID high part, BUID
>>> low part, PE address, option. The option could be one of: enable/disable EEH
>>> functionality, enable IO path, enable DMA path.
>>
>> Well, I'm just wondering that ENABLE wants to make sure there's a device
>> and the others don't.
>>
> 
> Oh, I misunderstood your question. Yes, you're correct. All options
> except ENABLE will have address check in rtas_handle_eeh_request()
> where we check on "BUID" since each PHB and PE have one-to-one
> relationship.
> 
> ENABLE and other options are using different address: enable
> uses config address of one specific device, but other options use PE
> address. From guest's sides, it enables EEH capability on basis
> of PCI device and left options are supported on basis of PE. Each
> PE could contain one or multiple PCI devices.
> 
> DISABLE option isn't used until now.

So would DISABLE also take effect on devfn basis or would it ignore the
first parameter?

If it behaves the same as ENABLE (which would be logical), please move
it into the same case group.


Alex
Gavin Shan Dec. 16, 2014, 12:31 a.m. UTC | #6
On Tue, Dec 16, 2014 at 01:08:52AM +0100, Alexander Graf wrote:
>
>
>On 16.12.14 00:29, Gavin Shan wrote:
>> On Tue, Dec 16, 2014 at 12:13:03AM +0100, Alexander Graf wrote:
>>> On 16.12.14 00:08, Gavin Shan wrote:
>>>> On Mon, Dec 15, 2014 at 03:52:17PM +0100, Alexander Graf wrote:
>>>>> On 15.12.14 01:15, Gavin Shan wrote:
>>>>>> The emulation for EEH RTAS requests from guest isn't covered
>>>>>> by QEMU yet and the patch implements them.
>>>>>>
>>>>>> The patch defines constants used by EEH RTAS calls and adds
>>>>>> callback sPAPRPHBClass::eeh_handler, which is going to be used
>>>>>> this way:
>>>>>>
>>>>>>   * RTAS calls are received in spapr_pci.c, sanity check is done
>>>>>>     there.
>>>>>>   * RTAS handlers handle what they can. If there is something it
>>>>>>     cannot handle and sPAPRPHBClass::eeh_handler callback is defined,
>>>>>>     it is called.
>>>>>>   * sPAPRPHBClass::eeh_handler is only implemented for VFIO now. It
>>>>>>     does ioctl() to the IOMMU container fd to complete the call. Error
>>>>>>     codes from that ioctl() are transferred back to the guest.
>>>>>>
>>>>>> [aik: defined RTAS tokens for EEH RTAS calls]
>>>>>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>>>>> ---
>>>>>>  hw/ppc/spapr_pci.c          | 246 ++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  include/hw/pci-host/spapr.h |   7 ++
>>>>>>  include/hw/ppc/spapr.h      |  43 +++++++-
>>>>>>  3 files changed, 294 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>>>>> index 3d70efe..3bb1971 100644
>>>>>> --- a/hw/ppc/spapr_pci.c
>>>>>> +++ b/hw/ppc/spapr_pci.c
>>>>>> @@ -406,6 +406,233 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
>>>>>>      rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
>>>>>>  }
>>>>>>  
>>>>>> +static int rtas_handle_eeh_request(sPAPREnvironment *spapr,
>>>>>> +                                   uint64_t buid, uint32_t req, uint32_t opt)
>>>>>> +{
>>>>>> +    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>>>>>> +    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>>>>>
>>>>> What happens when you try to cast NULL? Could a guest process invoke a
>>>>> host assert() through this and abort the whole VM?
>>>>>
>>>>
>>>> Yes, it would cause core dump. I had one experiment to force assigning
>>>> NULL to "sphb" before doing the cast, the whole VM is aborted. So I
>>>> guess you're happy to have something as follows. If you're not suggesting
>>>> something else, I'll update the code as follows in next version:
>>>>
>>>> 	sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>>>> 	sPAPRPHBClass *info;
>>>>
>>>> 	if (!sphb) {
>>>> 	    return -ENODEV;
>>>> 	}
>>>>
>>>> 	info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>>>> 	if (!info->eeh_handler) {
>>>> 	    return -ENOENT;
>>>> 	}
>>>>
>>>> 	return info->eeh_handler(sphb, req, opt);
>>>
>>> Yes, I think this is a lot safer. And yes, the other patch looks sane to me.
>>>
>> 
>> Thank you for your time reviewing this. Will update in next version.
>> 
>>>>
>>>>>> +
>>>>>> +    if (!sphb || !info->eeh_handler) {
>>>>>> +        return -ENOENT;
>>>>>> +    }
>>>>>> +
>>>>>> +    return info->eeh_handler(sphb, req, opt);
>>>>>> +}
>>>>>> +
>>>>>> +static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
>>>>>> +                                    sPAPREnvironment *spapr,
>>>>>> +                                    uint32_t token, uint32_t nargs,
>>>>>> +                                    target_ulong args, uint32_t nret,
>>>>>> +                                    target_ulong rets)
>>>>>> +{
>>>>>> +    uint32_t addr, option;
>>>>>> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    if ((nargs != 4) || (nret != 1)) {
>>>>>> +        goto param_error_exit;
>>>>>> +    }
>>>>>> +
>>>>>> +    addr = rtas_ld(args, 0);
>>>>>> +    option = rtas_ld(args, 3);
>>>>>> +    switch (option) {
>>>>>> +    case RTAS_EEH_ENABLE:
>>>>>> +        if (!spapr_pci_find_dev(spapr, buid, addr)) {
>>>>>> +            goto param_error_exit;
>>>>>> +        }
>>>>>> +        break;
>>>>>> +    case RTAS_EEH_DISABLE:
>>>>>> +    case RTAS_EEH_THAW_IO:
>>>>>> +    case RTAS_EEH_THAW_DMA:
>>>>>
>>>>> So these don't use the addr hint?
>>>>>
>>>>
>>>> No, there're no address as argument of this RTAS call "ibm,set-eeh-option".
>>>> The RTAS call has 4 arguments, all of them are 32-bits: BUID high part, BUID
>>>> low part, PE address, option. The option could be one of: enable/disable EEH
>>>> functionality, enable IO path, enable DMA path.
>>>
>>> Well, I'm just wondering that ENABLE wants to make sure there's a device
>>> and the others don't.
>>>
>> 
>> Oh, I misunderstood your question. Yes, you're correct. All options
>> except ENABLE will have address check in rtas_handle_eeh_request()
>> where we check on "BUID" since each PHB and PE have one-to-one
>> relationship.
>> 
>> ENABLE and other options are using different address: enable
>> uses config address of one specific device, but other options use PE
>> address. From guest's sides, it enables EEH capability on basis
>> of PCI device and left options are supported on basis of PE. Each
>> PE could contain one or multiple PCI devices.
>> 
>> DISABLE option isn't used until now.
>
>So would DISABLE also take effect on devfn basis or would it ignore the
>first parameter?
>
>If it behaves the same as ENABLE (which would be logical), please move
>it into the same case group.
>

DISABLE takes effect on PE basis. We don't have devfn for the case and
code needn't changes. Here's the concise procedures if DISABLE needs to be
involved:

    - ENABLE on basis of PCI device
    - RTAS call "ibm,get-config-addr-info2" to get the PE address
    - DISABLE on basis of PE, with PE address as argument.

If you don't have anything else, I would like to change the code and
send new version to you. Thanks again for your time.

Thanks,
Gavin
Alexander Graf Dec. 16, 2014, 12:54 a.m. UTC | #7
On 16.12.14 01:31, Gavin Shan wrote:
> On Tue, Dec 16, 2014 at 01:08:52AM +0100, Alexander Graf wrote:
>>
>>
>> On 16.12.14 00:29, Gavin Shan wrote:
>>> On Tue, Dec 16, 2014 at 12:13:03AM +0100, Alexander Graf wrote:
>>>> On 16.12.14 00:08, Gavin Shan wrote:
>>>>> On Mon, Dec 15, 2014 at 03:52:17PM +0100, Alexander Graf wrote:
>>>>>> On 15.12.14 01:15, Gavin Shan wrote:
>>>>>>> The emulation for EEH RTAS requests from guest isn't covered
>>>>>>> by QEMU yet and the patch implements them.
>>>>>>>
>>>>>>> The patch defines constants used by EEH RTAS calls and adds
>>>>>>> callback sPAPRPHBClass::eeh_handler, which is going to be used
>>>>>>> this way:
>>>>>>>
>>>>>>>   * RTAS calls are received in spapr_pci.c, sanity check is done
>>>>>>>     there.
>>>>>>>   * RTAS handlers handle what they can. If there is something it
>>>>>>>     cannot handle and sPAPRPHBClass::eeh_handler callback is defined,
>>>>>>>     it is called.
>>>>>>>   * sPAPRPHBClass::eeh_handler is only implemented for VFIO now. It
>>>>>>>     does ioctl() to the IOMMU container fd to complete the call. Error
>>>>>>>     codes from that ioctl() are transferred back to the guest.
>>>>>>>
>>>>>>> [aik: defined RTAS tokens for EEH RTAS calls]
>>>>>>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>>>>>> ---
>>>>>>>  hw/ppc/spapr_pci.c          | 246 ++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>  include/hw/pci-host/spapr.h |   7 ++
>>>>>>>  include/hw/ppc/spapr.h      |  43 +++++++-
>>>>>>>  3 files changed, 294 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>>>>>> index 3d70efe..3bb1971 100644
>>>>>>> --- a/hw/ppc/spapr_pci.c
>>>>>>> +++ b/hw/ppc/spapr_pci.c
>>>>>>> @@ -406,6 +406,233 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
>>>>>>>      rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
>>>>>>>  }
>>>>>>>  
>>>>>>> +static int rtas_handle_eeh_request(sPAPREnvironment *spapr,
>>>>>>> +                                   uint64_t buid, uint32_t req, uint32_t opt)
>>>>>>> +{
>>>>>>> +    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>>>>>>> +    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>>>>>>
>>>>>> What happens when you try to cast NULL? Could a guest process invoke a
>>>>>> host assert() through this and abort the whole VM?
>>>>>>
>>>>>
>>>>> Yes, it would cause core dump. I had one experiment to force assigning
>>>>> NULL to "sphb" before doing the cast, the whole VM is aborted. So I
>>>>> guess you're happy to have something as follows. If you're not suggesting
>>>>> something else, I'll update the code as follows in next version:
>>>>>
>>>>> 	sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>>>>> 	sPAPRPHBClass *info;
>>>>>
>>>>> 	if (!sphb) {
>>>>> 	    return -ENODEV;
>>>>> 	}
>>>>>
>>>>> 	info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>>>>> 	if (!info->eeh_handler) {
>>>>> 	    return -ENOENT;
>>>>> 	}
>>>>>
>>>>> 	return info->eeh_handler(sphb, req, opt);
>>>>
>>>> Yes, I think this is a lot safer. And yes, the other patch looks sane to me.
>>>>
>>>
>>> Thank you for your time reviewing this. Will update in next version.
>>>
>>>>>
>>>>>>> +
>>>>>>> +    if (!sphb || !info->eeh_handler) {
>>>>>>> +        return -ENOENT;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return info->eeh_handler(sphb, req, opt);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
>>>>>>> +                                    sPAPREnvironment *spapr,
>>>>>>> +                                    uint32_t token, uint32_t nargs,
>>>>>>> +                                    target_ulong args, uint32_t nret,
>>>>>>> +                                    target_ulong rets)
>>>>>>> +{
>>>>>>> +    uint32_t addr, option;
>>>>>>> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    if ((nargs != 4) || (nret != 1)) {
>>>>>>> +        goto param_error_exit;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    addr = rtas_ld(args, 0);
>>>>>>> +    option = rtas_ld(args, 3);
>>>>>>> +    switch (option) {
>>>>>>> +    case RTAS_EEH_ENABLE:
>>>>>>> +        if (!spapr_pci_find_dev(spapr, buid, addr)) {
>>>>>>> +            goto param_error_exit;
>>>>>>> +        }
>>>>>>> +        break;
>>>>>>> +    case RTAS_EEH_DISABLE:
>>>>>>> +    case RTAS_EEH_THAW_IO:
>>>>>>> +    case RTAS_EEH_THAW_DMA:
>>>>>>
>>>>>> So these don't use the addr hint?
>>>>>>
>>>>>
>>>>> No, there're no address as argument of this RTAS call "ibm,set-eeh-option".
>>>>> The RTAS call has 4 arguments, all of them are 32-bits: BUID high part, BUID
>>>>> low part, PE address, option. The option could be one of: enable/disable EEH
>>>>> functionality, enable IO path, enable DMA path.
>>>>
>>>> Well, I'm just wondering that ENABLE wants to make sure there's a device
>>>> and the others don't.
>>>>
>>>
>>> Oh, I misunderstood your question. Yes, you're correct. All options
>>> except ENABLE will have address check in rtas_handle_eeh_request()
>>> where we check on "BUID" since each PHB and PE have one-to-one
>>> relationship.
>>>
>>> ENABLE and other options are using different address: enable
>>> uses config address of one specific device, but other options use PE
>>> address. From guest's sides, it enables EEH capability on basis
>>> of PCI device and left options are supported on basis of PE. Each
>>> PE could contain one or multiple PCI devices.
>>>
>>> DISABLE option isn't used until now.
>>
>> So would DISABLE also take effect on devfn basis or would it ignore the
>> first parameter?
>>
>> If it behaves the same as ENABLE (which would be logical), please move
>> it into the same case group.
>>
> 
> DISABLE takes effect on PE basis. We don't have devfn for the case and
> code needn't changes. Here's the concise procedures if DISABLE needs to be
> involved:
> 
>     - ENABLE on basis of PCI device
>     - RTAS call "ibm,get-config-addr-info2" to get the PE address
>     - DISABLE on basis of PE, with PE address as argument.
> 
> If you don't have anything else, I would like to change the code and
> send new version to you. Thanks again for your time.

Sounds great :). Just wanted to double-check that the above was correct.


Alex
David Gibson Dec. 23, 2014, 4:22 a.m. UTC | #8
On Mon, Dec 15, 2014 at 11:15:07AM +1100, Gavin Shan wrote:
> The emulation for EEH RTAS requests from guest isn't covered
> by QEMU yet and the patch implements them.
> 
> The patch defines constants used by EEH RTAS calls and adds
> callback sPAPRPHBClass::eeh_handler, which is going to be used
> this way:
> 
>   * RTAS calls are received in spapr_pci.c, sanity check is done
>     there.
>   * RTAS handlers handle what they can. If there is something it
>     cannot handle and sPAPRPHBClass::eeh_handler callback is defined,
>     it is called.
>   * sPAPRPHBClass::eeh_handler is only implemented for VFIO now. It
>     does ioctl() to the IOMMU container fd to complete the call. Error
>     codes from that ioctl() are transferred back to the guest.
> 
> [aik: defined RTAS tokens for EEH RTAS calls]
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr_pci.c          | 246 ++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/pci-host/spapr.h |   7 ++
>  include/hw/ppc/spapr.h      |  43 +++++++-
>  3 files changed, 294 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 3d70efe..3bb1971 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -406,6 +406,233 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
>      rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
>  }
>  
> +static int rtas_handle_eeh_request(sPAPREnvironment *spapr,
> +                                   uint64_t buid, uint32_t req, uint32_t opt)
> +{
> +    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
> +    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
> +
> +    if (!sphb || !info->eeh_handler) {
> +        return -ENOENT;
> +    }
> +
> +    return info->eeh_handler(sphb, req, opt);
> +}
> +
> +static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
> +                                    sPAPREnvironment *spapr,
> +                                    uint32_t token, uint32_t nargs,
> +                                    target_ulong args, uint32_t nret,
> +                                    target_ulong rets)
> +{
> +    uint32_t addr, option;
> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);

You're dereferencing RTAS parameters here before you've checked the
number of parameters, which isn't safe.  Similar problem in the other
entry points as well.

> +    int ret;
> +
> +    if ((nargs != 4) || (nret != 1)) {
> +        goto param_error_exit;
> +    }
> +
> +    addr = rtas_ld(args, 0);
> +    option = rtas_ld(args, 3);
> +    switch (option) {
> +    case RTAS_EEH_ENABLE:
> +        if (!spapr_pci_find_dev(spapr, buid, addr)) {
> +            goto param_error_exit;
> +        }
> +        break;
> +    case RTAS_EEH_DISABLE:
> +    case RTAS_EEH_THAW_IO:
> +    case RTAS_EEH_THAW_DMA:
> +        break;
> +    default:
> +        goto param_error_exit;
> +    }
> +
> +    ret = rtas_handle_eeh_request(spapr, buid,
> +                                  RTAS_EEH_REQ_SET_OPTION, option);
> +    if (ret >= 0) {
> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +        return;
> +    }

The fall through here means that any failure in
rtas_handle_eeh_request will be reported as RTAS_OUT_PARAM_ERROR,
which doesn't sound like it would always be the right error code.
Similar in the other entry points.
Gavin Shan Dec. 25, 2014, 3:17 a.m. UTC | #9
On Tue, Dec 23, 2014 at 03:22:06PM +1100, David Gibson wrote:
>On Mon, Dec 15, 2014 at 11:15:07AM +1100, Gavin Shan wrote:
>> The emulation for EEH RTAS requests from guest isn't covered
>> by QEMU yet and the patch implements them.
>> 
>> The patch defines constants used by EEH RTAS calls and adds
>> callback sPAPRPHBClass::eeh_handler, which is going to be used
>> this way:
>> 
>>   * RTAS calls are received in spapr_pci.c, sanity check is done
>>     there.
>>   * RTAS handlers handle what they can. If there is something it
>>     cannot handle and sPAPRPHBClass::eeh_handler callback is defined,
>>     it is called.
>>   * sPAPRPHBClass::eeh_handler is only implemented for VFIO now. It
>>     does ioctl() to the IOMMU container fd to complete the call. Error
>>     codes from that ioctl() are transferred back to the guest.
>> 
>> [aik: defined RTAS tokens for EEH RTAS calls]
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr_pci.c          | 246 ++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/pci-host/spapr.h |   7 ++
>>  include/hw/ppc/spapr.h      |  43 +++++++-
>>  3 files changed, 294 insertions(+), 2 deletions(-)
>> 
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index 3d70efe..3bb1971 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -406,6 +406,233 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
>>      rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
>>  }
>>  
>> +static int rtas_handle_eeh_request(sPAPREnvironment *spapr,
>> +                                   uint64_t buid, uint32_t req, uint32_t opt)
>> +{
>> +    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
>> +    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
>> +
>> +    if (!sphb || !info->eeh_handler) {
>> +        return -ENOENT;
>> +    }
>> +
>> +    return info->eeh_handler(sphb, req, opt);
>> +}
>> +
>> +static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
>> +                                    sPAPREnvironment *spapr,
>> +                                    uint32_t token, uint32_t nargs,
>> +                                    target_ulong args, uint32_t nret,
>> +                                    target_ulong rets)
>> +{
>> +    uint32_t addr, option;
>> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
>
>You're dereferencing RTAS parameters here before you've checked the
>number of parameters, which isn't safe.  Similar problem in the other
>entry points as well.
>

Yep, I'll fix it in next version. Thanks for review and pointing
it out.

>> +    int ret;
>> +
>> +    if ((nargs != 4) || (nret != 1)) {
>> +        goto param_error_exit;
>> +    }
>> +
>> +    addr = rtas_ld(args, 0);
>> +    option = rtas_ld(args, 3);
>> +    switch (option) {
>> +    case RTAS_EEH_ENABLE:
>> +        if (!spapr_pci_find_dev(spapr, buid, addr)) {
>> +            goto param_error_exit;
>> +        }
>> +        break;
>> +    case RTAS_EEH_DISABLE:
>> +    case RTAS_EEH_THAW_IO:
>> +    case RTAS_EEH_THAW_DMA:
>> +        break;
>> +    default:
>> +        goto param_error_exit;
>> +    }
>> +
>> +    ret = rtas_handle_eeh_request(spapr, buid,
>> +                                  RTAS_EEH_REQ_SET_OPTION, option);
>> +    if (ret >= 0) {
>> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +        return;
>> +    }
>
>The fall through here means that any failure in
>rtas_handle_eeh_request will be reported as RTAS_OUT_PARAM_ERROR,
>which doesn't sound like it would always be the right error code.
>Similar in the other entry points.
>

Yes, Varied error code to indicate different failure cases will
be better. I'll check PAPR spec again and return more precise
error code in next version.

Thanks,
Gavin

>-- 
>David Gibson			| I'll have my music baroque, and my code
>david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
>				| _way_ _around_!
>http://www.ozlabs.org/~dgibson
diff mbox

Patch

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 3d70efe..3bb1971 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -406,6 +406,233 @@  static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
     rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
 }
 
+static int rtas_handle_eeh_request(sPAPREnvironment *spapr,
+                                   uint64_t buid, uint32_t req, uint32_t opt)
+{
+    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
+    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
+
+    if (!sphb || !info->eeh_handler) {
+        return -ENOENT;
+    }
+
+    return info->eeh_handler(sphb, req, opt);
+}
+
+static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
+                                    sPAPREnvironment *spapr,
+                                    uint32_t token, uint32_t nargs,
+                                    target_ulong args, uint32_t nret,
+                                    target_ulong rets)
+{
+    uint32_t addr, option;
+    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    int ret;
+
+    if ((nargs != 4) || (nret != 1)) {
+        goto param_error_exit;
+    }
+
+    addr = rtas_ld(args, 0);
+    option = rtas_ld(args, 3);
+    switch (option) {
+    case RTAS_EEH_ENABLE:
+        if (!spapr_pci_find_dev(spapr, buid, addr)) {
+            goto param_error_exit;
+        }
+        break;
+    case RTAS_EEH_DISABLE:
+    case RTAS_EEH_THAW_IO:
+    case RTAS_EEH_THAW_DMA:
+        break;
+    default:
+        goto param_error_exit;
+    }
+
+    ret = rtas_handle_eeh_request(spapr, buid,
+                                  RTAS_EEH_REQ_SET_OPTION, option);
+    if (ret >= 0) {
+        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+        return;
+    }
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
+                                           sPAPREnvironment *spapr,
+                                           uint32_t token, uint32_t nargs,
+                                           target_ulong args, uint32_t nret,
+                                           target_ulong rets)
+{
+    uint32_t addr, option;
+    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
+    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
+    PCIDevice *pdev;
+
+    if (!sphb || !info->eeh_handler) {
+        goto param_error_exit;
+    }
+
+    if ((nargs != 4) || (nret != 2)) {
+        goto param_error_exit;
+    }
+
+    addr = rtas_ld(args, 0);
+    option = rtas_ld(args, 3);
+    if (option != RTAS_GET_PE_ADDR && option != RTAS_GET_PE_MODE) {
+        goto param_error_exit;
+    }
+
+    pdev = spapr_pci_find_dev(spapr, buid, addr);
+    if (!pdev) {
+        goto param_error_exit;
+    }
+
+    /*
+     * For now, we always have bus level PE whose address
+     * has format "00BBSS00". The guest OS might regard
+     * PE address 0 as invalid. We avoid that simply by
+     * extending it with one.
+     */
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    if (option == RTAS_GET_PE_ADDR) {
+        rtas_st(rets, 1, (pci_bus_num(pdev->bus) << 16) + 1);
+    } else {
+        rtas_st(rets, 1, RTAS_PE_MODE_SHARED);
+    }
+
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
+                                            sPAPREnvironment *spapr,
+                                            uint32_t token, uint32_t nargs,
+                                            target_ulong args, uint32_t nret,
+                                            target_ulong rets)
+{
+    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    int ret;
+
+    if ((nargs != 3) || (nret != 4 && nret != 5)) {
+        goto param_error_exit;
+    }
+
+    ret = rtas_handle_eeh_request(spapr, buid, RTAS_EEH_REQ_GET_STATE, 0);
+    if (ret >= 0) {
+        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+        rtas_st(rets, 1, ret);
+        rtas_st(rets, 2, RTAS_EEH_SUPPORT);
+        rtas_st(rets, 3, RTAS_EEH_PE_UNAVAIL_INFO);
+        if (nret >= 5) {
+            rtas_st(rets, 4, RTAS_EEH_PE_RECOVER_INFO);
+        }
+
+        return;
+    }
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
+                                    sPAPREnvironment *spapr,
+                                    uint32_t token, uint32_t nargs,
+                                    target_ulong args, uint32_t nret,
+                                    target_ulong rets)
+{
+    uint32_t option;
+    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    int ret;
+
+    if ((nargs != 4) || (nret != 1)) {
+        goto param_error_exit;
+    }
+
+    option = rtas_ld(args, 3);
+    switch (option) {
+    case RTAS_SLOT_RESET_DEACTIVATE:
+    case RTAS_SLOT_RESET_HOT:
+    case RTAS_SLOT_RESET_FUNDAMENTAL:
+        break;
+    default:
+        goto param_error_exit;
+    }
+
+    ret = rtas_handle_eeh_request(spapr, buid, RTAS_EEH_REQ_RESET, option);
+    if (ret >= 0) {
+        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+        return;
+    }
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
+                                  sPAPREnvironment *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args, uint32_t nret,
+                                  target_ulong rets)
+{
+    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    int ret;
+
+    if ((nargs != 3) || (nret != 1)) {
+        goto param_error_exit;
+    }
+
+    ret = rtas_handle_eeh_request(spapr, buid, RTAS_EEH_REQ_CONFIGURE, 0);
+    if (ret >= 0) {
+        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+        return;
+    }
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+/* To support it later */
+static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
+                                       sPAPREnvironment *spapr,
+                                       uint32_t token, uint32_t nargs,
+                                       target_ulong args, uint32_t nret,
+                                       target_ulong rets)
+{
+    int option;
+    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
+    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
+
+    if (!sphb || !info->eeh_handler) {
+        goto param_error_exit;
+    }
+
+    if ((nargs != 8) || (nret != 1)) {
+        goto param_error_exit;
+    }
+
+    option = rtas_ld(args, 7);
+    switch (option) {
+    case RTAS_SLOT_TEMP_ERR_LOG:
+    case RTAS_SLOT_PERM_ERR_LOG:
+        break;
+    default:
+        goto param_error_exit;
+    }
+
+    rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
 static int pci_spapr_swizzle(int slot, int pin)
 {
     return (slot + pin) % PCI_NUM_PINS;
@@ -958,6 +1185,25 @@  void spapr_pci_rtas_init(void)
         spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
                             rtas_ibm_change_msi);
     }
+
+    spapr_rtas_register(RTAS_IBM_SET_EEH_OPTION,
+                        "ibm,set-eeh-option",
+                        rtas_ibm_set_eeh_option);
+    spapr_rtas_register(RTAS_IBM_GET_CONFIG_ADDR_INFO2,
+                        "ibm,get-config-addr-info2",
+                        rtas_ibm_get_config_addr_info2);
+    spapr_rtas_register(RTAS_IBM_READ_SLOT_RESET_STATE2,
+                        "ibm,read-slot-reset-state2",
+                        rtas_ibm_read_slot_reset_state2);
+    spapr_rtas_register(RTAS_IBM_SET_SLOT_RESET,
+                        "ibm,set-slot-reset",
+                        rtas_ibm_set_slot_reset);
+    spapr_rtas_register(RTAS_IBM_CONFIGURE_PE,
+                        "ibm,configure-pe",
+                        rtas_ibm_configure_pe);
+    spapr_rtas_register(RTAS_IBM_SLOT_ERROR_DETAIL,
+                        "ibm,slot-error-detail",
+                        rtas_ibm_slot_error_detail);
 }
 
 static void spapr_pci_register_types(void)
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 3892f1a..8b6b37f 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -49,6 +49,7 @@  struct sPAPRPHBClass {
     PCIHostBridgeClass parent_class;
 
     void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
+    int (*eeh_handler)(sPAPRPHBState *sphb, int req, int opt);
 };
 
 typedef struct spapr_pci_msi {
@@ -107,6 +108,12 @@  struct sPAPRPHBVFIOState {
 
 #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
 
+/* EEH related requests */
+#define RTAS_EEH_REQ_SET_OPTION      0
+#define RTAS_EEH_REQ_GET_STATE       1
+#define RTAS_EEH_REQ_RESET           2
+#define RTAS_EEH_REQ_CONFIGURE       3
+
 static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
 {
     return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 749daf4..e4a2c11 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -337,6 +337,39 @@  target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
 int spapr_allocate_irq(int hint, bool lsi);
 int spapr_allocate_irq_block(int num, bool lsi, bool msi);
 
+/* ibm,set-eeh-option */
+#define RTAS_EEH_DISABLE                 0
+#define RTAS_EEH_ENABLE                  1
+#define RTAS_EEH_THAW_IO                 2
+#define RTAS_EEH_THAW_DMA                3
+
+/* ibm,get-config-addr-info2 */
+#define RTAS_GET_PE_ADDR                 0
+#define RTAS_GET_PE_MODE                 1
+#define RTAS_PE_MODE_NONE                0
+#define RTAS_PE_MODE_NOT_SHARED          1
+#define RTAS_PE_MODE_SHARED              2
+
+/* ibm,read-slot-reset-state2 */
+#define RTAS_EEH_PE_STATE_NORMAL         0
+#define RTAS_EEH_PE_STATE_RESET          1
+#define RTAS_EEH_PE_STATE_STOPPED_IO_DMA 2
+#define RTAS_EEH_PE_STATE_STOPPED_DMA    4
+#define RTAS_EEH_PE_STATE_UNAVAIL        5
+#define RTAS_EEH_NOT_SUPPORT             0
+#define RTAS_EEH_SUPPORT                 1
+#define RTAS_EEH_PE_UNAVAIL_INFO         1000
+#define RTAS_EEH_PE_RECOVER_INFO         0
+
+/* ibm,set-slot-reset */
+#define RTAS_SLOT_RESET_DEACTIVATE       0
+#define RTAS_SLOT_RESET_HOT              1
+#define RTAS_SLOT_RESET_FUNDAMENTAL      3
+
+/* ibm,slot-error-detail */
+#define RTAS_SLOT_TEMP_ERR_LOG           1
+#define RTAS_SLOT_PERM_ERR_LOG           2
+
 /* RTAS return codes */
 #define RTAS_OUT_SUCCESS            0
 #define RTAS_OUT_NO_ERRORS_FOUND    1
@@ -381,8 +414,14 @@  int spapr_allocate_irq_block(int num, bool lsi, bool msi);
 #define RTAS_GET_SENSOR_STATE                   (RTAS_TOKEN_BASE + 0x1D)
 #define RTAS_IBM_CONFIGURE_CONNECTOR            (RTAS_TOKEN_BASE + 0x1E)
 #define RTAS_IBM_OS_TERM                        (RTAS_TOKEN_BASE + 0x1F)
-
-#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x20)
+#define RTAS_IBM_SET_EEH_OPTION                 (RTAS_TOKEN_BASE + 0x20)
+#define RTAS_IBM_GET_CONFIG_ADDR_INFO2          (RTAS_TOKEN_BASE + 0x21)
+#define RTAS_IBM_READ_SLOT_RESET_STATE2         (RTAS_TOKEN_BASE + 0x22)
+#define RTAS_IBM_SET_SLOT_RESET                 (RTAS_TOKEN_BASE + 0x23)
+#define RTAS_IBM_CONFIGURE_PE                   (RTAS_TOKEN_BASE + 0x24)
+#define RTAS_IBM_SLOT_ERROR_DETAIL              (RTAS_TOKEN_BASE + 0x25)
+
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x26)
 
 /* RTAS ibm,get-system-parameter token values */
 #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20