Patchwork [2/2] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

login
register
mail settings
Submitter Pandarathil, Vijaymohan R
Date Jan. 9, 2013, 6:26 a.m.
Message ID <F9E001219150CB45BEDC82A650F360C901496034@G9W0717.americas.hpqcorp.net>
Download mbox | patch
Permalink /patch/210630/
State New
Headers show

Comments

Pandarathil, Vijaymohan R - Jan. 9, 2013, 6:26 a.m.
- Create eventfd per vfio device assigned to a guest and register an
          event handler

	- This fd is passed to the vfio_pci driver through a new ioctl

	- When the device encounters an error, the eventfd is signaled
          and the qemu eventfd handler gets invoked.

	- In the handler decide what action to take. Current action taken
          is to terminate the guest.

Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>
---
 hw/vfio_pci.c              | 56 ++++++++++++++++++++++++++++++++++++++++++++++
 linux-headers/linux/vfio.h |  9 ++++++++
 2 files changed, 65 insertions(+)
Alex Williamson - Jan. 9, 2013, 6:07 p.m.
On Wed, 2013-01-09 at 06:26 +0000, Pandarathil, Vijaymohan R wrote:
> 	- Create eventfd per vfio device assigned to a guest and register an
>           event handler
> 
> 	- This fd is passed to the vfio_pci driver through a new ioctl
> 
> 	- When the device encounters an error, the eventfd is signaled
>           and the qemu eventfd handler gets invoked.
> 
> 	- In the handler decide what action to take. Current action taken
>           is to terminate the guest.
> 
> Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>
> ---
>  hw/vfio_pci.c              | 56 ++++++++++++++++++++++++++++++++++++++++++++++
>  linux-headers/linux/vfio.h |  9 ++++++++
>  2 files changed, 65 insertions(+)
> 
> diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
> index 28c8303..9c3c28b 100644
> --- a/hw/vfio_pci.c
> +++ b/hw/vfio_pci.c
> @@ -38,6 +38,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/queue.h"
>  #include "qemu/range.h"
> +#include "sysemu/sysemu.h"
>  
>  /* #define DEBUG_VFIO */
>  #ifdef DEBUG_VFIO
> @@ -130,6 +131,8 @@ typedef struct VFIODevice {
>      QLIST_ENTRY(VFIODevice) next;
>      struct VFIOGroup *group;
>      bool reset_works;
> +    EventNotifier errfd;

Misleading name, it's an EventNotifier not an fd.

> +    __u32 dev_info_flags;
>  } VFIODevice;
>  
>  typedef struct VFIOGroup {
> @@ -1805,6 +1808,8 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
>      DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
>              dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
>  
> +    vdev->dev_info_flags = dev_info.flags;
> +

We test the VFIO_DEVICE_FLAGS_RESET elsewhere by caching the bit into a
variable, why not do that here too?

>      if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
>          error_report("vfio: Um, this isn't a PCI device\n");
>          goto error;
> @@ -1900,6 +1905,55 @@ static void vfio_put_device(VFIODevice *vdev)
>      }
>  }
>  
> +static void vfio_errfd_handler(void *opaque)
> +{
> +    VFIODevice *vdev = opaque;
> +
> +    if (!event_notifier_test_and_clear(&vdev->errfd)) {
> +        return;
> +    }
> +
> +    /*
> +     * TBD. Retrieve the error details and decide what action
> +     * needs to be taken. One of the actions could be to pass
> +     * the error to the guest and have the guest driver recover
> +     * the error. This requires that PCIe capabilities be
> +     * exposed to the guest. At present, we just terminate the
> +     * guest to contain the error.
> +     */
> +    error_report("%s(%04x:%02x:%02x.%x) "
> +        "Unrecoverable error detected... Terminating guest\n",
> +        __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,
> +        vdev->host.function);
> +
> +    qemu_system_shutdown_request();

I would have figured hw_error

> +    return;
> +}
> +
> +static void vfio_register_errfd(VFIODevice *vdev)
> +{
> +    int32_t pfd;

"pfd" is used elsewhere in vfio as "Pointer to FD", this is just a fd.

> +    int ret;
> +
> +    if (!(vdev->dev_info_flags & VFIO_DEVICE_FLAGS_AER_NOTIFY)) {
> +        error_report("vfio: Warning: Error notification not supported for the device\n");

This should probably just be a debug print.

> +        return;
> +    }
> +    if (event_notifier_init(&vdev->errfd, 0)) {
> +        error_report("vfio: Warning: Unable to init event notifier for error detection\n");
> +        return;
> +    }
> +    pfd = event_notifier_get_fd(&vdev->errfd);
> +    qemu_set_fd_handler(pfd, vfio_errfd_handler, NULL, vdev);
> +
> +    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_ERRFD, pfd);
> +    if (ret) {
> +        error_report("vfio: Warning: Failed to setup error fd: %d\n", ret);
> +        qemu_set_fd_handler(pfd, NULL, NULL, vdev);
> +        event_notifier_cleanup(&vdev->errfd);
> +    }
> +    return;
> +}
>  static int vfio_initfn(PCIDevice *pdev)
>  {
>      VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
> @@ -2010,6 +2064,8 @@ static int vfio_initfn(PCIDevice *pdev)
>          }
>      }
>  
> +    vfio_register_errfd(vdev);
> +

No cleanup in exitfn?!  Thanks,

Alex

>      return 0;
>  
>  out_teardown:
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 4758d1b..0ca4eeb 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -147,6 +147,7 @@ struct vfio_device_info {
>  	__u32	flags;
>  #define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
>  #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
> +#define VFIO_DEVICE_FLAGS_AER_NOTIFY (1 << 2)   /* Supports aer notification */
>  	__u32	num_regions;	/* Max region index + 1 */
>  	__u32	num_irqs;	/* Max IRQ index + 1 */
>  };
> @@ -288,6 +289,14 @@ struct vfio_irq_set {
>   */
>  #define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)
>  
> +/**
> + * VFIO_DEVICE_SET_ERRFD - _IO(VFIO_TYPE, VFIO_BASE + 12)
> + *
> + * Pass the eventfd to the vfio-pci driver for signalling any device
> + * error notifications
> + */
> +#define VFIO_DEVICE_SET_ERRFD           _IO(VFIO_TYPE, VFIO_BASE + 12)
> +
>  /*
>   * The VFIO-PCI bus driver makes use of the following fixed region and
>   * IRQ index mapping.  Unimplemented regions return a size of zero.
Blue Swirl - Jan. 9, 2013, 9:22 p.m.
On Wed, Jan 9, 2013 at 6:26 AM, Pandarathil, Vijaymohan R
<vijaymohan.pandarathil@hp.com> wrote:
>         - Create eventfd per vfio device assigned to a guest and register an
>           event handler
>
>         - This fd is passed to the vfio_pci driver through a new ioctl
>
>         - When the device encounters an error, the eventfd is signaled
>           and the qemu eventfd handler gets invoked.
>
>         - In the handler decide what action to take. Current action taken
>           is to terminate the guest.
>
> Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>
> ---
>  hw/vfio_pci.c              | 56 ++++++++++++++++++++++++++++++++++++++++++++++
>  linux-headers/linux/vfio.h |  9 ++++++++
>  2 files changed, 65 insertions(+)
>
> diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
> index 28c8303..9c3c28b 100644
> --- a/hw/vfio_pci.c
> +++ b/hw/vfio_pci.c
> @@ -38,6 +38,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/queue.h"
>  #include "qemu/range.h"
> +#include "sysemu/sysemu.h"
>
>  /* #define DEBUG_VFIO */
>  #ifdef DEBUG_VFIO
> @@ -130,6 +131,8 @@ typedef struct VFIODevice {
>      QLIST_ENTRY(VFIODevice) next;
>      struct VFIOGroup *group;
>      bool reset_works;
> +    EventNotifier errfd;
> +    __u32 dev_info_flags;

QEMU is not kernel code, please use uint32_t.

>  } VFIODevice;
>
>  typedef struct VFIOGroup {
> @@ -1805,6 +1808,8 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
>      DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
>              dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
>
> +    vdev->dev_info_flags = dev_info.flags;
> +
>      if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
>          error_report("vfio: Um, this isn't a PCI device\n");
>          goto error;
> @@ -1900,6 +1905,55 @@ static void vfio_put_device(VFIODevice *vdev)
>      }
>  }
>
> +static void vfio_errfd_handler(void *opaque)
> +{
> +    VFIODevice *vdev = opaque;
> +
> +    if (!event_notifier_test_and_clear(&vdev->errfd)) {
> +        return;
> +    }
> +
> +    /*
> +     * TBD. Retrieve the error details and decide what action
> +     * needs to be taken. One of the actions could be to pass
> +     * the error to the guest and have the guest driver recover
> +     * the error. This requires that PCIe capabilities be
> +     * exposed to the guest. At present, we just terminate the
> +     * guest to contain the error.
> +     */
> +    error_report("%s(%04x:%02x:%02x.%x) "
> +        "Unrecoverable error detected... Terminating guest\n",
> +        __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,
> +        vdev->host.function);
> +
> +    qemu_system_shutdown_request();
> +    return;
> +}
> +
> +static void vfio_register_errfd(VFIODevice *vdev)
> +{
> +    int32_t pfd;
> +    int ret;
> +
> +    if (!(vdev->dev_info_flags & VFIO_DEVICE_FLAGS_AER_NOTIFY)) {
> +        error_report("vfio: Warning: Error notification not supported for the device\n");
> +        return;
> +    }
> +    if (event_notifier_init(&vdev->errfd, 0)) {
> +        error_report("vfio: Warning: Unable to init event notifier for error detection\n");
> +        return;
> +    }
> +    pfd = event_notifier_get_fd(&vdev->errfd);
> +    qemu_set_fd_handler(pfd, vfio_errfd_handler, NULL, vdev);
> +
> +    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_ERRFD, pfd);
> +    if (ret) {
> +        error_report("vfio: Warning: Failed to setup error fd: %d\n", ret);
> +        qemu_set_fd_handler(pfd, NULL, NULL, vdev);
> +        event_notifier_cleanup(&vdev->errfd);
> +    }
> +    return;
> +}
>  static int vfio_initfn(PCIDevice *pdev)
>  {
>      VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
> @@ -2010,6 +2064,8 @@ static int vfio_initfn(PCIDevice *pdev)
>          }
>      }
>
> +    vfio_register_errfd(vdev);
> +
>      return 0;
>
>  out_teardown:
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 4758d1b..0ca4eeb 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -147,6 +147,7 @@ struct vfio_device_info {
>         __u32   flags;
>  #define VFIO_DEVICE_FLAGS_RESET        (1 << 0)        /* Device supports reset */
>  #define VFIO_DEVICE_FLAGS_PCI  (1 << 1)        /* vfio-pci device */
> +#define VFIO_DEVICE_FLAGS_AER_NOTIFY (1 << 2)   /* Supports aer notification */
>         __u32   num_regions;    /* Max region index + 1 */
>         __u32   num_irqs;       /* Max IRQ index + 1 */

These are verbatim copies of kernel headers so it's OK here.

>  };
> @@ -288,6 +289,14 @@ struct vfio_irq_set {
>   */
>  #define VFIO_DEVICE_RESET              _IO(VFIO_TYPE, VFIO_BASE + 11)
>
> +/**
> + * VFIO_DEVICE_SET_ERRFD - _IO(VFIO_TYPE, VFIO_BASE + 12)
> + *
> + * Pass the eventfd to the vfio-pci driver for signalling any device
> + * error notifications
> + */
> +#define VFIO_DEVICE_SET_ERRFD           _IO(VFIO_TYPE, VFIO_BASE + 12)
> +
>  /*
>   * The VFIO-PCI bus driver makes use of the following fixed region and
>   * IRQ index mapping.  Unimplemented regions return a size of zero.
> --
> 1.7.11.3
>
>
Pandarathil, Vijaymohan R - Jan. 11, 2013, 8:53 a.m.
> -----Original Message-----

> From: Alex Williamson [mailto:alex.williamson@redhat.com]

> Sent: Wednesday, January 09, 2013 10:08 AM

> To: Pandarathil, Vijaymohan R

> Cc: Bjorn Helgaas; Gleb Natapov; kvm@vger.kernel.org; qemu-

> devel@nongnu.org; linux-pci@vger.kernel.org; linux-kernel@vger.kernel.org

> Subject: Re: [PATCH 2/2] QEMU-AER: Qemu changes to support AER for VFIO-PCI

> devices

> 

> On Wed, 2013-01-09 at 06:26 +0000, Pandarathil, Vijaymohan R wrote:

> > 	- Create eventfd per vfio device assigned to a guest and register an

> >           event handler

> >

> > 	- This fd is passed to the vfio_pci driver through a new ioctl

> >

> > 	- When the device encounters an error, the eventfd is signaled

> >           and the qemu eventfd handler gets invoked.

> >

> > 	- In the handler decide what action to take. Current action taken

> >           is to terminate the guest.

> >

> > Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>

> > ---

> >  hw/vfio_pci.c              | 56

> ++++++++++++++++++++++++++++++++++++++++++++++

> >  linux-headers/linux/vfio.h |  9 ++++++++

> >  2 files changed, 65 insertions(+)

> >

> > diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c

> > index 28c8303..9c3c28b 100644

> > --- a/hw/vfio_pci.c

> > +++ b/hw/vfio_pci.c

> > @@ -38,6 +38,7 @@

> >  #include "qemu/error-report.h"

> >  #include "qemu/queue.h"

> >  #include "qemu/range.h"

> > +#include "sysemu/sysemu.h"

> >

> >  /* #define DEBUG_VFIO */

> >  #ifdef DEBUG_VFIO

> > @@ -130,6 +131,8 @@ typedef struct VFIODevice {

> >      QLIST_ENTRY(VFIODevice) next;

> >      struct VFIOGroup *group;

> >      bool reset_works;

> > +    EventNotifier errfd;

> 

> Misleading name, it's an EventNotifier not an fd.


Will make the change.

> 

> > +    __u32 dev_info_flags;

> >  } VFIODevice;

> >

> >  typedef struct VFIOGroup {

> > @@ -1805,6 +1808,8 @@ static int vfio_get_device(VFIOGroup *group, const

> char *name, VFIODevice *vdev)

> >      DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,

> >              dev_info.flags, dev_info.num_regions, dev_info.num_irqs);

> >

> > +    vdev->dev_info_flags = dev_info.flags;

> > +

> 

> We test the VFIO_DEVICE_FLAGS_RESET elsewhere by caching the bit into a

> variable, why not do that here too?


I was wondering if there was any specific reason to cache the bit into a separate variable. Wouldn't a flags field which can contain the specific bit be more apt ?

> 

> >      if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {

> >          error_report("vfio: Um, this isn't a PCI device\n");

> >          goto error;

> > @@ -1900,6 +1905,55 @@ static void vfio_put_device(VFIODevice *vdev)

> >      }

> >  }

> >

> > +static void vfio_errfd_handler(void *opaque)

> > +{

> > +    VFIODevice *vdev = opaque;

> > +

> > +    if (!event_notifier_test_and_clear(&vdev->errfd)) {

> > +        return;

> > +    }

> > +

> > +    /*

> > +     * TBD. Retrieve the error details and decide what action

> > +     * needs to be taken. One of the actions could be to pass

> > +     * the error to the guest and have the guest driver recover

> > +     * the error. This requires that PCIe capabilities be

> > +     * exposed to the guest. At present, we just terminate the

> > +     * guest to contain the error.

> > +     */

> > +    error_report("%s(%04x:%02x:%02x.%x) "

> > +        "Unrecoverable error detected... Terminating guest\n",

> > +        __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,

> > +        vdev->host.function);

> > +

> > +    qemu_system_shutdown_request();

> 

> I would have figured hw_error


Hw_error() is probably more appropriate. Will make the change.

> 

> > +    return;

> > +}

> > +

> > +static void vfio_register_errfd(VFIODevice *vdev)

> > +{

> > +    int32_t pfd;

> 

> "pfd" is used elsewhere in vfio as "Pointer to FD", this is just a fd.


Will change.

> 

> > +    int ret;

> > +

> > +    if (!(vdev->dev_info_flags & VFIO_DEVICE_FLAGS_AER_NOTIFY)) {

> > +        error_report("vfio: Warning: Error notification not supported

> for the device\n");

> 

> This should probably just be a debug print.


Okay.

> 

> > +        return;

> > +    }

> > +    if (event_notifier_init(&vdev->errfd, 0)) {

> > +        error_report("vfio: Warning: Unable to init event notifier for

> error detection\n");

> > +        return;

> > +    }

> > +    pfd = event_notifier_get_fd(&vdev->errfd);

> > +    qemu_set_fd_handler(pfd, vfio_errfd_handler, NULL, vdev);

> > +

> > +    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_ERRFD, pfd);

> > +    if (ret) {

> > +        error_report("vfio: Warning: Failed to setup error fd: %d\n",

> ret);

> > +        qemu_set_fd_handler(pfd, NULL, NULL, vdev);

> > +        event_notifier_cleanup(&vdev->errfd);

> > +    }

> > +    return;

> > +}

> >  static int vfio_initfn(PCIDevice *pdev)

> >  {

> >      VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);

> > @@ -2010,6 +2064,8 @@ static int vfio_initfn(PCIDevice *pdev)

> >          }

> >      }

> >

> > +    vfio_register_errfd(vdev);

> > +

> 

> No cleanup in exitfn?!  Thanks,


Missed that. Will fix.

Vijay

> 

> Alex

> 

> >      return 0;

> >

> >  out_teardown:

> > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h

> > index 4758d1b..0ca4eeb 100644

> > --- a/linux-headers/linux/vfio.h

> > +++ b/linux-headers/linux/vfio.h

> > @@ -147,6 +147,7 @@ struct vfio_device_info {

> >  	__u32	flags;

> >  #define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */

> >  #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */

> > +#define VFIO_DEVICE_FLAGS_AER_NOTIFY (1 << 2)   /* Supports aer

> notification */

> >  	__u32	num_regions;	/* Max region index + 1 */

> >  	__u32	num_irqs;	/* Max IRQ index + 1 */

> >  };

> > @@ -288,6 +289,14 @@ struct vfio_irq_set {

> >   */

> >  #define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)

> >

> > +/**

> > + * VFIO_DEVICE_SET_ERRFD - _IO(VFIO_TYPE, VFIO_BASE + 12)

> > + *

> > + * Pass the eventfd to the vfio-pci driver for signalling any device

> > + * error notifications

> > + */

> > +#define VFIO_DEVICE_SET_ERRFD           _IO(VFIO_TYPE, VFIO_BASE + 12)

> > +

> >  /*

> >   * The VFIO-PCI bus driver makes use of the following fixed region and

> >   * IRQ index mapping.  Unimplemented regions return a size of zero.

> 

>
Pandarathil, Vijaymohan R - Jan. 11, 2013, 8:55 a.m.
> -----Original Message-----

> From: Blue Swirl [mailto:blauwirbel@gmail.com]

> Sent: Wednesday, January 09, 2013 1:23 PM

> To: Pandarathil, Vijaymohan R

> Cc: Alex Williamson; Bjorn Helgaas; Gleb Natapov; linux-

> pci@vger.kernel.org; qemu-devel@nongnu.org; kvm@vger.kernel.org; linux-

> kernel@vger.kernel.org

> Subject: Re: [Qemu-devel] [PATCH 2/2] QEMU-AER: Qemu changes to support AER

> for VFIO-PCI devices

> 

> On Wed, Jan 9, 2013 at 6:26 AM, Pandarathil, Vijaymohan R

> <vijaymohan.pandarathil@hp.com> wrote:

> >         - Create eventfd per vfio device assigned to a guest and register

> an

> >           event handler

> >

> >         - This fd is passed to the vfio_pci driver through a new ioctl

> >

> >         - When the device encounters an error, the eventfd is signaled

> >           and the qemu eventfd handler gets invoked.

> >

> >         - In the handler decide what action to take. Current action taken

> >           is to terminate the guest.

> >

> > Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>

> > ---

> >  hw/vfio_pci.c              | 56

> ++++++++++++++++++++++++++++++++++++++++++++++

> >  linux-headers/linux/vfio.h |  9 ++++++++

> >  2 files changed, 65 insertions(+)

> >

> > diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c

> > index 28c8303..9c3c28b 100644

> > --- a/hw/vfio_pci.c

> > +++ b/hw/vfio_pci.c

> > @@ -38,6 +38,7 @@

> >  #include "qemu/error-report.h"

> >  #include "qemu/queue.h"

> >  #include "qemu/range.h"

> > +#include "sysemu/sysemu.h"

> >

> >  /* #define DEBUG_VFIO */

> >  #ifdef DEBUG_VFIO

> > @@ -130,6 +131,8 @@ typedef struct VFIODevice {

> >      QLIST_ENTRY(VFIODevice) next;

> >      struct VFIOGroup *group;

> >      bool reset_works;

> > +    EventNotifier errfd;

> > +    __u32 dev_info_flags;

> 

> QEMU is not kernel code, please use uint32_t.


As you saw later in the code, I was using the same type in the structure 
copied from the kernel. Will change this to uint32_t.

Thanks

Vijay

> 

> >  } VFIODevice;

> >

> >  typedef struct VFIOGroup {

> > @@ -1805,6 +1808,8 @@ static int vfio_get_device(VFIOGroup *group, const

> char *name, VFIODevice *vdev)

> >      DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,

> >              dev_info.flags, dev_info.num_regions, dev_info.num_irqs);

> >

> > +    vdev->dev_info_flags = dev_info.flags;

> > +

> >      if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {

> >          error_report("vfio: Um, this isn't a PCI device\n");

> >          goto error;

> > @@ -1900,6 +1905,55 @@ static void vfio_put_device(VFIODevice *vdev)

> >      }

> >  }

> >

> > +static void vfio_errfd_handler(void *opaque)

> > +{

> > +    VFIODevice *vdev = opaque;

> > +

> > +    if (!event_notifier_test_and_clear(&vdev->errfd)) {

> > +        return;

> > +    }

> > +

> > +    /*

> > +     * TBD. Retrieve the error details and decide what action

> > +     * needs to be taken. One of the actions could be to pass

> > +     * the error to the guest and have the guest driver recover

> > +     * the error. This requires that PCIe capabilities be

> > +     * exposed to the guest. At present, we just terminate the

> > +     * guest to contain the error.

> > +     */

> > +    error_report("%s(%04x:%02x:%02x.%x) "

> > +        "Unrecoverable error detected... Terminating guest\n",

> > +        __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,

> > +        vdev->host.function);

> > +

> > +    qemu_system_shutdown_request();

> > +    return;

> > +}

> > +

> > +static void vfio_register_errfd(VFIODevice *vdev)

> > +{

> > +    int32_t pfd;

> > +    int ret;

> > +

> > +    if (!(vdev->dev_info_flags & VFIO_DEVICE_FLAGS_AER_NOTIFY)) {

> > +        error_report("vfio: Warning: Error notification not supported

> for the device\n");

> > +        return;

> > +    }

> > +    if (event_notifier_init(&vdev->errfd, 0)) {

> > +        error_report("vfio: Warning: Unable to init event notifier for

> error detection\n");

> > +        return;

> > +    }

> > +    pfd = event_notifier_get_fd(&vdev->errfd);

> > +    qemu_set_fd_handler(pfd, vfio_errfd_handler, NULL, vdev);

> > +

> > +    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_ERRFD, pfd);

> > +    if (ret) {

> > +        error_report("vfio: Warning: Failed to setup error fd: %d\n",

> ret);

> > +        qemu_set_fd_handler(pfd, NULL, NULL, vdev);

> > +        event_notifier_cleanup(&vdev->errfd);

> > +    }

> > +    return;

> > +}

> >  static int vfio_initfn(PCIDevice *pdev)

> >  {

> >      VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);

> > @@ -2010,6 +2064,8 @@ static int vfio_initfn(PCIDevice *pdev)

> >          }

> >      }

> >

> > +    vfio_register_errfd(vdev);

> > +

> >      return 0;

> >

> >  out_teardown:

> > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h

> > index 4758d1b..0ca4eeb 100644

> > --- a/linux-headers/linux/vfio.h

> > +++ b/linux-headers/linux/vfio.h

> > @@ -147,6 +147,7 @@ struct vfio_device_info {

> >         __u32   flags;

> >  #define VFIO_DEVICE_FLAGS_RESET        (1 << 0)        /* Device

> supports reset */

> >  #define VFIO_DEVICE_FLAGS_PCI  (1 << 1)        /* vfio-pci device */

> > +#define VFIO_DEVICE_FLAGS_AER_NOTIFY (1 << 2)   /* Supports aer

> notification */

> >         __u32   num_regions;    /* Max region index + 1 */

> >         __u32   num_irqs;       /* Max IRQ index + 1 */

> 

> These are verbatim copies of kernel headers so it's OK here.

> 

> >  };

> > @@ -288,6 +289,14 @@ struct vfio_irq_set {

> >   */

> >  #define VFIO_DEVICE_RESET              _IO(VFIO_TYPE, VFIO_BASE + 11)

> >

> > +/**

> > + * VFIO_DEVICE_SET_ERRFD - _IO(VFIO_TYPE, VFIO_BASE + 12)

> > + *

> > + * Pass the eventfd to the vfio-pci driver for signalling any device

> > + * error notifications

> > + */

> > +#define VFIO_DEVICE_SET_ERRFD           _IO(VFIO_TYPE, VFIO_BASE + 12)

> > +

> >  /*

> >   * The VFIO-PCI bus driver makes use of the following fixed region and

> >   * IRQ index mapping.  Unimplemented regions return a size of zero.

> > --

> > 1.7.11.3

> >

> >
Alex Williamson - Jan. 11, 2013, 3:53 p.m.
On Fri, 2013-01-11 at 08:53 +0000, Pandarathil, Vijaymohan R wrote:
> 
> > -----Original Message-----
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Wednesday, January 09, 2013 10:08 AM
> > To: Pandarathil, Vijaymohan R
> > Cc: Bjorn Helgaas; Gleb Natapov; kvm@vger.kernel.org; qemu-
> > devel@nongnu.org; linux-pci@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: Re: [PATCH 2/2] QEMU-AER: Qemu changes to support AER for VFIO-PCI
> > devices
> > 
> > On Wed, 2013-01-09 at 06:26 +0000, Pandarathil, Vijaymohan R wrote:
> > > 	- Create eventfd per vfio device assigned to a guest and register an
> > >           event handler
> > >
> > > 	- This fd is passed to the vfio_pci driver through a new ioctl
> > >
> > > 	- When the device encounters an error, the eventfd is signaled
> > >           and the qemu eventfd handler gets invoked.
> > >
> > > 	- In the handler decide what action to take. Current action taken
> > >           is to terminate the guest.
> > >
> > > Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>
> > > ---
> > >  hw/vfio_pci.c              | 56
> > ++++++++++++++++++++++++++++++++++++++++++++++
> > >  linux-headers/linux/vfio.h |  9 ++++++++
> > >  2 files changed, 65 insertions(+)
> > >
> > > diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
> > > index 28c8303..9c3c28b 100644
> > > --- a/hw/vfio_pci.c
> > > +++ b/hw/vfio_pci.c
> > > @@ -38,6 +38,7 @@
> > >  #include "qemu/error-report.h"
> > >  #include "qemu/queue.h"
> > >  #include "qemu/range.h"
> > > +#include "sysemu/sysemu.h"
> > >
> > >  /* #define DEBUG_VFIO */
> > >  #ifdef DEBUG_VFIO
> > > @@ -130,6 +131,8 @@ typedef struct VFIODevice {
> > >      QLIST_ENTRY(VFIODevice) next;
> > >      struct VFIOGroup *group;
> > >      bool reset_works;
> > > +    EventNotifier errfd;
> > 
> > Misleading name, it's an EventNotifier not an fd.
> 
> Will make the change.
> 
> > 
> > > +    __u32 dev_info_flags;
> > >  } VFIODevice;
> > >
> > >  typedef struct VFIOGroup {
> > > @@ -1805,6 +1808,8 @@ static int vfio_get_device(VFIOGroup *group, const
> > char *name, VFIODevice *vdev)
> > >      DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
> > >              dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
> > >
> > > +    vdev->dev_info_flags = dev_info.flags;
> > > +
> > 
> > We test the VFIO_DEVICE_FLAGS_RESET elsewhere by caching the bit into a
> > variable, why not do that here too?
> 
> I was wondering if there was any specific reason to cache the bit into
> a separate variable. Wouldn't a flags field which can contain the
> specific bit be more apt ?

Then we have to mask it out ever time we want to reference it.  Qemu
doesn't seem to like macros or inlines for that, so using a new variable
keeps things neat.  Caching two fields to bools is still more space
efficient than saving the whole thing and we can easily switch to named
bits if we get enough to start caring about the space.  Thanks,

Alex
 
> > >      if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
> > >          error_report("vfio: Um, this isn't a PCI device\n");
> > >          goto error;
> > > @@ -1900,6 +1905,55 @@ static void vfio_put_device(VFIODevice *vdev)
> > >      }
> > >  }
> > >
> > > +static void vfio_errfd_handler(void *opaque)
> > > +{
> > > +    VFIODevice *vdev = opaque;
> > > +
> > > +    if (!event_notifier_test_and_clear(&vdev->errfd)) {
> > > +        return;
> > > +    }
> > > +
> > > +    /*
> > > +     * TBD. Retrieve the error details and decide what action
> > > +     * needs to be taken. One of the actions could be to pass
> > > +     * the error to the guest and have the guest driver recover
> > > +     * the error. This requires that PCIe capabilities be
> > > +     * exposed to the guest. At present, we just terminate the
> > > +     * guest to contain the error.
> > > +     */
> > > +    error_report("%s(%04x:%02x:%02x.%x) "
> > > +        "Unrecoverable error detected... Terminating guest\n",
> > > +        __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,
> > > +        vdev->host.function);
> > > +
> > > +    qemu_system_shutdown_request();
> > 
> > I would have figured hw_error
> 
> Hw_error() is probably more appropriate. Will make the change.
> 
> > 
> > > +    return;
> > > +}
> > > +
> > > +static void vfio_register_errfd(VFIODevice *vdev)
> > > +{
> > > +    int32_t pfd;
> > 
> > "pfd" is used elsewhere in vfio as "Pointer to FD", this is just a fd.
> 
> Will change.
> 
> > 
> > > +    int ret;
> > > +
> > > +    if (!(vdev->dev_info_flags & VFIO_DEVICE_FLAGS_AER_NOTIFY)) {
> > > +        error_report("vfio: Warning: Error notification not supported
> > for the device\n");
> > 
> > This should probably just be a debug print.
> 
> Okay.
> 
> > 
> > > +        return;
> > > +    }
> > > +    if (event_notifier_init(&vdev->errfd, 0)) {
> > > +        error_report("vfio: Warning: Unable to init event notifier for
> > error detection\n");
> > > +        return;
> > > +    }
> > > +    pfd = event_notifier_get_fd(&vdev->errfd);
> > > +    qemu_set_fd_handler(pfd, vfio_errfd_handler, NULL, vdev);
> > > +
> > > +    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_ERRFD, pfd);
> > > +    if (ret) {
> > > +        error_report("vfio: Warning: Failed to setup error fd: %d\n",
> > ret);
> > > +        qemu_set_fd_handler(pfd, NULL, NULL, vdev);
> > > +        event_notifier_cleanup(&vdev->errfd);
> > > +    }
> > > +    return;
> > > +}
> > >  static int vfio_initfn(PCIDevice *pdev)
> > >  {
> > >      VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
> > > @@ -2010,6 +2064,8 @@ static int vfio_initfn(PCIDevice *pdev)
> > >          }
> > >      }
> > >
> > > +    vfio_register_errfd(vdev);
> > > +
> > 
> > No cleanup in exitfn?!  Thanks,
> 
> Missed that. Will fix.
> 
> Vijay
> 
> > 
> > Alex
> > 
> > >      return 0;
> > >
> > >  out_teardown:
> > > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > > index 4758d1b..0ca4eeb 100644
> > > --- a/linux-headers/linux/vfio.h
> > > +++ b/linux-headers/linux/vfio.h
> > > @@ -147,6 +147,7 @@ struct vfio_device_info {
> > >  	__u32	flags;
> > >  #define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
> > >  #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
> > > +#define VFIO_DEVICE_FLAGS_AER_NOTIFY (1 << 2)   /* Supports aer
> > notification */
> > >  	__u32	num_regions;	/* Max region index + 1 */
> > >  	__u32	num_irqs;	/* Max IRQ index + 1 */
> > >  };
> > > @@ -288,6 +289,14 @@ struct vfio_irq_set {
> > >   */
> > >  #define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)
> > >
> > > +/**
> > > + * VFIO_DEVICE_SET_ERRFD - _IO(VFIO_TYPE, VFIO_BASE + 12)
> > > + *
> > > + * Pass the eventfd to the vfio-pci driver for signalling any device
> > > + * error notifications
> > > + */
> > > +#define VFIO_DEVICE_SET_ERRFD           _IO(VFIO_TYPE, VFIO_BASE + 12)
> > > +
> > >  /*
> > >   * The VFIO-PCI bus driver makes use of the following fixed region and
> > >   * IRQ index mapping.  Unimplemented regions return a size of zero.
> > 
> > 
>

Patch

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 28c8303..9c3c28b 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -38,6 +38,7 @@ 
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "qemu/range.h"
+#include "sysemu/sysemu.h"
 
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
@@ -130,6 +131,8 @@  typedef struct VFIODevice {
     QLIST_ENTRY(VFIODevice) next;
     struct VFIOGroup *group;
     bool reset_works;
+    EventNotifier errfd;
+    __u32 dev_info_flags;
 } VFIODevice;
 
 typedef struct VFIOGroup {
@@ -1805,6 +1808,8 @@  static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
     DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
             dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
 
+    vdev->dev_info_flags = dev_info.flags;
+
     if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
         error_report("vfio: Um, this isn't a PCI device\n");
         goto error;
@@ -1900,6 +1905,55 @@  static void vfio_put_device(VFIODevice *vdev)
     }
 }
 
+static void vfio_errfd_handler(void *opaque)
+{
+    VFIODevice *vdev = opaque;
+
+    if (!event_notifier_test_and_clear(&vdev->errfd)) {
+        return;
+    }
+
+    /*
+     * TBD. Retrieve the error details and decide what action
+     * needs to be taken. One of the actions could be to pass
+     * the error to the guest and have the guest driver recover
+     * the error. This requires that PCIe capabilities be
+     * exposed to the guest. At present, we just terminate the
+     * guest to contain the error.
+     */
+    error_report("%s(%04x:%02x:%02x.%x) "
+        "Unrecoverable error detected... Terminating guest\n",
+        __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,
+        vdev->host.function);
+
+    qemu_system_shutdown_request();
+    return;
+}
+
+static void vfio_register_errfd(VFIODevice *vdev)
+{
+    int32_t pfd;
+    int ret;
+
+    if (!(vdev->dev_info_flags & VFIO_DEVICE_FLAGS_AER_NOTIFY)) {
+        error_report("vfio: Warning: Error notification not supported for the device\n");
+        return;
+    }
+    if (event_notifier_init(&vdev->errfd, 0)) {
+        error_report("vfio: Warning: Unable to init event notifier for error detection\n");
+        return;
+    }
+    pfd = event_notifier_get_fd(&vdev->errfd);
+    qemu_set_fd_handler(pfd, vfio_errfd_handler, NULL, vdev);
+
+    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_ERRFD, pfd);
+    if (ret) {
+        error_report("vfio: Warning: Failed to setup error fd: %d\n", ret);
+        qemu_set_fd_handler(pfd, NULL, NULL, vdev);
+        event_notifier_cleanup(&vdev->errfd);
+    }
+    return;
+}
 static int vfio_initfn(PCIDevice *pdev)
 {
     VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
@@ -2010,6 +2064,8 @@  static int vfio_initfn(PCIDevice *pdev)
         }
     }
 
+    vfio_register_errfd(vdev);
+
     return 0;
 
 out_teardown:
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 4758d1b..0ca4eeb 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -147,6 +147,7 @@  struct vfio_device_info {
 	__u32	flags;
 #define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
 #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
+#define VFIO_DEVICE_FLAGS_AER_NOTIFY (1 << 2)   /* Supports aer notification */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 };
@@ -288,6 +289,14 @@  struct vfio_irq_set {
  */
 #define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)
 
+/**
+ * VFIO_DEVICE_SET_ERRFD - _IO(VFIO_TYPE, VFIO_BASE + 12)
+ *
+ * Pass the eventfd to the vfio-pci driver for signalling any device
+ * error notifications
+ */
+#define VFIO_DEVICE_SET_ERRFD           _IO(VFIO_TYPE, VFIO_BASE + 12)
+
 /*
  * The VFIO-PCI bus driver makes use of the following fixed region and
  * IRQ index mapping.  Unimplemented regions return a size of zero.