diff mbox

Xen: Use the ioreq-server API when available

Message ID 1412949407-16779-1-git-send-email-paul.durrant@citrix.com
State New
Headers show

Commit Message

Paul Durrant Oct. 10, 2014, 1:56 p.m. UTC
The ioreq-server API added to Xen 4.5 offers better security than
the existing Xen/QEMU interface because the shared pages that are
used to pass emulation request/results back and forth are removed
from the guest's memory space before any requests are serviced.
This prevents the guest from mapping these pages (they are in a
well known location) and attempting to attack QEMU by synthesizing
its own request structures. Hence, this patch modifies configure
to detect whether the API is available, and adds the necessary
code to use the API if it is.

The ioreq-server API does require that PCI device models explicitly
register with Xen for config space accesses, so to use the API the
code in xen-hvm.c needs to be informed as PCI devices are added or
removed from PCI buses. This patch therefore also adds a PCI bus
listener interface akin to the memory listener interface to fulfil
this need.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Michael Tokarev <mjt@tls.msk.ru>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Stefan Weil <sw@weilnetz.de>
Cc: Andreas Faerber" <afaerber@suse.de>
Cc: Thomas Huth <thuth@linux.vnet.ibm.com>
Cc: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: Alexander Graf <agraf@suse.de>
---
 configure               |   29 ++++++
 hw/pci/pci.c            |   65 ++++++++++++
 include/hw/pci/pci.h    |    9 ++
 include/qemu/typedefs.h |    1 +
 trace-events            |    8 ++
 xen-hvm.c               |  251 +++++++++++++++++++++++++++++++++++++++++++----
 6 files changed, 345 insertions(+), 18 deletions(-)

Comments

Stefano Stabellini Oct. 10, 2014, 2:39 p.m. UTC | #1
On Fri, 10 Oct 2014, Paul Durrant wrote:
> The ioreq-server API added to Xen 4.5 offers better security than
> the existing Xen/QEMU interface because the shared pages that are
> used to pass emulation request/results back and forth are removed
> from the guest's memory space before any requests are serviced.
> This prevents the guest from mapping these pages (they are in a
> well known location) and attempting to attack QEMU by synthesizing
> its own request structures. Hence, this patch modifies configure
> to detect whether the API is available, and adds the necessary
> code to use the API if it is.
> 
> The ioreq-server API does require that PCI device models explicitly
> register with Xen for config space accesses, so to use the API the
> code in xen-hvm.c needs to be informed as PCI devices are added or
> removed from PCI buses. This patch therefore also adds a PCI bus
> listener interface akin to the memory listener interface to fulfil
> this need.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> Cc: Peter Maydell <peter.maydell@linaro.org>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Michael Tokarev <mjt@tls.msk.ru>
> Cc: Stefan Hajnoczi <stefanha@redhat.com>
> Cc: Stefan Weil <sw@weilnetz.de>
> Cc: Andreas Faerber" <afaerber@suse.de>
> Cc: Thomas Huth <thuth@linux.vnet.ibm.com>
> Cc: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
> Cc: Eduardo Habkost <ehabkost@redhat.com>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
> Cc: Alexander Graf <agraf@suse.de>
> ---
>  configure               |   29 ++++++
>  hw/pci/pci.c            |   65 ++++++++++++
>  include/hw/pci/pci.h    |    9 ++
>  include/qemu/typedefs.h |    1 +
>  trace-events            |    8 ++
>  xen-hvm.c               |  251 +++++++++++++++++++++++++++++++++++++++++++----
>  6 files changed, 345 insertions(+), 18 deletions(-)

Please split this patch into two: one to introduce the PCI listener
stuff and another for the Xen specific changes.


> diff --git a/configure b/configure
> index 9ac2600..c2db574 100755
> --- a/configure
> +++ b/configure
> @@ -1876,6 +1876,32 @@ int main(void) {
>    xc_gnttab_open(NULL, 0);
>    xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
>    xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
> +  xc_hvm_create_ioreq_server(xc, 0, 0, NULL);
> +  return 0;
> +}
> +EOF
> +      compile_prog "" "$xen_libs"
> +    then
> +    xen_ctrl_version=450
> +    xen=yes
> +
> +  elif
> +      cat > $TMPC <<EOF &&
> +#include <xenctrl.h>
> +#include <xenstore.h>
> +#include <stdint.h>
> +#include <xen/hvm/hvm_info_table.h>
> +#if !defined(HVM_MAX_VCPUS)
> +# error HVM_MAX_VCPUS not defined
> +#endif
> +int main(void) {
> +  xc_interface *xc;
> +  xs_daemon_open();
> +  xc = xc_interface_open(0, 0, 0);
> +  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
> +  xc_gnttab_open(NULL, 0);
> +  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
> +  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
>    return 0;
>  }
>  EOF
> @@ -4282,6 +4308,9 @@ if test -n "$sparc_cpu"; then
>      echo "Target Sparc Arch $sparc_cpu"
>  fi
>  echo "xen support       $xen"
> +if test "$xen" = "yes" ; then
> +  echo "xen ctrl version  $xen_ctrl_version"
> +fi
>  echo "brlapi support    $brlapi"
>  echo "bluez  support    $bluez"
>  echo "Documentation     $docs"
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 6ce75aa..53c955d 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -122,6 +122,66 @@ static uint16_t pci_default_sub_device_id = PCI_SUBDEVICE_ID_QEMU;
>  
>  static QLIST_HEAD(, PCIHostState) pci_host_bridges;
>  
> +static QTAILQ_HEAD(pci_listeners, PCIListener) pci_listeners
> +    = QTAILQ_HEAD_INITIALIZER(pci_listeners);
> +
> +enum ListenerDirection { Forward, Reverse };
> +
> +#define PCI_LISTENER_CALL(_callback, _direction, _args...)      \
> +    do {                                                        \
> +        PCIListener *_listener;                                 \
> +                                                                \
> +        switch (_direction) {                                   \
> +        case Forward:                                           \
> +            QTAILQ_FOREACH(_listener, &pci_listeners, link) {   \
> +                if (_listener->_callback) {                     \
> +                    _listener->_callback(_listener, ##_args);   \
> +                }                                               \
> +            }                                                   \
> +            break;                                              \
> +        case Reverse:                                           \
> +            QTAILQ_FOREACH_REVERSE(_listener, &pci_listeners,   \
> +                                   pci_listeners, link) {       \
> +                if (_listener->_callback) {                     \
> +                    _listener->_callback(_listener, ##_args);   \
> +                }                                               \
> +            }                                                   \
> +            break;                                              \
> +        default:                                                \
> +            abort();                                            \
> +        }                                                       \
> +    } while (0)
> +
> +static int pci_listener_add(DeviceState *dev, void *opaque)
> +{
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
> +        PCIDevice *pci_dev = PCI_DEVICE(dev);
> +
> +        PCI_LISTENER_CALL(device_add, Forward, pci_dev);
> +    }
> +
> +    return 0;
> +}
> +
> +void pci_listener_register(PCIListener *listener)
> +{
> +    PCIHostState *host;
> +
> +    QTAILQ_INSERT_TAIL(&pci_listeners, listener, link);
> +
> +    QLIST_FOREACH(host, &pci_host_bridges, next) {
> +        PCIBus *bus = host->bus;
> +
> +        qbus_walk_children(&bus->qbus, NULL, NULL, pci_listener_add,
> +                           NULL, NULL);
> +    }
> +}
> +
> +void pci_listener_unregister(PCIListener *listener)
> +{
> +    QTAILQ_REMOVE(&pci_listeners, listener, link);
> +}
> +
>  static int pci_bar(PCIDevice *d, int reg)
>  {
>      uint8_t type;
> @@ -795,6 +855,8 @@ static void pci_config_free(PCIDevice *pci_dev)
>  
>  static void do_pci_unregister_device(PCIDevice *pci_dev)
>  {
> +    PCI_LISTENER_CALL(device_del, Reverse, pci_dev);
> +
>      pci_dev->bus->devices[pci_dev->devfn] = NULL;
>      pci_config_free(pci_dev);
>  
> @@ -878,6 +940,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>      pci_dev->config_write = config_write;
>      bus->devices[devfn] = pci_dev;
>      pci_dev->version_id = 2; /* Current pci device vmstate version */
> +
> +    PCI_LISTENER_CALL(device_add, Forward, pci_dev);
> +
>      return pci_dev;
>  }
>  
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index c352c7b..6c21b37 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -303,6 +303,15 @@ struct PCIDevice {
>      MSIVectorPollNotifier msix_vector_poll_notifier;
>  };
>  
> +struct PCIListener {
> +    void (*device_add)(PCIListener *listener, PCIDevice *pci_dev);
> +    void (*device_del)(PCIListener *listener, PCIDevice *pci_dev);
> +    QTAILQ_ENTRY(PCIListener) link;
> +};
> +
> +void pci_listener_register(PCIListener *listener);
> +void pci_listener_unregister(PCIListener *listener);
> +
>  void pci_register_bar(PCIDevice *pci_dev, int region_num,
>                        uint8_t attr, MemoryRegion *memory);
>  void pci_register_vga(PCIDevice *pci_dev, MemoryRegion *mem,
> diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
> index 04df51b..2b974c6 100644
> --- a/include/qemu/typedefs.h
> +++ b/include/qemu/typedefs.h
> @@ -54,6 +54,7 @@ typedef struct PCIHostState PCIHostState;
>  typedef struct PCIExpressHost PCIExpressHost;
>  typedef struct PCIBus PCIBus;
>  typedef struct PCIDevice PCIDevice;
> +typedef struct PCIListener PCIListener;
>  typedef struct PCIExpressDevice PCIExpressDevice;
>  typedef struct PCIBridge PCIBridge;
>  typedef struct PCIEAERMsg PCIEAERMsg;
> diff --git a/trace-events b/trace-events
> index 011d105..3efcff7 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -895,6 +895,14 @@ pvscsi_tx_rings_num_pages(const char* label, uint32_t num) "Number of %s pages:
>  # xen-hvm.c
>  xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx"
>  xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty) "%#"PRIx64" size %#lx, log_dirty %i"
> +xen_ioreq_server_create(uint32_t id) "id: %u"
> +xen_ioreq_server_destroy(uint32_t id) "id: %u"
> +xen_map_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
> +xen_unmap_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
> +xen_map_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
> +xen_unmap_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
> +xen_map_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x"
> +xen_unmap_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x"
>  
>  # xen-mapcache.c
>  xen_map_cache(uint64_t phys_addr) "want %#"PRIx64
> diff --git a/xen-hvm.c b/xen-hvm.c
> index 05e522c..031edd9 100644
> --- a/xen-hvm.c
> +++ b/xen-hvm.c
> @@ -78,6 +78,7 @@ typedef struct XenPhysmap {
>  } XenPhysmap;
>  
>  typedef struct XenIOState {
> +    ioservid_t ioservid;
>      shared_iopage_t *shared_page;
>      buffered_iopage_t *buffered_io_page;
>      QEMUTimer *buffered_io_timer;
> @@ -92,6 +93,8 @@ typedef struct XenIOState {
>  
>      struct xs_handle *xenstore;
>      MemoryListener memory_listener;
> +    MemoryListener io_listener;
> +    PCIListener pci_listener;
>      QLIST_HEAD(, XenPhysmap) physmap;
>      hwaddr free_phys_offset;
>      const XenPhysmap *log_for_dirtybit;
> @@ -480,6 +483,20 @@ static void xen_region_add(MemoryListener *listener,
>                             MemoryRegionSection *section)
>  {
>      memory_region_ref(section->mr);
> +
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450

Please use include/hw/xen/xen_common.h for compat stuff.  In this case
you could provide an empty implementation of
xc_hvm_map_io_range_to_ioreq_server. Try to remove ifdefs from
xen-hvm.c.


> +    if (section->mr != &ram_memory) {
> +        XenIOState *state = container_of(listener, XenIOState, memory_listener);
> +        hwaddr start_addr = section->offset_within_address_space;
> +        ram_addr_t size = int128_get64(section->size);
> +        hwaddr end_addr = start_addr + size - 1;
> +
> +        trace_xen_map_mmio_range(state->ioservid, start_addr, end_addr);
> +        xc_hvm_map_io_range_to_ioreq_server(xen_xc, xen_domid, state->ioservid,
> +                                            1, start_addr, end_addr);
> +    }
> +#endif
> +
>      xen_set_memory(listener, section, true);
>  }
>  
> @@ -487,9 +504,81 @@ static void xen_region_del(MemoryListener *listener,
>                             MemoryRegionSection *section)
>  {
>      xen_set_memory(listener, section, false);
> +
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> +    if (section->mr != &ram_memory) {
> +        XenIOState *state = container_of(listener, XenIOState, memory_listener);
> +        hwaddr start_addr = section->offset_within_address_space;
> +        ram_addr_t size = int128_get64(section->size);
> +        hwaddr end_addr = start_addr + size - 1;
> +
> +        trace_xen_unmap_mmio_range(state->ioservid, start_addr, end_addr);
> +        xc_hvm_unmap_io_range_from_ioreq_server(xen_xc, xen_domid, state->ioservid,
> +                                                1, start_addr, end_addr);
> +    }
> +#endif
> +
> +    memory_region_unref(section->mr);
> +}
> +
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> +static void xen_io_add(MemoryListener *listener,
> +		       MemoryRegionSection *section)
> +{
> +    XenIOState *state = container_of(listener, XenIOState, io_listener);
> +    hwaddr start_addr = section->offset_within_address_space;
> +    ram_addr_t size = int128_get64(section->size);
> +    hwaddr end_addr = start_addr + size - 1;
> +
> +    trace_xen_map_portio_range(state->ioservid, start_addr, end_addr);
> +    xc_hvm_map_io_range_to_ioreq_server(xen_xc, xen_domid, state->ioservid,
> +                                        0, start_addr, end_addr);
> +            
> +    memory_region_ref(section->mr);
> +}
> +
> +static void xen_io_del(MemoryListener *listener,
> +		       MemoryRegionSection *section)
> +{
> +    XenIOState *state = container_of(listener, XenIOState, io_listener);
> +    hwaddr start_addr = section->offset_within_address_space;
> +    ram_addr_t size = int128_get64(section->size);
> +    hwaddr end_addr = start_addr + size - 1;
> +
> +    trace_xen_unmap_portio_range(state->ioservid, start_addr, end_addr);
> +    xc_hvm_unmap_io_range_from_ioreq_server(xen_xc, xen_domid, state->ioservid,
> +                                            0, start_addr, end_addr);
> +            
>      memory_region_unref(section->mr);
>  }
>  
> +static void xen_pci_add(PCIListener *listener,
> +			PCIDevice *pci_dev)
> +{
> +    XenIOState *state = container_of(listener, XenIOState, pci_listener);
> +
> +    trace_xen_map_pcidev(state->ioservid, pci_bus_num(pci_dev->bus),
> +                         PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
> +    xc_hvm_map_pcidev_to_ioreq_server(xen_xc, xen_domid, state->ioservid,
> +                                      0, pci_bus_num(pci_dev->bus),
> +                                      PCI_SLOT(pci_dev->devfn),
> +                                      PCI_FUNC(pci_dev->devfn));
> +}
> +
> +static void xen_pci_del(PCIListener *listener,
> +			PCIDevice *pci_dev)
> +{
> +    XenIOState *state = container_of(listener, XenIOState, pci_listener);
> +
> +    trace_xen_unmap_pcidev(state->ioservid, pci_bus_num(pci_dev->bus),
> +                           PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
> +    xc_hvm_unmap_pcidev_from_ioreq_server(xen_xc, xen_domid, state->ioservid,
> +                                          0, pci_bus_num(pci_dev->bus),
> +                                          PCI_SLOT(pci_dev->devfn),
> +                                          PCI_FUNC(pci_dev->devfn));
> +}
> +#endif
> +
>  static void xen_sync_dirty_bitmap(XenIOState *state,
>                                    hwaddr start_addr,
>                                    ram_addr_t size)
> @@ -590,6 +679,19 @@ static MemoryListener xen_memory_listener = {
>      .priority = 10,
>  };
>  
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> +static MemoryListener xen_io_listener = {
> +    .region_add = xen_io_add,
> +    .region_del = xen_io_del,
> +    .priority = 10,
> +};
> +
> +static PCIListener xen_pci_listener = {
> +    .device_add = xen_pci_add,
> +    .device_del = xen_pci_del,
> +};
> +#endif
> +
>  /* get the ioreq packets from share mem */
>  static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
>  {
> @@ -792,6 +894,29 @@ static void handle_ioreq(ioreq_t *req)
>          case IOREQ_TYPE_INVALIDATE:
>              xen_invalidate_map_cache();
>              break;
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> +        case IOREQ_TYPE_PCI_CONFIG: {
> +            uint32_t sbdf = req->addr >> 32;
> +            uint32_t val;
> +
> +            /* Fake a write to port 0xCF8 so that
> +             * the config space access will target the
> +             * correct device model.
> +             */
> +            val = (1u << 31) |
> +                  ((req->addr & 0x0f00) << 16) |
> +                  ((sbdf & 0xffff) << 8) |
> +                  (req->addr & 0xfc);
> +            do_outp(0xcf8, 4, val);
> +
> +            /* Now issue the config space access via
> +             * port 0xCFC
> +             */
> +            req->addr = 0xcfc | (req->addr & 0x03);
> +            cpu_ioreq_pio(req);
> +            break;
> +        }
> +#endif
>          default:
>              hw_error("Invalid ioreq type 0x%x\n", req->type);
>      }
> @@ -979,13 +1104,39 @@ static void xen_wakeup_notifier(Notifier *notifier, void *data)
>      xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0);
>  }
>  
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> +static void xen_hvm_pre_save(void *opaque)
> +{
> +    XenIOState *state = opaque;
> +
> +    /* Stop servicing emulation requests */
> +    trace_xen_ioreq_server_destroy(state->ioservid);
> +    xc_hvm_set_ioreq_server_state(xen_xc, xen_domid, state->ioservid, 0);
> +    xc_hvm_destroy_ioreq_server(xen_xc, xen_domid, state->ioservid);
> +}
> +
> +static const VMStateDescription vmstate_xen_hvm = {
> +    .name = "xen-hvm",
> +    .version_id = 4,
> +    .minimum_version_id = 4,
> +    .pre_save = xen_hvm_pre_save,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +#endif
> +
>  /* return 0 means OK, or -1 means critical issue -- will exit(1) */
>  int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
>                   MemoryRegion **ram_memory)
>  {
>      int i, rc;
> -    unsigned long ioreq_pfn;
> -    unsigned long bufioreq_evtchn;
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 450
> +    unsigned long param;
> +#endif
> +    xen_pfn_t ioreq_pfn;
> +    xen_pfn_t bufioreq_pfn;
> +    evtchn_port_t bufioreq_evtchn;
>      XenIOState *state;
>  
>      state = g_malloc0(sizeof (XenIOState));
> @@ -1002,6 +1153,16 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
>          return -1;
>      }
>  
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> +    rc = xc_hvm_create_ioreq_server(xen_xc, xen_domid, 1, &state->ioservid);
> +    if (rc < 0) {
> +        perror("xen: ioreq server create");
> +        return -1;
> +    }
> +
> +    trace_xen_ioreq_server_create(state->ioservid);
> +#endif
> +
>      state->exit.notify = xen_exit_notifier;
>      qemu_add_exit_notifier(&state->exit);
>  
> @@ -1011,23 +1172,71 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
>      state->wakeup.notify = xen_wakeup_notifier;
>      qemu_register_wakeup_notifier(&state->wakeup);
>  
> -    xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn);
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> +    rc = xc_hvm_get_ioreq_server_info(xen_xc, xen_domid, state->ioservid,
> +				      &ioreq_pfn, &bufioreq_pfn,
> +				      &bufioreq_evtchn);
> +    if (rc < 0) {
> +        hw_error("failed to get ioreq server info: error %d handle=" XC_INTERFACE_FMT,
> +                 errno, xen_xc);
> +    }
> +#else
> +    rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &param);
> +    if (rc < 0) {
> +        fprintf(stderr, "failed to get HVM_PARAM_IOREQ_PFN\n");
> +        return -1;
> +    }
> +
> +    ioreq_pfn = param;
> +
> +    rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &param);
> +    if (rc < 0) {
> +        fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_PFN\n");
> +        return -1;
> +    }
> +
> +    bufioreq_pfn = param;
> +
> +    rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_EVTCHN,
> +                          &param);
> +    if (rc < 0) {
> +        fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n");
> +        return -1;
> +    }
> +
> +    bufioreq_evtchn = param;
> +#endif
> +
>      DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
> -    state->shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
> -                                              PROT_READ|PROT_WRITE, ioreq_pfn);
> +    DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn);
> +    DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn);
> +
> +    state->shared_page = xc_map_foreign_range(xen_xc, xen_domid,
> +                                              XC_PAGE_SIZE,
> +                                              PROT_READ|PROT_WRITE,
> +                                              ioreq_pfn);
>      if (state->shared_page == NULL) {
>          hw_error("map shared IO page returned error %d handle=" XC_INTERFACE_FMT,
>                   errno, xen_xc);
>      }
>  
> -    xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn);
> -    DPRINTF("buffered io page at pfn %lx\n", ioreq_pfn);
> -    state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
> -                                                   PROT_READ|PROT_WRITE, ioreq_pfn);
> +    state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid,
> +                                                   XC_PAGE_SIZE,
> +                                                   PROT_READ|PROT_WRITE,
> +                                                   bufioreq_pfn);
>      if (state->buffered_io_page == NULL) {
>          hw_error("map buffered IO page returned error %d", errno);
>      }
>  
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> +    rc = xc_hvm_set_ioreq_server_state(xen_xc, xen_domid, state->ioservid,
> +				       1);
> +    if (rc < 0) {
> +        hw_error("failed to enable ioreq server info: error %d handle=" XC_INTERFACE_FMT,
> +                 errno, xen_xc);
> +    }
> +#endif
> +
>      state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t));
>  
>      /* FIXME: how about if we overflow the page here? */
> @@ -1035,22 +1244,16 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
>          rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid,
>                                          xen_vcpu_eport(state->shared_page, i));
>          if (rc == -1) {
> -            fprintf(stderr, "bind interdomain ioctl error %d\n", errno);
> +            fprintf(stderr, "shared evtchn %d bind error %d\n", i, errno);
>              return -1;
>          }
>          state->ioreq_local_port[i] = rc;
>      }
>  
> -    rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_EVTCHN,
> -            &bufioreq_evtchn);
> -    if (rc < 0) {
> -        fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n");
> -        return -1;
> -    }
>      rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid,
> -            (uint32_t)bufioreq_evtchn);
> +                                    bufioreq_evtchn);
>      if (rc == -1) {
> -        fprintf(stderr, "bind interdomain ioctl error %d\n", errno);
> +        fprintf(stderr, "buffered evtchn bind error %d\n", errno);
>          return -1;
>      }
>      state->bufioreq_local_port = rc;
> @@ -1061,11 +1264,23 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
>  
>      qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
>  
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> +    vmstate_register(NULL, 0, &vmstate_xen_hvm, state);
> +#endif
> +
>      state->memory_listener = xen_memory_listener;
>      QLIST_INIT(&state->physmap);
>      memory_listener_register(&state->memory_listener, &address_space_memory);
>      state->log_for_dirtybit = NULL;
>  
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> +    state->io_listener = xen_io_listener;
> +    memory_listener_register(&state->io_listener, &address_space_io);
> +
> +    state->pci_listener = xen_pci_listener;
> +    pci_listener_register(&state->pci_listener);
> +#endif
> +
>      /* Initialize backend core & drivers */
>      if (xen_be_init() != 0) {
>          fprintf(stderr, "%s: xen backend core setup failed\n", __FUNCTION__);
> -- 
> 1.7.10.4
>
Paul Durrant Oct. 10, 2014, 2:52 p.m. UTC | #2
> -----Original Message-----
> From: Stefano Stabellini [mailto:stefano.stabellini@eu.citrix.com]
> Sent: 10 October 2014 15:40
> To: Paul Durrant
> Cc: qemu-devel@nongnu.org; xen-devel@lists.xenproject.org; Michael S.
> Tsirkin; Stefano Stabellini; Peter Maydell; Paolo Bonzini; Michael Tokarev;
> Stefan Hajnoczi; Stefan Weil; Andreas Faerber"; Thomas Huth; Peter
> Crosthwaite; Eduardo Habkost; Gerd Hoffmann; Alexey Kardashevskiy;
> Alexander Graf
> Subject: Re: [PATCH] Xen: Use the ioreq-server API when available
> 
> On Fri, 10 Oct 2014, Paul Durrant wrote:
> > The ioreq-server API added to Xen 4.5 offers better security than
> > the existing Xen/QEMU interface because the shared pages that are
> > used to pass emulation request/results back and forth are removed
> > from the guest's memory space before any requests are serviced.
> > This prevents the guest from mapping these pages (they are in a
> > well known location) and attempting to attack QEMU by synthesizing
> > its own request structures. Hence, this patch modifies configure
> > to detect whether the API is available, and adds the necessary
> > code to use the API if it is.
> >
> > The ioreq-server API does require that PCI device models explicitly
> > register with Xen for config space accesses, so to use the API the
> > code in xen-hvm.c needs to be informed as PCI devices are added or
> > removed from PCI buses. This patch therefore also adds a PCI bus
> > listener interface akin to the memory listener interface to fulfil
> > this need.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> > Cc: Peter Maydell <peter.maydell@linaro.org>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Michael Tokarev <mjt@tls.msk.ru>
> > Cc: Stefan Hajnoczi <stefanha@redhat.com>
> > Cc: Stefan Weil <sw@weilnetz.de>
> > Cc: Andreas Faerber" <afaerber@suse.de>
> > Cc: Thomas Huth <thuth@linux.vnet.ibm.com>
> > Cc: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
> > Cc: Eduardo Habkost <ehabkost@redhat.com>
> > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
> > Cc: Alexander Graf <agraf@suse.de>
> > ---
> >  configure               |   29 ++++++
> >  hw/pci/pci.c            |   65 ++++++++++++
> >  include/hw/pci/pci.h    |    9 ++
> >  include/qemu/typedefs.h |    1 +
> >  trace-events            |    8 ++
> >  xen-hvm.c               |  251
> +++++++++++++++++++++++++++++++++++++++++++----
> >  6 files changed, 345 insertions(+), 18 deletions(-)
> 
> Please split this patch into two: one to introduce the PCI listener
> stuff and another for the Xen specific changes.
> 

Originally it was, but I was uneasy about introducing an interface with no consumers. Is that generally acceptable?

[snip]
> > diff --git a/xen-hvm.c b/xen-hvm.c
> > index 05e522c..031edd9 100644
> > --- a/xen-hvm.c
> > +++ b/xen-hvm.c
> > @@ -78,6 +78,7 @@ typedef struct XenPhysmap {
> >  } XenPhysmap;
> >
> >  typedef struct XenIOState {
> > +    ioservid_t ioservid;
> >      shared_iopage_t *shared_page;
> >      buffered_iopage_t *buffered_io_page;
> >      QEMUTimer *buffered_io_timer;
> > @@ -92,6 +93,8 @@ typedef struct XenIOState {
> >
> >      struct xs_handle *xenstore;
> >      MemoryListener memory_listener;
> > +    MemoryListener io_listener;
> > +    PCIListener pci_listener;
> >      QLIST_HEAD(, XenPhysmap) physmap;
> >      hwaddr free_phys_offset;
> >      const XenPhysmap *log_for_dirtybit;
> > @@ -480,6 +483,20 @@ static void xen_region_add(MemoryListener
> *listener,
> >                             MemoryRegionSection *section)
> >  {
> >      memory_region_ref(section->mr);
> > +
> > +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
> 
> Please use include/hw/xen/xen_common.h for compat stuff.  In this case
> you could provide an empty implementation of
> xc_hvm_map_io_range_to_ioreq_server. Try to remove ifdefs from
> xen-hvm.c.
>

Ok, I can do it that way; I was only following suit with the already #ifdefed code in xen-hvm.c.

  Paul
Paolo Bonzini Oct. 10, 2014, 3:04 p.m. UTC | #3
Il 10/10/2014 16:52, Paul Durrant ha scritto:
>> -----Original Message-----
>> From: Stefano Stabellini [mailto:stefano.stabellini@eu.citrix.com]
>> Sent: 10 October 2014 15:40
>> To: Paul Durrant
>> Cc: qemu-devel@nongnu.org; xen-devel@lists.xenproject.org; Michael S.
>> Tsirkin; Stefano Stabellini; Peter Maydell; Paolo Bonzini; Michael Tokarev;
>> Stefan Hajnoczi; Stefan Weil; Andreas Faerber"; Thomas Huth; Peter
>> Crosthwaite; Eduardo Habkost; Gerd Hoffmann; Alexey Kardashevskiy;
>> Alexander Graf
>> Subject: Re: [PATCH] Xen: Use the ioreq-server API when available
>>
>> On Fri, 10 Oct 2014, Paul Durrant wrote:
>>> The ioreq-server API added to Xen 4.5 offers better security than
>>> the existing Xen/QEMU interface because the shared pages that are
>>> used to pass emulation request/results back and forth are removed
>>> from the guest's memory space before any requests are serviced.
>>> This prevents the guest from mapping these pages (they are in a
>>> well known location) and attempting to attack QEMU by synthesizing
>>> its own request structures. Hence, this patch modifies configure
>>> to detect whether the API is available, and adds the necessary
>>> code to use the API if it is.
>>>
>>> The ioreq-server API does require that PCI device models explicitly
>>> register with Xen for config space accesses, so to use the API the
>>> code in xen-hvm.c needs to be informed as PCI devices are added or
>>> removed from PCI buses. This patch therefore also adds a PCI bus
>>> listener interface akin to the memory listener interface to fulfil
>>> this need.
>>>
>>> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
>>> Cc: Michael S. Tsirkin <mst@redhat.com>
>>> Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
>>> Cc: Peter Maydell <peter.maydell@linaro.org>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Cc: Michael Tokarev <mjt@tls.msk.ru>
>>> Cc: Stefan Hajnoczi <stefanha@redhat.com>
>>> Cc: Stefan Weil <sw@weilnetz.de>
>>> Cc: Andreas Faerber" <afaerber@suse.de>
>>> Cc: Thomas Huth <thuth@linux.vnet.ibm.com>
>>> Cc: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
>>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>>> Cc: Gerd Hoffmann <kraxel@redhat.com>
>>> Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> Cc: Alexander Graf <agraf@suse.de>
>>> ---
>>>  configure               |   29 ++++++
>>>  hw/pci/pci.c            |   65 ++++++++++++
>>>  include/hw/pci/pci.h    |    9 ++
>>>  include/qemu/typedefs.h |    1 +
>>>  trace-events            |    8 ++
>>>  xen-hvm.c               |  251
>> +++++++++++++++++++++++++++++++++++++++++++----
>>>  6 files changed, 345 insertions(+), 18 deletions(-)
>>
>> Please split this patch into two: one to introduce the PCI listener
>> stuff and another for the Xen specific changes.
>>
> 
> Originally it was, but I was uneasy about introducing an interface with no consumers. Is that generally acceptable?

It is the suggested way as long as the two patches are part of the same
series.

Paolo
diff mbox

Patch

diff --git a/configure b/configure
index 9ac2600..c2db574 100755
--- a/configure
+++ b/configure
@@ -1876,6 +1876,32 @@  int main(void) {
   xc_gnttab_open(NULL, 0);
   xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
   xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
+  xc_hvm_create_ioreq_server(xc, 0, 0, NULL);
+  return 0;
+}
+EOF
+      compile_prog "" "$xen_libs"
+    then
+    xen_ctrl_version=450
+    xen=yes
+
+  elif
+      cat > $TMPC <<EOF &&
+#include <xenctrl.h>
+#include <xenstore.h>
+#include <stdint.h>
+#include <xen/hvm/hvm_info_table.h>
+#if !defined(HVM_MAX_VCPUS)
+# error HVM_MAX_VCPUS not defined
+#endif
+int main(void) {
+  xc_interface *xc;
+  xs_daemon_open();
+  xc = xc_interface_open(0, 0, 0);
+  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
+  xc_gnttab_open(NULL, 0);
+  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
+  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
   return 0;
 }
 EOF
@@ -4282,6 +4308,9 @@  if test -n "$sparc_cpu"; then
     echo "Target Sparc Arch $sparc_cpu"
 fi
 echo "xen support       $xen"
+if test "$xen" = "yes" ; then
+  echo "xen ctrl version  $xen_ctrl_version"
+fi
 echo "brlapi support    $brlapi"
 echo "bluez  support    $bluez"
 echo "Documentation     $docs"
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 6ce75aa..53c955d 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -122,6 +122,66 @@  static uint16_t pci_default_sub_device_id = PCI_SUBDEVICE_ID_QEMU;
 
 static QLIST_HEAD(, PCIHostState) pci_host_bridges;
 
+static QTAILQ_HEAD(pci_listeners, PCIListener) pci_listeners
+    = QTAILQ_HEAD_INITIALIZER(pci_listeners);
+
+enum ListenerDirection { Forward, Reverse };
+
+#define PCI_LISTENER_CALL(_callback, _direction, _args...)      \
+    do {                                                        \
+        PCIListener *_listener;                                 \
+                                                                \
+        switch (_direction) {                                   \
+        case Forward:                                           \
+            QTAILQ_FOREACH(_listener, &pci_listeners, link) {   \
+                if (_listener->_callback) {                     \
+                    _listener->_callback(_listener, ##_args);   \
+                }                                               \
+            }                                                   \
+            break;                                              \
+        case Reverse:                                           \
+            QTAILQ_FOREACH_REVERSE(_listener, &pci_listeners,   \
+                                   pci_listeners, link) {       \
+                if (_listener->_callback) {                     \
+                    _listener->_callback(_listener, ##_args);   \
+                }                                               \
+            }                                                   \
+            break;                                              \
+        default:                                                \
+            abort();                                            \
+        }                                                       \
+    } while (0)
+
+static int pci_listener_add(DeviceState *dev, void *opaque)
+{
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+        PCIDevice *pci_dev = PCI_DEVICE(dev);
+
+        PCI_LISTENER_CALL(device_add, Forward, pci_dev);
+    }
+
+    return 0;
+}
+
+void pci_listener_register(PCIListener *listener)
+{
+    PCIHostState *host;
+
+    QTAILQ_INSERT_TAIL(&pci_listeners, listener, link);
+
+    QLIST_FOREACH(host, &pci_host_bridges, next) {
+        PCIBus *bus = host->bus;
+
+        qbus_walk_children(&bus->qbus, NULL, NULL, pci_listener_add,
+                           NULL, NULL);
+    }
+}
+
+void pci_listener_unregister(PCIListener *listener)
+{
+    QTAILQ_REMOVE(&pci_listeners, listener, link);
+}
+
 static int pci_bar(PCIDevice *d, int reg)
 {
     uint8_t type;
@@ -795,6 +855,8 @@  static void pci_config_free(PCIDevice *pci_dev)
 
 static void do_pci_unregister_device(PCIDevice *pci_dev)
 {
+    PCI_LISTENER_CALL(device_del, Reverse, pci_dev);
+
     pci_dev->bus->devices[pci_dev->devfn] = NULL;
     pci_config_free(pci_dev);
 
@@ -878,6 +940,9 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pci_dev->config_write = config_write;
     bus->devices[devfn] = pci_dev;
     pci_dev->version_id = 2; /* Current pci device vmstate version */
+
+    PCI_LISTENER_CALL(device_add, Forward, pci_dev);
+
     return pci_dev;
 }
 
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index c352c7b..6c21b37 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -303,6 +303,15 @@  struct PCIDevice {
     MSIVectorPollNotifier msix_vector_poll_notifier;
 };
 
+struct PCIListener {
+    void (*device_add)(PCIListener *listener, PCIDevice *pci_dev);
+    void (*device_del)(PCIListener *listener, PCIDevice *pci_dev);
+    QTAILQ_ENTRY(PCIListener) link;
+};
+
+void pci_listener_register(PCIListener *listener);
+void pci_listener_unregister(PCIListener *listener);
+
 void pci_register_bar(PCIDevice *pci_dev, int region_num,
                       uint8_t attr, MemoryRegion *memory);
 void pci_register_vga(PCIDevice *pci_dev, MemoryRegion *mem,
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 04df51b..2b974c6 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -54,6 +54,7 @@  typedef struct PCIHostState PCIHostState;
 typedef struct PCIExpressHost PCIExpressHost;
 typedef struct PCIBus PCIBus;
 typedef struct PCIDevice PCIDevice;
+typedef struct PCIListener PCIListener;
 typedef struct PCIExpressDevice PCIExpressDevice;
 typedef struct PCIBridge PCIBridge;
 typedef struct PCIEAERMsg PCIEAERMsg;
diff --git a/trace-events b/trace-events
index 011d105..3efcff7 100644
--- a/trace-events
+++ b/trace-events
@@ -895,6 +895,14 @@  pvscsi_tx_rings_num_pages(const char* label, uint32_t num) "Number of %s pages:
 # xen-hvm.c
 xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx"
 xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty) "%#"PRIx64" size %#lx, log_dirty %i"
+xen_ioreq_server_create(uint32_t id) "id: %u"
+xen_ioreq_server_destroy(uint32_t id) "id: %u"
+xen_map_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
+xen_unmap_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
+xen_map_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
+xen_unmap_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
+xen_map_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x"
+xen_unmap_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x"
 
 # xen-mapcache.c
 xen_map_cache(uint64_t phys_addr) "want %#"PRIx64
diff --git a/xen-hvm.c b/xen-hvm.c
index 05e522c..031edd9 100644
--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -78,6 +78,7 @@  typedef struct XenPhysmap {
 } XenPhysmap;
 
 typedef struct XenIOState {
+    ioservid_t ioservid;
     shared_iopage_t *shared_page;
     buffered_iopage_t *buffered_io_page;
     QEMUTimer *buffered_io_timer;
@@ -92,6 +93,8 @@  typedef struct XenIOState {
 
     struct xs_handle *xenstore;
     MemoryListener memory_listener;
+    MemoryListener io_listener;
+    PCIListener pci_listener;
     QLIST_HEAD(, XenPhysmap) physmap;
     hwaddr free_phys_offset;
     const XenPhysmap *log_for_dirtybit;
@@ -480,6 +483,20 @@  static void xen_region_add(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
     memory_region_ref(section->mr);
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+    if (section->mr != &ram_memory) {
+        XenIOState *state = container_of(listener, XenIOState, memory_listener);
+        hwaddr start_addr = section->offset_within_address_space;
+        ram_addr_t size = int128_get64(section->size);
+        hwaddr end_addr = start_addr + size - 1;
+
+        trace_xen_map_mmio_range(state->ioservid, start_addr, end_addr);
+        xc_hvm_map_io_range_to_ioreq_server(xen_xc, xen_domid, state->ioservid,
+                                            1, start_addr, end_addr);
+    }
+#endif
+
     xen_set_memory(listener, section, true);
 }
 
@@ -487,9 +504,81 @@  static void xen_region_del(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
     xen_set_memory(listener, section, false);
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+    if (section->mr != &ram_memory) {
+        XenIOState *state = container_of(listener, XenIOState, memory_listener);
+        hwaddr start_addr = section->offset_within_address_space;
+        ram_addr_t size = int128_get64(section->size);
+        hwaddr end_addr = start_addr + size - 1;
+
+        trace_xen_unmap_mmio_range(state->ioservid, start_addr, end_addr);
+        xc_hvm_unmap_io_range_from_ioreq_server(xen_xc, xen_domid, state->ioservid,
+                                                1, start_addr, end_addr);
+    }
+#endif
+
+    memory_region_unref(section->mr);
+}
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+static void xen_io_add(MemoryListener *listener,
+		       MemoryRegionSection *section)
+{
+    XenIOState *state = container_of(listener, XenIOState, io_listener);
+    hwaddr start_addr = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    hwaddr end_addr = start_addr + size - 1;
+
+    trace_xen_map_portio_range(state->ioservid, start_addr, end_addr);
+    xc_hvm_map_io_range_to_ioreq_server(xen_xc, xen_domid, state->ioservid,
+                                        0, start_addr, end_addr);
+            
+    memory_region_ref(section->mr);
+}
+
+static void xen_io_del(MemoryListener *listener,
+		       MemoryRegionSection *section)
+{
+    XenIOState *state = container_of(listener, XenIOState, io_listener);
+    hwaddr start_addr = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    hwaddr end_addr = start_addr + size - 1;
+
+    trace_xen_unmap_portio_range(state->ioservid, start_addr, end_addr);
+    xc_hvm_unmap_io_range_from_ioreq_server(xen_xc, xen_domid, state->ioservid,
+                                            0, start_addr, end_addr);
+            
     memory_region_unref(section->mr);
 }
 
+static void xen_pci_add(PCIListener *listener,
+			PCIDevice *pci_dev)
+{
+    XenIOState *state = container_of(listener, XenIOState, pci_listener);
+
+    trace_xen_map_pcidev(state->ioservid, pci_bus_num(pci_dev->bus),
+                         PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
+    xc_hvm_map_pcidev_to_ioreq_server(xen_xc, xen_domid, state->ioservid,
+                                      0, pci_bus_num(pci_dev->bus),
+                                      PCI_SLOT(pci_dev->devfn),
+                                      PCI_FUNC(pci_dev->devfn));
+}
+
+static void xen_pci_del(PCIListener *listener,
+			PCIDevice *pci_dev)
+{
+    XenIOState *state = container_of(listener, XenIOState, pci_listener);
+
+    trace_xen_unmap_pcidev(state->ioservid, pci_bus_num(pci_dev->bus),
+                           PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
+    xc_hvm_unmap_pcidev_from_ioreq_server(xen_xc, xen_domid, state->ioservid,
+                                          0, pci_bus_num(pci_dev->bus),
+                                          PCI_SLOT(pci_dev->devfn),
+                                          PCI_FUNC(pci_dev->devfn));
+}
+#endif
+
 static void xen_sync_dirty_bitmap(XenIOState *state,
                                   hwaddr start_addr,
                                   ram_addr_t size)
@@ -590,6 +679,19 @@  static MemoryListener xen_memory_listener = {
     .priority = 10,
 };
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+static MemoryListener xen_io_listener = {
+    .region_add = xen_io_add,
+    .region_del = xen_io_del,
+    .priority = 10,
+};
+
+static PCIListener xen_pci_listener = {
+    .device_add = xen_pci_add,
+    .device_del = xen_pci_del,
+};
+#endif
+
 /* get the ioreq packets from share mem */
 static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
 {
@@ -792,6 +894,29 @@  static void handle_ioreq(ioreq_t *req)
         case IOREQ_TYPE_INVALIDATE:
             xen_invalidate_map_cache();
             break;
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+        case IOREQ_TYPE_PCI_CONFIG: {
+            uint32_t sbdf = req->addr >> 32;
+            uint32_t val;
+
+            /* Fake a write to port 0xCF8 so that
+             * the config space access will target the
+             * correct device model.
+             */
+            val = (1u << 31) |
+                  ((req->addr & 0x0f00) << 16) |
+                  ((sbdf & 0xffff) << 8) |
+                  (req->addr & 0xfc);
+            do_outp(0xcf8, 4, val);
+
+            /* Now issue the config space access via
+             * port 0xCFC
+             */
+            req->addr = 0xcfc | (req->addr & 0x03);
+            cpu_ioreq_pio(req);
+            break;
+        }
+#endif
         default:
             hw_error("Invalid ioreq type 0x%x\n", req->type);
     }
@@ -979,13 +1104,39 @@  static void xen_wakeup_notifier(Notifier *notifier, void *data)
     xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0);
 }
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+static void xen_hvm_pre_save(void *opaque)
+{
+    XenIOState *state = opaque;
+
+    /* Stop servicing emulation requests */
+    trace_xen_ioreq_server_destroy(state->ioservid);
+    xc_hvm_set_ioreq_server_state(xen_xc, xen_domid, state->ioservid, 0);
+    xc_hvm_destroy_ioreq_server(xen_xc, xen_domid, state->ioservid);
+}
+
+static const VMStateDescription vmstate_xen_hvm = {
+    .name = "xen-hvm",
+    .version_id = 4,
+    .minimum_version_id = 4,
+    .pre_save = xen_hvm_pre_save,
+    .fields = (VMStateField[]) {
+        VMSTATE_END_OF_LIST()
+    },
+};
+#endif
+
 /* return 0 means OK, or -1 means critical issue -- will exit(1) */
 int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
                  MemoryRegion **ram_memory)
 {
     int i, rc;
-    unsigned long ioreq_pfn;
-    unsigned long bufioreq_evtchn;
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 450
+    unsigned long param;
+#endif
+    xen_pfn_t ioreq_pfn;
+    xen_pfn_t bufioreq_pfn;
+    evtchn_port_t bufioreq_evtchn;
     XenIOState *state;
 
     state = g_malloc0(sizeof (XenIOState));
@@ -1002,6 +1153,16 @@  int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
         return -1;
     }
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+    rc = xc_hvm_create_ioreq_server(xen_xc, xen_domid, 1, &state->ioservid);
+    if (rc < 0) {
+        perror("xen: ioreq server create");
+        return -1;
+    }
+
+    trace_xen_ioreq_server_create(state->ioservid);
+#endif
+
     state->exit.notify = xen_exit_notifier;
     qemu_add_exit_notifier(&state->exit);
 
@@ -1011,23 +1172,71 @@  int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
     state->wakeup.notify = xen_wakeup_notifier;
     qemu_register_wakeup_notifier(&state->wakeup);
 
-    xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn);
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+    rc = xc_hvm_get_ioreq_server_info(xen_xc, xen_domid, state->ioservid,
+				      &ioreq_pfn, &bufioreq_pfn,
+				      &bufioreq_evtchn);
+    if (rc < 0) {
+        hw_error("failed to get ioreq server info: error %d handle=" XC_INTERFACE_FMT,
+                 errno, xen_xc);
+    }
+#else
+    rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &param);
+    if (rc < 0) {
+        fprintf(stderr, "failed to get HVM_PARAM_IOREQ_PFN\n");
+        return -1;
+    }
+
+    ioreq_pfn = param;
+
+    rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &param);
+    if (rc < 0) {
+        fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_PFN\n");
+        return -1;
+    }
+
+    bufioreq_pfn = param;
+
+    rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_EVTCHN,
+                          &param);
+    if (rc < 0) {
+        fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n");
+        return -1;
+    }
+
+    bufioreq_evtchn = param;
+#endif
+
     DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
-    state->shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
-                                              PROT_READ|PROT_WRITE, ioreq_pfn);
+    DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn);
+    DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn);
+
+    state->shared_page = xc_map_foreign_range(xen_xc, xen_domid,
+                                              XC_PAGE_SIZE,
+                                              PROT_READ|PROT_WRITE,
+                                              ioreq_pfn);
     if (state->shared_page == NULL) {
         hw_error("map shared IO page returned error %d handle=" XC_INTERFACE_FMT,
                  errno, xen_xc);
     }
 
-    xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn);
-    DPRINTF("buffered io page at pfn %lx\n", ioreq_pfn);
-    state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
-                                                   PROT_READ|PROT_WRITE, ioreq_pfn);
+    state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid,
+                                                   XC_PAGE_SIZE,
+                                                   PROT_READ|PROT_WRITE,
+                                                   bufioreq_pfn);
     if (state->buffered_io_page == NULL) {
         hw_error("map buffered IO page returned error %d", errno);
     }
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+    rc = xc_hvm_set_ioreq_server_state(xen_xc, xen_domid, state->ioservid,
+				       1);
+    if (rc < 0) {
+        hw_error("failed to enable ioreq server info: error %d handle=" XC_INTERFACE_FMT,
+                 errno, xen_xc);
+    }
+#endif
+
     state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t));
 
     /* FIXME: how about if we overflow the page here? */
@@ -1035,22 +1244,16 @@  int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
         rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid,
                                         xen_vcpu_eport(state->shared_page, i));
         if (rc == -1) {
-            fprintf(stderr, "bind interdomain ioctl error %d\n", errno);
+            fprintf(stderr, "shared evtchn %d bind error %d\n", i, errno);
             return -1;
         }
         state->ioreq_local_port[i] = rc;
     }
 
-    rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_EVTCHN,
-            &bufioreq_evtchn);
-    if (rc < 0) {
-        fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n");
-        return -1;
-    }
     rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid,
-            (uint32_t)bufioreq_evtchn);
+                                    bufioreq_evtchn);
     if (rc == -1) {
-        fprintf(stderr, "bind interdomain ioctl error %d\n", errno);
+        fprintf(stderr, "buffered evtchn bind error %d\n", errno);
         return -1;
     }
     state->bufioreq_local_port = rc;
@@ -1061,11 +1264,23 @@  int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size,
 
     qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+    vmstate_register(NULL, 0, &vmstate_xen_hvm, state);
+#endif
+
     state->memory_listener = xen_memory_listener;
     QLIST_INIT(&state->physmap);
     memory_listener_register(&state->memory_listener, &address_space_memory);
     state->log_for_dirtybit = NULL;
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450
+    state->io_listener = xen_io_listener;
+    memory_listener_register(&state->io_listener, &address_space_io);
+
+    state->pci_listener = xen_pci_listener;
+    pci_listener_register(&state->pci_listener);
+#endif
+
     /* Initialize backend core & drivers */
     if (xen_be_init() != 0) {
         fprintf(stderr, "%s: xen backend core setup failed\n", __FUNCTION__);