Patchwork [RFC,V3,10/12] xen: Initialize event channels and io rings

login
register
mail settings
Submitter Anthony PERARD
Date Sept. 17, 2010, 11:15 a.m.
Message ID <1284722107-28550-11-git-send-email-anthony.perard@citrix.com>
Download mbox | patch
Permalink /patch/65067/
State New
Headers show

Comments

Anthony PERARD - Sept. 17, 2010, 11:15 a.m.
From: Anthony PERARD <anthony.perard@citrix.com>

Open and bind event channels; map ioreq and buffered ioreq rings.

Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 hw/xen_common.h |    1 +
 xen-all.c       |  381 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 382 insertions(+), 0 deletions(-)
Blue Swirl - Sept. 17, 2010, 7:27 p.m.
On Fri, Sep 17, 2010 at 11:15 AM,  <anthony.perard@citrix.com> wrote:
> From: Anthony PERARD <anthony.perard@citrix.com>
>
> Open and bind event channels; map ioreq and buffered ioreq rings.

In general, because of CPUState accesses and cpu_in/out use, this
looks like CPU code, specifically x86. Could this belong to
target-i386/xen.c instead, much like target-i386/kvm.c vs ./kvm-all.c?
Do other CPU types use this stuff?

>
> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> ---
>  hw/xen_common.h |    1 +
>  xen-all.c       |  381 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 382 insertions(+), 0 deletions(-)
>
> diff --git a/hw/xen_common.h b/hw/xen_common.h
> index dd54063..96cfad7 100644
> --- a/hw/xen_common.h
> +++ b/hw/xen_common.h
> @@ -53,5 +53,6 @@ typedef xc_interface *qemu_xc_interface;
>  #endif
>
>  qemu_irq *i8259_xen_init(void);
> +void destroy_hvm_domain(void);
>
>  #endif /* QEMU_HW_XEN_COMMON_H */
> diff --git a/xen-all.c b/xen-all.c
> index 4e0b061..13672f0 100644
> --- a/xen-all.c
> +++ b/xen-all.c
> @@ -8,12 +8,38 @@
>
>  #include "config.h"
>
> +#include <sys/mman.h>
> +
>  #include "hw/pci.h"
>  #include "hw/xen_common.h"
>  #include "hw/xen_backend.h"
>
>  #include "xen_mapcache.h"
>
> +#include <xen/hvm/ioreq.h>
> +
> +//#define DEBUG_XEN
> +
> +#ifdef DEBUG_XEN
> +#define DPRINTF(fmt, ...) \
> +    do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0)
> +#else
> +#define DPRINTF(fmt, ...) \
> +    do { } while (0)
> +#endif
> +
> +shared_iopage_t *shared_page = NULL;
> +#define BUFFER_IO_MAX_DELAY  100
> +buffered_iopage_t *buffered_io_page = NULL;
> +QEMUTimer *buffered_io_timer;
> +/* the evtchn port for polling the notification, */
> +evtchn_port_t *ioreq_local_port;
> +/* the evtchn fd for polling */
> +int xce_handle = -1;
> +/* which vcpu we are serving */
> +int send_vcpu = 0;
> +long time_offset = 0;

Are all these global needed? Can some of them actually be 'static'?
Could you wrap these into a struct and pass that around?

> +
>  /* Xen specific function for piix pci */
>
>  int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
> @@ -111,19 +137,374 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
>  }
>
>
> +/* VCPU Operations, MMIO, IO ring ... */
> +
> +/* get the ioreq packets from share mem */
> +static ioreq_t *cpu_get_ioreq_from_shared_memory(int vcpu)
> +{
> +    ioreq_t *req = &shared_page->vcpu_ioreq[vcpu];
> +
> +    if (req->state != STATE_IOREQ_READY) {
> +        DPRINTF("I/O request not ready: "
> +                "%x, ptr: %x, port: %"PRIx64", "
> +                "data: %"PRIx64", count: %u, size: %u\n",
> +                req->state, req->data_is_ptr, req->addr,
> +                req->data, req->count, req->size);
> +        return NULL;
> +    }
> +
> +    xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
> +
> +    req->state = STATE_IOREQ_INPROCESS;
> +    return req;
> +}
> +
> +/* use poll to get the port notification */
> +/* ioreq_vec--out,the */
> +/* retval--the number of ioreq packet */
> +static ioreq_t *cpu_get_ioreq(void)
> +{
> +    int i;
> +    evtchn_port_t port;
> +
> +    port = xc_evtchn_pending(xce_handle);
> +    if (port != -1) {
> +        for ( i = 0; i < smp_cpus; i++ )

Please add braces and remove extra spaces after '(' and before ')',
also in other places.

> +            if ( ioreq_local_port[i] == port )
> +                break;
> +
> +        if ( i == smp_cpus ) {
> +            hw_error("Fatal error while trying to get io event!\n");
> +        }
> +
> +        /* unmask the wanted port again */
> +        xc_evtchn_unmask(xce_handle, port);
> +
> +        /* get the io packet from shared memory */
> +        send_vcpu = i;
> +        return cpu_get_ioreq_from_shared_memory(i);
> +    }
> +
> +    /* read error or read nothing */
> +    return NULL;
> +}
> +
> +static uint32_t do_inp(CPUState *env, pio_addr_t addr, unsigned long size)
> +{
> +    switch(size) {
> +        case 1:
> +            return cpu_inb(addr);
> +        case 2:
> +            return cpu_inw(addr);
> +        case 4:
> +            return cpu_inl(addr);
> +        default:
> +            hw_error("inp: bad size: %04"FMT_pioaddr" %lx", addr, size);
> +    }
> +}
> +
> +static void do_outp(CPUState *env, pio_addr_t addr,
> +        unsigned long size, uint32_t val)
> +{
> +    switch(size) {
> +        case 1:
> +            return cpu_outb(addr, val);
> +        case 2:
> +            return cpu_outw(addr, val);
> +        case 4:
> +            return cpu_outl(addr, val);
> +        default:
> +            hw_error("outp: bad size: %04"FMT_pioaddr" %lx", addr, size);
> +    }
> +}
> +
> +static void cpu_ioreq_pio(CPUState *env, ioreq_t *req)
> +{
> +    int i, sign;
> +
> +    sign = req->df ? -1 : 1;
> +
> +    if (req->dir == IOREQ_READ) {
> +        if (!req->data_is_ptr) {
> +            req->data = do_inp(env, req->addr, req->size);
> +        } else {
> +            uint32_t tmp;
> +
> +            for (i = 0; i < req->count; i++) {
> +                tmp = do_inp(env, req->addr, req->size);
> +                cpu_physical_memory_write(req->data + (sign * i * req->size),
> +                        (uint8_t*) &tmp, req->size);
> +            }
> +        }
> +    } else if (req->dir == IOREQ_WRITE) {
> +        if (!req->data_is_ptr) {
> +            do_outp(env, req->addr, req->size, req->data);
> +        } else {
> +            for (i = 0; i < req->count; i++) {
> +                uint32_t tmp = 0;
> +
> +                cpu_physical_memory_read(req->data + (sign * i * req->size),
> +                        (uint8_t*) &tmp, req->size);
> +                do_outp(env, req->addr, req->size, tmp);
> +            }
> +        }
> +    }
> +}
> +
> +static void cpu_ioreq_move(CPUState *env, ioreq_t *req)
> +{
> +    int i, sign;
> +
> +    sign = req->df ? -1 : 1;
> +
> +    if (!req->data_is_ptr) {
> +        if (req->dir == IOREQ_READ) {
> +            for (i = 0; i < req->count; i++) {
> +                cpu_physical_memory_read(req->addr + (sign * i * req->size),
> +                        (uint8_t*) &req->data, req->size);
> +            }
> +        } else if (req->dir == IOREQ_WRITE) {
> +            for (i = 0; i < req->count; i++) {
> +                cpu_physical_memory_write(req->addr + (sign * i * req->size),
> +                        (uint8_t*) &req->data, req->size);
> +            }
> +        }
> +    } else {
> +        target_ulong tmp;
> +
> +        if (req->dir == IOREQ_READ) {
> +            for (i = 0; i < req->count; i++) {
> +                cpu_physical_memory_read(req->addr + (sign * i * req->size),
> +                        (uint8_t*) &tmp, req->size);
> +                cpu_physical_memory_write(req->data + (sign * i * req->size),
> +                        (uint8_t*) &tmp, req->size);
> +            }
> +        } else if (req->dir == IOREQ_WRITE) {
> +            for (i = 0; i < req->count; i++) {
> +                cpu_physical_memory_read(req->data + (sign * i * req->size),
> +                        (uint8_t*) &tmp, req->size);
> +                cpu_physical_memory_write(req->addr + (sign * i * req->size),
> +                        (uint8_t*) &tmp, req->size);
> +            }
> +        }
> +    }
> +}
> +
> +static void cpu_ioreq_timeoffset(CPUState *env, ioreq_t *req)
> +{
> +    /* char b[64]; */
> +
> +    time_offset += (unsigned long)req->data;
> +
> +    //DPRINTF("Time offset set %ld, added offset %"PRId64"\n",
> +            //time_offset, req->data);
> +    /* snprintf(b, 64, "%ld", time_offset); */
> +    /* xenstore_vm_write(xen_domid, "rtc/timeoffset", b); */

The commented out stuff should probably go.

> +}
> +
> +static void handle_ioreq(CPUState *env, ioreq_t *req)
> +{
> +    if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
> +            (req->size < sizeof(target_ulong)))
> +        req->data &= ((target_ulong)1 << (8 * req->size)) - 1;
> +
> +    switch (req->type) {
> +        case IOREQ_TYPE_PIO:
> +            cpu_ioreq_pio(env, req);
> +            break;
> +        case IOREQ_TYPE_COPY:
> +            cpu_ioreq_move(env, req);
> +            break;
> +        case IOREQ_TYPE_TIMEOFFSET:
> +            cpu_ioreq_timeoffset(env, req);
> +            break;
> +        case IOREQ_TYPE_INVALIDATE:
> +            qemu_invalidate_map_cache();
> +            break;
> +        default:
> +            hw_error("Invalid ioreq type 0x%x\n", req->type);
> +    }
> +}
> +
> +static void handle_buffered_iopage(CPUState *env)
> +{
> +    buf_ioreq_t *buf_req = NULL;
> +    ioreq_t req;
> +    int qw;
> +
> +    if (!buffered_io_page)
> +        return;
> +
> +    while (buffered_io_page->read_pointer !=
> +            buffered_io_page->write_pointer) {
> +        buf_req = &buffered_io_page->buf_ioreq[
> +            buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
> +        req.size = 1UL << buf_req->size;
> +        req.count = 1;
> +        req.addr = buf_req->addr;
> +        req.data = buf_req->data;
> +        req.state = STATE_IOREQ_READY;
> +        req.dir = buf_req->dir;
> +        req.df = 1;
> +        req.type = buf_req->type;
> +        req.data_is_ptr = 0;
> +        qw = (req.size == 8);
> +        if (qw) {
> +            buf_req = &buffered_io_page->buf_ioreq[
> +                (buffered_io_page->read_pointer+1) % IOREQ_BUFFER_SLOT_NUM];
> +            req.data |= ((uint64_t)buf_req->data) << 32;
> +        }
> +
> +        handle_ioreq(env, &req);
> +
> +        xen_mb();
> +        buffered_io_page->read_pointer += qw ? 2 : 1;
> +    }
> +}
> +
> +static void handle_buffered_io(void *opaque)
> +{
> +    CPUState *env = opaque;
> +
> +    handle_buffered_iopage(env);
> +    qemu_mod_timer(buffered_io_timer, BUFFER_IO_MAX_DELAY +
> +                   qemu_get_clock(rt_clock));
> +}
> +
> +static void cpu_handle_ioreq(void *opaque)
> +{
> +    CPUState *env = opaque;
> +    ioreq_t *req = cpu_get_ioreq();
> +
> +    handle_buffered_iopage(env);
> +    if (req) {
> +        handle_ioreq(env, req);
> +
> +        if (req->state != STATE_IOREQ_INPROCESS) {
> +            fprintf(stderr, "Badness in I/O request ... not in service?!: "
> +                    "%x, ptr: %x, port: %"PRIx64", "
> +                    "data: %"PRIx64", count: %u, size: %u\n",
> +                    req->state, req->data_is_ptr, req->addr,
> +                    req->data, req->count, req->size);
> +            destroy_hvm_domain();
> +            return;
> +        }
> +
> +        xen_wmb(); /* Update ioreq contents /then/ update state. */
> +
> +        /*
> +         * We do this before we send the response so that the tools
> +         * have the opportunity to pick up on the reset before the
> +         * guest resumes and does a hlt with interrupts disabled which
> +         * causes Xen to powerdown the domain.
> +         */
> +        if (vm_running) {
> +            if (qemu_shutdown_requested_get()) {
> +                destroy_hvm_domain();
> +            }
> +            if (qemu_reset_requested_get()) {
> +                qemu_system_reset();
> +            }
> +        }
> +
> +        req->state = STATE_IORESP_READY;
> +        xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]);
> +    }
> +}
> +
> +static void xen_main_loop_prepare(void)
> +{
> +    CPUState *env = cpu_single_env;
> +
> +    int evtchn_fd = xce_handle == -1 ? -1 : xc_evtchn_fd(xce_handle);
> +
> +    buffered_io_timer = qemu_new_timer(rt_clock, handle_buffered_io,
> +                                       cpu_single_env);
> +    qemu_mod_timer(buffered_io_timer, qemu_get_clock(rt_clock));
> +
> +    if (evtchn_fd != -1)
> +        qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, env);

braces

> +}
> +
> +
>  /* Initialise Xen */
>
> +static void xen_vm_change_state_handler(void *opaque, int running, int reason)
> +{
> +    if (running)

braces

> +        xen_main_loop_prepare();
> +}
> +
>  int xen_init(int smp_cpus)
>  {
> +    int i, rc;
> +    unsigned long ioreq_pfn;
> +
>     xen_xc = xc_interface_open(NULL, NULL, 0);
>     if (xen_xc == NULL) {
>         xen_be_printf(NULL, 0, "can't open xen interface\n");
>         return -1;
>     }
>
> +    xce_handle = xc_evtchn_open();
> +    if (xce_handle == -1) {
> +        perror("open");
> +        return -errno;
> +    }
> +
> +    xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn);
> +    DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
> +    shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
> +                                       PROT_READ|PROT_WRITE, ioreq_pfn);
> +    if (shared_page == NULL) {
> +        hw_error("map shared IO page returned error %d handle=%p", errno, xen_xc);
> +    }
> +
> +    xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn);
> +    DPRINTF("buffered io page at pfn %lx\n", ioreq_pfn);
> +    buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
> +                                            PROT_READ|PROT_WRITE, ioreq_pfn);
> +    if (buffered_io_page == NULL) {
> +        hw_error("map buffered IO page returned error %d", errno);
> +    }
> +
> +    ioreq_local_port = qemu_mallocz(smp_cpus * sizeof(evtchn_port_t));
> +
> +    /* FIXME: how about if we overflow the page here? */
> +    for (i = 0; i < smp_cpus; i++) {
> +        rc = xc_evtchn_bind_interdomain(xce_handle, xen_domid,
> +                                        shared_page->vcpu_ioreq[i].vp_eport);
> +        if (rc == -1) {
> +            fprintf(stderr, "bind interdomain ioctl error %d\n", errno);
> +            return -1;
> +        }
> +        ioreq_local_port[i] = rc;
> +    }
> +
>     /* Init RAM management */
>     qemu_map_cache_init();
>     xen_ram_init(ram_size);
>
> +    qemu_add_vm_change_state_handler(xen_vm_change_state_handler, NULL);
> +
>     return 0;
>  }
> +
> +void destroy_hvm_domain(void)
> +{
> +    xc_interface *xc_handle;
> +    int sts;
> +
> +    xc_handle = xc_interface_open(NULL, NULL, 0);
> +    if (!xc_handle)
> +        fprintf(stderr, "Cannot acquire xenctrl handle\n");
> +    else {
> +        sts = xc_domain_shutdown(xc_handle, xen_domid, SHUTDOWN_poweroff);
> +        if (sts != 0)
> +            fprintf(stderr, "? xc_domain_shutdown failed to issue poweroff, "
> +                    "sts %d, errno %d\n", sts, errno);

braces, perror()
Stefano Stabellini - Sept. 22, 2010, 10:28 a.m.
On Fri, 17 Sep 2010, Blue Swirl wrote:
> On Fri, Sep 17, 2010 at 11:15 AM,  <anthony.perard@citrix.com> wrote:
> > From: Anthony PERARD <anthony.perard@citrix.com>
> >
> > Open and bind event channels; map ioreq and buffered ioreq rings.
> 
> In general, because of CPUState accesses and cpu_in/out use, this
> looks like CPU code, specifically x86. Could this belong to
> target-i386/xen.c instead, much like target-i386/kvm.c vs ./kvm-all.c?
> Do other CPU types use this stuff?
> 

Even though it might look like CPU code, this code only deals with IO
events from xen on behalf of the guest.
In fact it runs as it is on ia64 AFAIK.

Patch

diff --git a/hw/xen_common.h b/hw/xen_common.h
index dd54063..96cfad7 100644
--- a/hw/xen_common.h
+++ b/hw/xen_common.h
@@ -53,5 +53,6 @@  typedef xc_interface *qemu_xc_interface;
 #endif
 
 qemu_irq *i8259_xen_init(void);
+void destroy_hvm_domain(void);
 
 #endif /* QEMU_HW_XEN_COMMON_H */
diff --git a/xen-all.c b/xen-all.c
index 4e0b061..13672f0 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -8,12 +8,38 @@ 
 
 #include "config.h"
 
+#include <sys/mman.h>
+
 #include "hw/pci.h"
 #include "hw/xen_common.h"
 #include "hw/xen_backend.h"
 
 #include "xen_mapcache.h"
 
+#include <xen/hvm/ioreq.h>
+
+//#define DEBUG_XEN
+
+#ifdef DEBUG_XEN
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+shared_iopage_t *shared_page = NULL;
+#define BUFFER_IO_MAX_DELAY  100
+buffered_iopage_t *buffered_io_page = NULL;
+QEMUTimer *buffered_io_timer;
+/* the evtchn port for polling the notification, */
+evtchn_port_t *ioreq_local_port;
+/* the evtchn fd for polling */
+int xce_handle = -1;
+/* which vcpu we are serving */
+int send_vcpu = 0;
+long time_offset = 0;
+
 /* Xen specific function for piix pci */
 
 int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
@@ -111,19 +137,374 @@  void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
 }
 
 
+/* VCPU Operations, MMIO, IO ring ... */
+
+/* get the ioreq packets from share mem */
+static ioreq_t *cpu_get_ioreq_from_shared_memory(int vcpu)
+{
+    ioreq_t *req = &shared_page->vcpu_ioreq[vcpu];
+
+    if (req->state != STATE_IOREQ_READY) {
+        DPRINTF("I/O request not ready: "
+                "%x, ptr: %x, port: %"PRIx64", "
+                "data: %"PRIx64", count: %u, size: %u\n",
+                req->state, req->data_is_ptr, req->addr,
+                req->data, req->count, req->size);
+        return NULL;
+    }
+
+    xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
+
+    req->state = STATE_IOREQ_INPROCESS;
+    return req;
+}
+
+/* use poll to get the port notification */
+/* ioreq_vec--out,the */
+/* retval--the number of ioreq packet */
+static ioreq_t *cpu_get_ioreq(void)
+{
+    int i;
+    evtchn_port_t port;
+
+    port = xc_evtchn_pending(xce_handle);
+    if (port != -1) {
+        for ( i = 0; i < smp_cpus; i++ )
+            if ( ioreq_local_port[i] == port )
+                break;
+
+        if ( i == smp_cpus ) {
+            hw_error("Fatal error while trying to get io event!\n");
+        }
+
+        /* unmask the wanted port again */
+        xc_evtchn_unmask(xce_handle, port);
+
+        /* get the io packet from shared memory */
+        send_vcpu = i;
+        return cpu_get_ioreq_from_shared_memory(i);
+    }
+
+    /* read error or read nothing */
+    return NULL;
+}
+
+static uint32_t do_inp(CPUState *env, pio_addr_t addr, unsigned long size)
+{
+    switch(size) {
+        case 1:
+            return cpu_inb(addr);
+        case 2:
+            return cpu_inw(addr);
+        case 4:
+            return cpu_inl(addr);
+        default:
+            hw_error("inp: bad size: %04"FMT_pioaddr" %lx", addr, size);
+    }
+}
+
+static void do_outp(CPUState *env, pio_addr_t addr,
+        unsigned long size, uint32_t val)
+{
+    switch(size) {
+        case 1:
+            return cpu_outb(addr, val);
+        case 2:
+            return cpu_outw(addr, val);
+        case 4:
+            return cpu_outl(addr, val);
+        default:
+            hw_error("outp: bad size: %04"FMT_pioaddr" %lx", addr, size);
+    }
+}
+
+static void cpu_ioreq_pio(CPUState *env, ioreq_t *req)
+{
+    int i, sign;
+
+    sign = req->df ? -1 : 1;
+
+    if (req->dir == IOREQ_READ) {
+        if (!req->data_is_ptr) {
+            req->data = do_inp(env, req->addr, req->size);
+        } else {
+            uint32_t tmp;
+
+            for (i = 0; i < req->count; i++) {
+                tmp = do_inp(env, req->addr, req->size);
+                cpu_physical_memory_write(req->data + (sign * i * req->size),
+                        (uint8_t*) &tmp, req->size);
+            }
+        }
+    } else if (req->dir == IOREQ_WRITE) {
+        if (!req->data_is_ptr) {
+            do_outp(env, req->addr, req->size, req->data);
+        } else {
+            for (i = 0; i < req->count; i++) {
+                uint32_t tmp = 0;
+
+                cpu_physical_memory_read(req->data + (sign * i * req->size),
+                        (uint8_t*) &tmp, req->size);
+                do_outp(env, req->addr, req->size, tmp);
+            }
+        }
+    }
+}
+
+static void cpu_ioreq_move(CPUState *env, ioreq_t *req)
+{
+    int i, sign;
+
+    sign = req->df ? -1 : 1;
+
+    if (!req->data_is_ptr) {
+        if (req->dir == IOREQ_READ) {
+            for (i = 0; i < req->count; i++) {
+                cpu_physical_memory_read(req->addr + (sign * i * req->size),
+                        (uint8_t*) &req->data, req->size);
+            }
+        } else if (req->dir == IOREQ_WRITE) {
+            for (i = 0; i < req->count; i++) {
+                cpu_physical_memory_write(req->addr + (sign * i * req->size),
+                        (uint8_t*) &req->data, req->size);
+            }
+        }
+    } else {
+        target_ulong tmp;
+
+        if (req->dir == IOREQ_READ) {
+            for (i = 0; i < req->count; i++) {
+                cpu_physical_memory_read(req->addr + (sign * i * req->size),
+                        (uint8_t*) &tmp, req->size);
+                cpu_physical_memory_write(req->data + (sign * i * req->size),
+                        (uint8_t*) &tmp, req->size);
+            }
+        } else if (req->dir == IOREQ_WRITE) {
+            for (i = 0; i < req->count; i++) {
+                cpu_physical_memory_read(req->data + (sign * i * req->size),
+                        (uint8_t*) &tmp, req->size);
+                cpu_physical_memory_write(req->addr + (sign * i * req->size),
+                        (uint8_t*) &tmp, req->size);
+            }
+        }
+    }
+}
+
+static void cpu_ioreq_timeoffset(CPUState *env, ioreq_t *req)
+{
+    /* char b[64]; */
+
+    time_offset += (unsigned long)req->data;
+
+    //DPRINTF("Time offset set %ld, added offset %"PRId64"\n",
+            //time_offset, req->data);
+    /* snprintf(b, 64, "%ld", time_offset); */
+    /* xenstore_vm_write(xen_domid, "rtc/timeoffset", b); */
+}
+
+static void handle_ioreq(CPUState *env, ioreq_t *req)
+{
+    if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
+            (req->size < sizeof(target_ulong)))
+        req->data &= ((target_ulong)1 << (8 * req->size)) - 1;
+
+    switch (req->type) {
+        case IOREQ_TYPE_PIO:
+            cpu_ioreq_pio(env, req);
+            break;
+        case IOREQ_TYPE_COPY:
+            cpu_ioreq_move(env, req);
+            break;
+        case IOREQ_TYPE_TIMEOFFSET:
+            cpu_ioreq_timeoffset(env, req);
+            break;
+        case IOREQ_TYPE_INVALIDATE:
+            qemu_invalidate_map_cache();
+            break;
+        default:
+            hw_error("Invalid ioreq type 0x%x\n", req->type);
+    }
+}
+
+static void handle_buffered_iopage(CPUState *env)
+{
+    buf_ioreq_t *buf_req = NULL;
+    ioreq_t req;
+    int qw;
+
+    if (!buffered_io_page)
+        return;
+
+    while (buffered_io_page->read_pointer !=
+            buffered_io_page->write_pointer) {
+        buf_req = &buffered_io_page->buf_ioreq[
+            buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
+        req.size = 1UL << buf_req->size;
+        req.count = 1;
+        req.addr = buf_req->addr;
+        req.data = buf_req->data;
+        req.state = STATE_IOREQ_READY;
+        req.dir = buf_req->dir;
+        req.df = 1;
+        req.type = buf_req->type;
+        req.data_is_ptr = 0;
+        qw = (req.size == 8);
+        if (qw) {
+            buf_req = &buffered_io_page->buf_ioreq[
+                (buffered_io_page->read_pointer+1) % IOREQ_BUFFER_SLOT_NUM];
+            req.data |= ((uint64_t)buf_req->data) << 32;
+        }
+
+        handle_ioreq(env, &req);
+
+        xen_mb();
+        buffered_io_page->read_pointer += qw ? 2 : 1;
+    }
+}
+
+static void handle_buffered_io(void *opaque)
+{
+    CPUState *env = opaque;
+
+    handle_buffered_iopage(env);
+    qemu_mod_timer(buffered_io_timer, BUFFER_IO_MAX_DELAY +
+                   qemu_get_clock(rt_clock));
+}
+
+static void cpu_handle_ioreq(void *opaque)
+{
+    CPUState *env = opaque;
+    ioreq_t *req = cpu_get_ioreq();
+
+    handle_buffered_iopage(env);
+    if (req) {
+        handle_ioreq(env, req);
+
+        if (req->state != STATE_IOREQ_INPROCESS) {
+            fprintf(stderr, "Badness in I/O request ... not in service?!: "
+                    "%x, ptr: %x, port: %"PRIx64", "
+                    "data: %"PRIx64", count: %u, size: %u\n",
+                    req->state, req->data_is_ptr, req->addr,
+                    req->data, req->count, req->size);
+            destroy_hvm_domain();
+            return;
+        }
+
+        xen_wmb(); /* Update ioreq contents /then/ update state. */
+
+        /*
+         * We do this before we send the response so that the tools
+         * have the opportunity to pick up on the reset before the
+         * guest resumes and does a hlt with interrupts disabled which
+         * causes Xen to powerdown the domain.
+         */
+        if (vm_running) {
+            if (qemu_shutdown_requested_get()) {
+                destroy_hvm_domain();
+            }
+            if (qemu_reset_requested_get()) {
+                qemu_system_reset();
+            }
+        }
+
+        req->state = STATE_IORESP_READY;
+        xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]);
+    }
+}
+
+static void xen_main_loop_prepare(void)
+{
+    CPUState *env = cpu_single_env;
+
+    int evtchn_fd = xce_handle == -1 ? -1 : xc_evtchn_fd(xce_handle);
+
+    buffered_io_timer = qemu_new_timer(rt_clock, handle_buffered_io,
+                                       cpu_single_env);
+    qemu_mod_timer(buffered_io_timer, qemu_get_clock(rt_clock));
+
+    if (evtchn_fd != -1)
+        qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, env);
+}
+
+
 /* Initialise Xen */
 
+static void xen_vm_change_state_handler(void *opaque, int running, int reason)
+{
+    if (running)
+        xen_main_loop_prepare();
+}
+
 int xen_init(int smp_cpus)
 {
+    int i, rc;
+    unsigned long ioreq_pfn;
+
     xen_xc = xc_interface_open(NULL, NULL, 0);
     if (xen_xc == NULL) {
         xen_be_printf(NULL, 0, "can't open xen interface\n");
         return -1;
     }
 
+    xce_handle = xc_evtchn_open();
+    if (xce_handle == -1) {
+        perror("open");
+        return -errno;
+    }
+
+    xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn);
+    DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
+    shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, ioreq_pfn);
+    if (shared_page == NULL) {
+        hw_error("map shared IO page returned error %d handle=%p", errno, xen_xc);
+    }
+
+    xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn);
+    DPRINTF("buffered io page at pfn %lx\n", ioreq_pfn);
+    buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
+                                            PROT_READ|PROT_WRITE, ioreq_pfn);
+    if (buffered_io_page == NULL) {
+        hw_error("map buffered IO page returned error %d", errno);
+    }
+
+    ioreq_local_port = qemu_mallocz(smp_cpus * sizeof(evtchn_port_t));
+
+    /* FIXME: how about if we overflow the page here? */
+    for (i = 0; i < smp_cpus; i++) {
+        rc = xc_evtchn_bind_interdomain(xce_handle, xen_domid,
+                                        shared_page->vcpu_ioreq[i].vp_eport);
+        if (rc == -1) {
+            fprintf(stderr, "bind interdomain ioctl error %d\n", errno);
+            return -1;
+        }
+        ioreq_local_port[i] = rc;
+    }
+
     /* Init RAM management */
     qemu_map_cache_init();
     xen_ram_init(ram_size);
 
+    qemu_add_vm_change_state_handler(xen_vm_change_state_handler, NULL);
+
     return 0;
 }
+
+void destroy_hvm_domain(void)
+{
+    xc_interface *xc_handle;
+    int sts;
+
+    xc_handle = xc_interface_open(NULL, NULL, 0);
+    if (!xc_handle)
+        fprintf(stderr, "Cannot acquire xenctrl handle\n");
+    else {
+        sts = xc_domain_shutdown(xc_handle, xen_domid, SHUTDOWN_poweroff);
+        if (sts != 0)
+            fprintf(stderr, "? xc_domain_shutdown failed to issue poweroff, "
+                    "sts %d, errno %d\n", sts, errno);
+        else
+            fprintf(stderr, "Issued domain %d poweroff\n", xen_domid);
+        xc_interface_close(xc_handle);
+    }
+}