Patchwork [v2] qxl: async I/O

login
register
mail settings
Submitter Alon Levy
Date July 7, 2011, 4:50 p.m.
Message ID <1310057455-18570-10-git-send-email-alevy@redhat.com>
Download mbox | patch
Permalink /patch/103712/
State New
Headers show

Comments

Alon Levy - July 7, 2011, 4:50 p.m.
Some of the QXL port i/o commands are waiting for the spice server to
complete certain actions.  Add async versions for these commands, so we
don't block the vcpu while the spice server processses the command.
Instead the qxl device will raise an IRQ when done.

The async command processing relies on an added QXLInterface::async_complete
and added QXLWorker::*_async additions, in spice server qxl >= 3.1

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Alon Levy     <alevy@redhat.com>
---
 hw/qxl.c           |  244 ++++++++++++++++++++++++++++++++++++++++++---------
 hw/qxl.h           |   21 ++++-
 ui/spice-display.c |   33 +++++++
 ui/spice-display.h |    8 ++
 4 files changed, 261 insertions(+), 45 deletions(-)
Gerd Hoffmann - July 8, 2011, 7:17 a.m.
> +void qxl_spice_update_area_async(PCIQXLDevice *qxl, uint32_t surface_id,
> +                           struct QXLRect *area, struct QXLRect *dirty_rects,
> +                           uint32_t num_dirty_rects, uint32_t clear_dirty_region,
> +                           int async)
> +{
> +    if (async) {
> +        qxl->ssd.worker->update_area_async(qxl->ssd.worker, surface_id, area, dirty_rects,
> +                                 num_dirty_rects, clear_dirty_region, 0);

Fails to build with older libspice.

> +    } else {
> +        qxl->ssd.worker->update_area(qxl->ssd.worker, surface_id, area, dirty_rects,
> +                                 num_dirty_rects, clear_dirty_region);
> +    }
> +}
>
>   void qxl_spice_update_area(PCIQXLDevice *qxl, uint32_t surface_id,
>                              struct QXLRect *area, struct QXLRect *dirty_rects,
>                              uint32_t num_dirty_rects, uint32_t clear_dirty_region)
>   {
> -    qxl->ssd.worker->update_area(qxl->ssd.worker, surface_id, area, dirty_rects,
> -                             num_dirty_rects, clear_dirty_region);
> +    qxl_spice_update_area_async(qxl, surface_id, area, dirty_rects,
> +                                num_dirty_rects, clear_dirty_region, 0);
>   }

Pretty pointless wrapper IMHO.

> -void qxl_spice_destroy_surface_wait(PCIQXLDevice *qxl, uint32_t id)
> +static void qxl_spice_destroy_surface_wait_complete(PCIQXLDevice *qxl)
>   {
>       qemu_mutex_lock(&qxl->track_lock);
> -    PANIC_ON(id>= NUM_SURFACES);
> -    qxl->ssd.worker->destroy_surface_wait(qxl->ssd.worker, id);
> -    qxl->guest_surfaces.cmds[id] = 0;
> +    qxl->guest_surfaces.cmds[qxl->io_data.surface_id] = 0;

I'd suggest to pass in the surface id as argument instead.

>       qxl->guest_surfaces.count--;
>       qemu_mutex_unlock(&qxl->track_lock);
>   }
>
> +static void qxl_spice_destroy_surface_wait_async(PCIQXLDevice *qxl, uint32_t id, int async)
> +{
> +    qxl->io_data.surface_id = id;
> +    if (async) {
> +        qxl->ssd.worker->destroy_surface_wait_async(qxl->ssd.worker, id, 0);
> +    } else {
> +        qxl->ssd.worker->destroy_surface_wait(qxl->ssd.worker, id);
> +        qxl_spice_destroy_surface_wait_complete(qxl);

qxl_spice_destroy_surface_wait_complete(qxl, id);

> +    }
> +}
> +
>   void qxl_spice_loadvm_commands(PCIQXLDevice *qxl, struct QXLCommandExt *ext,
>                                  uint32_t count)
>   {
> @@ -171,15 +193,29 @@ void qxl_spice_reset_memslots(PCIQXLDevice *qxl)
>       qxl->ssd.worker->reset_memslots(qxl->ssd.worker);
>   }
>
> -void qxl_spice_destroy_surfaces(PCIQXLDevice *qxl)
> +static void qxl_spice_destroy_surfaces_complete(PCIQXLDevice *qxl)
>   {
>       qemu_mutex_lock(&qxl->track_lock);
> -    qxl->ssd.worker->destroy_surfaces(qxl->ssd.worker);
>       memset(&qxl->guest_surfaces.cmds, 0, sizeof(qxl->guest_surfaces.cmds));
>       qxl->guest_surfaces.count = 0;
>       qemu_mutex_unlock(&qxl->track_lock);
>   }
>
> +static void qxl_spice_destroy_surfaces(PCIQXLDevice *qxl)
> +{
> +    qxl->ssd.worker->destroy_surfaces(qxl->ssd.worker);
> +    qxl_spice_destroy_surfaces_complete(qxl);
> +}
> +
> +static void qxl_spice_destroy_surfaces_async(PCIQXLDevice *qxl, int async)
> +{
> +    if (async) {
> +        qxl->ssd.worker->destroy_surfaces_async(qxl->ssd.worker, 0);
> +    } else {
> +        qxl_spice_destroy_surfaces(qxl);
> +    }
> +}

I'd combine those into one function simliar to 
qxl_spice_destroy_surface_wait_async (and we don't need the _async 
suffix if we have a single version only which gets passed in async as 
argument).

> +
>   void qxl_spice_reset_image_cache(PCIQXLDevice *qxl)
>   {
>       qxl->ssd.worker->reset_image_cache(qxl->ssd.worker);
> @@ -706,6 +742,38 @@ static int interface_flush_resources(QXLInstance *sin)
>       return ret;
>   }
>
> +static void qxl_add_memslot_complete(PCIQXLDevice *d);
> +static void qxl_create_guest_primary_complete(PCIQXLDevice *d);
> +
> +/* called from spice server thread context only */
> +static void interface_async_complete(QXLInstance *sin, uint64_t cookie)
> +{
> +    PCIQXLDevice *qxl = container_of(sin, PCIQXLDevice, ssd.qxl);
> +    uint32_t current_async;
> +
> +    qemu_mutex_lock(&qxl->async_lock);
> +    current_async = qxl->current_async;
> +    qxl->current_async = QXL_UNDEFINED_IO;
> +    qemu_mutex_unlock(&qxl->async_lock);

I'd tend to use the cookie to pass that information (also the stuff in 
io_data).

> -static void qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta)
> +static void qxl_add_memslot_complete(PCIQXLDevice *d)

I think it isn't needed to move that to the completion callback.  Memory 
slots can be created and destroyed with I/O commands only, so there is 
no need to care about the ordering like we have to with surfaces.

>       qemu_mutex_init(&qxl->track_lock);
> +    qemu_mutex_init(&qxl->async_lock);

Do we really need two locks?
When passing info via cookie, doesn't the need for the async lock go 
away completely?

> index af10ae8..b7bc0de 100644
> --- a/ui/spice-display.c
> +++ b/ui/spice-display.c
> @@ -62,6 +62,20 @@ void qemu_spice_rect_union(QXLRect *dest, const QXLRect *r)
>       dest->right = MAX(dest->right, r->right);
>   }
>
> +int qemu_spice_supports_async(SimpleSpiceDisplay *ssd)
> +{
> +    return (ssd->worker->major_version>  3 ||
> +            (ssd->worker->major_version == 3&&  ssd->worker->minor_version>= 1));
> +}

Doing a runtime check here is pointless, just use
#if SPICE_INTERFACE_QXL_MINOR >= 1
...
#endif

>   void qemu_spice_create_primary_surface(SimpleSpiceDisplay *ssd, uint32_t id,
>                                          QXLDevSurfaceCreate *surface)
>   {
>       ssd->worker->create_primary_surface(ssd->worker, id, surface);
>   }
>
> +void qemu_spice_destroy_primary_surface_async(SimpleSpiceDisplay *ssd, uint32_t id, int async)
> +{
> +    if (async) {
> +        ssd->worker->destroy_primary_surface_async(ssd->worker, id, 0);
> +    } else {
> +        qemu_spice_destroy_primary_surface(ssd, id);
> +    }
> +}

Like for all others: one only please.

cheers,
   Gerd
Alon Levy - July 8, 2011, 8 a.m.
On Fri, Jul 08, 2011 at 09:17:50AM +0200, Gerd Hoffmann wrote:
> >+void qxl_spice_update_area_async(PCIQXLDevice *qxl, uint32_t surface_id,
> >+                           struct QXLRect *area, struct QXLRect *dirty_rects,
> >+                           uint32_t num_dirty_rects, uint32_t clear_dirty_region,
> >+                           int async)
> >+{
> >+    if (async) {
> >+        qxl->ssd.worker->update_area_async(qxl->ssd.worker, surface_id, area, dirty_rects,
> >+                                 num_dirty_rects, clear_dirty_region, 0);
> 
> Fails to build with older libspice.

> 
> >+    } else {
> >+        qxl->ssd.worker->update_area(qxl->ssd.worker, surface_id, area, dirty_rects,
> >+                                 num_dirty_rects, clear_dirty_region);
> >+    }
> >+}
> >
> >  void qxl_spice_update_area(PCIQXLDevice *qxl, uint32_t surface_id,
> >                             struct QXLRect *area, struct QXLRect *dirty_rects,
> >                             uint32_t num_dirty_rects, uint32_t clear_dirty_region)
> >  {
> >-    qxl->ssd.worker->update_area(qxl->ssd.worker, surface_id, area, dirty_rects,
> >-                             num_dirty_rects, clear_dirty_region);
> >+    qxl_spice_update_area_async(qxl, surface_id, area, dirty_rects,
> >+                                num_dirty_rects, clear_dirty_region, 0);
> >  }
> 
> Pretty pointless wrapper IMHO.

The above two lines change was a mistake. What about:

qxl_spice_update_area_async(...)
{
#ifdef ..
 if (async) {
    qxl->ssd.worker->update_area_async(...)
 } else {
    qxl_spice_update_area(...)
 }
#else
 qxl_spice_update_area(...)
#endif
}

> 
> >-void qxl_spice_destroy_surface_wait(PCIQXLDevice *qxl, uint32_t id)
> >+static void qxl_spice_destroy_surface_wait_complete(PCIQXLDevice *qxl)
> >  {
> >      qemu_mutex_lock(&qxl->track_lock);
> >-    PANIC_ON(id>= NUM_SURFACES);
> >-    qxl->ssd.worker->destroy_surface_wait(qxl->ssd.worker, id);
> >-    qxl->guest_surfaces.cmds[id] = 0;
> >+    qxl->guest_surfaces.cmds[qxl->io_data.surface_id] = 0;
> 
> I'd suggest to pass in the surface id as argument instead.

I can use the cookie if that's what you mean (which I guess means it will make
more sense to define it as a void pointer).
> 
> >      qxl->guest_surfaces.count--;
> >      qemu_mutex_unlock(&qxl->track_lock);
> >  }
> >
> >+static void qxl_spice_destroy_surface_wait_async(PCIQXLDevice *qxl, uint32_t id, int async)
> >+{
> >+    qxl->io_data.surface_id = id;
> >+    if (async) {
> >+        qxl->ssd.worker->destroy_surface_wait_async(qxl->ssd.worker, id, 0);
> >+    } else {
> >+        qxl->ssd.worker->destroy_surface_wait(qxl->ssd.worker, id);
> >+        qxl_spice_destroy_surface_wait_complete(qxl);
> 
> qxl_spice_destroy_surface_wait_complete(qxl, id);
and use the cookie on the async_complete with appropriate casting and free, got it.

> 
> >+    }
> >+}
> >+
> >  void qxl_spice_loadvm_commands(PCIQXLDevice *qxl, struct QXLCommandExt *ext,
> >                                 uint32_t count)
> >  {
> >@@ -171,15 +193,29 @@ void qxl_spice_reset_memslots(PCIQXLDevice *qxl)
> >      qxl->ssd.worker->reset_memslots(qxl->ssd.worker);
> >  }
> >
> >-void qxl_spice_destroy_surfaces(PCIQXLDevice *qxl)
> >+static void qxl_spice_destroy_surfaces_complete(PCIQXLDevice *qxl)
> >  {
> >      qemu_mutex_lock(&qxl->track_lock);
> >-    qxl->ssd.worker->destroy_surfaces(qxl->ssd.worker);
> >      memset(&qxl->guest_surfaces.cmds, 0, sizeof(qxl->guest_surfaces.cmds));
> >      qxl->guest_surfaces.count = 0;
> >      qemu_mutex_unlock(&qxl->track_lock);
> >  }
> >
> >+static void qxl_spice_destroy_surfaces(PCIQXLDevice *qxl)
> >+{
> >+    qxl->ssd.worker->destroy_surfaces(qxl->ssd.worker);
> >+    qxl_spice_destroy_surfaces_complete(qxl);
> >+}
> >+
> >+static void qxl_spice_destroy_surfaces_async(PCIQXLDevice *qxl, int async)
> >+{
> >+    if (async) {
> >+        qxl->ssd.worker->destroy_surfaces_async(qxl->ssd.worker, 0);
> >+    } else {
> >+        qxl_spice_destroy_surfaces(qxl);
> >+    }
> >+}
> 
> I'd combine those into one function simliar to
> qxl_spice_destroy_surface_wait_async (and we don't need the _async
> suffix if we have a single version only which gets passed in async
> as argument).
ok, I'll ditch the suffix.

> 
> >+
> >  void qxl_spice_reset_image_cache(PCIQXLDevice *qxl)
> >  {
> >      qxl->ssd.worker->reset_image_cache(qxl->ssd.worker);
> >@@ -706,6 +742,38 @@ static int interface_flush_resources(QXLInstance *sin)
> >      return ret;
> >  }
> >
> >+static void qxl_add_memslot_complete(PCIQXLDevice *d);
> >+static void qxl_create_guest_primary_complete(PCIQXLDevice *d);
> >+
> >+/* called from spice server thread context only */
> >+static void interface_async_complete(QXLInstance *sin, uint64_t cookie)
> >+{
> >+    PCIQXLDevice *qxl = container_of(sin, PCIQXLDevice, ssd.qxl);
> >+    uint32_t current_async;
> >+
> >+    qemu_mutex_lock(&qxl->async_lock);
> >+    current_async = qxl->current_async;
> >+    qxl->current_async = QXL_UNDEFINED_IO;
> >+    qemu_mutex_unlock(&qxl->async_lock);
> 
> I'd tend to use the cookie to pass that information (also the stuff
> in io_data).

yeah, I'll throw that, malloc something, cast to cookie, pass it, cast back, free.

> 
> >-static void qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta)
> >+static void qxl_add_memslot_complete(PCIQXLDevice *d)
> 
> I think it isn't needed to move that to the completion callback.
> Memory slots can be created and destroyed with I/O commands only, so
> there is no need to care about the ordering like we have to with
> surfaces.
ok.

> 
> >      qemu_mutex_init(&qxl->track_lock);
> >+    qemu_mutex_init(&qxl->async_lock);
> 
> Do we really need two locks?
> When passing info via cookie, doesn't the need for the async lock go
> away completely?
the current_async still gets changed from two threads (on [vcpu]ioport_write
and [worker]async_complete)

> 
> >index af10ae8..b7bc0de 100644
> >--- a/ui/spice-display.c
> >+++ b/ui/spice-display.c
> >@@ -62,6 +62,20 @@ void qemu_spice_rect_union(QXLRect *dest, const QXLRect *r)
> >      dest->right = MAX(dest->right, r->right);
> >  }
> >
> >+int qemu_spice_supports_async(SimpleSpiceDisplay *ssd)
> >+{
> >+    return (ssd->worker->major_version>  3 ||
> >+            (ssd->worker->major_version == 3&&  ssd->worker->minor_version>= 1));
> >+}
> 
> Doing a runtime check here is pointless, just use
> #if SPICE_INTERFACE_QXL_MINOR >= 1
> ...
> #endif
this is a runtime check - what's preventing someone from compiling with 3.1 and running with 3.0?
that we will require a newer library version? (which I am yet to send a patch for)

> 
> >  void qemu_spice_create_primary_surface(SimpleSpiceDisplay *ssd, uint32_t id,
> >                                         QXLDevSurfaceCreate *surface)
> >  {
> >      ssd->worker->create_primary_surface(ssd->worker, id, surface);
> >  }
> >
> >+void qemu_spice_destroy_primary_surface_async(SimpleSpiceDisplay *ssd, uint32_t id, int async)
> >+{
> >+    if (async) {
> >+        ssd->worker->destroy_primary_surface_async(ssd->worker, id, 0);
> >+    } else {
> >+        qemu_spice_destroy_primary_surface(ssd, id);
> >+    }
> >+}
> 
> Like for all others: one only please.
ok

> 
> cheers,
>   Gerd
Gerd Hoffmann - July 8, 2011, 8:10 a.m.
> The above two lines change was a mistake. What about:
>
> qxl_spice_update_area_async(...)
> {
> #ifdef ..
>   if (async) {
>      qxl->ssd.worker->update_area_async(...)
>   } else {
>      qxl_spice_update_area(...)
>   }
> #else
>   qxl_spice_update_area(...)
> #endif
> }

I would do

if (async) {
#if ...
   worker->foo_async()
#else
   abort() /* should hot happen */
#endif
} else {
   worker->foo
}

> yeah, I'll throw that, malloc something, cast to cookie, pass it, cast back, free.

cookie should be big enougth to store the info directly.  malloc works 
too though.

>> Doing a runtime check here is pointless, just use
>> #if SPICE_INTERFACE_QXL_MINOR>= 1
>> ...
>> #endif
> this is a runtime check - what's preventing someone from compiling with 3.1 and running with 3.0?
> that we will require a newer library version? (which I am yet to send a patch for)

Yes, thats why the minor version of the shared library needs to be raised.

cheers,
   Gerd
Alon Levy - July 8, 2011, 8:12 a.m.
On Fri, Jul 08, 2011 at 09:17:50AM +0200, Gerd Hoffmann wrote:
> >+void qxl_spice_update_area_async(PCIQXLDevice *qxl, uint32_t surface_id,
> >+                           struct QXLRect *area, struct QXLRect *dirty_rects,
> >+                           uint32_t num_dirty_rects, uint32_t clear_dirty_region,
> >+                           int async)
> >+{
> >+    if (async) {
> >+        qxl->ssd.worker->update_area_async(qxl->ssd.worker, surface_id, area, dirty_rects,
> >+                                 num_dirty_rects, clear_dirty_region, 0);
> 
> Fails to build with older libspice.

btw, I'm looking at "#if.*MINOR" code like

 #if SPICE_INTERFACE_CORE_MINOR >= 3

(ui/spice-core.c)

Shouldn't that be checking the MAJOR as well?

[snip]
Gerd Hoffmann - July 8, 2011, 8:16 a.m.
> btw, I'm looking at "#if.*MINOR" code like
>
>   #if SPICE_INTERFACE_CORE_MINOR>= 3
>
> (ui/spice-core.c)
>
> Shouldn't that be checking the MAJOR as well?

major changing means a incompatible change.  I doubt we ever will do 
that.  But if you feel better checking that it probably should just be a

#if SPICE_INTERFACE_CORE_MAJOR != 1
#error incompatible spice core interface
#endif

at the top of the file.

cheers,
   Gerd

Patch

diff --git a/hw/qxl.c b/hw/qxl.c
index 935bac0..f72d5b8 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -136,25 +136,47 @@  void qxl_guest_bug(PCIQXLDevice *qxl, const char *msg, ...)
     }
 }
 
+void qxl_spice_update_area_async(PCIQXLDevice *qxl, uint32_t surface_id,
+                           struct QXLRect *area, struct QXLRect *dirty_rects,
+                           uint32_t num_dirty_rects, uint32_t clear_dirty_region,
+                           int async)
+{
+    if (async) {
+        qxl->ssd.worker->update_area_async(qxl->ssd.worker, surface_id, area, dirty_rects,
+                                 num_dirty_rects, clear_dirty_region, 0);
+    } else {
+        qxl->ssd.worker->update_area(qxl->ssd.worker, surface_id, area, dirty_rects,
+                                 num_dirty_rects, clear_dirty_region);
+    }
+}
 
 void qxl_spice_update_area(PCIQXLDevice *qxl, uint32_t surface_id,
                            struct QXLRect *area, struct QXLRect *dirty_rects,
                            uint32_t num_dirty_rects, uint32_t clear_dirty_region)
 {
-    qxl->ssd.worker->update_area(qxl->ssd.worker, surface_id, area, dirty_rects,
-                             num_dirty_rects, clear_dirty_region);
+    qxl_spice_update_area_async(qxl, surface_id, area, dirty_rects,
+                                num_dirty_rects, clear_dirty_region, 0);
 }
 
-void qxl_spice_destroy_surface_wait(PCIQXLDevice *qxl, uint32_t id)
+static void qxl_spice_destroy_surface_wait_complete(PCIQXLDevice *qxl)
 {
     qemu_mutex_lock(&qxl->track_lock);
-    PANIC_ON(id >= NUM_SURFACES);
-    qxl->ssd.worker->destroy_surface_wait(qxl->ssd.worker, id);
-    qxl->guest_surfaces.cmds[id] = 0;
+    qxl->guest_surfaces.cmds[qxl->io_data.surface_id] = 0;
     qxl->guest_surfaces.count--;
     qemu_mutex_unlock(&qxl->track_lock);
 }
 
+static void qxl_spice_destroy_surface_wait_async(PCIQXLDevice *qxl, uint32_t id, int async)
+{
+    qxl->io_data.surface_id = id;
+    if (async) {
+        qxl->ssd.worker->destroy_surface_wait_async(qxl->ssd.worker, id, 0);
+    } else {
+        qxl->ssd.worker->destroy_surface_wait(qxl->ssd.worker, id);
+        qxl_spice_destroy_surface_wait_complete(qxl);
+    }
+}
+
 void qxl_spice_loadvm_commands(PCIQXLDevice *qxl, struct QXLCommandExt *ext,
                                uint32_t count)
 {
@@ -171,15 +193,29 @@  void qxl_spice_reset_memslots(PCIQXLDevice *qxl)
     qxl->ssd.worker->reset_memslots(qxl->ssd.worker);
 }
 
-void qxl_spice_destroy_surfaces(PCIQXLDevice *qxl)
+static void qxl_spice_destroy_surfaces_complete(PCIQXLDevice *qxl)
 {
     qemu_mutex_lock(&qxl->track_lock);
-    qxl->ssd.worker->destroy_surfaces(qxl->ssd.worker);
     memset(&qxl->guest_surfaces.cmds, 0, sizeof(qxl->guest_surfaces.cmds));
     qxl->guest_surfaces.count = 0;
     qemu_mutex_unlock(&qxl->track_lock);
 }
 
+static void qxl_spice_destroy_surfaces(PCIQXLDevice *qxl)
+{
+    qxl->ssd.worker->destroy_surfaces(qxl->ssd.worker);
+    qxl_spice_destroy_surfaces_complete(qxl);
+}
+
+static void qxl_spice_destroy_surfaces_async(PCIQXLDevice *qxl, int async)
+{
+    if (async) {
+        qxl->ssd.worker->destroy_surfaces_async(qxl->ssd.worker, 0);
+    } else {
+        qxl_spice_destroy_surfaces(qxl);
+    }
+}
+
 void qxl_spice_reset_image_cache(PCIQXLDevice *qxl)
 {
     qxl->ssd.worker->reset_image_cache(qxl->ssd.worker);
@@ -706,6 +742,38 @@  static int interface_flush_resources(QXLInstance *sin)
     return ret;
 }
 
+static void qxl_add_memslot_complete(PCIQXLDevice *d);
+static void qxl_create_guest_primary_complete(PCIQXLDevice *d);
+
+/* called from spice server thread context only */
+static void interface_async_complete(QXLInstance *sin, uint64_t cookie)
+{
+    PCIQXLDevice *qxl = container_of(sin, PCIQXLDevice, ssd.qxl);
+    uint32_t current_async;
+
+    qemu_mutex_lock(&qxl->async_lock);
+    current_async = qxl->current_async;
+    qxl->current_async = QXL_UNDEFINED_IO;
+    qemu_mutex_unlock(&qxl->async_lock);
+
+    dprint(qxl, 1, "async_complete: %d (%ld) done\n", current_async, cookie);
+    switch (current_async) {
+    case QXL_IO_MEMSLOT_ADD_ASYNC:
+        qxl_add_memslot_complete(qxl);
+        break;
+    case QXL_IO_CREATE_PRIMARY_ASYNC:
+        qxl_create_guest_primary_complete(qxl);
+        break;
+    case QXL_IO_DESTROY_ALL_SURFACES_ASYNC:
+        qxl_spice_destroy_surfaces_complete(qxl);
+        break;
+    case QXL_IO_DESTROY_SURFACE_ASYNC:
+        qxl_spice_destroy_surface_wait_complete(qxl);
+        break;
+    }
+    qxl_send_events(qxl, QXL_INTERRUPT_IO_CMD);
+}
+
 static const QXLInterface qxl_interface = {
     .base.type               = SPICE_INTERFACE_QXL,
     .base.description        = "qxl gpu",
@@ -725,6 +793,7 @@  static const QXLInterface qxl_interface = {
     .req_cursor_notification = interface_req_cursor_notification,
     .notify_update           = interface_notify_update,
     .flush_resources         = interface_flush_resources,
+    .async_complete          = interface_async_complete,
 };
 
 static void qxl_enter_vga_mode(PCIQXLDevice *d)
@@ -853,7 +922,18 @@  static void qxl_vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     vga_ioport_write(opaque, addr, val);
 }
 
-static void qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta)
+static void qxl_add_memslot_complete(PCIQXLDevice *d)
+{
+    QXLDevMemSlot *memslot = &d->io_data.memslot;
+    uint32_t slot_id = d->io_data.memslot.slot_id;
+
+    d->guest_slots[slot_id].ptr = (void*)memslot->virt_start;
+    d->guest_slots[slot_id].size = memslot->virt_end - memslot->virt_start;
+    d->guest_slots[slot_id].delta = d->io_data.delta;
+    d->guest_slots[slot_id].active = 1;
+}
+
+static void qxl_add_memslot_async(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta, int async)
 {
     static const int regions[] = {
         QXL_RAM_RANGE_INDEX,
@@ -865,9 +945,11 @@  static void qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta)
     pcibus_t pci_start;
     pcibus_t pci_end;
     intptr_t virt_start;
-    QXLDevMemSlot memslot;
+    QXLDevMemSlot *memslot = &d->io_data.memslot;
     int i;
 
+    d->io_data.delta = delta;
+
     guest_start = le64_to_cpu(d->guest_slots[slot_id].slot.mem_start);
     guest_end   = le64_to_cpu(d->guest_slots[slot_id].slot.mem_end);
 
@@ -911,23 +993,27 @@  static void qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta)
         abort();
     }
 
-    memslot.slot_id = slot_id;
-    memslot.slot_group_id = MEMSLOT_GROUP_GUEST; /* guest group */
-    memslot.virt_start = virt_start + (guest_start - pci_start);
-    memslot.virt_end   = virt_start + (guest_end   - pci_start);
-    memslot.addr_delta = memslot.virt_start - delta;
-    memslot.generation = d->rom->slot_generation = 0;
+    memslot->slot_id = slot_id;
+    memslot->slot_group_id = MEMSLOT_GROUP_GUEST; /* guest group */
+    memslot->virt_start = virt_start + (guest_start - pci_start);
+    memslot->virt_end   = virt_start + (guest_end   - pci_start);
+    memslot->addr_delta = memslot->virt_start - delta;
+    memslot->generation = d->rom->slot_generation = 0;
     qxl_rom_set_dirty(d);
 
     dprint(d, 1, "%s: slot %d: host virt 0x%" PRIx64 " - 0x%" PRIx64 "\n",
-           __FUNCTION__, memslot.slot_id,
-           memslot.virt_start, memslot.virt_end);
+           __FUNCTION__, memslot->slot_id,
+           memslot->virt_start, memslot->virt_end);
 
-    qemu_spice_add_memslot(&d->ssd, &memslot);
-    d->guest_slots[slot_id].ptr = (void*)memslot.virt_start;
-    d->guest_slots[slot_id].size = memslot.virt_end - memslot.virt_start;
-    d->guest_slots[slot_id].delta = delta;
-    d->guest_slots[slot_id].active = 1;
+    qemu_spice_add_memslot_async(&d->ssd, memslot, async);
+    if (!async) {
+        qxl_add_memslot_complete(d);
+    }
+}
+
+static void qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta)
+{
+    qxl_add_memslot_async(d, slot_id, delta, 0);
 }
 
 static void qxl_del_memslot(PCIQXLDevice *d, uint32_t slot_id)
@@ -973,7 +1059,13 @@  void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id)
     }
 }
 
-static void qxl_create_guest_primary(PCIQXLDevice *qxl, int loadvm)
+static void qxl_create_guest_primary_complete(PCIQXLDevice *qxl)
+{
+    /* for local rendering */
+    qxl_render_resize(qxl);
+}
+
+static void qxl_create_guest_primary_async(PCIQXLDevice *qxl, int loadvm, int async)
 {
     QXLDevSurfaceCreate surface;
     QXLSurfaceCreate *sc = &qxl->guest_primary.surface;
@@ -1001,10 +1093,16 @@  static void qxl_create_guest_primary(PCIQXLDevice *qxl, int loadvm)
 
     qxl->mode = QXL_MODE_NATIVE;
     qxl->cmdflags = 0;
-    qemu_spice_create_primary_surface(&qxl->ssd, 0, &surface);
+    qemu_spice_create_primary_surface_async(&qxl->ssd, 0, &surface, async);
 
-    /* for local rendering */
-    qxl_render_resize(qxl);
+    if (!async) {
+        qxl_create_guest_primary_complete(qxl);
+    }
+}
+
+static void qxl_create_guest_primary(PCIQXLDevice *qxl, int loadvm)
+{
+    qxl_create_guest_primary_async(qxl, loadvm, 0);
 }
 
 static void qxl_set_mode(PCIQXLDevice *d, int modenr, int loadvm)
@@ -1055,13 +1153,16 @@  static void ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     PCIQXLDevice *d = opaque;
     uint32_t io_port = addr - d->io_base;
+    int async = 0;
 
     switch (io_port) {
     case QXL_IO_RESET:
     case QXL_IO_SET_MODE:
     case QXL_IO_MEMSLOT_ADD:
+    case QXL_IO_MEMSLOT_ADD_ASYNC:
     case QXL_IO_MEMSLOT_DEL:
     case QXL_IO_CREATE_PRIMARY:
+    case QXL_IO_CREATE_PRIMARY_ASYNC:
     case QXL_IO_UPDATE_IRQ:
     case QXL_IO_LOG:
         break;
@@ -1070,17 +1171,47 @@  static void ioport_write(void *opaque, uint32_t addr, uint32_t val)
             break;
         dprint(d, 1, "%s: unexpected port 0x%x (%s) in vga mode\n",
             __FUNCTION__, io_port, io_port_to_string(io_port));
+        /* be nice to buggy guest drivers */
+        if (io_port >= QXL_IO_UPDATE_AREA_ASYNC &&
+            io_port <= QXL_IO_DESTROY_ALL_SURFACES_ASYNC) {
+            qxl_send_events(d, QXL_INTERRUPT_IO_CMD);
+        }
         return;
     }
 
     switch (io_port) {
-    case QXL_IO_UPDATE_AREA:
-    {
-        QXLRect update = d->ram->update_area;
-        qxl_spice_update_area(d, d->ram->update_surface,
-                              &update, NULL, 0, 0);
+    case QXL_IO_UPDATE_AREA_ASYNC:
+    case QXL_IO_MEMSLOT_ADD_ASYNC:
+    case QXL_IO_CREATE_PRIMARY_ASYNC:
+    case QXL_IO_DESTROY_PRIMARY_ASYNC:
+    case QXL_IO_DESTROY_SURFACE_ASYNC:
+    case QXL_IO_DESTROY_ALL_SURFACES_ASYNC:
+        if (!qemu_spice_supports_async(&d->ssd)) {
+            fprintf(stderr, "qxl: error: async not supported by libspice but guest driver used it\n");
+            return;
+        }
+        async = 1;
+        qemu_mutex_lock(&d->async_lock);
+        if (d->current_async != QXL_UNDEFINED_IO) {
+            qxl_guest_bug(d, "%d async started before last (%d) complete\n",
+                io_port, d->current_async);
+            qemu_mutex_unlock(&d->async_lock);
+            return;
+        }
+        d->current_async = io_port;
+        qemu_mutex_unlock(&d->async_lock);
+        dprint(d, 1, "start async %d\n", d->current_async);
+        break;
+    default:
         break;
     }
+
+    switch (io_port) {
+    case QXL_IO_UPDATE_AREA_ASYNC:
+    case QXL_IO_UPDATE_AREA:
+        qxl_spice_update_area_async(d, d->ram->update_surface,
+                              &d->ram->update_area, NULL, 0, 0, async);
+        break;
     case QXL_IO_NOTIFY_CMD:
         qemu_spice_wakeup(&d->ssd);
         break;
@@ -1116,6 +1247,7 @@  static void ioport_write(void *opaque, uint32_t addr, uint32_t val)
         dprint(d, 1, "QXL_IO_RESET\n");
         qxl_hard_reset(d, 0);
         break;
+    case QXL_IO_MEMSLOT_ADD_ASYNC:
     case QXL_IO_MEMSLOT_ADD:
         if (val >= NUM_MEMSLOTS) {
             qxl_guest_bug(d, "QXL_IO_MEMSLOT_ADD: val out of range\n");
@@ -1126,7 +1258,7 @@  static void ioport_write(void *opaque, uint32_t addr, uint32_t val)
             break;
         }
         d->guest_slots[val].slot = d->ram->mem_slot;
-        qxl_add_memslot(d, val, 0);
+        qxl_add_memslot_async(d, val, 0, async);
         break;
     case QXL_IO_MEMSLOT_DEL:
         if (val >= NUM_MEMSLOTS) {
@@ -1135,36 +1267,60 @@  static void ioport_write(void *opaque, uint32_t addr, uint32_t val)
         }
         qxl_del_memslot(d, val);
         break;
+    case QXL_IO_CREATE_PRIMARY_ASYNC:
     case QXL_IO_CREATE_PRIMARY:
         if (val != 0) {
-            qxl_guest_bug(d, "QXL_IO_CREATE_PRIMARY: val != 0\n");
-            break;
+            qxl_guest_bug(d, "QXL_IO_CREATE_PRIMARY (async=%d): val != 0\n", async);
+            goto cancel_async;
         }
-        dprint(d, 1, "QXL_IO_CREATE_PRIMARY\n");
+        dprint(d, 1, "QXL_IO_CREATE_PRIMARY async=%d\n", async);
         d->guest_primary.surface = d->ram->create_surface;
-        qxl_create_guest_primary(d, 0);
+        qxl_create_guest_primary_async(d, 0, async);
         break;
+    case QXL_IO_DESTROY_PRIMARY_ASYNC:
     case QXL_IO_DESTROY_PRIMARY:
         if (val != 0) {
-            qxl_guest_bug(d, "QXL_IO_DESTROY_PRIMARY: val != 0\n");
-            break;
+            qxl_guest_bug(d, "QXL_IO_DESTROY_PRIMARY (async=%d): val != 0\n", async);
+            goto cancel_async;
         }
-        dprint(d, 1, "QXL_IO_DESTROY_PRIMARY (%s)\n", qxl_mode_to_string(d->mode));
+        dprint(d, 1, "QXL_IO_DESTROY_PRIMARY (async=%d) (%s)\n", async,
+               qxl_mode_to_string(d->mode));
         if (d->mode != QXL_MODE_UNDEFINED) {
             d->mode = QXL_MODE_UNDEFINED;
-            qemu_spice_destroy_primary_surface(&d->ssd, 0);
+            qemu_spice_destroy_primary_surface_async(&d->ssd, 0, async);
+        } else {
+            if (async) {
+                dprint(d, 1, "QXL_IO_DESTROY_PRIMARY in %s, ignored\n",
+                        qxl_mode_to_string(d->mode));
+                qxl_send_events(d, QXL_INTERRUPT_IO_CMD);
+                goto cancel_async;
+            }
         }
         break;
+    case QXL_IO_DESTROY_SURFACE_ASYNC:
     case QXL_IO_DESTROY_SURFACE_WAIT:
-        qxl_spice_destroy_surface_wait(d, val);
+        if (val >= NUM_SURFACES) {
+            qxl_guest_bug(d, "QXL_IO_DESTROY_SURFACE (async=%d): %d >= NUM_SURFACES", async, val);
+            goto cancel_async;
+        }
+        qxl_spice_destroy_surface_wait_async(d, val, async);
         break;
+    case QXL_IO_DESTROY_ALL_SURFACES_ASYNC:
     case QXL_IO_DESTROY_ALL_SURFACES:
-        qxl_spice_destroy_surfaces(d);
+        d->mode = QXL_MODE_UNDEFINED;
+        qxl_spice_destroy_surfaces_async(d, async);
         break;
     default:
         fprintf(stderr, "%s: ioport=0x%x, abort()\n", __FUNCTION__, io_port);
         abort();
     }
+    return;
+cancel_async:
+    if (async) {
+        qemu_mutex_lock(&d->async_lock);
+        d->current_async = QXL_UNDEFINED_IO;
+        qemu_mutex_unlock(&d->async_lock);
+    }
 }
 
 static uint32_t ioport_read(void *opaque, uint32_t addr)
@@ -1377,6 +1533,8 @@  static int qxl_init_common(PCIQXLDevice *qxl)
     qxl->num_memslots = NUM_MEMSLOTS;
     qxl->num_surfaces = NUM_SURFACES;
     qemu_mutex_init(&qxl->track_lock);
+    qemu_mutex_init(&qxl->async_lock);
+    qxl->current_async = QXL_UNDEFINED_IO;
 
     switch (qxl->revision) {
     case 1: /* spice 0.4 -- qxl-1 */
diff --git a/hw/qxl.h b/hw/qxl.h
index e361bc6..16639ce 100644
--- a/hw/qxl.h
+++ b/hw/qxl.h
@@ -15,6 +15,8 @@  enum qxl_mode {
     QXL_MODE_NATIVE,
 };
 
+#define QXL_UNDEFINED_IO UINT32_MAX
+
 typedef struct PCIQXLDevice {
     PCIDevice          pci;
     SimpleSpiceDisplay ssd;
@@ -30,6 +32,8 @@  typedef struct PCIQXLDevice {
     int32_t            num_memslots;
     int32_t            num_surfaces;
 
+    uint32_t           current_async;
+
     struct guest_slots {
         QXLMemSlot     slot;
         void           *ptr;
@@ -56,6 +60,7 @@  typedef struct PCIQXLDevice {
     QXLPHYSICAL        guest_cursor;
 
     QemuMutex          track_lock;
+    QemuMutex          async_lock;
 
     /* thread signaling */
     pthread_t          main;
@@ -82,6 +87,15 @@  typedef struct PCIQXLDevice {
 
     /* io bar */
     uint32_t           io_base;
+
+    /* io data for asyncable calls */
+    struct {
+        /* QXL_IO_MEMSLOT_ADD{,ASYNC} */
+        QXLDevMemSlot memslot;
+        uint64_t delta;
+        /* QXL_IO_DESTROY_SURFACE_{WAIT,ASYNC} */
+        uint32_t surface_id;
+    } io_data;
 } PCIQXLDevice;
 
 #define PANIC_ON(x) if ((x)) {                         \
@@ -104,12 +118,15 @@  void qxl_guest_bug(PCIQXLDevice *qxl, const char *msg, ...);
 void qxl_spice_update_area(PCIQXLDevice *qxl, uint32_t surface_id,
                            struct QXLRect *area, struct QXLRect *dirty_rects,
                            uint32_t num_dirty_rects, uint32_t clear_dirty_region);
-void qxl_spice_destroy_surface_wait(PCIQXLDevice *qxl, uint32_t id);
+void qxl_spice_update_area_async(PCIQXLDevice *qxl, uint32_t surface_id,
+                           struct QXLRect *area, struct QXLRect *dirty_rects,
+                           uint32_t num_dirty_rects, uint32_t clear_dirty_region,
+                           int async);
 void qxl_spice_loadvm_commands(PCIQXLDevice *qxl, struct QXLCommandExt *ext,
                                uint32_t count);
 void qxl_spice_oom(PCIQXLDevice *qxl);
+void qxl_spice_oom_async(PCIQXLDevice *qxl, int async);
 void qxl_spice_reset_memslots(PCIQXLDevice *qxl);
-void qxl_spice_destroy_surfaces(PCIQXLDevice *qxl);
 void qxl_spice_reset_image_cache(PCIQXLDevice *qxl);
 void qxl_spice_reset_cursor(PCIQXLDevice *qxl);
 
diff --git a/ui/spice-display.c b/ui/spice-display.c
index af10ae8..b7bc0de 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -62,6 +62,20 @@  void qemu_spice_rect_union(QXLRect *dest, const QXLRect *r)
     dest->right = MAX(dest->right, r->right);
 }
 
+int qemu_spice_supports_async(SimpleSpiceDisplay *ssd)
+{
+    return (ssd->worker->major_version > 3 ||
+            (ssd->worker->major_version == 3 && ssd->worker->minor_version >= 1));
+}
+
+void qemu_spice_add_memslot_async(SimpleSpiceDisplay *ssd, QXLDevMemSlot *memslot, int async)
+{
+    if (async) {
+        ssd->worker->add_memslot_async(ssd->worker, memslot, 0);
+    } else {
+        qemu_spice_add_memslot(ssd, memslot);
+    }
+}
 
 void qemu_spice_add_memslot(SimpleSpiceDisplay *ssd, QXLDevMemSlot *memslot)
 {
@@ -73,12 +87,31 @@  void qemu_spice_del_memslot(SimpleSpiceDisplay *ssd, uint32_t gid, uint32_t sid)
     ssd->worker->del_memslot(ssd->worker, gid, sid);
 }
 
+void qemu_spice_create_primary_surface_async(SimpleSpiceDisplay *ssd, uint32_t id,
+                                       QXLDevSurfaceCreate *surface, int async)
+{
+    if (async) {
+        ssd->worker->create_primary_surface_async(ssd->worker, id, surface, 0);
+    } else {
+        qemu_spice_create_primary_surface(ssd, id, surface);
+    }
+}
+
 void qemu_spice_create_primary_surface(SimpleSpiceDisplay *ssd, uint32_t id,
                                        QXLDevSurfaceCreate *surface)
 {
     ssd->worker->create_primary_surface(ssd->worker, id, surface);
 }
 
+void qemu_spice_destroy_primary_surface_async(SimpleSpiceDisplay *ssd, uint32_t id, int async)
+{
+    if (async) {
+        ssd->worker->destroy_primary_surface_async(ssd->worker, id, 0);
+    } else {
+        qemu_spice_destroy_primary_surface(ssd, id);
+    }
+}
+
 void qemu_spice_destroy_primary_surface(SimpleSpiceDisplay *ssd, uint32_t id)
 {
     ssd->worker->destroy_primary_surface(ssd->worker, id);
diff --git a/ui/spice-display.h b/ui/spice-display.h
index d32dc9e..ebfa7ca 100644
--- a/ui/spice-display.h
+++ b/ui/spice-display.h
@@ -90,3 +90,11 @@  void qemu_spice_destroy_primary_surface(SimpleSpiceDisplay *ssd, uint32_t id);
 void qemu_spice_wakeup(SimpleSpiceDisplay *ssd);
 void qemu_spice_start(SimpleSpiceDisplay *ssd);
 void qemu_spice_stop(SimpleSpiceDisplay *ssd);
+
+int qemu_spice_supports_async(SimpleSpiceDisplay *ssd);
+
+/* calls async version if async == 1, otherwise calls the sync version */
+void qemu_spice_add_memslot_async(SimpleSpiceDisplay *ssd, QXLDevMemSlot *memslot, int async);
+void qemu_spice_create_primary_surface_async(SimpleSpiceDisplay *ssd, uint32_t id,
+                                       QXLDevSurfaceCreate *surface, int async);
+void qemu_spice_destroy_primary_surface_async(SimpleSpiceDisplay *ssd, uint32_t id, int async);