diff mbox

[XEN,RFC,V2,15/17] xl: support spawn/destroy on multiple device model

Message ID 9522ee398a1fd3cdce48cfe883b307336ae6674f.1345552068.git.julien.grall@citrix.com
State New
Headers show

Commit Message

Julien Grall Aug. 22, 2012, 12:32 p.m. UTC
Old configuration file is still working with qemu disaggregation.
Before to spawn any QEMU, the toolstack will fill correctly, if needed,
configuration structure.

For the moment, the toolstack spawns device models one by one.

Signed-off-by: Julien Grall <julien.grall@citrix.com>
---
 tools/libxl/libxl.c          |   16 ++-
 tools/libxl/libxl_create.c   |  150 +++++++++++++-----
 tools/libxl/libxl_device.c   |    7 +-
 tools/libxl/libxl_dm.c       |  369 ++++++++++++++++++++++++++++++------------
 tools/libxl/libxl_dom.c      |    4 +-
 tools/libxl/libxl_internal.h |   36 +++--
 6 files changed, 421 insertions(+), 161 deletions(-)

Comments

Ian Campbell Aug. 23, 2012, 1:56 p.m. UTC | #1
On Wed, 2012-08-22 at 13:32 +0100, Julien Grall wrote:
> Old configuration file is still working with qemu disaggregation.
> Before to spawn any QEMU, the toolstack will fill correctly, if needed,
> configuration structure.
> 
> For the moment, the toolstack spawns device models one by one.
> 
> Signed-off-by: Julien Grall <julien.grall@citrix.com>
> ---
>  tools/libxl/libxl.c          |   16 ++-
>  tools/libxl/libxl_create.c   |  150 +++++++++++++-----
>  tools/libxl/libxl_device.c   |    7 +-
>  tools/libxl/libxl_dm.c       |  369 ++++++++++++++++++++++++++++++------------
>  tools/libxl/libxl_dom.c      |    4 +-
>  tools/libxl/libxl_internal.h |   36 +++--
>  6 files changed, 421 insertions(+), 161 deletions(-)
> 
> diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
> index 8ea3478..60718b6 100644
> --- a/tools/libxl/libxl.c
> +++ b/tools/libxl/libxl.c
> @@ -1330,7 +1330,8 @@ static void stubdom_destroy_callback(libxl__egc *egc,
>      }
> 
>      dds->stubdom_finished = 1;
> -    savefile = libxl__device_model_savefile(gc, dis->domid);
> +    /* FIXME: get dmid */
> +    savefile = libxl__device_model_savefile(gc, dis->domid, 0);
>      rc = libxl__remove_file(gc, savefile);
>      /*
>       * On suspend libxl__domain_save_device_model will have already
> @@ -1423,10 +1424,8 @@ void libxl__destroy_domid(libxl__egc *egc, libxl__destroy_domid_state *dis)
>          LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_pause failed for %d", domid);
>      }
>      if (dm_present) {
> -        if (libxl__destroy_device_model(gc, domid) < 0)
> -            LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl__destroy_device_model failed for %d", domid);
> -
> -        libxl__qmp_cleanup(gc, domid);
> +        if (libxl__destroy_device_models(gc, domid) < 0)
> +            LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl__destroy_device_models failed for %d", domid);
>      }
>      dis->drs.ao = ao;
>      dis->drs.domid = domid;
> @@ -1725,6 +1724,13 @@ out:
> 
>  /******************************************************************************/
> 
> +int libxl__dm_setdefault(libxl__gc *gc, libxl_dm *dm)
> +{
> +    return 0;
> +}
> +
> +/******************************************************************************/
> +
>  int libxl__device_disk_setdefault(libxl__gc *gc, libxl_device_disk *disk)
>  {
>      int rc;
> diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
> index 5f0d26f..7160c78 100644
> --- a/tools/libxl/libxl_create.c
> +++ b/tools/libxl/libxl_create.c
> @@ -35,6 +35,10 @@ void libxl_domain_config_dispose(libxl_domain_config *d_config)
>  {
>      int i;
> 
> +    for (i=0; i<d_config->num_dms; i++)
> +        libxl_dm_dispose(&d_config->dms[i]);
> +    free(d_config->dms);

We are adding libxl_FOO_list_free functions for new ones of these as we
introduce new ones), can you do that for the dm type please.

> +
>      for (i=0; i<d_config->num_disks; i++)
>          libxl_device_disk_dispose(&d_config->disks[i]);
>      free(d_config->disks);
> @@ -59,6 +63,50 @@ void libxl_domain_config_dispose(libxl_domain_config *d_config)
>      libxl_domain_build_info_dispose(&d_config->b_info);
>  }
> 
> +static int libxl__domain_config_setdefault(libxl__gc *gc,
> +                                           libxl_domain_config *d_config)
> +{
> +    libxl_domain_build_info *b_info = &d_config->b_info;
> +    uint64_t cap = 0;
> +    int i = 0;
> +    int ret = 0;
> +    libxl_dm *default_dm = NULL;
> +
> +    if (b_info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL
> +        && (d_config->num_dms > 1))
> +        return ERROR_INVAL;
> +
> +    if (!d_config->num_dms) {
> +        d_config->dms = malloc(sizeof (*d_config->dms));

You should use libxl__zalloc or libxl__calloc or something here with the
NO_GC argument to get the expected error handling.
> @@ -991,12 +1057,11 @@ static void domcreate_launch_dm(libxl__egc *egc, libxl__multidev *multidev,
>          libxl__device_console_dispose(&console);
> 
>          if (need_qemu) {
> -            dcs->dmss.dm.guest_domid = domid;
> -            libxl__spawn_local_dm(egc, &dcs->dmss.dm);
> +            assert(dcs->dmss);
> +            domcreate_spawn_devmodel(egc, dcs, dcs->current_dmid);
>              return;
>          } else {
> -            assert(!dcs->dmss.dm.guest_domid);
> -            domcreate_devmodel_started(egc, &dcs->dmss.dm, 0);
> +            assert(!dcs->dmss);

Doesn't this stop progress in this case meaning we'll never get to the
end of the async op?

>              return;
>          }
>      }
[..]
> @@ -1044,7 +1044,8 @@ int libxl__wait_for_device_model(libxl__gc *gc,
>                                   void *check_callback_userdata)
>  {
>      char *path;
> -    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
> +    path = libxl__sprintf(gc, "/local/domain/0/dms/%u/%u/state",
> +                          domid, dmid);

Isn't this control path shared with qemu? I'm not sure we can just
change it like that? We need to at least retain compatibility with
pre-disag qemus.

>      return libxl__wait_for_offspring(gc, domid,
>                                       LIBXL_DEVICE_MODEL_START_TIMEOUT,
>                                       "Device Model", path, state, spawning,

>  const char *libxl__domain_device_model(libxl__gc *gc,
> -                                       const libxl_domain_build_info *info)
> +                                       uint32_t dmid,
> +                                       const libxl_domain_build_info *b_info)
>  {
>      libxl_ctx *ctx = libxl__gc_owner(gc);
>      const char *dm;
> +    libxl_domain_config *guest_config = CONTAINER_OF(b_info, *guest_config,
> +                                                     b_info);
> 
> -    if (libxl_defbool_val(info->device_model_stubdomain))
> +    if (libxl_defbool_val(guest_config->b_info.device_model_stubdomain))

You just extracted guest_config from b_info but you still have the
b_info point to hand. Why not use it? Likewise a few more times below.
>  {
> +    /**

The ** implies some sort of automagic comments->doc parsing process
which we don't have here.

> +     * PCI device number. Before 3, we have IDE, ISA, SouthBridge and
> +     * XEN PCI. Theses devices will be emulate in each QEMU, but only
> +     * one QEMU (the one which emulates default device) will register
> +     * these devices through Xen PCI hypercall.
> +     */
> +    static unsigned int bdf = 3;

Do you mean const rather than static?

Isn't this baking in some implementation detail from the current qemu
version? What happens if it changes?

> +
>      libxl_ctx *ctx = libxl__gc_owner(gc);
>      const libxl_domain_create_info *c_info = &guest_config->c_info;
>      const libxl_domain_build_info *b_info = &guest_config->b_info;
> +    const libxl_dm *dm_config = &guest_config->dms[dmid];
>      const libxl_device_disk *disks = guest_config->disks;
>      const libxl_device_nic *nics = guest_config->nics;
>      const int num_disks = guest_config->num_disks;
>      const int num_nics = guest_config->num_nics;
> -    const libxl_vnc_info *vnc = libxl__dm_vnc(guest_config);
> +    const libxl_vnc_info *vnc = libxl__dm_vnc(dmid, guest_config);
>      const libxl_sdl_info *sdl = dm_sdl(guest_config);
>      const char *keymap = dm_keymap(guest_config);
>      flexarray_t *dm_args;
>      int i;
>      uint64_t ram_size;
> +    uint32_t cap_ui = dm_config->capabilities & LIBXL_DM_CAP_UI;
> +    uint32_t cap_ide = dm_config->capabilities & LIBXL_DM_CAP_IDE;
> +    uint32_t cap_serial = dm_config->capabilities & LIBXL_DM_CAP_SERIAL;
> +    uint32_t cap_audio = dm_config->capabilities & LIBXL_DM_CAP_AUDIO;

->capabilities is defined as 64 bits, but you use 32 here, which happens
to work if you know what the actual values of the enum are but whoever
adds the 33rd capability will probably get it wrong.

     bool cap_foo = !! (dm_....capabiltieis & LIBXL_DM_CAP_FOO)

would probably work?

> 
>      dm_args = flexarray_make(16, 1);
>      if (!dm_args)
> @@ -348,11 +389,12 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
>                        "-xen-domid",
>                        libxl__sprintf(gc, "%d", guest_domid), NULL);
> 
> +    flexarray_append(dm_args, "-nodefaults");

Does this not cause a change in behaviour other than what you've
accounted for here?

> @@ -528,65 +583,69 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
>          abort();
>      }
> 
> -    ram_size = libxl__sizekb_to_mb(b_info->max_memkb - b_info->video_memkb);
> +    // Allocate ram space of 32Mo per previous device model to store rom

What is this about?

(also that Mo looks a bit odd in among all these mb's)

Ian.
Julien Grall Aug. 24, 2012, 1:51 p.m. UTC | #2
On 08/23/2012 02:56 PM, Ian Campbell wrote:
> On Wed, 2012-08-22 at 13:32 +0100, Julien Grall wrote:
>    
>> @@ -991,12 +1057,11 @@ static void domcreate_launch_dm(libxl__egc *egc, libxl__multidev *multidev,
>>           libxl__device_console_dispose(&console);
>>
>>           if (need_qemu) {
>> -            dcs->dmss.dm.guest_domid = domid;
>> -            libxl__spawn_local_dm(egc,&dcs->dmss.dm);
>> +            assert(dcs->dmss);
>> +            domcreate_spawn_devmodel(egc, dcs, dcs->current_dmid);
>>               return;
>>           } else {
>> -            assert(!dcs->dmss.dm.guest_domid);
>> -            domcreate_devmodel_started(egc,&dcs->dmss.dm, 0);
>> +            assert(!dcs->dmss);
>>      
> Doesn't this stop progress in this case meaning we'll never get to the
> end of the async op?
>
>    
Indeed, I will fix on the next patch version.

>>               return;
>>           }
>>       }
>>      
> [..]
>    
>> @@ -1044,7 +1044,8 @@ int libxl__wait_for_device_model(libxl__gc *gc,
>>                                    void *check_callback_userdata)
>>   {
>>       char *path;
>> -    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
>> +    path = libxl__sprintf(gc, "/local/domain/0/dms/%u/%u/state",
>> +                          domid, dmid);
>>      
> Isn't this control path shared with qemu? I'm not sure we can just
> change it like that? We need to at least retain compatibility with
> pre-disag qemus.
>
>    
Indeed, as we have multiple QEMUs for a same domain, we need
to have one control path by QEMU.

Pre-disag QEMUs cannot work with my changes inside the Xen.
Xen will not forward by default ioreq if there is no ioreq server.
>>   const char *libxl__domain_device_model(libxl__gc *gc,
>> -                                       const libxl_domain_build_info *info)
>> +                                       uint32_t dmid,
>> +                                       const libxl_domain_build_info *b_info)
>>   {
>>       libxl_ctx *ctx = libxl__gc_owner(gc);
>>       const char *dm;
>> +    libxl_domain_config *guest_config = CONTAINER_OF(b_info, *guest_config,
>> +                                                     b_info);
>>
>> -    if (libxl_defbool_val(info->device_model_stubdomain))
>> +    if (libxl_defbool_val(guest_config->b_info.device_model_stubdomain))
>>      
> You just extracted guest_config from b_info but you still have the
> b_info point to hand. Why not use it? Likewise a few more times below.
>    
An error, will be fix on next patch version.
>> +     * PCI device number. Before 3, we have IDE, ISA, SouthBridge and
>> +     * XEN PCI. Theses devices will be emulate in each QEMU, but only
>> +     * one QEMU (the one which emulates default device) will register
>> +     * these devices through Xen PCI hypercall.
>> +     */
>> +    static unsigned int bdf = 3;
>>      
> Do you mean const rather than static?
>
>    
No static. With QEMU disaggregation, the toolstack allocate
BDF incrementaly. QEMU is unable to know if a BDF is already
allocated in another QEMU.
For the moment, bdf variable is used to give a devfn for
network card and VGA.

> Isn't this baking in some implementation detail from the current qemu
> version? What happens if it changes?
>    

I don't have another way for the moment. I would be happy,
if someone have a good solution.

>> +
>>       libxl_ctx *ctx = libxl__gc_owner(gc);
>>       const libxl_domain_create_info *c_info =&guest_config->c_info;
>>       const libxl_domain_build_info *b_info =&guest_config->b_info;
>> +    const libxl_dm *dm_config =&guest_config->dms[dmid];
>>       const libxl_device_disk *disks = guest_config->disks;
>>       const libxl_device_nic *nics = guest_config->nics;
>>       const int num_disks = guest_config->num_disks;
>>       const int num_nics = guest_config->num_nics;
>> -    const libxl_vnc_info *vnc = libxl__dm_vnc(guest_config);
>> +    const libxl_vnc_info *vnc = libxl__dm_vnc(dmid, guest_config);
>>       const libxl_sdl_info *sdl = dm_sdl(guest_config);
>>       const char *keymap = dm_keymap(guest_config);
>>       flexarray_t *dm_args;
>>       int i;
>>       uint64_t ram_size;
>> +    uint32_t cap_ui = dm_config->capabilities&  LIBXL_DM_CAP_UI;
>> +    uint32_t cap_ide = dm_config->capabilities&  LIBXL_DM_CAP_IDE;
>> +    uint32_t cap_serial = dm_config->capabilities&  LIBXL_DM_CAP_SERIAL;
>> +    uint32_t cap_audio = dm_config->capabilities&  LIBXL_DM_CAP_AUDIO;
>>      
> ->capabilities is defined as 64 bits, but you use 32 here, which happens
> to work if you know what the actual values of the enum are but whoever
> adds the 33rd capability will probably get it wrong.
>
>       bool cap_foo = !! (dm_....capabiltieis&  LIBXL_DM_CAP_FOO)
>
> would probably work?
>    
Indeed, will be fix in next patch version.

>>       dm_args = flexarray_make(16, 1);
>>       if (!dm_args)
>> @@ -348,11 +389,12 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
>>                         "-xen-domid",
>>                         libxl__sprintf(gc, "%d", guest_domid), NULL);
>>
>> +    flexarray_append(dm_args, "-nodefaults");
>>      
> Does this not cause a change in behaviour other than what you've
> accounted for here?
>    
  By default QEMU emulates VGA card, and a network card. This options,
disabled it  and avoid to add "-net none".
I added it after a discussion on my first patch series.
https://lists.gnu.org/archive/html/qemu-devel/2012-03/msg04767.html

>> @@ -528,65 +583,69 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
>>           abort();
>>       }
>>
>> -    ram_size = libxl__sizekb_to_mb(b_info->max_memkb - b_info->video_memkb);
>> +    // Allocate ram space of 32Mo per previous device model to store rom
>>      
> What is this about?
>
> (also that Mo looks a bit odd in among all these mb's)
>
>    
It's space for ROM allocation, like vga, rtl8139 roms ...
Each QEMU can load ROM and memory, but the memory
allocator consider that it's alone. It starts to allocate
ROM space from the end of memory RAM.

It's a solution suggest by Stefano, it's avoid modification
in QEMU. As we don't know the number of ROM and their
size per QEMU, we chose a space of 32 Mo to be sure, but in
fine most of time memory is not allocated.
Ian Campbell Aug. 24, 2012, 2:09 p.m. UTC | #3
On Fri, 2012-08-24 at 14:51 +0100, Julien Grall wrote:
> >> @@ -1044,7 +1044,8 @@ int libxl__wait_for_device_model(libxl__gc *gc,
> >>                                    void *check_callback_userdata)
> >>   {
> >>       char *path;
> >> -    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
> >> +    path = libxl__sprintf(gc, "/local/domain/0/dms/%u/%u/state",
> >> +                          domid, dmid);
> >>      
> > Isn't this control path shared with qemu? I'm not sure we can just
> > change it like that? We need to at least retain compatibility with
> > pre-disag qemus.
> >
> >    
> Indeed, as we have multiple QEMUs for a same domain, we need
> to have one control path by QEMU.
> 
> Pre-disag QEMUs cannot work with my changes inside the Xen.
> Xen will not forward by default ioreq if there is no ioreq server.

We might need to consider making disagg an opt in feature, with the
default being to have as we do today.

> >> +     * PCI device number. Before 3, we have IDE, ISA, SouthBridge and
> >> +     * XEN PCI. Theses devices will be emulate in each QEMU, but only
> >> +     * one QEMU (the one which emulates default device) will register
> >> +     * these devices through Xen PCI hypercall.
> >> +     */
> >> +    static unsigned int bdf = 3;
> >>      
> > Do you mean const rather than static?
> >
> >    
> No static. With QEMU disaggregation, the toolstack allocate
> BDF incrementaly. QEMU is unable to know if a BDF is already
> allocated in another QEMU.

This is broken if the toolstack is building multiple domains, since the
bdf will be preserved across each of them.

You need to put this in some sort of data structure specific to this
particular iteration of the builder code. We must surely have something
suitable close to hand in this function. libxl__domain_build_state
perhaps?

A static variable in a library is almost always a mistake.

> > Isn't this baking in some implementation detail from the current qemu
> > version? What happens if it changes?
> >    
> 
> I don't have another way for the moment. I would be happy,
> if someone have a good solution.

Could we at least make the assignments of the 3 prior BDFs explicit on
the command line too?

> >>       dm_args = flexarray_make(16, 1);
> >>       if (!dm_args)
> >> @@ -348,11 +389,12 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
> >>                         "-xen-domid",
> >>                         libxl__sprintf(gc, "%d", guest_domid), NULL);
> >>
> >> +    flexarray_append(dm_args, "-nodefaults");
> >>      
> > Does this not cause a change in behaviour other than what you've
> > accounted for here?
> >    
>   By default QEMU emulates VGA card, and a network card. This options,
> disabled it  and avoid to add "-net none".
> I added it after a discussion on my first patch series.
> https://lists.gnu.org/archive/html/qemu-devel/2012-03/msg04767.html

OK. 

> >> @@ -528,65 +583,69 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
> >>           abort();
> >>       }
> >>
> >> -    ram_size = libxl__sizekb_to_mb(b_info->max_memkb - b_info->video_memkb);
> >> +    // Allocate ram space of 32Mo per previous device model to store rom
> >>      
> > What is this about?
> >
> > (also that Mo looks a bit odd in among all these mb's)
> >
> >    
> It's space for ROM allocation, like vga, rtl8139 roms ...
> Each QEMU can load ROM and memory, but the memory
> allocator consider that it's alone. It starts to allocate
> ROM space from the end of memory RAM.
> 
> It's a solution suggest by Stefano, it's avoid modification
> in QEMU. As we don't know the number of ROM and their
> size per QEMU, we chose a space of 32 Mo to be sure, but in
> fine most of time memory is not allocated.

"32Mo per previous device model" is the bit which struck me as odd. That
means the first device model uses 32Mo, the second 64Mo, the third 96Mo
etc?

Aren't we already modifying qemu quite substantially to implement this
functionality anyway? so why are we trying to avoid it in this one
corner? Especially at the cost of doing something which on the face of
it looks quite strange!

Isn't space for the ROMs allocated by SeaBIOS as part of enumerating the
PCI bus anyway? Or is this a different per-ROM allocation?

Ian.
Julien Grall Aug. 24, 2012, 2:37 p.m. UTC | #4
On 08/24/2012 03:09 PM, Ian Campbell wrote:
> On Fri, 2012-08-24 at 14:51 +0100, Julien Grall wrote:
>    
>>>> @@ -1044,7 +1044,8 @@ int libxl__wait_for_device_model(libxl__gc *gc,
>>>>                                     void *check_callback_userdata)
>>>>    {
>>>>        char *path;
>>>> -    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
>>>> +    path = libxl__sprintf(gc, "/local/domain/0/dms/%u/%u/state",
>>>> +                          domid, dmid);
>>>>
>>>>          
>>> Isn't this control path shared with qemu? I'm not sure we can just
>>> change it like that? We need to at least retain compatibility with
>>> pre-disag qemus.
>>>
>>>
>>>        
>> Indeed, as we have multiple QEMUs for a same domain, we need
>> to have one control path by QEMU.
>>
>> Pre-disag QEMUs cannot work with my changes inside the Xen.
>> Xen will not forward by default ioreq if there is no ioreq server.
>>      
> We might need to consider making disagg an opt in feature, with the
> default being to have as we do today.
>    
When you told feature, it's only for libxl or even for Xen ?
In case of libxl, if 'device_models' options is not specified
we used only one QEMU. So there is compatibility with
previous configuration file.
In case of Xen, it's hard to have a compatibility. We can
still spawn only one QEMU, but ioreq handling will not
send an io request if no device models registered it.
There is no more default QEMU.

>>>> +     * PCI device number. Before 3, we have IDE, ISA, SouthBridge and
>>>> +     * XEN PCI. Theses devices will be emulate in each QEMU, but only
>>>> +     * one QEMU (the one which emulates default device) will register
>>>> +     * these devices through Xen PCI hypercall.
>>>> +     */
>>>> +    static unsigned int bdf = 3;
>>>>
>>>>          
>>> Do you mean const rather than static?
>>>
>>>
>>>        
>> No static. With QEMU disaggregation, the toolstack allocate
>> BDF incrementaly. QEMU is unable to know if a BDF is already
>> allocated in another QEMU.
>>      
> This is broken if the toolstack is building multiple domains, since the
> bdf will be preserved across each of them.
>
> You need to put this in some sort of data structure specific to this
> particular iteration of the builder code. We must surely have something
> suitable close to hand in this function. libxl__domain_build_state
> perhaps?
>
>    
Will be fix in the next patch version.
> A static variable in a library is almost always a mistake.
>
>    
>>> Isn't this baking in some implementation detail from the current qemu
>>> version? What happens if it changes?
>>>
>>>        
>> I don't have another way for the moment. I would be happy,
>> if someone have a good solution.
>>      
> Could we at least make the assignments of the 3 prior BDFs explicit on
> the command line too?
>    
I don't understand your question. Theses 3 priors BDFs can't
be modify via QEMU command line (or I don't know how).
>>>> @@ -528,65 +583,69 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
>>>>            abort();
>>>>        }
>>>>
>>>> -    ram_size = libxl__sizekb_to_mb(b_info->max_memkb - b_info->video_memkb);
>>>> +    // Allocate ram space of 32Mo per previous device model to store rom
>>>>
>>>>          
>>> What is this about?
>>>
>>> (also that Mo looks a bit odd in among all these mb's)
>>>
>>>
>>>        
>> It's space for ROM allocation, like vga, rtl8139 roms ...
>> Each QEMU can load ROM and memory, but the memory
>> allocator consider that it's alone. It starts to allocate
>> ROM space from the end of memory RAM.
>>
>> It's a solution suggest by Stefano, it's avoid modification
>> in QEMU. As we don't know the number of ROM and their
>> size per QEMU, we chose a space of 32 Mo to be sure, but in
>> fine most of time memory is not allocated.
>>      
> "32Mo per previous device model" is the bit which struck me as odd. That
> means the first device model uses 32Mo, the second 64Mo, the third 96Mo
> etc?
>    
That means:
     - first QEMU can allocate ROM after ram_size + 0
     - second after ram_size + 32 mo
     - ...

It's a hack to avoid modification in QEMU memory allocator
(find_ram_offset exec.c in QEMU).

> Aren't we already modifying qemu quite substantially to implement this
> functionality anyway? so why are we trying to avoid it in this one
> corner? Especially at the cost of doing something which on the face of
> it looks quite strange!
>
>    
It's not possible to made it in QEMU, otherwise QEMU need to
be spawn one by one. Indeed, the next QEMU need to know
what is the last 'address' used by the previous QEMU.

I made a modification in this way, but it was abandoned. Indeed,
it required XenStore.

> Isn't space for the ROMs allocated by SeaBIOS as part of enumerating the
> PCI bus anyway? Or is this a different per-ROM allocation?
>    
It's the rom allocated via pci_add_option_rom in QEMU.
QEMU seems to store ROM in memory and then SeaBIOS
will copy it, in the right place.
Ian Campbell Aug. 24, 2012, 2:45 p.m. UTC | #5
On Fri, 2012-08-24 at 15:37 +0100, Julien Grall wrote:
> In case of Xen, it's hard to have a compatibility. We can
> still spawn only one QEMU, but ioreq handling will not
> send an io request if no device models registered it.
> There is no more default QEMU.

This means we've broken existing qemu on a new hypervisor, which now
that we have Xen support in upstream qemu is something we need to think
about and decide if we are happy with that or not.

Perhaps it is sufficient for this to be a compile time thing, i.e.
detect if we are building against a disagg capable hypervisor or not.

Or maybe it has to be a runtime thing with Xen only turning off the
default QEMU when the first io req region is registered, or something
like that.

> >>> Isn't this baking in some implementation detail from the current qemu
> >>> version? What happens if it changes?
> >>>
> >>>        
> >> I don't have another way for the moment. I would be happy,
> >> if someone have a good solution.
> >>      
> > Could we at least make the assignments of the 3 prior BDFs explicit on
> > the command line too?
> >    
> I don't understand your question. Theses 3 priors BDFs can't
> be modify via QEMU command line (or I don't know how).

Could qemu be modified to allow this?

> >>>> @@ -528,65 +583,69 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
> >>>>            abort();
> >>>>        }
> >>>>
> >>>> -    ram_size = libxl__sizekb_to_mb(b_info->max_memkb - b_info->video_memkb);
> >>>> +    // Allocate ram space of 32Mo per previous device model to store rom
> >>>>
> >>>>          
> >>> What is this about?
> >>>
> >>> (also that Mo looks a bit odd in among all these mb's)
> >>>
> >>>
> >>>        
> >> It's space for ROM allocation, like vga, rtl8139 roms ...
> >> Each QEMU can load ROM and memory, but the memory
> >> allocator consider that it's alone. It starts to allocate
> >> ROM space from the end of memory RAM.
> >>
> >> It's a solution suggest by Stefano, it's avoid modification
> >> in QEMU. As we don't know the number of ROM and their
> >> size per QEMU, we chose a space of 32 Mo to be sure, but in
> >> fine most of time memory is not allocated.
> >>      
> > "32Mo per previous device model" is the bit which struck me as odd. That
> > means the first device model uses 32Mo, the second 64Mo, the third 96Mo
> > etc?
> >    
> That means:
>      - first QEMU can allocate ROM after ram_size + 0
>      - second after ram_size + 32 mo
>      - ...
> 
> It's a hack to avoid modification in QEMU memory allocator
> (find_ram_offset exec.c in QEMU).

Why don't we enhance the memory allocator instead of adding hacks?

> > Aren't we already modifying qemu quite substantially to implement this
> > functionality anyway? so why are we trying to avoid it in this one
> > corner? Especially at the cost of doing something which on the face of
> > it looks quite strange!
> >
> >    
> It's not possible to made it in QEMU, otherwise QEMU need to
> be spawn one by one. Indeed, the next QEMU need to know
> what is the last 'address' used by the previous QEMU.

Or each one needs to be told explicitly where to put its ROMs. Encoding
a magic 32Mo*N in the interface is just too hacky.

> I made a modification in this way, but it was abandoned. Indeed,
> it required XenStore.
> 
> > Isn't space for the ROMs allocated by SeaBIOS as part of enumerating the
> > PCI bus anyway? Or is this a different per-ROM allocation?
> >    
> It's the rom allocated via pci_add_option_rom in QEMU.
> QEMU seems to store ROM in memory and then SeaBIOS
> will copy it, in the right place.

So the ROM binary (the content of the ROM_BAR) is stored in "guest"
memory? That seems a bit odd to me, I'd have thought it would be stored
in the host and provided on demand when the ROM BAR was accessed.

Is there any scope for changing this behaviour?

Ian.
diff mbox

Patch

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 8ea3478..60718b6 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -1330,7 +1330,8 @@  static void stubdom_destroy_callback(libxl__egc *egc,
     }
 
     dds->stubdom_finished = 1;
-    savefile = libxl__device_model_savefile(gc, dis->domid);
+    /* FIXME: get dmid */
+    savefile = libxl__device_model_savefile(gc, dis->domid, 0);
     rc = libxl__remove_file(gc, savefile);
     /*
      * On suspend libxl__domain_save_device_model will have already
@@ -1423,10 +1424,8 @@  void libxl__destroy_domid(libxl__egc *egc, libxl__destroy_domid_state *dis)
         LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_pause failed for %d", domid);
     }
     if (dm_present) {
-        if (libxl__destroy_device_model(gc, domid) < 0)
-            LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl__destroy_device_model failed for %d", domid);
-
-        libxl__qmp_cleanup(gc, domid);
+        if (libxl__destroy_device_models(gc, domid) < 0)
+            LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl__destroy_device_models failed for %d", domid);
     }
     dis->drs.ao = ao;
     dis->drs.domid = domid;
@@ -1725,6 +1724,13 @@  out:
 
 /******************************************************************************/
 
+int libxl__dm_setdefault(libxl__gc *gc, libxl_dm *dm)
+{
+    return 0;
+}
+
+/******************************************************************************/
+
 int libxl__device_disk_setdefault(libxl__gc *gc, libxl_device_disk *disk)
 {
     int rc;
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 5f0d26f..7160c78 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -35,6 +35,10 @@  void libxl_domain_config_dispose(libxl_domain_config *d_config)
 {
     int i;
 
+    for (i=0; i<d_config->num_dms; i++)
+        libxl_dm_dispose(&d_config->dms[i]);
+    free(d_config->dms);
+
     for (i=0; i<d_config->num_disks; i++)
         libxl_device_disk_dispose(&d_config->disks[i]);
     free(d_config->disks);
@@ -59,6 +63,50 @@  void libxl_domain_config_dispose(libxl_domain_config *d_config)
     libxl_domain_build_info_dispose(&d_config->b_info);
 }
 
+static int libxl__domain_config_setdefault(libxl__gc *gc,
+                                           libxl_domain_config *d_config)
+{
+    libxl_domain_build_info *b_info = &d_config->b_info;
+    uint64_t cap = 0;
+    int i = 0;
+    int ret = 0;
+    libxl_dm *default_dm = NULL;
+
+    if (b_info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL
+        && (d_config->num_dms > 1))
+        return ERROR_INVAL;
+
+    if (!d_config->num_dms) {
+        d_config->dms = malloc(sizeof (*d_config->dms));
+        if (!d_config->dms)
+            return ERROR_NOMEM;
+        libxl_dm_init(d_config->dms);
+        d_config->num_dms = 1;
+    }
+
+    for (i = 0; i < d_config->num_dms; i++)
+    {
+        ret = libxl__dm_setdefault(gc, &d_config->dms[i]);
+        if (ret) return ret;
+
+        if (cap & d_config->dms[i].capabilities)
+            /* Some capabilities are already emulated */
+            return ERROR_INVAL;
+
+        cap |= d_config->dms[i].capabilities;
+        if (d_config->dms[i].capabilities & LIBXL_DM_CAP_UI)
+            default_dm = &d_config->dms[i];
+    }
+
+    if (!default_dm)
+        default_dm = &d_config->dms[0];
+
+    /* The default device model emulates all that the others don't emulate */
+    default_dm->capabilities |= ~cap;
+
+    return ret;
+}
+
 int libxl__domain_create_info_setdefault(libxl__gc *gc,
                                          libxl_domain_create_info *c_info)
 {
@@ -145,11 +193,11 @@  int libxl__domain_build_info_setdefault(libxl__gc *gc,
                 LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL;
         else {
             const char *dm;
-            int rc;
+            int rc = 0;
 
             b_info->device_model_version =
                 LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN;
-            dm = libxl__domain_device_model(gc, b_info);
+            dm = libxl__domain_device_model(gc, ~0, b_info);
             rc = access(dm, X_OK);
             if (rc < 0) {
                 /* qemu-xen unavailable, use qemu-xen-traditional */
@@ -651,11 +699,13 @@  static void initiate_domain_create(libxl__egc *egc,
     }
 
     dcs->guest_domid = domid;
-    dcs->dmss.dm.guest_domid = 0; /* means we haven't spawned */
 
     ret = libxl__domain_build_info_setdefault(gc, &d_config->b_info);
     if (ret) goto error_out;
 
+    ret = libxl__domain_config_setdefault(gc, d_config);
+    if (ret) goto error_out;
+
     if (!sched_params_valid(gc, domid, &d_config->b_info.sched_params)) {
         LOG(ERROR, "Invalid scheduling parameters\n");
         ret = ERROR_INVAL;
@@ -667,6 +717,15 @@  static void initiate_domain_create(libxl__egc *egc,
         if (ret) goto error_out;
     }
 
+    dcs->current_dmid = 0;
+    dcs->build_state.num_dms = d_config->num_dms;
+    GCNEW_ARRAY(dcs->dmss, d_config->num_dms);
+
+    for (i = 0; i < d_config->num_dms; i++) {
+        dcs->dmss[i].dm.guest_domid = 0; /* Means we haven't spawned */
+        dcs->dmss[i].dm.dcs = dcs;
+    }
+
     dcs->bl.ao = ao;
     libxl_device_disk *bootdisk =
         d_config->num_disks > 0 ? &d_config->disks[0] : NULL;
@@ -709,6 +768,26 @@  static void domcreate_console_available(libxl__egc *egc,
                                         dcs->guest_domid));
 }
 
+static void domcreate_spawn_devmodel(libxl__egc *egc,
+                                    libxl__domain_create_state *dcs,
+                                    libxl_dmid dmid)
+{
+    libxl__stub_dm_spawn_state *dmss = &dcs->dmss[dmid];
+    STATE_AO_GC(dcs->ao);
+
+    /* We might be going to call libxl__spawn_local_dm, or _spawn_stub_dm.
+     * Fill in any field required by either, including both relevant
+     * callbacks (_spawn_stub_dm will overwrite our trespass if needed). */
+    dmss->dm.spawn.ao = ao;
+    dmss->dm.guest_config = dcs->guest_config;
+    dmss->dm.build_state = &dcs->build_state;
+    dmss->dm.callback = domcreate_devmodel_started;
+    dmss->callback = domcreate_devmodel_started;
+    dmss->dm.dmid = dmid;
+
+    libxl__spawn_dm(egc, dmss);
+}
+
 static void domcreate_bootloader_done(libxl__egc *egc,
                                       libxl__bootloader_state *bl,
                                       int rc)
@@ -735,15 +814,6 @@  static void domcreate_bootloader_done(libxl__egc *egc,
      */
     state->pv_cmdline = bl->cmdline;
 
-    /* We might be going to call libxl__spawn_local_dm, or _spawn_stub_dm.
-     * Fill in any field required by either, including both relevant
-     * callbacks (_spawn_stub_dm will overwrite our trespass if needed). */
-    dcs->dmss.dm.spawn.ao = ao;
-    dcs->dmss.dm.guest_config = dcs->guest_config;
-    dcs->dmss.dm.build_state = &dcs->build_state;
-    dcs->dmss.dm.callback = domcreate_devmodel_started;
-    dcs->dmss.callback = domcreate_devmodel_started;
-
     if ( restore_fd < 0 ) {
         rc = libxl__domain_build(gc, &d_config->b_info, domid, state);
         domcreate_rebuild_done(egc, dcs, rc);
@@ -962,11 +1032,7 @@  static void domcreate_launch_dm(libxl__egc *egc, libxl__multidev *multidev,
         libxl__device_vkb_add(gc, domid, &vkb);
         libxl_device_vkb_dispose(&vkb);
 
-        dcs->dmss.dm.guest_domid = domid;
-        if (libxl_defbool_val(d_config->b_info.device_model_stubdomain))
-            libxl__spawn_stub_dm(egc, &dcs->dmss);
-        else
-            libxl__spawn_local_dm(egc, &dcs->dmss.dm);
+        domcreate_spawn_devmodel(egc, dcs, dcs->current_dmid);
         return;
     }
     case LIBXL_DOMAIN_TYPE_PV:
@@ -991,12 +1057,11 @@  static void domcreate_launch_dm(libxl__egc *egc, libxl__multidev *multidev,
         libxl__device_console_dispose(&console);
 
         if (need_qemu) {
-            dcs->dmss.dm.guest_domid = domid;
-            libxl__spawn_local_dm(egc, &dcs->dmss.dm);
+            assert(dcs->dmss);
+            domcreate_spawn_devmodel(egc, dcs, dcs->current_dmid);
             return;
         } else {
-            assert(!dcs->dmss.dm.guest_domid);
-            domcreate_devmodel_started(egc, &dcs->dmss.dm, 0);
+            assert(!dcs->dmss);
             return;
         }
     }
@@ -1015,7 +1080,7 @@  static void domcreate_devmodel_started(libxl__egc *egc,
                                        libxl__dm_spawn_state *dmss,
                                        int ret)
 {
-    libxl__domain_create_state *dcs = CONTAINER_OF(dmss, *dcs, dmss.dm);
+    libxl__domain_create_state *dcs = dmss->dcs;
     STATE_AO_GC(dmss->spawn.ao);
     libxl_ctx *ctx = CTX;
     int domid = dcs->guest_domid;
@@ -1029,15 +1094,15 @@  static void domcreate_devmodel_started(libxl__egc *egc,
         goto error_out;
     }
 
-    if (dcs->dmss.dm.guest_domid) {
+    if (dmss->guest_domid) {
         if (d_config->b_info.device_model_version
             == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
-            libxl__qmp_initializations(gc, domid, d_config);
+            libxl__qmp_initializations(gc, domid, dmss->dmid, d_config);
         }
     }
 
     /* Plug nic interfaces */
-    if (d_config->num_nics > 0) {
+    if (d_config->num_nics > 0 && dmss->dmid == 0) {
         /* Attach nics */
         libxl__multidev_begin(ao, &dcs->multidev);
         dcs->multidev.callback = domcreate_attach_pci;
@@ -1071,23 +1136,34 @@  static void domcreate_attach_pci(libxl__egc *egc, libxl__multidev *multidev,
         goto error_out;
     }
 
-    for (i = 0; i < d_config->num_pcidevs; i++)
-        libxl__device_pci_add(gc, domid, &d_config->pcidevs[i], 1);
+    /* TO FIX: for the moment only add to device model 0 */
 
-    if (d_config->num_pcidevs > 0) {
-        ret = libxl__create_pci_backend(gc, domid, d_config->pcidevs,
-            d_config->num_pcidevs);
-        if (ret < 0) {
-            LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
-                "libxl_create_pci_backend failed: %d", ret);
-            goto error_out;
+    if (dcs->current_dmid == 0) {
+        for (i = 0; i < d_config->num_pcidevs; i++)
+            libxl__device_pci_add(gc, domid,
+                                  &d_config->pcidevs[i], 1);
+
+        if (d_config->num_pcidevs > 0) {
+            ret = libxl__create_pci_backend(gc, domid, d_config->pcidevs,
+                d_config->num_pcidevs);
+            if (ret < 0) {
+                LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
+                    "libxl_create_pci_backend failed: %d", ret);
+                goto error_out;
+            }
         }
     }
 
-    libxl__arch_domain_create(gc, d_config, domid);
-    domcreate_console_available(egc, dcs);
+    dcs->current_dmid++;
+
+    if (dcs->current_dmid >= dcs->guest_config->num_dms) {
+        libxl__arch_domain_create(gc, d_config, domid);
+        domcreate_console_available(egc, dcs);
+        domcreate_complete(egc, dcs, 0);
+    } else {
+        domcreate_spawn_devmodel(egc, dcs, dcs->current_dmid);
+    }
 
-    domcreate_complete(egc, dcs, 0);
     return;
 
 error_out:
diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index 8e8410e..798665e 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -1034,8 +1034,8 @@  static void devices_remove_callback(libxl__egc *egc,
     return;
 }
 
-int libxl__wait_for_device_model(libxl__gc *gc,
-                                 uint32_t domid, char *state,
+int libxl__wait_for_device_model(libxl__gc *gc, libxl_domid domid,
+                                 libxl_dmid dmid, char *state,
                                  libxl__spawn_starting *spawning,
                                  int (*check_callback)(libxl__gc *gc,
                                                        uint32_t domid,
@@ -1044,7 +1044,8 @@  int libxl__wait_for_device_model(libxl__gc *gc,
                                  void *check_callback_userdata)
 {
     char *path;
-    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
+    path = libxl__sprintf(gc, "/local/domain/0/dms/%u/%u/state",
+                          domid, dmid);
     return libxl__wait_for_offspring(gc, domid,
                                      LIBXL_DEVICE_MODEL_START_TIMEOUT,
                                      "Device Model", path, state, spawning,
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 0c0084f..de7138f 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -28,24 +28,30 @@  static const char *libxl_tapif_script(libxl__gc *gc)
 #endif
 }
 
-const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid)
+const char *libxl__device_model_savefile(libxl__gc *gc, libxl_domid domid,
+                                         libxl_dmid dmid)
 {
-    return libxl__sprintf(gc, "/var/lib/xen/qemu-save.%d", domid);
+    return libxl__sprintf(gc, "/var/lib/xen/qemu-save.%u.%u", domid, dmid);
 }
 
 const char *libxl__domain_device_model(libxl__gc *gc,
-                                       const libxl_domain_build_info *info)
+                                       uint32_t dmid,
+                                       const libxl_domain_build_info *b_info)
 {
     libxl_ctx *ctx = libxl__gc_owner(gc);
     const char *dm;
+    libxl_domain_config *guest_config = CONTAINER_OF(b_info, *guest_config,
+                                                     b_info);
 
-    if (libxl_defbool_val(info->device_model_stubdomain))
+    if (libxl_defbool_val(guest_config->b_info.device_model_stubdomain))
         return NULL;
 
-    if (info->device_model) {
-        dm = libxl__strdup(gc, info->device_model);
+    if (dmid < guest_config->num_dms && guest_config->dms[dmid].path) {
+        dm = libxl__strdup(gc, guest_config->dms[dmid].path);
+    } else if (guest_config->b_info.device_model) {
+        dm = libxl__strdup(gc, guest_config->b_info.device_model);
     } else {
-        switch (info->device_model_version) {
+        switch (guest_config->b_info.device_model_version) {
         case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
             dm = libxl__abs_path(gc, "qemu-dm", libxl__libexec_path());
             break;
@@ -55,7 +61,7 @@  const char *libxl__domain_device_model(libxl__gc *gc,
         default:
             LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
                        "invalid device model version %d\n",
-                       info->device_model_version);
+                       guest_config->b_info.device_model_version);
             dm = NULL;
             break;
         }
@@ -63,7 +69,8 @@  const char *libxl__domain_device_model(libxl__gc *gc,
     return dm;
 }
 
-const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config)
+const libxl_vnc_info *libxl__dm_vnc(libxl_dmid dmid,
+                                    const libxl_domain_config *guest_config)
 {
     const libxl_vnc_info *vnc = NULL;
     if (guest_config->b_info.type == LIBXL_DOMAIN_TYPE_HVM) {
@@ -103,7 +110,7 @@  static char ** libxl__build_device_model_args_old(libxl__gc *gc,
     const libxl_domain_create_info *c_info = &guest_config->c_info;
     const libxl_domain_build_info *b_info = &guest_config->b_info;
     const libxl_device_nic *nics = guest_config->nics;
-    const libxl_vnc_info *vnc = libxl__dm_vnc(guest_config);
+    const libxl_vnc_info *vnc = libxl__dm_vnc(0, guest_config);
     const libxl_sdl_info *sdl = dm_sdl(guest_config);
     const int num_nics = guest_config->num_nics;
     const char *keymap = dm_keymap(guest_config);
@@ -321,24 +328,58 @@  static char *dm_spice_options(libxl__gc *gc,
     return opt;
 }
 
+static int libxl__dm_has_vif(const char *vifname, libxl_dmid dmid,
+                             const libxl_domain_config *guest_config)
+{
+    const libxl_dm *dm_config = &guest_config->dms[dmid];
+    int i = 0;
+
+    if (!vifname && (dm_config->capabilities & LIBXL_DM_CAP_UI))
+        return 1;
+
+    if (!dm_config->vifs)
+        return 0;
+
+    for (i = 0; dm_config->vifs[i]; i++) {
+        if (!strcmp(dm_config->vifs[i], vifname))
+            return 1;
+    }
+
+    return 0;
+}
+
 static char ** libxl__build_device_model_args_new(libxl__gc *gc,
-                                        const char *dm, int guest_domid,
+                                        const char *dm, libxl_dmid guest_domid,
+                                        libxl_dmid dmid,
                                         const libxl_domain_config *guest_config,
                                         const libxl__domain_build_state *state)
 {
+    /**
+     * PCI device number. Before 3, we have IDE, ISA, SouthBridge and
+     * XEN PCI. Theses devices will be emulate in each QEMU, but only
+     * one QEMU (the one which emulates default device) will register
+     * these devices through Xen PCI hypercall.
+     */
+    static unsigned int bdf = 3;
+
     libxl_ctx *ctx = libxl__gc_owner(gc);
     const libxl_domain_create_info *c_info = &guest_config->c_info;
     const libxl_domain_build_info *b_info = &guest_config->b_info;
+    const libxl_dm *dm_config = &guest_config->dms[dmid];
     const libxl_device_disk *disks = guest_config->disks;
     const libxl_device_nic *nics = guest_config->nics;
     const int num_disks = guest_config->num_disks;
     const int num_nics = guest_config->num_nics;
-    const libxl_vnc_info *vnc = libxl__dm_vnc(guest_config);
+    const libxl_vnc_info *vnc = libxl__dm_vnc(dmid, guest_config);
     const libxl_sdl_info *sdl = dm_sdl(guest_config);
     const char *keymap = dm_keymap(guest_config);
     flexarray_t *dm_args;
     int i;
     uint64_t ram_size;
+    uint32_t cap_ui = dm_config->capabilities & LIBXL_DM_CAP_UI;
+    uint32_t cap_ide = dm_config->capabilities & LIBXL_DM_CAP_IDE;
+    uint32_t cap_serial = dm_config->capabilities & LIBXL_DM_CAP_SERIAL;
+    uint32_t cap_audio = dm_config->capabilities & LIBXL_DM_CAP_AUDIO;
 
     dm_args = flexarray_make(16, 1);
     if (!dm_args)
@@ -348,11 +389,12 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
                       "-xen-domid",
                       libxl__sprintf(gc, "%d", guest_domid), NULL);
 
+    flexarray_append(dm_args, "-nodefaults");
     flexarray_append(dm_args, "-chardev");
     flexarray_append(dm_args,
                      libxl__sprintf(gc, "socket,id=libxl-cmd,"
-                                    "path=%s/qmp-libxl-%d,server,nowait",
-                                    libxl__run_dir_path(), guest_domid));
+                                    "path=%s/qmp-libxl-%u-%u,server,nowait",
+                                    libxl__run_dir_path(), guest_domid, dmid));
 
     flexarray_append(dm_args, "-mon");
     flexarray_append(dm_args, "chardev=libxl-cmd,mode=control");
@@ -364,7 +406,8 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
     if (c_info->name) {
         flexarray_vappend(dm_args, "-name", c_info->name, NULL);
     }
-    if (vnc) {
+
+    if (vnc && cap_ui) {
         int display = 0;
         const char *listen = "127.0.0.1";
         char *vncarg = NULL;
@@ -395,7 +438,7 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
         }
         flexarray_append(dm_args, vncarg);
     }
-    if (sdl) {
+    if (sdl && cap_ui) {
         flexarray_append(dm_args, "-sdl");
         /* XXX sdl->{display,xauthority} into $DISPLAY/$XAUTHORITY */
     }
@@ -411,13 +454,27 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
     if (b_info->type == LIBXL_DOMAIN_TYPE_HVM) {
         int ioemu_nics = 0;
 
-        if (b_info->u.hvm.serial) {
+        if (b_info->u.hvm.serial && cap_serial) {
             flexarray_vappend(dm_args, "-serial", b_info->u.hvm.serial, NULL);
         }
 
-        if (libxl_defbool_val(b_info->u.hvm.nographic) && (!sdl && !vnc)) {
+        if ((libxl_defbool_val(b_info->u.hvm.nographic) && (!sdl && !vnc))
+            || !cap_ui) {
             flexarray_append(dm_args, "-nographic");
         }
+        else {
+            switch (b_info->u.hvm.vga.kind) {
+            case LIBXL_VGA_INTERFACE_TYPE_STD:
+                flexarray_vappend(dm_args, "-device",
+                                  GCSPRINTF("VGA,addr=%u", bdf++), NULL);
+                break;
+            case LIBXL_VGA_INTERFACE_TYPE_CIRRUS:
+                flexarray_vappend(dm_args, "-device",
+                                  GCSPRINTF("cirrus-vga,addr=%u", bdf++),
+                                  NULL);
+                break;
+            }
+        }
 
         if (libxl_defbool_val(b_info->u.hvm.spice.enable)) {
             const libxl_spice_info *spice = &b_info->u.hvm.spice;
@@ -429,27 +486,19 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
             flexarray_append(dm_args, spiceoptions);
         }
 
-        switch (b_info->u.hvm.vga.kind) {
-        case LIBXL_VGA_INTERFACE_TYPE_STD:
-            flexarray_vappend(dm_args, "-vga", "std", NULL);
-            break;
-        case LIBXL_VGA_INTERFACE_TYPE_CIRRUS:
-            flexarray_vappend(dm_args, "-vga", "cirrus", NULL);
-            break;
-        }
-
         if (b_info->u.hvm.boot) {
             flexarray_vappend(dm_args, "-boot",
                     libxl__sprintf(gc, "order=%s", b_info->u.hvm.boot), NULL);
         }
-        if (libxl_defbool_val(b_info->u.hvm.usb) || b_info->u.hvm.usbdevice) {
+        if ((libxl_defbool_val(b_info->u.hvm.usb) || b_info->u.hvm.usbdevice)
+            && cap_ui) {
             flexarray_append(dm_args, "-usb");
             if (b_info->u.hvm.usbdevice) {
                 flexarray_vappend(dm_args,
                                   "-usbdevice", b_info->u.hvm.usbdevice, NULL);
             }
         }
-        if (b_info->u.hvm.soundhw) {
+        if (b_info->u.hvm.soundhw && cap_audio) {
             flexarray_vappend(dm_args, "-soundhw", b_info->u.hvm.soundhw, NULL);
         }
         if (!libxl_defbool_val(b_info->u.hvm.acpi)) {
@@ -469,7 +518,8 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
                                                          b_info->max_vcpus));
         }
         for (i = 0; i < num_nics; i++) {
-            if (nics[i].nictype == LIBXL_NIC_TYPE_VIF_IOEMU) {
+            if (nics[i].nictype == LIBXL_NIC_TYPE_VIF_IOEMU
+                && libxl__dm_has_vif(nics[i].id, dmid, guest_config)) {
                 char *smac = libxl__sprintf(gc,
                                 LIBXL_MAC_FMT, LIBXL_MAC_BYTES(nics[i].mac));
                 const char *ifname = libxl__device_nic_devname(gc,
@@ -477,9 +527,9 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
                                                 LIBXL_NIC_TYPE_VIF_IOEMU);
                 flexarray_append(dm_args, "-device");
                 flexarray_append(dm_args,
-                   libxl__sprintf(gc, "%s,id=nic%d,netdev=net%d,mac=%s",
-                                                nics[i].model, nics[i].devid,
-                                                nics[i].devid, smac));
+                            GCSPRINTF("%s,id=nic%d,netdev=net%d,mac=%s,addr=%u",
+                                      nics[i].model, nics[i].devid,
+                                      nics[i].devid, smac, bdf++));
                 flexarray_append(dm_args, "-netdev");
                 flexarray_append(dm_args, GCSPRINTF(
                                           "type=tap,id=net%d,ifname=%s,"
@@ -495,7 +545,7 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
             flexarray_append(dm_args, "-net");
             flexarray_append(dm_args, "none");
         }
-        if (libxl_defbool_val(b_info->u.hvm.gfx_passthru)) {
+        if (libxl_defbool_val(b_info->u.hvm.gfx_passthru) && cap_ui) {
             flexarray_append(dm_args, "-gfx_passthru");
         }
     } else {
@@ -506,13 +556,14 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
 
     if (state->saved_state) {
         /* This file descriptor is meant to be used by QEMU */
-        int migration_fd = open(state->saved_state, O_RDONLY);
+        int migration_fd = open(libxl__sprintf(gc, "%s.%u", state->saved_state,
+                                               dmid), O_RDONLY);
         flexarray_append(dm_args, "-incoming");
         flexarray_append(dm_args, libxl__sprintf(gc, "fd:%d", migration_fd));
     }
     for (i = 0; b_info->extra && b_info->extra[i] != NULL; i++)
         flexarray_append(dm_args, b_info->extra[i]);
-    flexarray_append(dm_args, "-M");
+    flexarray_append(dm_args, "-machine");
     switch (b_info->type) {
     case LIBXL_DOMAIN_TYPE_PV:
         flexarray_append(dm_args, "xenpv");
@@ -520,7 +571,11 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
             flexarray_append(dm_args, b_info->extra_pv[i]);
         break;
     case LIBXL_DOMAIN_TYPE_HVM:
-        flexarray_append(dm_args, "xenfv");
+        flexarray_append(dm_args,
+                         libxl__sprintf(gc,
+                                        "xenfv,xen_dmid=%u,xen_default_dev=%s,xen_emulate_ide=%s",
+                                        dmid, (cap_ui) ? "on" : "off",
+                                        (cap_ide) ? "on" : "off"));
         for (i = 0; b_info->extra_hvm && b_info->extra_hvm[i] != NULL; i++)
             flexarray_append(dm_args, b_info->extra_hvm[i]);
         break;
@@ -528,65 +583,69 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
         abort();
     }
 
-    ram_size = libxl__sizekb_to_mb(b_info->max_memkb - b_info->video_memkb);
+    // Allocate ram space of 32Mo per previous device model to store rom
+    ram_size = libxl__sizekb_to_mb(b_info->max_memkb - b_info->video_memkb)
+        + 32 * dmid;
     flexarray_append(dm_args, "-m");
     flexarray_append(dm_args, libxl__sprintf(gc, "%"PRId64, ram_size));
 
     if (b_info->type == LIBXL_DOMAIN_TYPE_HVM) {
-        for (i = 0; i < num_disks; i++) {
-            int disk, part;
-            int dev_number =
-                libxl__device_disk_dev_number(disks[i].vdev, &disk, &part);
-            const char *format = qemu_disk_format_string(disks[i].format);
-            char *drive;
-
-            if (dev_number == -1) {
-                LIBXL__LOG(ctx, LIBXL__LOG_WARNING, "unable to determine"
-                           " disk number for %s", disks[i].vdev);
-                continue;
-            }
-
-            if (disks[i].is_cdrom) {
-                if (disks[i].format == LIBXL_DISK_FORMAT_EMPTY)
-                    drive = libxl__sprintf
-                        (gc, "if=ide,index=%d,media=cdrom", disk);
-                else
-                    drive = libxl__sprintf
-                        (gc, "file=%s,if=ide,index=%d,media=cdrom,format=%s",
-                         disks[i].pdev_path, disk, format);
-            } else {
-                if (disks[i].format == LIBXL_DISK_FORMAT_EMPTY) {
-                    LIBXL__LOG(ctx, LIBXL__LOG_WARNING, "cannot support"
-                               " empty disk format for %s", disks[i].vdev);
+        if (cap_ide) {
+            for (i = 0; i < num_disks; i++) {
+                int disk, part;
+                int dev_number =
+                    libxl__device_disk_dev_number(disks[i].vdev, &disk, &part);
+                const char *format = qemu_disk_format_string(disks[i].format);
+                char *drive;
+
+                if (dev_number == -1) {
+                    LIBXL__LOG(ctx, LIBXL__LOG_WARNING, "unable to determine"
+                               " disk number for %s", disks[i].vdev);
                     continue;
                 }
 
-                if (format == NULL) {
-                    LIBXL__LOG(ctx, LIBXL__LOG_WARNING, "unable to determine"
-                               " disk image format %s", disks[i].vdev);
-                    continue;
+                if (disks[i].is_cdrom) {
+                    if (disks[i].format == LIBXL_DISK_FORMAT_EMPTY)
+                        drive = libxl__sprintf
+                            (gc, "if=ide,index=%d,media=cdrom", disk);
+                    else
+                        drive = libxl__sprintf
+                            (gc, "file=%s,if=ide,index=%d,media=cdrom,format=%s",
+                             disks[i].pdev_path, disk, format);
+                } else {
+                    if (disks[i].format == LIBXL_DISK_FORMAT_EMPTY) {
+                        LIBXL__LOG(ctx, LIBXL__LOG_WARNING, "cannot support"
+                                   " empty disk format for %s", disks[i].vdev);
+                        continue;
+                    }
+
+                    if (format == NULL) {
+                        LIBXL__LOG(ctx, LIBXL__LOG_WARNING, "unable to determine"
+                                   " disk image format %s", disks[i].vdev);
+                        continue;
+                    }
+
+                    /*
+                     * Explicit sd disks are passed through as is.
+                     *
+                     * For other disks we translate devices 0..3 into
+                     * hd[a-d] and ignore the rest.
+                     */
+                    if (strncmp(disks[i].vdev, "sd", 2) == 0)
+                        drive = libxl__sprintf
+                            (gc, "file=%s,if=scsi,bus=0,unit=%d,format=%s",
+                             disks[i].pdev_path, disk, format);
+                    else if (disk < 4)
+                        drive = libxl__sprintf
+                            (gc, "file=%s,if=ide,index=%d,media=disk,format=%s",
+                             disks[i].pdev_path, disk, format);
+                    else
+                        continue; /* Do not emulate this disk */
                 }
 
-                /*
-                 * Explicit sd disks are passed through as is.
-                 *
-                 * For other disks we translate devices 0..3 into
-                 * hd[a-d] and ignore the rest.
-                 */
-                if (strncmp(disks[i].vdev, "sd", 2) == 0)
-                    drive = libxl__sprintf
-                        (gc, "file=%s,if=scsi,bus=0,unit=%d,format=%s",
-                         disks[i].pdev_path, disk, format);
-                else if (disk < 4)
-                    drive = libxl__sprintf
-                        (gc, "file=%s,if=ide,index=%d,media=disk,format=%s",
-                         disks[i].pdev_path, disk, format);
-                else
-                    continue; /* Do not emulate this disk */
+                flexarray_append(dm_args, "-drive");
+                flexarray_append(dm_args, drive);
             }
-
-            flexarray_append(dm_args, "-drive");
-            flexarray_append(dm_args, drive);
         }
     }
     flexarray_append(dm_args, NULL);
@@ -594,7 +653,9 @@  static char ** libxl__build_device_model_args_new(libxl__gc *gc,
 }
 
 static char ** libxl__build_device_model_args(libxl__gc *gc,
-                                        const char *dm, int guest_domid,
+                                        const char *dm,
+                                        libxl_domid guest_domid,
+                                        libxl_dmid dmid,
                                         const libxl_domain_config *guest_config,
                                         const libxl__domain_build_state *state)
 {
@@ -607,8 +668,8 @@  static char ** libxl__build_device_model_args(libxl__gc *gc,
                                                   state);
     case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
         return libxl__build_device_model_args_new(gc, dm,
-                                                  guest_domid, guest_config,
-                                                  state);
+                                                  guest_domid, dmid,
+                                                  guest_config, state);
     default:
         LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "unknown device model version %d",
                          guest_config->b_info.device_model_version);
@@ -729,7 +790,8 @@  char *libxl__stub_dm_name(libxl__gc *gc, const char *guest_name)
     return libxl__sprintf(gc, "%s-dm", guest_name);
 }
 
-void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *sdss)
+static void libxl__spawn_stub_dm(libxl__egc *egc,
+                                 libxl__stub_dm_spawn_state *sdss)
 {
     STATE_AO_GC(sdss->dm.spawn.ao);
     libxl_ctx *ctx = libxl__gc_owner(gc);
@@ -815,7 +877,7 @@  void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *sdss)
     if (ret)
         goto out;
 
-    args = libxl__build_device_model_args(gc, "stubdom-dm", guest_domid,
+    args = libxl__build_device_model_args(gc, "stubdom-dm", guest_domid, 0,
                                           guest_config, d_state);
     if (!args) {
         ret = ERROR_FAIL;
@@ -871,12 +933,16 @@  out:
     spawn_stubdom_pvqemu_cb(egc, &sdss->pvqemu, ret);
 }
 
+static void libxl__spawn_local_dm(libxl__egc *egc,
+                                  libxl__dm_spawn_state *sdss);
+
 static void spawn_stub_launch_dm(libxl__egc *egc,
                                  libxl__multidev *multidev, int ret)
 {
     libxl__stub_dm_spawn_state *sdss = CONTAINER_OF(multidev, *sdss, multidev);
     STATE_AO_GC(sdss->dm.spawn.ao);
     libxl_ctx *ctx = libxl__gc_owner(gc);
+    libxl_dmid dmid = sdss->dm.dmid;
     int i, num_console = STUBDOM_SPECIAL_CONSOLES;
     libxl__device_console *console;
 
@@ -937,7 +1003,8 @@  static void spawn_stub_launch_dm(libxl__egc *egc,
                 break;
             case STUBDOM_CONSOLE_SAVE:
                 console[i].output = libxl__sprintf(gc, "file:%s",
-                                libxl__device_model_savefile(gc, guest_domid));
+                                libxl__device_model_savefile(gc, guest_domid,
+                                                             dmid));
                 break;
             case STUBDOM_CONSOLE_RESTORE:
                 if (d_state->saved_state)
@@ -1049,10 +1116,11 @@  static void device_model_spawn_outcome(libxl__egc *egc,
                                        libxl__dm_spawn_state *dmss,
                                        int rc);
 
-void libxl__spawn_local_dm(libxl__egc *egc, libxl__dm_spawn_state *dmss)
+static void libxl__spawn_local_dm(libxl__egc *egc, libxl__dm_spawn_state *dmss)
 {
     /* convenience aliases */
     const int domid = dmss->guest_domid;
+    const libxl_dmid dmid = dmss->dmid;
     libxl__domain_build_state *const state = dmss->build_state;
     libxl__spawn_state *const spawn = &dmss->spawn;
 
@@ -1062,7 +1130,8 @@  void libxl__spawn_local_dm(libxl__egc *egc, libxl__dm_spawn_state *dmss)
     libxl_domain_config *guest_config = dmss->guest_config;
     const libxl_domain_create_info *c_info = &guest_config->c_info;
     const libxl_domain_build_info *b_info = &guest_config->b_info;
-    const libxl_vnc_info *vnc = libxl__dm_vnc(guest_config);
+    const libxl_vnc_info *vnc = libxl__dm_vnc(dmid, guest_config);
+    const libxl_dm *dm_config = &guest_config->dms[dmid];
     char *path, *logfile;
     int logfile_w, null;
     int rc;
@@ -1071,12 +1140,13 @@  void libxl__spawn_local_dm(libxl__egc *egc, libxl__dm_spawn_state *dmss)
     char *vm_path;
     char **pass_stuff;
     const char *dm;
+    const char *name;
 
     if (libxl_defbool_val(b_info->device_model_stubdomain)) {
         abort();
     }
 
-    dm = libxl__domain_device_model(gc, b_info);
+    dm = libxl__domain_device_model(gc, dmid, b_info);
     if (!dm) {
         rc = ERROR_FAIL;
         goto out;
@@ -1087,7 +1157,7 @@  void libxl__spawn_local_dm(libxl__egc *egc, libxl__dm_spawn_state *dmss)
         rc = ERROR_FAIL;
         goto out;
     }
-    args = libxl__build_device_model_args(gc, dm, domid, guest_config, state);
+    args = libxl__build_device_model_args(gc, dm, domid, dmid, guest_config, state);
     if (!args) {
         rc = ERROR_FAIL;
         goto out;
@@ -1101,7 +1171,7 @@  void libxl__spawn_local_dm(libxl__egc *egc, libxl__dm_spawn_state *dmss)
         free(path);
     }
 
-    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d", domid);
+    path = libxl__sprintf(gc, "/local/domain/0/dms/%u/%u", domid, dmid);
     xs_mkdir(ctx->xsh, XBT_NULL, path);
 
     if (b_info->type == LIBXL_DOMAIN_TYPE_HVM &&
@@ -1110,8 +1180,13 @@  void libxl__spawn_local_dm(libxl__egc *egc, libxl__dm_spawn_state *dmss)
         libxl__xs_write(gc, XBT_NULL, libxl__sprintf(gc, "%s/disable_pf", path),
                     "%d", !libxl_defbool_val(b_info->u.hvm.xen_platform_pci));
 
+
+    name = dm_config->name;
+    if (!name)
+        name = libxl__sprintf(gc, "%u", dmid);
+
     libxl_create_logfile(ctx,
-                         libxl__sprintf(gc, "qemu-dm-%s", c_info->name),
+                         libxl__sprintf(gc, "qemu-%s-%s", name, c_info->name),
                          &logfile);
     logfile_w = open(logfile, O_WRONLY|O_CREAT|O_APPEND, 0644);
     free(logfile);
@@ -1143,10 +1218,10 @@  retry_transaction:
     for (arg = args; *arg; arg++)
         LIBXL__LOG(CTX, XTL_DEBUG, "  %s", *arg);
 
-    spawn->what = GCSPRINTF("domain %d device model", domid);
-    spawn->xspath = GCSPRINTF("/local/domain/0/device-model/%d/state", domid);
+    spawn->what = GCSPRINTF("domain %d device model %s", domid, name);
+    spawn->xspath = GCSPRINTF("/local/domain/0/dms/%u/%u/state", domid, dmid);
     spawn->timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
-    spawn->pidpath = GCSPRINTF("%s/image/device-model-pid", dom_path);
+    spawn->pidpath = GCSPRINTF("%s/image/dms/%u-pid", dom_path, dmid);
     spawn->midproc_cb = libxl__spawn_record_pid;
     spawn->confirm_cb = device_model_confirm;
     spawn->failure_cb = device_model_startup_failed;
@@ -1171,6 +1246,32 @@  out:
         device_model_spawn_outcome(egc, dmss, rc);
 }
 
+void libxl__spawn_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *dmss)
+{
+    libxl__domain_create_state *dcs = dmss->dm.dcs;
+    libxl_domain_config *const d_config = dcs->guest_config;
+    STATE_AO_GC(dmss->dm.spawn.ao);
+
+    switch (d_config->c_info.type) {
+    case LIBXL_DOMAIN_TYPE_HVM:
+    {
+        dmss->dm.guest_domid = dcs->guest_domid;
+        if (libxl_defbool_val(d_config->b_info.device_model_stubdomain))
+            libxl__spawn_stub_dm(egc, dmss);
+        else
+            libxl__spawn_local_dm(egc, &dmss->dm);
+        break;
+    }
+    case LIBXL_DOMAIN_TYPE_PV:
+    {
+        dmss->dm.guest_domid = dcs->guest_domid;
+        libxl__spawn_local_dm(egc, &dmss->dm);
+        break;
+    }
+    default:
+        LIBXL__LOG(CTX, XTL_ERROR, "Unknow type %u", d_config->c_info.type);
+    }
+}
 
 static void device_model_confirm(libxl__egc *egc, libxl__spawn_state *spawn,
                                  const char *xsdata)
@@ -1207,6 +1308,7 @@  static void device_model_spawn_outcome(libxl__egc *egc,
 {
     STATE_AO_GC(dmss->spawn.ao);
     int ret2;
+    char *filename;
 
     if (rc)
         LOG(ERROR, "%s: spawn failed (rc=%d)", dmss->spawn.what, rc);
@@ -1214,10 +1316,11 @@  static void device_model_spawn_outcome(libxl__egc *egc,
     libxl__domain_build_state *state = dmss->build_state;
 
     if (state->saved_state) {
-        ret2 = unlink(state->saved_state);
+        filename = GCSPRINTF("%s.%u", state->saved_state, dmss->dmid);
+        ret2 = unlink(filename);
         if (ret2) {
             LOGE(ERROR, "%s: failed to remove device-model state %s",
-                 dmss->spawn.what, state->saved_state);
+                 dmss->spawn.what, filename);
             rc = ERROR_FAIL;
             goto out;
         }
@@ -1229,12 +1332,14 @@  static void device_model_spawn_outcome(libxl__egc *egc,
     dmss->callback(egc, dmss, rc);
 }
 
-int libxl__destroy_device_model(libxl__gc *gc, uint32_t domid)
+static int libxl__destroy_device_model(libxl__gc *gc, libxl_domid domid,
+                                       libxl_dmid dmid)
 {
     char *pid;
     int ret;
 
-    pid = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "/local/domain/%d/image/device-model-pid", domid));
+    pid = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("/local/domain/%u/image/dms/%u-pid",
+                                                 domid, dmid));
     if (!pid || !atoi(pid)) {
         LOG(ERROR, "could not find device-model's pid for dom %u", domid);
         ret = ERROR_FAIL;
@@ -1300,6 +1405,60 @@  out:
     return ret;
 }
 
+libxl_dmid *libxl__list_device_models(libxl__gc *gc, libxl_domid domid,
+                                      unsigned *num_dms)
+{
+    unsigned int i = 0;
+    char **dir = NULL;
+    libxl_dmid *dms = NULL;
+    unsigned int num = 0;
+
+    dir = libxl__xs_directory(gc, XBT_NULL,
+                              GCSPRINTF("/local/domain/0/dms/%u", domid),
+                              &num);
+    if (dir) {
+        GCNEW_ARRAY(dms, num);
+
+        if (num_dms)
+            *num_dms = num;
+
+        for (i = 0; i < num; i++) {
+            dms[i] = atoi(dir[i]);
+        }
+
+        return dms;
+    }
+    else
+        return NULL;
+}
+
+int libxl__destroy_device_models(libxl__gc *gc,
+                                 libxl_domid domid)
+{
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+    int ret = 0;
+    libxl_dmid *dms = NULL;
+    unsigned int num_dms;
+    unsigned int i;
+
+    dms = libxl__list_device_models(gc, domid, &num_dms);
+
+    if (!dms)
+        return ERROR_FAIL;
+
+    for (i = 0; i < num_dms; i++)
+        ret |= libxl__destroy_device_model(gc, domid, dms[i]);
+
+    if (!ret) {
+        xs_rm(ctx->xsh, XBT_NULL, libxl__sprintf(gc, "/local/domain/0/dms/%u",
+                                                 domid));
+        xs_rm(ctx->xsh, XBT_NULL, libxl__sprintf(gc, "/local/domain/0/device-model/%u",
+                                                 domid));
+    }
+
+    return ret;
+ }
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 06d5e4f..475fea8 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -544,6 +544,7 @@  int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
     libxl_ctx *ctx = libxl__gc_owner(gc);
     int ret, rc = ERROR_FAIL;
     const char *firmware = libxl__domain_firmware(gc, info);
+    libxl_domain_config *d_config = CONTAINER_OF(info, *d_config, b_info);
 
     if (!firmware)
         goto out;
@@ -552,7 +553,8 @@  int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
         domid,
         (info->max_memkb - info->video_memkb) / 1024,
         (info->target_memkb - info->video_memkb) / 1024,
-        firmware);
+        firmware,
+        state->num_dms * 2 + 1);
     if (ret) {
         LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, ret, "hvm building failed");
         goto out;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 71e4970..2e6eedc 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -867,6 +867,7 @@  typedef struct {
     unsigned long console_mfn;
 
     unsigned long vm_generationid_addr;
+    unsigned long num_dms;
 
     char *saved_state;
 
@@ -887,7 +888,7 @@  _hidden int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
               libxl_domain_build_info *info,
               libxl__domain_build_state *state);
 
-_hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
+_hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, libxl_domid domid,
                                         const char *cmd);
 _hidden int libxl__domain_rename(libxl__gc *gc, uint32_t domid,
                                  const char *old_name, const char *new_name,
@@ -947,6 +948,8 @@  _hidden int libxl__domain_create_info_setdefault(libxl__gc *gc,
                                         libxl_domain_create_info *c_info);
 _hidden int libxl__domain_build_info_setdefault(libxl__gc *gc,
                                         libxl_domain_build_info *b_info);
+_hidden int libxl__dm_setdefault(libxl__gc *gc,
+                                 libxl_dm *dm);
 _hidden int libxl__device_disk_setdefault(libxl__gc *gc,
                                           libxl_device_disk *disk);
 _hidden int libxl__device_nic_setdefault(libxl__gc *gc, libxl_device_nic *nic,
@@ -1042,7 +1045,9 @@  _hidden char *libxl__devid_to_localdev(libxl__gc *gc, int devid);
 
 /* from libxl_pci */
 
-_hidden int libxl__device_pci_add(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev, int starting);
+_hidden int libxl__device_pci_add(libxl__gc *gc, libxl_domid domid,
+                                  libxl_device_pci *pcidev,
+                                  int starting);
 _hidden int libxl__create_pci_backend(libxl__gc *gc, uint32_t domid,
                                       libxl_device_pci *pcidev, int num);
 _hidden int libxl__device_pci_destroy_all(libxl__gc *gc, uint32_t domid);
@@ -1272,6 +1277,7 @@  _hidden int libxl__domain_build(libxl__gc *gc,
 
 /* for device model creation */
 _hidden const char *libxl__domain_device_model(libxl__gc *gc,
+                                        libxl_dmid dmid,
                                         const libxl_domain_build_info *info);
 _hidden int libxl__need_xenpv_qemu(libxl__gc *gc,
         int nr_consoles, libxl__device_console *consoles,
@@ -1281,7 +1287,9 @@  _hidden int libxl__need_xenpv_qemu(libxl__gc *gc,
    * return pass *starting_r (which will be non-0) to
    * libxl__confirm_device_model_startup or libxl__detach_device_model. */
 _hidden int libxl__wait_for_device_model(libxl__gc *gc,
-                                uint32_t domid, char *state,
+                                libxl_domid domid,
+                                libxl_dmid dmid,
+                                char *state,
                                 libxl__spawn_starting *spawning,
                                 int (*check_callback)(libxl__gc *gc,
                                                       uint32_t domid,
@@ -1289,9 +1297,14 @@  _hidden int libxl__wait_for_device_model(libxl__gc *gc,
                                                       void *userdata),
                                 void *check_callback_userdata);
 
-_hidden int libxl__destroy_device_model(libxl__gc *gc, uint32_t domid);
+_hidden libxl_dmid *libxl__list_device_models(libxl__gc *gc,
+                                              libxl_domid domid,
+                                              unsigned int *num_dms);
 
-_hidden const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *g_cfg);
+_hidden int libxl__destroy_device_models(libxl__gc *gc, libxl_domid domid);
+
+_hidden const libxl_vnc_info *libxl__dm_vnc(libxl_dmid dmid,
+                                            const libxl_domain_config *g_cfg);
 
 _hidden char *libxl__abs_path(libxl__gc *gc, const char *s, const char *path);
 
@@ -2427,10 +2440,10 @@  struct libxl__dm_spawn_state {
     libxl_domain_config *guest_config;
     libxl__domain_build_state *build_state; /* relates to guest_domid */
     libxl__dm_spawn_cb *callback;
+    libxl_dmid dmid;
+    struct libxl__domain_create_state *dcs;
 };
 
-_hidden void libxl__spawn_local_dm(libxl__egc *egc, libxl__dm_spawn_state*);
-
 /* Stubdom device models. */
 
 typedef struct {
@@ -2447,7 +2460,7 @@  typedef struct {
     libxl__multidev multidev;
 } libxl__stub_dm_spawn_state;
 
-_hidden void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state*);
+_hidden void libxl__spawn_dm(libxl__egc *egc, libxl__stub_dm_spawn_state*);
 
 _hidden char *libxl__stub_dm_name(libxl__gc *gc, const char * guest_name);
 
@@ -2470,7 +2483,8 @@  struct libxl__domain_create_state {
     int guest_domid;
     libxl__domain_build_state build_state;
     libxl__bootloader_state bl;
-    libxl__stub_dm_spawn_state dmss;
+    libxl_dmid current_dmid;
+    libxl__stub_dm_spawn_state* dmss;
         /* If we're not doing stubdom, we use only dmss.dm,
          * for the non-stubdom device model. */
     libxl__save_helper_state shs;
@@ -2527,7 +2541,9 @@  _hidden void libxl__domain_save_device_model(libxl__egc *egc,
                                      libxl__domain_suspend_state *dss,
                                      libxl__save_device_model_cb *callback);
 
-_hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid);
+_hidden const char *libxl__device_model_savefile(libxl__gc *gc,
+                                                 libxl_domid domid,
+                                                 libxl_dmid dmid);
 
 
 /*