diff mbox

[PULL,03/36] hw/pci: delay bus_master_enable_region initialization

Message ID 20160704194615-mutt-send-email-mst@redhat.com
State New
Headers show

Commit Message

Michael S. Tsirkin July 4, 2016, 4:46 p.m. UTC
From: Marcel Apfelbaum <marcel@redhat.com>

Skip bus_master_enable region creation on PCI device init
in order to be sure the IOMMU device (if present) would
be created in advance. Add this memory region at machine_done time.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/pci/pci_bus.h |  2 ++
 include/sysemu/sysemu.h  |  1 +
 hw/pci/pci.c             | 41 ++++++++++++++++++++++++++++++++---------
 vl.c                     |  5 +++++
 4 files changed, 40 insertions(+), 9 deletions(-)

Comments

Mark Cave-Ayland July 9, 2016, 1:34 a.m. UTC | #1
On 04/07/16 17:46, Michael S. Tsirkin wrote:

> From: Marcel Apfelbaum <marcel@redhat.com>
> 
> Skip bus_master_enable region creation on PCI device init
> in order to be sure the IOMMU device (if present) would
> be created in advance. Add this memory region at machine_done time.
> 
> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  include/hw/pci/pci_bus.h |  2 ++
>  include/sysemu/sysemu.h  |  1 +
>  hw/pci/pci.c             | 41 ++++++++++++++++++++++++++++++++---------
>  vl.c                     |  5 +++++
>  4 files changed, 40 insertions(+), 9 deletions(-)
> 
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 403fec6..5484a9b 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -39,6 +39,8 @@ struct PCIBus {
>         Keep a count of the number of devices with raised IRQs.  */
>      int nirq;
>      int *irq_count;
> +
> +    Notifier machine_done;
>  };
>  
>  typedef struct PCIBridgeWindows PCIBridgeWindows;
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index 7313673..ee7c760 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -75,6 +75,7 @@ void qemu_add_exit_notifier(Notifier *notify);
>  void qemu_remove_exit_notifier(Notifier *notify);
>  
>  void qemu_add_machine_init_done_notifier(Notifier *notify);
> +void qemu_remove_machine_init_done_notifier(Notifier *notify);
>  
>  void hmp_savevm(Monitor *mon, const QDict *qdict);
>  int load_vmstate(const char *name);
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 4b585f4..ee385eb 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -78,10 +78,37 @@ static const VMStateDescription vmstate_pcibus = {
>      }
>  };
>  
> +static void pci_init_bus_master(PCIDevice *pci_dev)
> +{
> +    AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
> +
> +    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> +                             OBJECT(pci_dev), "bus master",
> +                             dma_as->root, 0, memory_region_size(dma_as->root));
> +    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> +    address_space_init(&pci_dev->bus_master_as,
> +                       &pci_dev->bus_master_enable_region, pci_dev->name);
> +}
> +
> +static void pcibus_machine_done(Notifier *notifier, void *data)
> +{
> +    PCIBus *bus = container_of(notifier, PCIBus, machine_done);
> +    int i;
> +
> +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> +        if (bus->devices[i]) {
> +            pci_init_bus_master(bus->devices[i]);
> +        }
> +    }
> +}
> +
>  static void pci_bus_realize(BusState *qbus, Error **errp)
>  {
>      PCIBus *bus = PCI_BUS(qbus);
>  
> +    bus->machine_done.notify = pcibus_machine_done;
> +    qemu_add_machine_init_done_notifier(&bus->machine_done);
> +
>      vmstate_register(NULL, -1, &vmstate_pcibus, bus);
>  }
>  
> @@ -89,6 +116,8 @@ static void pci_bus_unrealize(BusState *qbus, Error **errp)
>  {
>      PCIBus *bus = PCI_BUS(qbus);
>  
> +    qemu_remove_machine_init_done_notifier(&bus->machine_done);
> +
>      vmstate_unregister(NULL, &vmstate_pcibus, bus);
>  }
>  
> @@ -920,7 +949,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>      PCIConfigReadFunc *config_read = pc->config_read;
>      PCIConfigWriteFunc *config_write = pc->config_write;
>      Error *local_err = NULL;
> -    AddressSpace *dma_as;
>      DeviceState *dev = DEVICE(pci_dev);
>  
>      pci_dev->bus = bus;
> @@ -961,15 +989,10 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>  
>      pci_dev->devfn = devfn;
>      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
> -    dma_as = pci_device_iommu_address_space(pci_dev);
> -
> -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> -                             OBJECT(pci_dev), "bus master",
> -                             dma_as->root, 0, memory_region_size(dma_as->root));
> -    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> -    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> -                       name);
>  
> +    if (qdev_hotplug) {
> +        pci_init_bus_master(pci_dev);
> +    }
>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>      pci_dev->irq_state = 0;
>      pci_config_alloc(pci_dev);
> diff --git a/vl.c b/vl.c
> index 9bb7f4c..5cd0f2a 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -2675,6 +2675,11 @@ void qemu_add_machine_init_done_notifier(Notifier *notify)
>      }
>  }
>  
> +void qemu_remove_machine_init_done_notifier(Notifier *notify)
> +{
> +    notifier_remove(notify);
> +}
> +
>  static void qemu_run_machine_init_done_notifiers(void)
>  {
>      notifier_list_notify(&machine_init_done_notifiers, NULL);
> 

Unfortunately this patch causes a regression booting my Debian 7.8.0 and
NetBSD 7.0 test images under qemu-system-sparc64 when accessing the
CDROM device, and rather unusually causes the qemu-system-sparc64
executable itself to segfault:


Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffe8453700 (LWP 21454)]
0x000000000041494a in address_space_lookup_region (d=0x0,
addr=3221225472, resolve_subpage=true) at
/home/build/src/qemu/git/qemu/exec.c:359
359         MemoryRegionSection *section = atomic_read(&d->mru_section);
(gdb) bt
#0  0x000000000041494a in address_space_lookup_region (d=0x0,
addr=3221225472, resolve_subpage=true) at
/home/build/src/qemu/git/qemu/exec.c:359
#1  0x0000000000414aa7 in address_space_translate_internal (d=0x0,
addr=3221225472, xlat=0x7fffe8451c40, plen=0x7fffe8451d10,
resolve_subpage=true) at /home/build/src/qemu/git/qemu/exec.c:390
#2  0x0000000000414c8f in address_space_translate (as=0x1961600,
addr=3221225472, xlat=0x7fffe8451d18, plen=0x7fffe8451d10,
is_write=false) at /home/build/src/qemu/git/qemu/exec.c:428
#3  0x000000000041a4be in address_space_read_full (as=0x1961600,
addr=3221225472, attrs=..., buf=0x7fffe8451f10 "\030(\226\001", len=8)
at /home/build/src/qemu/git/qemu/exec.c:2724
#4  0x000000000041a5b3 in address_space_read (len=8, buf=0x7fffe8451f10
"\030(\226\001", attrs=..., addr=3221225472, as=0x1961600) at
/home/build/src/qemu/git/qemu/include/exec/memory.h:1460
#5  address_space_rw (as=0x1961600, addr=3221225472, attrs=...,
buf=0x7fffe8451f10 "\030(\226\001", len=8, is_write=false) at
/home/build/src/qemu/git/qemu/exec.c:2739
#6  0x00000000005c71e0 in dma_memory_rw_relaxed (as=0x1961600,
addr=3221225472, buf=0x7fffe8451f10, len=8, dir=DMA_DIRECTION_TO_DEVICE)
at /home/build/src/qemu/git/qemu/include/sysemu/dma.h:87
#7  0x00000000005c7258 in dma_memory_rw (as=0x1961600, addr=3221225472,
buf=0x7fffe8451f10, len=8, dir=DMA_DIRECTION_TO_DEVICE) at
/home/build/src/qemu/git/qemu/include/sysemu/dma.h:110
#8  0x00000000005c72fa in pci_dma_rw (dev=0x19613f0, addr=3221225472,
buf=0x7fffe8451f10, len=8, dir=DMA_DIRECTION_TO_DEVICE) at
/home/build/src/qemu/git/qemu/include/hw/pci/pci.h:731
#9  0x00000000005c735a in pci_dma_read (dev=0x19613f0, addr=3221225472,
buf=0x7fffe8451f10, len=8) at
/home/build/src/qemu/git/qemu/include/hw/pci/pci.h:738
#10 0x00000000005c7980 in bmdma_rw_buf (dma=0x1962fa8, is_write=1) at
hw/ide/pci.c:139
#11 0x00000000005c4372 in ide_atapi_cmd_read_dma_cb (opaque=0x1962550,
ret=0) at hw/ide/atapi.c:413
#12 0x00000000005c7dfe in bmdma_cmd_writeb (bm=0x1962fa8, val=9) at
hw/ide/pci.c:245
#13 0x00000000005c91a9 in bmdma_write (opaque=0x1962fa8, addr=0, val=9,
size=1) at hw/ide/cmd646.c:219
#14 0x0000000000469fb8 in memory_region_write_accessor (mr=0x1963100,
addr=0, value=0x7fffe8452148, size=1, shift=0, mask=255, attrs=...) at
/home/build/src/qemu/git/qemu/memory.c:525
#15 0x000000000046a1e4 in access_with_adjusted_size (addr=0,
value=0x7fffe8452148, size=1, access_size_min=1, access_size_max=4,
access=0x469ec1 <memory_region_write_accessor>, mr=0x1963100, attrs=...)
    at /home/build/src/qemu/git/qemu/memory.c:586
#16 0x000000000046d42b in memory_region_dispatch_write (mr=0x1963100,
addr=0, data=9, size=1, attrs=...) at
/home/build/src/qemu/git/qemu/memory.c:1262
#17 0x000000000041a051 in address_space_write_continue (as=0x12bb4b0,
addr=2190466908936, attrs=..., buf=0x7fffe8452323 "\t\376\001", len=1,
addr1=0, l=1, mr=0x1963100) at /home/build/src/qemu/git/qemu/exec.c:2590
#18 0x000000000041a1c3 in address_space_write (as=0x12bb4b0,
addr=2190466908936, attrs=..., buf=0x7fffe8452323 "\t\376\001", len=1)
at /home/build/src/qemu/git/qemu/exec.c:2635
#19 0x000000000041a569 in address_space_rw (as=0x12bb4b0,
addr=2190466908936, attrs=..., buf=0x7fffe8452323 "\t\376\001", len=1,
is_write=true) at /home/build/src/qemu/git/qemu/exec.c:2737
#20 0x000000000041c27d in address_space_stb (as=0x12bb4b0,
addr=2190466908936, val=9, attrs=..., result=0x0) at
/home/build/src/qemu/git/qemu/exec.c:3477
#21 0x000000000041c2f5 in stb_phys (as=0x12bb4b0, addr=2190466908936,
val=9) at /home/build/src/qemu/git/qemu/exec.c:3485
#22 0x0000000000504759 in helper_st_asi (env=0x12d57e8,
addr=2190466908936, val=9, asi=29, size=1) at
/home/build/src/qemu/git/qemu/target-sparc/ldst_helper.c:1801
#23 0x00000000405bf4b0 in code_gen_buffer ()
#24 0x0000000000420c97 in cpu_tb_exec (cpu=0x12cd570,
itb=0x7fffe8fc10c0) at /home/build/src/qemu/git/qemu/cpu-exec.c:166
#25 0x0000000000421882 in cpu_loop_exec_tb (cpu=0x12cd570,
tb=0x7fffe8fc10c0, last_tb=0x7fffe8452988, tb_exit=0x7fffe8452984,
sc=0x7fffe84529a0) at /home/build/src/qemu/git/qemu/cpu-exec.c:530
#26 0x0000000000421b40 in cpu_exec (cpu=0x12cd570) at
/home/build/src/qemu/git/qemu/cpu-exec.c:626
#27 0x0000000000451e6b in tcg_cpu_exec (cpu=0x12cd570) at
/home/build/src/qemu/git/qemu/cpus.c:1541
#28 0x0000000000451f6d in tcg_exec_all () at
/home/build/src/qemu/git/qemu/cpus.c:1574
#29 0x00000000004510b4 in qemu_tcg_cpu_thread_fn (arg=0x12cd570) at
/home/build/src/qemu/git/qemu/cpus.c:1171
#30 0x00007ffff264bb50 in start_thread (arg=<optimized out>) at
pthread_create.c:304
#31 0x00007ffff2395fbd in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#32 0x0000000000000000 in ?? ()
(gdb)



ATB,

Mark.
Marcel Apfelbaum July 9, 2016, 7:07 a.m. UTC | #2
On 07/09/2016 04:34 AM, Mark Cave-Ayland wrote:
> On 04/07/16 17:46, Michael S. Tsirkin wrote:
>
>> From: Marcel Apfelbaum <marcel@redhat.com>
>>
>> Skip bus_master_enable region creation on PCI device init
>> in order to be sure the IOMMU device (if present) would
>> be created in advance. Add this memory region at machine_done time.
>>
>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> ---
>>   include/hw/pci/pci_bus.h |  2 ++
>>   include/sysemu/sysemu.h  |  1 +
>>   hw/pci/pci.c             | 41 ++++++++++++++++++++++++++++++++---------
>>   vl.c                     |  5 +++++
>>   4 files changed, 40 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index 403fec6..5484a9b 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -39,6 +39,8 @@ struct PCIBus {
>>          Keep a count of the number of devices with raised IRQs.  */
>>       int nirq;
>>       int *irq_count;
>> +
>> +    Notifier machine_done;
>>   };
>>
>>   typedef struct PCIBridgeWindows PCIBridgeWindows;
>> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
>> index 7313673..ee7c760 100644
>> --- a/include/sysemu/sysemu.h
>> +++ b/include/sysemu/sysemu.h
>> @@ -75,6 +75,7 @@ void qemu_add_exit_notifier(Notifier *notify);
>>   void qemu_remove_exit_notifier(Notifier *notify);
>>
>>   void qemu_add_machine_init_done_notifier(Notifier *notify);
>> +void qemu_remove_machine_init_done_notifier(Notifier *notify);
>>
>>   void hmp_savevm(Monitor *mon, const QDict *qdict);
>>   int load_vmstate(const char *name);
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 4b585f4..ee385eb 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -78,10 +78,37 @@ static const VMStateDescription vmstate_pcibus = {
>>       }
>>   };
>>
>> +static void pci_init_bus_master(PCIDevice *pci_dev)
>> +{
>> +    AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>> +
>> +    memory_region_init_alias(&pci_dev->bus_master_enable_region,
>> +                             OBJECT(pci_dev), "bus master",
>> +                             dma_as->root, 0, memory_region_size(dma_as->root));
>> +    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>> +    address_space_init(&pci_dev->bus_master_as,
>> +                       &pci_dev->bus_master_enable_region, pci_dev->name);
>> +}
>> +
>> +static void pcibus_machine_done(Notifier *notifier, void *data)
>> +{
>> +    PCIBus *bus = container_of(notifier, PCIBus, machine_done);
>> +    int i;
>> +
>> +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
>> +        if (bus->devices[i]) {
>> +            pci_init_bus_master(bus->devices[i]);
>> +        }
>> +    }
>> +}
>> +
>>   static void pci_bus_realize(BusState *qbus, Error **errp)
>>   {
>>       PCIBus *bus = PCI_BUS(qbus);
>>
>> +    bus->machine_done.notify = pcibus_machine_done;
>> +    qemu_add_machine_init_done_notifier(&bus->machine_done);
>> +
>>       vmstate_register(NULL, -1, &vmstate_pcibus, bus);
>>   }
>>
>> @@ -89,6 +116,8 @@ static void pci_bus_unrealize(BusState *qbus, Error **errp)
>>   {
>>       PCIBus *bus = PCI_BUS(qbus);
>>
>> +    qemu_remove_machine_init_done_notifier(&bus->machine_done);
>> +
>>       vmstate_unregister(NULL, &vmstate_pcibus, bus);
>>   }
>>
>> @@ -920,7 +949,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>       PCIConfigReadFunc *config_read = pc->config_read;
>>       PCIConfigWriteFunc *config_write = pc->config_write;
>>       Error *local_err = NULL;
>> -    AddressSpace *dma_as;
>>       DeviceState *dev = DEVICE(pci_dev);
>>
>>       pci_dev->bus = bus;
>> @@ -961,15 +989,10 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>
>>       pci_dev->devfn = devfn;
>>       pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>> -    dma_as = pci_device_iommu_address_space(pci_dev);
>> -
>> -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
>> -                             OBJECT(pci_dev), "bus master",
>> -                             dma_as->root, 0, memory_region_size(dma_as->root));
>> -    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>> -    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
>> -                       name);
>>
>> +    if (qdev_hotplug) {
>> +        pci_init_bus_master(pci_dev);
>> +    }
>>       pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>       pci_dev->irq_state = 0;
>>       pci_config_alloc(pci_dev);
>> diff --git a/vl.c b/vl.c
>> index 9bb7f4c..5cd0f2a 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -2675,6 +2675,11 @@ void qemu_add_machine_init_done_notifier(Notifier *notify)
>>       }
>>   }
>>
>> +void qemu_remove_machine_init_done_notifier(Notifier *notify)
>> +{
>> +    notifier_remove(notify);
>> +}
>> +
>>   static void qemu_run_machine_init_done_notifiers(void)
>>   {
>>       notifier_list_notify(&machine_init_done_notifiers, NULL);
>>
>
> Unfortunately this patch causes a regression booting my Debian 7.8.0 and
> NetBSD 7.0 test images under qemu-system-sparc64 when accessing the
> CDROM device, and rather unusually causes the qemu-system-sparc64
> executable itself to segfault:
>

Hi Mark,
Thank you for finding it, can you please provide a command line and maybe one of your test images?
I'll be sure to address it.

Thanks,
Marcel

>
> Program received signal SIGSEGV, Segmentation fault.
> [Switching to Thread 0x7fffe8453700 (LWP 21454)]
> 0x000000000041494a in address_space_lookup_region (d=0x0,
> addr=3221225472, resolve_subpage=true) at
> /home/build/src/qemu/git/qemu/exec.c:359
> 359         MemoryRegionSection *section = atomic_read(&d->mru_section);
> (gdb) bt
> #0  0x000000000041494a in address_space_lookup_region (d=0x0,
> addr=3221225472, resolve_subpage=true) at
> /home/build/src/qemu/git/qemu/exec.c:359
> #1  0x0000000000414aa7 in address_space_translate_internal (d=0x0,
> addr=3221225472, xlat=0x7fffe8451c40, plen=0x7fffe8451d10,
> resolve_subpage=true) at /home/build/src/qemu/git/qemu/exec.c:390
> #2  0x0000000000414c8f in address_space_translate (as=0x1961600,
> addr=3221225472, xlat=0x7fffe8451d18, plen=0x7fffe8451d10,
> is_write=false) at /home/build/src/qemu/git/qemu/exec.c:428
> #3  0x000000000041a4be in address_space_read_full (as=0x1961600,
> addr=3221225472, attrs=..., buf=0x7fffe8451f10 "\030(\226\001", len=8)
> at /home/build/src/qemu/git/qemu/exec.c:2724
> #4  0x000000000041a5b3 in address_space_read (len=8, buf=0x7fffe8451f10
> "\030(\226\001", attrs=..., addr=3221225472, as=0x1961600) at
> /home/build/src/qemu/git/qemu/include/exec/memory.h:1460
> #5  address_space_rw (as=0x1961600, addr=3221225472, attrs=...,
> buf=0x7fffe8451f10 "\030(\226\001", len=8, is_write=false) at
> /home/build/src/qemu/git/qemu/exec.c:2739
> #6  0x00000000005c71e0 in dma_memory_rw_relaxed (as=0x1961600,
> addr=3221225472, buf=0x7fffe8451f10, len=8, dir=DMA_DIRECTION_TO_DEVICE)
> at /home/build/src/qemu/git/qemu/include/sysemu/dma.h:87
> #7  0x00000000005c7258 in dma_memory_rw (as=0x1961600, addr=3221225472,
> buf=0x7fffe8451f10, len=8, dir=DMA_DIRECTION_TO_DEVICE) at
> /home/build/src/qemu/git/qemu/include/sysemu/dma.h:110
> #8  0x00000000005c72fa in pci_dma_rw (dev=0x19613f0, addr=3221225472,
> buf=0x7fffe8451f10, len=8, dir=DMA_DIRECTION_TO_DEVICE) at
> /home/build/src/qemu/git/qemu/include/hw/pci/pci.h:731
> #9  0x00000000005c735a in pci_dma_read (dev=0x19613f0, addr=3221225472,
> buf=0x7fffe8451f10, len=8) at
> /home/build/src/qemu/git/qemu/include/hw/pci/pci.h:738
> #10 0x00000000005c7980 in bmdma_rw_buf (dma=0x1962fa8, is_write=1) at
> hw/ide/pci.c:139
> #11 0x00000000005c4372 in ide_atapi_cmd_read_dma_cb (opaque=0x1962550,
> ret=0) at hw/ide/atapi.c:413
> #12 0x00000000005c7dfe in bmdma_cmd_writeb (bm=0x1962fa8, val=9) at
> hw/ide/pci.c:245
> #13 0x00000000005c91a9 in bmdma_write (opaque=0x1962fa8, addr=0, val=9,
> size=1) at hw/ide/cmd646.c:219
> #14 0x0000000000469fb8 in memory_region_write_accessor (mr=0x1963100,
> addr=0, value=0x7fffe8452148, size=1, shift=0, mask=255, attrs=...) at
> /home/build/src/qemu/git/qemu/memory.c:525
> #15 0x000000000046a1e4 in access_with_adjusted_size (addr=0,
> value=0x7fffe8452148, size=1, access_size_min=1, access_size_max=4,
> access=0x469ec1 <memory_region_write_accessor>, mr=0x1963100, attrs=...)
>      at /home/build/src/qemu/git/qemu/memory.c:586
> #16 0x000000000046d42b in memory_region_dispatch_write (mr=0x1963100,
> addr=0, data=9, size=1, attrs=...) at
> /home/build/src/qemu/git/qemu/memory.c:1262
> #17 0x000000000041a051 in address_space_write_continue (as=0x12bb4b0,
> addr=2190466908936, attrs=..., buf=0x7fffe8452323 "\t\376\001", len=1,
> addr1=0, l=1, mr=0x1963100) at /home/build/src/qemu/git/qemu/exec.c:2590
> #18 0x000000000041a1c3 in address_space_write (as=0x12bb4b0,
> addr=2190466908936, attrs=..., buf=0x7fffe8452323 "\t\376\001", len=1)
> at /home/build/src/qemu/git/qemu/exec.c:2635
> #19 0x000000000041a569 in address_space_rw (as=0x12bb4b0,
> addr=2190466908936, attrs=..., buf=0x7fffe8452323 "\t\376\001", len=1,
> is_write=true) at /home/build/src/qemu/git/qemu/exec.c:2737
> #20 0x000000000041c27d in address_space_stb (as=0x12bb4b0,
> addr=2190466908936, val=9, attrs=..., result=0x0) at
> /home/build/src/qemu/git/qemu/exec.c:3477
> #21 0x000000000041c2f5 in stb_phys (as=0x12bb4b0, addr=2190466908936,
> val=9) at /home/build/src/qemu/git/qemu/exec.c:3485
> #22 0x0000000000504759 in helper_st_asi (env=0x12d57e8,
> addr=2190466908936, val=9, asi=29, size=1) at
> /home/build/src/qemu/git/qemu/target-sparc/ldst_helper.c:1801
> #23 0x00000000405bf4b0 in code_gen_buffer ()
> #24 0x0000000000420c97 in cpu_tb_exec (cpu=0x12cd570,
> itb=0x7fffe8fc10c0) at /home/build/src/qemu/git/qemu/cpu-exec.c:166
> #25 0x0000000000421882 in cpu_loop_exec_tb (cpu=0x12cd570,
> tb=0x7fffe8fc10c0, last_tb=0x7fffe8452988, tb_exit=0x7fffe8452984,
> sc=0x7fffe84529a0) at /home/build/src/qemu/git/qemu/cpu-exec.c:530
> #26 0x0000000000421b40 in cpu_exec (cpu=0x12cd570) at
> /home/build/src/qemu/git/qemu/cpu-exec.c:626
> #27 0x0000000000451e6b in tcg_cpu_exec (cpu=0x12cd570) at
> /home/build/src/qemu/git/qemu/cpus.c:1541
> #28 0x0000000000451f6d in tcg_exec_all () at
> /home/build/src/qemu/git/qemu/cpus.c:1574
> #29 0x00000000004510b4 in qemu_tcg_cpu_thread_fn (arg=0x12cd570) at
> /home/build/src/qemu/git/qemu/cpus.c:1171
> #30 0x00007ffff264bb50 in start_thread (arg=<optimized out>) at
> pthread_create.c:304
> #31 0x00007ffff2395fbd in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
> #32 0x0000000000000000 in ?? ()
> (gdb)
>
>
>
> ATB,
>
> Mark.
>
Mark Cave-Ayland July 9, 2016, 9:09 a.m. UTC | #3
On 09/07/16 08:07, Marcel Apfelbaum wrote:

> On 07/09/2016 04:34 AM, Mark Cave-Ayland wrote:
>> On 04/07/16 17:46, Michael S. Tsirkin wrote:
>>
>>> From: Marcel Apfelbaum <marcel@redhat.com>
>>>
>>> Skip bus_master_enable region creation on PCI device init
>>> in order to be sure the IOMMU device (if present) would
>>> be created in advance. Add this memory region at machine_done time.
>>>
>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>> ---
>>>   include/hw/pci/pci_bus.h |  2 ++
>>>   include/sysemu/sysemu.h  |  1 +
>>>   hw/pci/pci.c             | 41
>>> ++++++++++++++++++++++++++++++++---------
>>>   vl.c                     |  5 +++++
>>>   4 files changed, 40 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>>> index 403fec6..5484a9b 100644
>>> --- a/include/hw/pci/pci_bus.h
>>> +++ b/include/hw/pci/pci_bus.h
>>> @@ -39,6 +39,8 @@ struct PCIBus {
>>>          Keep a count of the number of devices with raised IRQs.  */
>>>       int nirq;
>>>       int *irq_count;
>>> +
>>> +    Notifier machine_done;
>>>   };
>>>
>>>   typedef struct PCIBridgeWindows PCIBridgeWindows;
>>> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
>>> index 7313673..ee7c760 100644
>>> --- a/include/sysemu/sysemu.h
>>> +++ b/include/sysemu/sysemu.h
>>> @@ -75,6 +75,7 @@ void qemu_add_exit_notifier(Notifier *notify);
>>>   void qemu_remove_exit_notifier(Notifier *notify);
>>>
>>>   void qemu_add_machine_init_done_notifier(Notifier *notify);
>>> +void qemu_remove_machine_init_done_notifier(Notifier *notify);
>>>
>>>   void hmp_savevm(Monitor *mon, const QDict *qdict);
>>>   int load_vmstate(const char *name);
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index 4b585f4..ee385eb 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -78,10 +78,37 @@ static const VMStateDescription vmstate_pcibus = {
>>>       }
>>>   };
>>>
>>> +static void pci_init_bus_master(PCIDevice *pci_dev)
>>> +{
>>> +    AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>>> +
>>> +    memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>> +                             OBJECT(pci_dev), "bus master",
>>> +                             dma_as->root, 0,
>>> memory_region_size(dma_as->root));
>>> +    memory_region_set_enabled(&pci_dev->bus_master_enable_region,
>>> false);
>>> +    address_space_init(&pci_dev->bus_master_as,
>>> +                       &pci_dev->bus_master_enable_region,
>>> pci_dev->name);
>>> +}
>>> +
>>> +static void pcibus_machine_done(Notifier *notifier, void *data)
>>> +{
>>> +    PCIBus *bus = container_of(notifier, PCIBus, machine_done);
>>> +    int i;
>>> +
>>> +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
>>> +        if (bus->devices[i]) {
>>> +            pci_init_bus_master(bus->devices[i]);
>>> +        }
>>> +    }
>>> +}
>>> +
>>>   static void pci_bus_realize(BusState *qbus, Error **errp)
>>>   {
>>>       PCIBus *bus = PCI_BUS(qbus);
>>>
>>> +    bus->machine_done.notify = pcibus_machine_done;
>>> +    qemu_add_machine_init_done_notifier(&bus->machine_done);
>>> +
>>>       vmstate_register(NULL, -1, &vmstate_pcibus, bus);
>>>   }
>>>
>>> @@ -89,6 +116,8 @@ static void pci_bus_unrealize(BusState *qbus,
>>> Error **errp)
>>>   {
>>>       PCIBus *bus = PCI_BUS(qbus);
>>>
>>> +    qemu_remove_machine_init_done_notifier(&bus->machine_done);
>>> +
>>>       vmstate_unregister(NULL, &vmstate_pcibus, bus);
>>>   }
>>>
>>> @@ -920,7 +949,6 @@ static PCIDevice
>>> *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>       PCIConfigReadFunc *config_read = pc->config_read;
>>>       PCIConfigWriteFunc *config_write = pc->config_write;
>>>       Error *local_err = NULL;
>>> -    AddressSpace *dma_as;
>>>       DeviceState *dev = DEVICE(pci_dev);
>>>
>>>       pci_dev->bus = bus;
>>> @@ -961,15 +989,10 @@ static PCIDevice
>>> *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>>
>>>       pci_dev->devfn = devfn;
>>>       pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>>> -    dma_as = pci_device_iommu_address_space(pci_dev);
>>> -
>>> -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
>>> -                             OBJECT(pci_dev), "bus master",
>>> -                             dma_as->root, 0,
>>> memory_region_size(dma_as->root));
>>> -    memory_region_set_enabled(&pci_dev->bus_master_enable_region,
>>> false);
>>> -    address_space_init(&pci_dev->bus_master_as,
>>> &pci_dev->bus_master_enable_region,
>>> -                       name);
>>>
>>> +    if (qdev_hotplug) {
>>> +        pci_init_bus_master(pci_dev);
>>> +    }
>>>       pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>>       pci_dev->irq_state = 0;
>>>       pci_config_alloc(pci_dev);
>>> diff --git a/vl.c b/vl.c
>>> index 9bb7f4c..5cd0f2a 100644
>>> --- a/vl.c
>>> +++ b/vl.c
>>> @@ -2675,6 +2675,11 @@ void
>>> qemu_add_machine_init_done_notifier(Notifier *notify)
>>>       }
>>>   }
>>>
>>> +void qemu_remove_machine_init_done_notifier(Notifier *notify)
>>> +{
>>> +    notifier_remove(notify);
>>> +}
>>> +
>>>   static void qemu_run_machine_init_done_notifiers(void)
>>>   {
>>>       notifier_list_notify(&machine_init_done_notifiers, NULL);
>>>
>>
>> Unfortunately this patch causes a regression booting my Debian 7.8.0 and
>> NetBSD 7.0 test images under qemu-system-sparc64 when accessing the
>> CDROM device, and rather unusually causes the qemu-system-sparc64
>> executable itself to segfault:
>>
> 
> Hi Mark,
> Thank you for finding it, can you please provide a command line and
> maybe one of your test images?
> I'll be sure to address it.
> 
> Thanks,
> Marcel

Hi Marcel,

The failure was easy to reproduce with my NetBSD 7.0 image using the
following command line:

./qemu-system-sparc64 -cdrom NetBSD-7.0-sparc64.iso -boot d -nographic

The segfault occurs just after the CDROM is accessed.


Many thanks,

Mark.
Leon Alrae July 11, 2016, 2:42 p.m. UTC | #4
Hi,

This commit causes regressions in my MIPS tests. QEMU segfaults when
booting Linux on Malta board; this can be easily reproduced with
Aurelien's Debian images:

wget https://people.debian.org/~aurel32/qemu/mipsel/vmlinux-3.2.0-4-5kc-malta
wget https://people.debian.org/~aurel32/qemu/mipsel/debian_wheezy_mipsel_standard.qcow2
qemu-system-mips64el -M malta -kernel vmlinux-3.2.0-4-5kc-malta -hda debian_wheezy_mipsel_standard.qcow2 -append "root=/dev/sda1 console=ttyS0" -nographic

...
[    1.468000] ata2.00: ATAPI: QEMU DVD-ROM, 2.5+, max UDMA/100
[    1.476000] ata1.00: ATA-7: QEMU HARDDISK, 2.5+, max UDMA/100
[    1.476000] ata1.00: 52428800 sectors, multi 16: LBA48
[    1.480000] ata2.00: configured for UDMA/33
[    1.484000] ata1.00: configured for UDMA/33
[    1.536000] scsi 0:0:0:0: Direct-Access     ATA      QEMU HARDDISK    2.5+ PQ: 0 ANSI: 5
[    1.552000] sd 0:0:0:0: [sda] 52428800 512-byte logical blocks: (26.8 GB/25.0 GiB)
[    1.568000] scsi 1:0:0:0: CD-ROM            QEMU     QEMU DVD-ROM     2.5+ PQ: 0 ANSI: 5
[    1.576000] sd 0:0:0:0: [sda] Write Protect is off
[    1.580000] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffec79e700 (LWP 29679)]
0x00007ffff792f806 in address_space_lookup_region (d=0x0, addr=12582912, resolve_subpage=true) at /user/lea/dev/qemu/exec.c:359
359         MemoryRegionSection *section = atomic_read(&d->mru_section);
(gdb) bt
#0  0x00007ffff792f806 in address_space_lookup_region (d=0x0, addr=12582912, resolve_subpage=true) at /user/lea/dev/qemu/exec.c:359
#1  0x00007ffff792f95e in address_space_translate_internal (d=0x0, addr=12582912, xlat=0x7fffec79ca30, plen=0x7fffec79cb18, resolve_subpage=true) at /user/lea/dev/qemu/exec.c:390
#2  0x00007ffff792fb5a in address_space_translate (as=0x7ffff8bc1ef0, addr=12582912, xlat=0x7fffec79cb10, plen=0x7fffec79cb18, is_write=false) at /user/lea/dev/qemu/exec.c:428
#3  0x00007ffff7935720 in address_space_read_full (as=0x7ffff8bc1ef0, addr=12582912, attrs=..., buf=0x7fffec79cd60 "\260^\324\370", <incomplete sequence \330>, len=8) at /user/lea/dev/qemu/exec.c:2724
#4  0x00007ffff7935821 in address_space_read (as=0x7ffff8bc1ef0, addr=12582912, attrs=..., buf=0x7fffec79cd60 "\260^\324\370", <incomplete sequence \330>, len=8, is_write=false) at /user/lea/dev/qemu/include/exec/memory.h:1476
#5  address_space_rw (as=0x7ffff8bc1ef0, addr=12582912, attrs=..., buf=0x7fffec79cd60 "\260^\324\370", <incomplete sequence \330>, len=8, is_write=false) at /user/lea/dev/qemu/exec.c:2739
#6  0x00007ffff7baf9a7 in dma_memory_rw_relaxed (as=0x7ffff8bc1ef0, addr=12582912, buf=0x7fffec79cd60, len=8, dir=DMA_DIRECTION_TO_DEVICE) at /user/lea/dev/qemu/include/sysemu/dma.h:87
#7  0x00007ffff7bafa28 in dma_memory_rw (as=0x7ffff8bc1ef0, addr=12582912, buf=0x7fffec79cd60, len=8, dir=DMA_DIRECTION_TO_DEVICE) at /user/lea/dev/qemu/include/sysemu/dma.h:110
#8  0x00007ffff7bafad3 in pci_dma_rw (dev=0x7ffff8bc1ce0, addr=12582912, buf=0x7fffec79cd60, len=8, dir=DMA_DIRECTION_TO_DEVICE) at /user/lea/dev/qemu/include/hw/pci/pci.h:731
#9  0x00007ffff7bafb3c in pci_dma_read (dev=0x7ffff8bc1ce0, addr=12582912, buf=0x7fffec79cd60, len=8) at /user/lea/dev/qemu/include/hw/pci/pci.h:738
#10 0x00007ffff7baff6d in bmdma_prepare_buf (dma=0x7ffff8bc3620, limit=4096) at /user/lea/dev/qemu/hw/ide/pci.c:85
#11 0x00007ffff7ba66bc in ide_dma_cb (opaque=0x7ffff8bc25e8, ret=0) at /user/lea/dev/qemu/hw/ide/core.c:844
#12 0x00007ffff7bb0615 in bmdma_cmd_writeb (bm=0x7ffff8bc3620, val=9) at /user/lea/dev/qemu/hw/ide/pci.c:245
#13 0x00007ffff7bb1408 in bmdma_write (opaque=0x7ffff8bc3620, addr=0, val=9, size=1) at /user/lea/dev/qemu/hw/ide/piix.c:77
#14 0x00007ffff7988548 in memory_region_write_accessor (mr=0x7ffff8bc3778, addr=0, value=0x7fffec79cfd8, size=1, shift=0, mask=255, attrs=...) at /user/lea/dev/qemu/memory.c:525
#15 0x00007ffff79887dd in access_with_adjusted_size (addr=0, value=0x7fffec79cfd8, size=1, access_size_min=1, access_size_max=4, access=0x7ffff798843e <memory_region_write_accessor>, mr=0x7ffff8bc3778, attrs=...) at /user/lea/dev/qemu/memory.c:591
#16 0x00007ffff798bc6e in memory_region_dispatch_write (mr=0x7ffff8bc3778, addr=0, data=9, size=1, attrs=...) at /user/lea/dev/qemu/memory.c:1262
#17 0x00007ffff7935294 in address_space_write_continue (as=0x7ffff8921a90, addr=402657344, attrs=..., buf=0x7fffec79d170 "\t\345y\354\377\177", len=1, addr1=0, l=1, mr=0x7ffff8bc3778) at /user/lea/dev/qemu/exec.c:2590
#18 0x00007ffff793540d in address_space_write (as=0x7ffff8921a90, addr=402657344, attrs=..., buf=0x7fffec79d170 "\t\345y\354\377\177", len=1) at /user/lea/dev/qemu/exec.c:2635
#19 0x00007ffff79344c0 in subpage_write (opaque=0x7fffd4569730, addr=64, value=9, len=1, attrs=...) at /user/lea/dev/qemu/exec.c:2221
#20 0x00007ffff7988678 in memory_region_write_with_attrs_accessor (mr=0x7fffd4569730, addr=64, value=0x7fffec79d2b8, size=1, shift=0, mask=255, attrs=...) at /user/lea/dev/qemu/memory.c:551
#21 0x00007ffff79887dd in access_with_adjusted_size (addr=64, value=0x7fffec79d2b8, size=1, access_size_min=1, access_size_max=8, access=0x7ffff7988568 <memory_region_write_with_attrs_accessor>, mr=0x7fffd4569730, attrs=...) at /user/lea/dev/qemu/memory.c:591
#22 0x00007ffff798bcc9 in memory_region_dispatch_write (mr=0x7fffd4569730, addr=64, data=9, size=1, attrs=...) at /user/lea/dev/qemu/memory.c:1269
#23 0x00007ffff7993676 in io_writeb (env=0x7ffff8941978, iotlbentry=0x7ffff894a0e8, val=9 '\t', addr=10376293541864280128, retaddr=140737194805689) at /user/lea/dev/qemu/softmmu_template.h:349
#24 0x00007ffff7993ae0 in helper_ret_stb_mmu (env=0x7ffff8941978, addr=10376293541864280128, val=9 '\t', oi=0, retaddr=140737194805689) at /user/lea/dev/qemu/softmmu_template.h:390
#25 0x00007fffee80c9bb in code_gen_buffer ()
#26 0x00007ffff793c093 in cpu_tb_exec (cpu=0x7ffff8939700, itb=0x7fffedaf4ee0) at /user/lea/dev/qemu/cpu-exec.c:166
#27 0x00007ffff793ccd1 in cpu_loop_exec_tb (cpu=0x7ffff8939700, tb=0x7fffedaf4ee0, last_tb=0x7fffec79d9e0, tb_exit=0x7fffec79d9fc, sc=0x7fffec79d9c0) at /user/lea/dev/qemu/cpu-exec.c:530
#28 0x00007ffff793cf98 in cpu_exec (cpu=0x7ffff8939700) at /user/lea/dev/qemu/cpu-exec.c:626
#29 0x00007ffff796f949 in tcg_cpu_exec (cpu=0x7ffff8939700) at /user/lea/dev/qemu/cpus.c:1541
#30 0x00007ffff796fa52 in tcg_exec_all () at /user/lea/dev/qemu/cpus.c:1574
#31 0x00007ffff796eb15 in qemu_tcg_cpu_thread_fn (arg=0x7ffff8939700) at /user/lea/dev/qemu/cpus.c:1171
#32 0x00007ffff30ce9d1 in start_thread () from /lib64/libpthread.so.0
#33 0x00007ffff2e1b86d in clone () from /lib64/libc.so.6

Regards,
Leon

On Mon, Jul 04, 2016 at 07:46:15PM +0300, Michael S. Tsirkin wrote:
> From: Marcel Apfelbaum <marcel@redhat.com>
> 
> Skip bus_master_enable region creation on PCI device init
> in order to be sure the IOMMU device (if present) would
> be created in advance. Add this memory region at machine_done time.
> 
> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  include/hw/pci/pci_bus.h |  2 ++
>  include/sysemu/sysemu.h  |  1 +
>  hw/pci/pci.c             | 41 ++++++++++++++++++++++++++++++++---------
>  vl.c                     |  5 +++++
>  4 files changed, 40 insertions(+), 9 deletions(-)
> 
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 403fec6..5484a9b 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -39,6 +39,8 @@ struct PCIBus {
>         Keep a count of the number of devices with raised IRQs.  */
>      int nirq;
>      int *irq_count;
> +
> +    Notifier machine_done;
>  };
>  
>  typedef struct PCIBridgeWindows PCIBridgeWindows;
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index 7313673..ee7c760 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -75,6 +75,7 @@ void qemu_add_exit_notifier(Notifier *notify);
>  void qemu_remove_exit_notifier(Notifier *notify);
>  
>  void qemu_add_machine_init_done_notifier(Notifier *notify);
> +void qemu_remove_machine_init_done_notifier(Notifier *notify);
>  
>  void hmp_savevm(Monitor *mon, const QDict *qdict);
>  int load_vmstate(const char *name);
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 4b585f4..ee385eb 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -78,10 +78,37 @@ static const VMStateDescription vmstate_pcibus = {
>      }
>  };
>  
> +static void pci_init_bus_master(PCIDevice *pci_dev)
> +{
> +    AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
> +
> +    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> +                             OBJECT(pci_dev), "bus master",
> +                             dma_as->root, 0, memory_region_size(dma_as->root));
> +    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> +    address_space_init(&pci_dev->bus_master_as,
> +                       &pci_dev->bus_master_enable_region, pci_dev->name);
> +}
> +
> +static void pcibus_machine_done(Notifier *notifier, void *data)
> +{
> +    PCIBus *bus = container_of(notifier, PCIBus, machine_done);
> +    int i;
> +
> +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> +        if (bus->devices[i]) {
> +            pci_init_bus_master(bus->devices[i]);
> +        }
> +    }
> +}
> +
>  static void pci_bus_realize(BusState *qbus, Error **errp)
>  {
>      PCIBus *bus = PCI_BUS(qbus);
>  
> +    bus->machine_done.notify = pcibus_machine_done;
> +    qemu_add_machine_init_done_notifier(&bus->machine_done);
> +
>      vmstate_register(NULL, -1, &vmstate_pcibus, bus);
>  }
>  
> @@ -89,6 +116,8 @@ static void pci_bus_unrealize(BusState *qbus, Error **errp)
>  {
>      PCIBus *bus = PCI_BUS(qbus);
>  
> +    qemu_remove_machine_init_done_notifier(&bus->machine_done);
> +
>      vmstate_unregister(NULL, &vmstate_pcibus, bus);
>  }
>  
> @@ -920,7 +949,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>      PCIConfigReadFunc *config_read = pc->config_read;
>      PCIConfigWriteFunc *config_write = pc->config_write;
>      Error *local_err = NULL;
> -    AddressSpace *dma_as;
>      DeviceState *dev = DEVICE(pci_dev);
>  
>      pci_dev->bus = bus;
> @@ -961,15 +989,10 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>  
>      pci_dev->devfn = devfn;
>      pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
> -    dma_as = pci_device_iommu_address_space(pci_dev);
> -
> -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> -                             OBJECT(pci_dev), "bus master",
> -                             dma_as->root, 0, memory_region_size(dma_as->root));
> -    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> -    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> -                       name);
>  
> +    if (qdev_hotplug) {
> +        pci_init_bus_master(pci_dev);
> +    }
>      pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>      pci_dev->irq_state = 0;
>      pci_config_alloc(pci_dev);
> diff --git a/vl.c b/vl.c
> index 9bb7f4c..5cd0f2a 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -2675,6 +2675,11 @@ void qemu_add_machine_init_done_notifier(Notifier *notify)
>      }
>  }
>  
> +void qemu_remove_machine_init_done_notifier(Notifier *notify)
> +{
> +    notifier_remove(notify);
> +}
> +
>  static void qemu_run_machine_init_done_notifiers(void)
>  {
>      notifier_list_notify(&machine_init_done_notifiers, NULL);
> -- 
> MST
> 
>
Marcel Apfelbaum July 11, 2016, 3:18 p.m. UTC | #5
On 07/11/2016 05:42 PM, Leon Alrae wrote:
> Hi,
>
> This commit causes regressions in my MIPS tests. QEMU segfaults when
> booting Linux on Malta board;

Hi Leon,

Is a good thing you caught it in the pull request, thanks!
Is a pity we don't have a way to run sanity tests on all archs (beyond make check)

  this can be easily reproduced with
> Aurelien's Debian images:
>
> wget https://people.debian.org/~aurel32/qemu/mipsel/vmlinux-3.2.0-4-5kc-malta
> wget https://people.debian.org/~aurel32/qemu/mipsel/debian_wheezy_mipsel_standard.qcow2
> qemu-system-mips64el -M malta -kernel vmlinux-3.2.0-4-5kc-malta -hda debian_wheezy_mipsel_standard.qcow2 -append "root=/dev/sda1 console=ttyS0" -nographic
>

I'll reproduce and handle it, thanks!
Marcel

> ...
> [    1.468000] ata2.00: ATAPI: QEMU DVD-ROM, 2.5+, max UDMA/100
> [    1.476000] ata1.00: ATA-7: QEMU HARDDISK, 2.5+, max UDMA/100
> [    1.476000] ata1.00: 52428800 sectors, multi 16: LBA48
> [    1.480000] ata2.00: configured for UDMA/33
> [    1.484000] ata1.00: configured for UDMA/33
> [    1.536000] scsi 0:0:0:0: Direct-Access     ATA      QEMU HARDDISK    2.5+ PQ: 0 ANSI: 5
> [    1.552000] sd 0:0:0:0: [sda] 52428800 512-byte logical blocks: (26.8 GB/25.0 GiB)
> [    1.568000] scsi 1:0:0:0: CD-ROM            QEMU     QEMU DVD-ROM     2.5+ PQ: 0 ANSI: 5
> [    1.576000] sd 0:0:0:0: [sda] Write Protect is off
> [    1.580000] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
>
> Program received signal SIGSEGV, Segmentation fault.
> [Switching to Thread 0x7fffec79e700 (LWP 29679)]
> 0x00007ffff792f806 in address_space_lookup_region (d=0x0, addr=12582912, resolve_subpage=true) at /user/lea/dev/qemu/exec.c:359
> 359         MemoryRegionSection *section = atomic_read(&d->mru_section);
> (gdb) bt
> #0  0x00007ffff792f806 in address_space_lookup_region (d=0x0, addr=12582912, resolve_subpage=true) at /user/lea/dev/qemu/exec.c:359
> #1  0x00007ffff792f95e in address_space_translate_internal (d=0x0, addr=12582912, xlat=0x7fffec79ca30, plen=0x7fffec79cb18, resolve_subpage=true) at /user/lea/dev/qemu/exec.c:390
> #2  0x00007ffff792fb5a in address_space_translate (as=0x7ffff8bc1ef0, addr=12582912, xlat=0x7fffec79cb10, plen=0x7fffec79cb18, is_write=false) at /user/lea/dev/qemu/exec.c:428
> #3  0x00007ffff7935720 in address_space_read_full (as=0x7ffff8bc1ef0, addr=12582912, attrs=..., buf=0x7fffec79cd60 "\260^\324\370", <incomplete sequence \330>, len=8) at /user/lea/dev/qemu/exec.c:2724
> #4  0x00007ffff7935821 in address_space_read (as=0x7ffff8bc1ef0, addr=12582912, attrs=..., buf=0x7fffec79cd60 "\260^\324\370", <incomplete sequence \330>, len=8, is_write=false) at /user/lea/dev/qemu/include/exec/memory.h:1476
> #5  address_space_rw (as=0x7ffff8bc1ef0, addr=12582912, attrs=..., buf=0x7fffec79cd60 "\260^\324\370", <incomplete sequence \330>, len=8, is_write=false) at /user/lea/dev/qemu/exec.c:2739
> #6  0x00007ffff7baf9a7 in dma_memory_rw_relaxed (as=0x7ffff8bc1ef0, addr=12582912, buf=0x7fffec79cd60, len=8, dir=DMA_DIRECTION_TO_DEVICE) at /user/lea/dev/qemu/include/sysemu/dma.h:87
> #7  0x00007ffff7bafa28 in dma_memory_rw (as=0x7ffff8bc1ef0, addr=12582912, buf=0x7fffec79cd60, len=8, dir=DMA_DIRECTION_TO_DEVICE) at /user/lea/dev/qemu/include/sysemu/dma.h:110
> #8  0x00007ffff7bafad3 in pci_dma_rw (dev=0x7ffff8bc1ce0, addr=12582912, buf=0x7fffec79cd60, len=8, dir=DMA_DIRECTION_TO_DEVICE) at /user/lea/dev/qemu/include/hw/pci/pci.h:731
> #9  0x00007ffff7bafb3c in pci_dma_read (dev=0x7ffff8bc1ce0, addr=12582912, buf=0x7fffec79cd60, len=8) at /user/lea/dev/qemu/include/hw/pci/pci.h:738
> #10 0x00007ffff7baff6d in bmdma_prepare_buf (dma=0x7ffff8bc3620, limit=4096) at /user/lea/dev/qemu/hw/ide/pci.c:85
> #11 0x00007ffff7ba66bc in ide_dma_cb (opaque=0x7ffff8bc25e8, ret=0) at /user/lea/dev/qemu/hw/ide/core.c:844
> #12 0x00007ffff7bb0615 in bmdma_cmd_writeb (bm=0x7ffff8bc3620, val=9) at /user/lea/dev/qemu/hw/ide/pci.c:245
> #13 0x00007ffff7bb1408 in bmdma_write (opaque=0x7ffff8bc3620, addr=0, val=9, size=1) at /user/lea/dev/qemu/hw/ide/piix.c:77
> #14 0x00007ffff7988548 in memory_region_write_accessor (mr=0x7ffff8bc3778, addr=0, value=0x7fffec79cfd8, size=1, shift=0, mask=255, attrs=...) at /user/lea/dev/qemu/memory.c:525
> #15 0x00007ffff79887dd in access_with_adjusted_size (addr=0, value=0x7fffec79cfd8, size=1, access_size_min=1, access_size_max=4, access=0x7ffff798843e <memory_region_write_accessor>, mr=0x7ffff8bc3778, attrs=...) at /user/lea/dev/qemu/memory.c:591
> #16 0x00007ffff798bc6e in memory_region_dispatch_write (mr=0x7ffff8bc3778, addr=0, data=9, size=1, attrs=...) at /user/lea/dev/qemu/memory.c:1262
> #17 0x00007ffff7935294 in address_space_write_continue (as=0x7ffff8921a90, addr=402657344, attrs=..., buf=0x7fffec79d170 "\t\345y\354\377\177", len=1, addr1=0, l=1, mr=0x7ffff8bc3778) at /user/lea/dev/qemu/exec.c:2590
> #18 0x00007ffff793540d in address_space_write (as=0x7ffff8921a90, addr=402657344, attrs=..., buf=0x7fffec79d170 "\t\345y\354\377\177", len=1) at /user/lea/dev/qemu/exec.c:2635
> #19 0x00007ffff79344c0 in subpage_write (opaque=0x7fffd4569730, addr=64, value=9, len=1, attrs=...) at /user/lea/dev/qemu/exec.c:2221
> #20 0x00007ffff7988678 in memory_region_write_with_attrs_accessor (mr=0x7fffd4569730, addr=64, value=0x7fffec79d2b8, size=1, shift=0, mask=255, attrs=...) at /user/lea/dev/qemu/memory.c:551
> #21 0x00007ffff79887dd in access_with_adjusted_size (addr=64, value=0x7fffec79d2b8, size=1, access_size_min=1, access_size_max=8, access=0x7ffff7988568 <memory_region_write_with_attrs_accessor>, mr=0x7fffd4569730, attrs=...) at /user/lea/dev/qemu/memory.c:591
> #22 0x00007ffff798bcc9 in memory_region_dispatch_write (mr=0x7fffd4569730, addr=64, data=9, size=1, attrs=...) at /user/lea/dev/qemu/memory.c:1269
> #23 0x00007ffff7993676 in io_writeb (env=0x7ffff8941978, iotlbentry=0x7ffff894a0e8, val=9 '\t', addr=10376293541864280128, retaddr=140737194805689) at /user/lea/dev/qemu/softmmu_template.h:349
> #24 0x00007ffff7993ae0 in helper_ret_stb_mmu (env=0x7ffff8941978, addr=10376293541864280128, val=9 '\t', oi=0, retaddr=140737194805689) at /user/lea/dev/qemu/softmmu_template.h:390
> #25 0x00007fffee80c9bb in code_gen_buffer ()
> #26 0x00007ffff793c093 in cpu_tb_exec (cpu=0x7ffff8939700, itb=0x7fffedaf4ee0) at /user/lea/dev/qemu/cpu-exec.c:166
> #27 0x00007ffff793ccd1 in cpu_loop_exec_tb (cpu=0x7ffff8939700, tb=0x7fffedaf4ee0, last_tb=0x7fffec79d9e0, tb_exit=0x7fffec79d9fc, sc=0x7fffec79d9c0) at /user/lea/dev/qemu/cpu-exec.c:530
> #28 0x00007ffff793cf98 in cpu_exec (cpu=0x7ffff8939700) at /user/lea/dev/qemu/cpu-exec.c:626
> #29 0x00007ffff796f949 in tcg_cpu_exec (cpu=0x7ffff8939700) at /user/lea/dev/qemu/cpus.c:1541
> #30 0x00007ffff796fa52 in tcg_exec_all () at /user/lea/dev/qemu/cpus.c:1574
> #31 0x00007ffff796eb15 in qemu_tcg_cpu_thread_fn (arg=0x7ffff8939700) at /user/lea/dev/qemu/cpus.c:1171
> #32 0x00007ffff30ce9d1 in start_thread () from /lib64/libpthread.so.0
> #33 0x00007ffff2e1b86d in clone () from /lib64/libc.so.6
>
> Regards,
> Leon
>
> On Mon, Jul 04, 2016 at 07:46:15PM +0300, Michael S. Tsirkin wrote:
>> From: Marcel Apfelbaum <marcel@redhat.com>
>>
>> Skip bus_master_enable region creation on PCI device init
>> in order to be sure the IOMMU device (if present) would
>> be created in advance. Add this memory region at machine_done time.
>>
>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> ---
>>   include/hw/pci/pci_bus.h |  2 ++
>>   include/sysemu/sysemu.h  |  1 +
>>   hw/pci/pci.c             | 41 ++++++++++++++++++++++++++++++++---------
>>   vl.c                     |  5 +++++
>>   4 files changed, 40 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index 403fec6..5484a9b 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -39,6 +39,8 @@ struct PCIBus {
>>          Keep a count of the number of devices with raised IRQs.  */
>>       int nirq;
>>       int *irq_count;
>> +
>> +    Notifier machine_done;
>>   };
>>
>>   typedef struct PCIBridgeWindows PCIBridgeWindows;
>> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
>> index 7313673..ee7c760 100644
>> --- a/include/sysemu/sysemu.h
>> +++ b/include/sysemu/sysemu.h
>> @@ -75,6 +75,7 @@ void qemu_add_exit_notifier(Notifier *notify);
>>   void qemu_remove_exit_notifier(Notifier *notify);
>>
>>   void qemu_add_machine_init_done_notifier(Notifier *notify);
>> +void qemu_remove_machine_init_done_notifier(Notifier *notify);
>>
>>   void hmp_savevm(Monitor *mon, const QDict *qdict);
>>   int load_vmstate(const char *name);
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 4b585f4..ee385eb 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -78,10 +78,37 @@ static const VMStateDescription vmstate_pcibus = {
>>       }
>>   };
>>
>> +static void pci_init_bus_master(PCIDevice *pci_dev)
>> +{
>> +    AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
>> +
>> +    memory_region_init_alias(&pci_dev->bus_master_enable_region,
>> +                             OBJECT(pci_dev), "bus master",
>> +                             dma_as->root, 0, memory_region_size(dma_as->root));
>> +    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>> +    address_space_init(&pci_dev->bus_master_as,
>> +                       &pci_dev->bus_master_enable_region, pci_dev->name);
>> +}
>> +
>> +static void pcibus_machine_done(Notifier *notifier, void *data)
>> +{
>> +    PCIBus *bus = container_of(notifier, PCIBus, machine_done);
>> +    int i;
>> +
>> +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
>> +        if (bus->devices[i]) {
>> +            pci_init_bus_master(bus->devices[i]);
>> +        }
>> +    }
>> +}
>> +
>>   static void pci_bus_realize(BusState *qbus, Error **errp)
>>   {
>>       PCIBus *bus = PCI_BUS(qbus);
>>
>> +    bus->machine_done.notify = pcibus_machine_done;
>> +    qemu_add_machine_init_done_notifier(&bus->machine_done);
>> +
>>       vmstate_register(NULL, -1, &vmstate_pcibus, bus);
>>   }
>>
>> @@ -89,6 +116,8 @@ static void pci_bus_unrealize(BusState *qbus, Error **errp)
>>   {
>>       PCIBus *bus = PCI_BUS(qbus);
>>
>> +    qemu_remove_machine_init_done_notifier(&bus->machine_done);
>> +
>>       vmstate_unregister(NULL, &vmstate_pcibus, bus);
>>   }
>>
>> @@ -920,7 +949,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>       PCIConfigReadFunc *config_read = pc->config_read;
>>       PCIConfigWriteFunc *config_write = pc->config_write;
>>       Error *local_err = NULL;
>> -    AddressSpace *dma_as;
>>       DeviceState *dev = DEVICE(pci_dev);
>>
>>       pci_dev->bus = bus;
>> @@ -961,15 +989,10 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>>
>>       pci_dev->devfn = devfn;
>>       pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
>> -    dma_as = pci_device_iommu_address_space(pci_dev);
>> -
>> -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
>> -                             OBJECT(pci_dev), "bus master",
>> -                             dma_as->root, 0, memory_region_size(dma_as->root));
>> -    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>> -    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
>> -                       name);
>>
>> +    if (qdev_hotplug) {
>> +        pci_init_bus_master(pci_dev);
>> +    }
>>       pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
>>       pci_dev->irq_state = 0;
>>       pci_config_alloc(pci_dev);
>> diff --git a/vl.c b/vl.c
>> index 9bb7f4c..5cd0f2a 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -2675,6 +2675,11 @@ void qemu_add_machine_init_done_notifier(Notifier *notify)
>>       }
>>   }
>>
>> +void qemu_remove_machine_init_done_notifier(Notifier *notify)
>> +{
>> +    notifier_remove(notify);
>> +}
>> +
>>   static void qemu_run_machine_init_done_notifiers(void)
>>   {
>>       notifier_list_notify(&machine_init_done_notifiers, NULL);
>> --
>> MST
>>
>>
Mark Cave-Ayland July 11, 2016, 6:41 p.m. UTC | #6
On 11/07/16 16:18, Marcel Apfelbaum wrote:

> On 07/11/2016 05:42 PM, Leon Alrae wrote:
>> Hi,
>>
>> This commit causes regressions in my MIPS tests. QEMU segfaults when
>> booting Linux on Malta board;
> 
> Hi Leon,
> 
> Is a good thing you caught it in the pull request, thanks!
> Is a pity we don't have a way to run sanity tests on all archs (beyond
> make check)
> 
>  this can be easily reproduced with
>> Aurelien's Debian images:
>>
>> wget
>> https://people.debian.org/~aurel32/qemu/mipsel/vmlinux-3.2.0-4-5kc-malta
>> wget
>> https://people.debian.org/~aurel32/qemu/mipsel/debian_wheezy_mipsel_standard.qcow2
>>
>> qemu-system-mips64el -M malta -kernel vmlinux-3.2.0-4-5kc-malta -hda
>> debian_wheezy_mipsel_standard.qcow2 -append "root=/dev/sda1
>> console=ttyS0" -nographic
>>
> 
> I'll reproduce and handle it, thanks!
> Marcel

FWIW this looks like the exact same backtrace I reported with
qemu-system-sparc64 so hopefully the solution should fix both problems :)


ATB,

Mark.
Marcel Apfelbaum July 14, 2016, 12:40 p.m. UTC | #7
On 07/11/2016 09:41 PM, Mark Cave-Ayland wrote:
> On 11/07/16 16:18, Marcel Apfelbaum wrote:
>
>> On 07/11/2016 05:42 PM, Leon Alrae wrote:
>>> Hi,
>>>
>>> This commit causes regressions in my MIPS tests. QEMU segfaults when
>>> booting Linux on Malta board;
>>
>> Hi Leon,
>>
>> Is a good thing you caught it in the pull request, thanks!
>> Is a pity we don't have a way to run sanity tests on all archs (beyond
>> make check)
>>
>>   this can be easily reproduced with
>>> Aurelien's Debian images:
>>>
>>> wget
>>> https://people.debian.org/~aurel32/qemu/mipsel/vmlinux-3.2.0-4-5kc-malta
>>> wget
>>> https://people.debian.org/~aurel32/qemu/mipsel/debian_wheezy_mipsel_standard.qcow2
>>>
>>> qemu-system-mips64el -M malta -kernel vmlinux-3.2.0-4-5kc-malta -hda
>>> debian_wheezy_mipsel_standard.qcow2 -append "root=/dev/sda1
>>> console=ttyS0" -nographic
>>>
>>
>> I'll reproduce and handle it, thanks!
>> Marcel
>
> FWIW this looks like the exact same backtrace I reported with
> qemu-system-sparc64 so hopefully the solution should fix both problems :)
>

Hi Mark and Leon,

I've reproduced both failures and I've found the root cause.
The PCI root bus is not realized as part of the machine initialization.

Until now the only consequence was the machine couldn't be migrated
(because the vmstate is initialized as part of 'realize').
However, from now on the devices cannot do DMA requests if the bus is not 'realized'.

It took some time because I had to go over all the archs and check this issue.
I have the patches ready, I am running some more tests and post them soon.

Thanks for the patience,
Marcel

>
> ATB,
>
> Mark.
>
diff mbox

Patch

diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 403fec6..5484a9b 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -39,6 +39,8 @@  struct PCIBus {
        Keep a count of the number of devices with raised IRQs.  */
     int nirq;
     int *irq_count;
+
+    Notifier machine_done;
 };
 
 typedef struct PCIBridgeWindows PCIBridgeWindows;
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 7313673..ee7c760 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -75,6 +75,7 @@  void qemu_add_exit_notifier(Notifier *notify);
 void qemu_remove_exit_notifier(Notifier *notify);
 
 void qemu_add_machine_init_done_notifier(Notifier *notify);
+void qemu_remove_machine_init_done_notifier(Notifier *notify);
 
 void hmp_savevm(Monitor *mon, const QDict *qdict);
 int load_vmstate(const char *name);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 4b585f4..ee385eb 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -78,10 +78,37 @@  static const VMStateDescription vmstate_pcibus = {
     }
 };
 
+static void pci_init_bus_master(PCIDevice *pci_dev)
+{
+    AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
+
+    memory_region_init_alias(&pci_dev->bus_master_enable_region,
+                             OBJECT(pci_dev), "bus master",
+                             dma_as->root, 0, memory_region_size(dma_as->root));
+    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+    address_space_init(&pci_dev->bus_master_as,
+                       &pci_dev->bus_master_enable_region, pci_dev->name);
+}
+
+static void pcibus_machine_done(Notifier *notifier, void *data)
+{
+    PCIBus *bus = container_of(notifier, PCIBus, machine_done);
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
+        if (bus->devices[i]) {
+            pci_init_bus_master(bus->devices[i]);
+        }
+    }
+}
+
 static void pci_bus_realize(BusState *qbus, Error **errp)
 {
     PCIBus *bus = PCI_BUS(qbus);
 
+    bus->machine_done.notify = pcibus_machine_done;
+    qemu_add_machine_init_done_notifier(&bus->machine_done);
+
     vmstate_register(NULL, -1, &vmstate_pcibus, bus);
 }
 
@@ -89,6 +116,8 @@  static void pci_bus_unrealize(BusState *qbus, Error **errp)
 {
     PCIBus *bus = PCI_BUS(qbus);
 
+    qemu_remove_machine_init_done_notifier(&bus->machine_done);
+
     vmstate_unregister(NULL, &vmstate_pcibus, bus);
 }
 
@@ -920,7 +949,6 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     PCIConfigReadFunc *config_read = pc->config_read;
     PCIConfigWriteFunc *config_write = pc->config_write;
     Error *local_err = NULL;
-    AddressSpace *dma_as;
     DeviceState *dev = DEVICE(pci_dev);
 
     pci_dev->bus = bus;
@@ -961,15 +989,10 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
 
     pci_dev->devfn = devfn;
     pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
-    dma_as = pci_device_iommu_address_space(pci_dev);
-
-    memory_region_init_alias(&pci_dev->bus_master_enable_region,
-                             OBJECT(pci_dev), "bus master",
-                             dma_as->root, 0, memory_region_size(dma_as->root));
-    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
-    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
-                       name);
 
+    if (qdev_hotplug) {
+        pci_init_bus_master(pci_dev);
+    }
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     pci_dev->irq_state = 0;
     pci_config_alloc(pci_dev);
diff --git a/vl.c b/vl.c
index 9bb7f4c..5cd0f2a 100644
--- a/vl.c
+++ b/vl.c
@@ -2675,6 +2675,11 @@  void qemu_add_machine_init_done_notifier(Notifier *notify)
     }
 }
 
+void qemu_remove_machine_init_done_notifier(Notifier *notify)
+{
+    notifier_remove(notify);
+}
+
 static void qemu_run_machine_init_done_notifiers(void)
 {
     notifier_list_notify(&machine_init_done_notifiers, NULL);