diff mbox

hw/pci: completed master-abort emulation

Message ID 1379934077-3188-1-git-send-email-marcel.a@redhat.com
State New
Headers show

Commit Message

Marcel Apfelbaum Sept. 23, 2013, 11:01 a.m. UTC
This patch is implemented on top of series:
[PATCH v5 0/3] pci: implement upstream master abort protocol

Added "master abort io" background region for PCIBus.

Added "master abort mem" region to catch transactions initiated
by pci devices targeted to unassigned addresses.

Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.

Set "Received Master Abort" Bit on Status/Secondary Status
register as defined in the PCI Spec.

Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
---
 hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
 hw/pci/pci_bridge.c      |  10 +++++
 include/hw/pci/pci.h     |   3 ++
 include/hw/pci/pci_bus.h |   1 +
 4 files changed, 118 insertions(+), 11 deletions(-)

Comments

Michael S. Tsirkin Sept. 23, 2013, 11:27 a.m. UTC | #1
On Mon, Sep 23, 2013 at 02:01:17PM +0300, Marcel Apfelbaum wrote:
> This patch is implemented on top of series:
> [PATCH v5 0/3] pci: implement upstream master abort protocol
> 
> Added "master abort io" background region for PCIBus.
> 
> Added "master abort mem" region to catch transactions initiated
> by pci devices targeted to unassigned addresses.
> 
> Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.
> 
> Set "Received Master Abort" Bit on Status/Secondary Status
> register as defined in the PCI Spec.
> 
> Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>

I think it will be easier to review the complete
series, not an incremental patch.

We probably need to think what's the right thing
to do for master abort mode since we do not
emulate SERR. Do we make it writeable at all?

It's a pci only bit:
	When the Master-Abort Mode bit is cleared and a posted write transaction
	forwarded by the
	bridge terminates with a Master-Abort, no error is reported (note that
	the Master-Abort bit
	is still set). When the Master-Abort Mode bit is set and a posted write
	transaction forwarded
	by the bridge terminates with a Master-Abort on the destination bus, the
	bridge must:
	Assert SERR# on the primary interfaceCommand register)
	(if enabled by the SERR# Enable bitin the
	Set the Signaled System Errorbit in the Command register)
	bitin the Status register (if enabled by the SERR# Enable)

one way to do this would be not to set Master-Abort bit even
though spec says we should, and pretend there was no error.

> ---
>  hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
>  hw/pci/pci_bridge.c      |  10 +++++
>  include/hw/pci/pci.h     |   3 ++
>  include/hw/pci/pci_bus.h |   1 +
>  4 files changed, 118 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index d8a1b11..1f4e707 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -283,17 +283,76 @@ const char *pci_root_bus_path(PCIDevice *dev)
>      return rootbus->qbus.name;
>  }
>  
> +static PCIDevice *pci_bus_find_host(PCIBus *bus)
> +{
> +    PCIDevice *dev;
> +    int i;
> +
> +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> +        dev = bus->devices[i];
> +        if (dev) {
> +            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> +            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {

In fact you can do this easier:
	 pci_get_word(dev->config + PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_HOST

> +                return dev;
> +            }
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static void master_abort(void *opaque)
> +{
> +    PCIDevice *master_dev = NULL;
> +    PCIDeviceClass *pc;
> +    PCIBus *bus;
> +    int downstream;

bool?

> +
> +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {

Please don't do dynamic cast just because you can.
You know very well what kind of object you pass
when you create the MR.
So just call the appropriate function.

> +        master_dev = PCI_DEVICE(opaque);
> +        bus = master_dev->bus;
> +        while (!pci_bus_is_root(bus)) {
> +            master_dev = bus->parent_dev;
> +            bus = master_dev->bus;
> +        }
> +        downstream = 0;
> +    }
> +
> +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
> +        bus = PCI_BUS(opaque);
> +        if (pci_bus_is_root(bus)) {
> +            master_dev = pci_bus_find_host(bus);
> +        } else { /* bus behind a PCI-2-PCI bridge */
> +            master_dev = bus->parent_dev;
> +        }

So the reason for this is that device to device MA
is still not implemented here, correct?
If it was it would be just one instance of it.
I'm not saying this must block this patch, however
there should be a code comment to this effect,
and commit log should mention this explicitly.

> +        downstream = 1;
> +    }
> +
> +    assert(master_dev);
> +    pc = PCI_DEVICE_GET_CLASS(master_dev);
> +

There's very little common code between devices and
buses. So just use 2 functions.

> +    if (downstream && pc->is_bridge) {
> +        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
> +                                   PCI_STATUS_REC_MASTER_ABORT);
> +    } else {
> +        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
> +                                   PCI_STATUS_REC_MASTER_ABORT);
> +    }
> +}
> +
>  static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
>  {
> -   return -1ULL;

I didn't notice but this means there were 3 spaces here in previous
patch?

> +    master_abort(opaque);
> +    return -1ULL;
>  }
>  
>  static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
>                                     unsigned size)
>  {
> +    master_abort(opaque);
>  }
>  
> -static const MemoryRegionOps master_abort_mem_ops = {
> +static const MemoryRegionOps master_abort_ops = {
>      .read = master_abort_mem_read,
>      .write = master_abort_mem_write,
>      .endianness = DEVICE_LITTLE_ENDIAN,
> @@ -301,6 +360,23 @@ static const MemoryRegionOps master_abort_mem_ops = {
>  
>  #define MASTER_ABORT_MEM_PRIORITY INT_MIN
>  
> +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
> +{
> +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> +                          &master_abort_ops, bus, mem,
> +                          memory_region_size(bus->address_space_mem));
> +    memory_region_add_subregion_overlap(bus->address_space_mem,
> +                                        0, &bus->master_abort_mem,
> +                                        MASTER_ABORT_MEM_PRIORITY);
> +
> +    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
> +                          &master_abort_ops, bus, io,
> +                          memory_region_size(bus->address_space_io));
> +    memory_region_add_subregion_overlap(bus->address_space_io,
> +                                        0, &bus->master_abort_io,
> +                                        MASTER_ABORT_MEM_PRIORITY);
> +}
> +
>  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
>                           const char *name,
>                           MemoryRegion *address_space_mem,
> @@ -312,13 +388,8 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
>      bus->address_space_mem = address_space_mem;
>      bus->address_space_io = address_space_io;
>  
> -
> -    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> -                          &master_abort_mem_ops, bus, "pci-master-abort",
> -                          memory_region_size(bus->address_space_mem));
> -    memory_region_add_subregion_overlap(bus->address_space_mem,
> -                                        0, &bus->master_abort_mem,
> -                                        MASTER_ABORT_MEM_PRIORITY);
> +    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
> +                              "pci-master-abort-io");
>  
>      /* host bridge */
>      QLIST_INIT(&bus->child);
> @@ -840,9 +911,24 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>      pci_dev->bus = bus;
>      dma_as = pci_device_iommu_address_space(pci_dev);
>  
> -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> -                             OBJECT(pci_dev), "bus master",
> +    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
> +                       "bus master", UINT64_MAX);
> +
> +    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
> +                          &master_abort_ops, OBJECT(pci_dev),
> +                          "bus master master-abort",
> +                          memory_region_size(&pci_dev->bus_master_enable_region));
> +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> +                                        0, &pci_dev->bus_master_abort_mem,
> +                                        MASTER_ABORT_MEM_PRIORITY);
> +
> +    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
> +                             OBJECT(pci_dev), "bus master dma",
>                               dma_as->root, 0, memory_region_size(dma_as->root));
> +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> +                                        0, &pci_dev->bus_master_dma_mem, 0);
> +
> +
>      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
>      address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
>                         name);
> @@ -901,6 +987,13 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
>      pci_config_free(pci_dev);
>  
>      address_space_destroy(&pci_dev->bus_master_as);
> +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> +                                &pci_dev->bus_master_dma_mem);
> +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> +                                &pci_dev->bus_master_abort_mem);
> +    memory_region_destroy(&pci_dev->bus_master_dma_mem);
> +    memory_region_destroy(&pci_dev->bus_master_abort_mem);
> +
>      memory_region_destroy(&pci_dev->bus_master_enable_region);
>  }
>  
> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> index e6b22b8..56b682f 100644
> --- a/hw/pci/pci_bridge.c
> +++ b/hw/pci/pci_bridge.c
> @@ -376,6 +376,10 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
>      sec_bus->address_space_io = &br->address_space_io;
>      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
>      br->windows = pci_bridge_region_init(br);
> +
> +    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
> +                              "pci_bridge_master_abort_io");
> +
>      QLIST_INIT(&sec_bus->child);
>      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
>      return 0;
> @@ -389,6 +393,12 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
>      QLIST_REMOVE(&s->sec_bus, sibling);
>      pci_bridge_region_del(s, s->windows);
>      pci_bridge_region_cleanup(s, s->windows);
> +    memory_region_del_subregion(s->sec_bus.address_space_mem,
> +                                &s->sec_bus.master_abort_mem);
> +    memory_region_del_subregion(s->sec_bus.address_space_io,
> +                                &s->sec_bus.master_abort_io);
> +    memory_region_destroy(&s->sec_bus.master_abort_mem);
> +    memory_region_destroy(&s->sec_bus.master_abort_io);
>      memory_region_destroy(&s->address_space_mem);
>      memory_region_destroy(&s->address_space_io);
>      /* qbus_free() is called automatically by qdev_free() */
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 37979aa..d69e06d 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -242,6 +242,8 @@ struct PCIDevice {
>      PCIIORegion io_regions[PCI_NUM_REGIONS];
>      AddressSpace bus_master_as;
>      MemoryRegion bus_master_enable_region;
> +    MemoryRegion bus_master_dma_mem;
> +    MemoryRegion bus_master_abort_mem;
>  
>      /* do not access the following fields */
>      PCIConfigReadFunc *config_read;
> @@ -357,6 +359,7 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
>                      MemoryRegion *address_space_mem,
>                      MemoryRegion *address_space_io,
>                      uint8_t devfn_min, const char *typename);
> +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
>  void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
>                    void *irq_opaque, int nirq);
>  int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 2ad5edb..57b1541 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -24,6 +24,7 @@ struct PCIBus {
>      MemoryRegion *address_space_mem;
>      MemoryRegion *address_space_io;
>      MemoryRegion master_abort_mem;
> +    MemoryRegion master_abort_io;
>  
>      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
>      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> -- 
> 1.8.3.1
Marcel Apfelbaum Sept. 23, 2013, 12:37 p.m. UTC | #2
On Mon, 2013-09-23 at 14:27 +0300, Michael S. Tsirkin wrote:
> On Mon, Sep 23, 2013 at 02:01:17PM +0300, Marcel Apfelbaum wrote:
> > This patch is implemented on top of series:
> > [PATCH v5 0/3] pci: implement upstream master abort protocol
> > 
> > Added "master abort io" background region for PCIBus.
> > 
> > Added "master abort mem" region to catch transactions initiated
> > by pci devices targeted to unassigned addresses.
> > 
> > Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.
> > 
> > Set "Received Master Abort" Bit on Status/Secondary Status
> > register as defined in the PCI Spec.
> > 
> > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> 
> I think it will be easier to review the complete
> series, not an incremental patch.
Should I combine this with the prev series without the
memory patches?

> 
> We probably need to think what's the right thing
> to do for master abort mode since we do not
> emulate SERR. Do we make it writeable at all?
Master abort mode can be enabled. (Tested)

> 
> It's a pci only bit:
> 	When the Master-Abort Mode bit is cleared and a posted write transaction
> 	forwarded by the
> 	bridge terminates with a Master-Abort, no error is reported (note that
> 	the Master-Abort bit
> 	is still set). When the Master-Abort Mode bit is set and a posted write
> 	transaction forwarded
> 	by the bridge terminates with a Master-Abort on the destination bus, the
> 	bridge must:
> 	Assert SERR# on the primary interfaceCommand register)
> 	(if enabled by the SERR# Enable bitin the
> 	Set the Signaled System Errorbit in the Command register)
> 	bitin the Status register (if enabled by the SERR# Enable)
> 
> one way to do this would be not to set Master-Abort bit even
> though spec says we should, and pretend there was no error.
Maybe we can just not allow to set "Master abort mode"
in BRIDGE_CONTROL register. It is a cleaner way (I think)
considering we don't emulate SERR.

> 
> > ---
> >  hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
> >  hw/pci/pci_bridge.c      |  10 +++++
> >  include/hw/pci/pci.h     |   3 ++
> >  include/hw/pci/pci_bus.h |   1 +
> >  4 files changed, 118 insertions(+), 11 deletions(-)
> > 
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index d8a1b11..1f4e707 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -283,17 +283,76 @@ const char *pci_root_bus_path(PCIDevice *dev)
> >      return rootbus->qbus.name;
> >  }
> >  
> > +static PCIDevice *pci_bus_find_host(PCIBus *bus)
> > +{
> > +    PCIDevice *dev;
> > +    int i;
> > +
> > +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> > +        dev = bus->devices[i];
> > +        if (dev) {
> > +            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> > +            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {
> 
> In fact you can do this easier:
> 	 pci_get_word(dev->config + PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_HOST
Thanks, I'll change.
> 
> > +                return dev;
> > +            }
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static void master_abort(void *opaque)
> > +{
> > +    PCIDevice *master_dev = NULL;
> > +    PCIDeviceClass *pc;
> > +    PCIBus *bus;
> > +    int downstream;
> 
> bool?
Yes, I'll change

> > +
> > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {
> 
> Please don't do dynamic cast just because you can.
> You know very well what kind of object you pass
> when you create the MR.
> So just call the appropriate function.
I wanted to merge the code for all 3 entities involved:
root PCIBus, PCIBus behind a bridge, and PCIDevice.
The region for merge was to use the same master_abort_mem_ops
because there are the same operations that must be done:
return -1 (0xFFF...) and set master abort received bit. 

> 
> > +        master_dev = PCI_DEVICE(opaque);
> > +        bus = master_dev->bus;
> > +        while (!pci_bus_is_root(bus)) {
> > +            master_dev = bus->parent_dev;
> > +            bus = master_dev->bus;
> > +        }
> > +        downstream = 0;
> > +    }
> > +
> > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
> > +        bus = PCI_BUS(opaque);
> > +        if (pci_bus_is_root(bus)) {
> > +            master_dev = pci_bus_find_host(bus);
> > +        } else { /* bus behind a PCI-2-PCI bridge */
> > +            master_dev = bus->parent_dev;
> > +        }
> 
> So the reason for this is that device to device MA
> is still not implemented here, correct?
Nope, the above code differentiate 2 scenarios:
1. the bus is the root bus => look for host bridge
   to update STATUS register
2. the bus is bridge's secondary bus =>
   update bridge STATUS register
"Device 2 Device" is handled implicitly because we have
a background region covering all device's target memory

> If it was it would be just one instance of it.
> I'm not saying this must block this patch, however
> there should be a code comment to this effect,
> and commit log should mention this explicitly.
As above, "Device 2 Device" is handled implicit
> 
> > +        downstream = 1;
> > +    }
> > +
> > +    assert(master_dev);
> > +    pc = PCI_DEVICE_GET_CLASS(master_dev);
> > +
> 
> There's very little common code between devices and
> buses. So just use 2 functions.
As stated above:
The region for merge was to use the same master_abort_mem_ops
because there are the same operations that must be done:
return -1 (0xFFF...) and set master abort received bit.
Separating them will result in some code duplication.
The read and write methods are the same, but would call different
"master_abort" methods.
If it does bother you the down casting anyway, I'll change.

> 
> > +    if (downstream && pc->is_bridge) {
> > +        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
> > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > +    } else {
> > +        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
> > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > +    }
> > +}
> > +
> >  static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> >  {
> > -   return -1ULL;
> 
> I didn't notice but this means there were 3 spaces here in previous
> patch?
yes :(, checkpatch didn't catch it

Thanks for the review,
Marcel

> 
> > +    master_abort(opaque);
> > +    return -1ULL;
> >  }
> >  
> >  static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> >                                     unsigned size)
> >  {
> > +    master_abort(opaque);
> >  }
> >  
> > -static const MemoryRegionOps master_abort_mem_ops = {
> > +static const MemoryRegionOps master_abort_ops = {
> >      .read = master_abort_mem_read,
> >      .write = master_abort_mem_write,
> >      .endianness = DEVICE_LITTLE_ENDIAN,
> > @@ -301,6 +360,23 @@ static const MemoryRegionOps master_abort_mem_ops = {
> >  
> >  #define MASTER_ABORT_MEM_PRIORITY INT_MIN
> >  
> > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
> > +{
> > +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > +                          &master_abort_ops, bus, mem,
> > +                          memory_region_size(bus->address_space_mem));
> > +    memory_region_add_subregion_overlap(bus->address_space_mem,
> > +                                        0, &bus->master_abort_mem,
> > +                                        MASTER_ABORT_MEM_PRIORITY);
> > +
> > +    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
> > +                          &master_abort_ops, bus, io,
> > +                          memory_region_size(bus->address_space_io));
> > +    memory_region_add_subregion_overlap(bus->address_space_io,
> > +                                        0, &bus->master_abort_io,
> > +                                        MASTER_ABORT_MEM_PRIORITY);
> > +}
> > +
> >  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> >                           const char *name,
> >                           MemoryRegion *address_space_mem,
> > @@ -312,13 +388,8 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> >      bus->address_space_mem = address_space_mem;
> >      bus->address_space_io = address_space_io;
> >  
> > -
> > -    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > -                          &master_abort_mem_ops, bus, "pci-master-abort",
> > -                          memory_region_size(bus->address_space_mem));
> > -    memory_region_add_subregion_overlap(bus->address_space_mem,
> > -                                        0, &bus->master_abort_mem,
> > -                                        MASTER_ABORT_MEM_PRIORITY);
> > +    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
> > +                              "pci-master-abort-io");
> >  
> >      /* host bridge */
> >      QLIST_INIT(&bus->child);
> > @@ -840,9 +911,24 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> >      pci_dev->bus = bus;
> >      dma_as = pci_device_iommu_address_space(pci_dev);
> >  
> > -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> > -                             OBJECT(pci_dev), "bus master",
> > +    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
> > +                       "bus master", UINT64_MAX);
> > +
> > +    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
> > +                          &master_abort_ops, OBJECT(pci_dev),
> > +                          "bus master master-abort",
> > +                          memory_region_size(&pci_dev->bus_master_enable_region));
> > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > +                                        0, &pci_dev->bus_master_abort_mem,
> > +                                        MASTER_ABORT_MEM_PRIORITY);
> > +
> > +    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
> > +                             OBJECT(pci_dev), "bus master dma",
> >                               dma_as->root, 0, memory_region_size(dma_as->root));
> > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > +                                        0, &pci_dev->bus_master_dma_mem, 0);
> > +
> > +
> >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> >      address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> >                         name);
> > @@ -901,6 +987,13 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
> >      pci_config_free(pci_dev);
> >  
> >      address_space_destroy(&pci_dev->bus_master_as);
> > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > +                                &pci_dev->bus_master_dma_mem);
> > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > +                                &pci_dev->bus_master_abort_mem);
> > +    memory_region_destroy(&pci_dev->bus_master_dma_mem);
> > +    memory_region_destroy(&pci_dev->bus_master_abort_mem);
> > +
> >      memory_region_destroy(&pci_dev->bus_master_enable_region);
> >  }
> >  
> > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > index e6b22b8..56b682f 100644
> > --- a/hw/pci/pci_bridge.c
> > +++ b/hw/pci/pci_bridge.c
> > @@ -376,6 +376,10 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
> >      sec_bus->address_space_io = &br->address_space_io;
> >      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
> >      br->windows = pci_bridge_region_init(br);
> > +
> > +    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
> > +                              "pci_bridge_master_abort_io");
> > +
> >      QLIST_INIT(&sec_bus->child);
> >      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> >      return 0;
> > @@ -389,6 +393,12 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
> >      QLIST_REMOVE(&s->sec_bus, sibling);
> >      pci_bridge_region_del(s, s->windows);
> >      pci_bridge_region_cleanup(s, s->windows);
> > +    memory_region_del_subregion(s->sec_bus.address_space_mem,
> > +                                &s->sec_bus.master_abort_mem);
> > +    memory_region_del_subregion(s->sec_bus.address_space_io,
> > +                                &s->sec_bus.master_abort_io);
> > +    memory_region_destroy(&s->sec_bus.master_abort_mem);
> > +    memory_region_destroy(&s->sec_bus.master_abort_io);
> >      memory_region_destroy(&s->address_space_mem);
> >      memory_region_destroy(&s->address_space_io);
> >      /* qbus_free() is called automatically by qdev_free() */
> > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > index 37979aa..d69e06d 100644
> > --- a/include/hw/pci/pci.h
> > +++ b/include/hw/pci/pci.h
> > @@ -242,6 +242,8 @@ struct PCIDevice {
> >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> >      AddressSpace bus_master_as;
> >      MemoryRegion bus_master_enable_region;
> > +    MemoryRegion bus_master_dma_mem;
> > +    MemoryRegion bus_master_abort_mem;
> >  
> >      /* do not access the following fields */
> >      PCIConfigReadFunc *config_read;
> > @@ -357,6 +359,7 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
> >                      MemoryRegion *address_space_mem,
> >                      MemoryRegion *address_space_io,
> >                      uint8_t devfn_min, const char *typename);
> > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
> >  void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
> >                    void *irq_opaque, int nirq);
> >  int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
> > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > index 2ad5edb..57b1541 100644
> > --- a/include/hw/pci/pci_bus.h
> > +++ b/include/hw/pci/pci_bus.h
> > @@ -24,6 +24,7 @@ struct PCIBus {
> >      MemoryRegion *address_space_mem;
> >      MemoryRegion *address_space_io;
> >      MemoryRegion master_abort_mem;
> > +    MemoryRegion master_abort_io;
> >  
> >      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
> >      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> > -- 
> > 1.8.3.1
Michael S. Tsirkin Sept. 23, 2013, 1:45 p.m. UTC | #3
On Mon, Sep 23, 2013 at 03:37:43PM +0300, Marcel Apfelbaum wrote:
> On Mon, 2013-09-23 at 14:27 +0300, Michael S. Tsirkin wrote:
> > On Mon, Sep 23, 2013 at 02:01:17PM +0300, Marcel Apfelbaum wrote:
> > > This patch is implemented on top of series:
> > > [PATCH v5 0/3] pci: implement upstream master abort protocol
> > > 
> > > Added "master abort io" background region for PCIBus.
> > > 
> > > Added "master abort mem" region to catch transactions initiated
> > > by pci devices targeted to unassigned addresses.
> > > 
> > > Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.
> > > 
> > > Set "Received Master Abort" Bit on Status/Secondary Status
> > > register as defined in the PCI Spec.
> > > 
> > > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> > 
> > I think it will be easier to review the complete
> > series, not an incremental patch.
> Should I combine this with the prev series without the
> memory patches?
> 
> > 
> > We probably need to think what's the right thing
> > to do for master abort mode since we do not
> > emulate SERR. Do we make it writeable at all?
> Master abort mode can be enabled. (Tested)
> > 
> > It's a pci only bit:
> > 	When the Master-Abort Mode bit is cleared and a posted write transaction
> > 	forwarded by the
> > 	bridge terminates with a Master-Abort, no error is reported (note that
> > 	the Master-Abort bit
> > 	is still set). When the Master-Abort Mode bit is set and a posted write
> > 	transaction forwarded
> > 	by the bridge terminates with a Master-Abort on the destination bus, the
> > 	bridge must:
> > 	Assert SERR# on the primary interfaceCommand register)
> > 	(if enabled by the SERR# Enable bitin the
> > 	Set the Signaled System Errorbit in the Command register)
> > 	bitin the Status register (if enabled by the SERR# Enable)
> > 
> > one way to do this would be not to set Master-Abort bit even
> > though spec says we should, and pretend there was no error.
> Maybe we can just not allow to set "Master abort mode"
> in BRIDGE_CONTROL register. It is a cleaner way (I think)
> considering we don't emulate SERR.

I'm not sure P2P spec allows this - want to check.
It's the right thing to do for express.

Also pls note that cross-version migration
requires that we keep it writable for old
machine types.

> > 
> > > ---
> > >  hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
> > >  hw/pci/pci_bridge.c      |  10 +++++
> > >  include/hw/pci/pci.h     |   3 ++
> > >  include/hw/pci/pci_bus.h |   1 +
> > >  4 files changed, 118 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > index d8a1b11..1f4e707 100644
> > > --- a/hw/pci/pci.c
> > > +++ b/hw/pci/pci.c
> > > @@ -283,17 +283,76 @@ const char *pci_root_bus_path(PCIDevice *dev)
> > >      return rootbus->qbus.name;
> > >  }
> > >  
> > > +static PCIDevice *pci_bus_find_host(PCIBus *bus)
> > > +{
> > > +    PCIDevice *dev;
> > > +    int i;
> > > +
> > > +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> > > +        dev = bus->devices[i];
> > > +        if (dev) {
> > > +            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> > > +            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {
> > 
> > In fact you can do this easier:
> > 	 pci_get_word(dev->config + PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_HOST
> Thanks, I'll change.
> > 
> > > +                return dev;
> > > +            }
> > > +        }
> > > +    }
> > > +
> > > +    return NULL;
> > > +}
> > > +
> > > +static void master_abort(void *opaque)
> > > +{
> > > +    PCIDevice *master_dev = NULL;
> > > +    PCIDeviceClass *pc;
> > > +    PCIBus *bus;
> > > +    int downstream;
> > 
> > bool?
> Yes, I'll change
> 
> > > +
> > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {
> > 
> > Please don't do dynamic cast just because you can.
> > You know very well what kind of object you pass
> > when you create the MR.
> > So just call the appropriate function.
> I wanted to merge the code for all 3 entities involved:
> root PCIBus, PCIBus behind a bridge, and PCIDevice.
> The region for merge was to use the same master_abort_mem_ops
> because there are the same operations that must be done:
> return -1 (0xFFF...) and set master abort received bit. 

return -1 is not a lot of common code, and MA bit
is in a different place each time.

> > 
> > > +        master_dev = PCI_DEVICE(opaque);
> > > +        bus = master_dev->bus;
> > > +        while (!pci_bus_is_root(bus)) {
> > > +            master_dev = bus->parent_dev;
> > > +            bus = master_dev->bus;
> > > +        }
> > > +        downstream = 0;
> > > +    }
> > > +
> > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
> > > +        bus = PCI_BUS(opaque);
> > > +        if (pci_bus_is_root(bus)) {
> > > +            master_dev = pci_bus_find_host(bus);
> > > +        } else { /* bus behind a PCI-2-PCI bridge */
> > > +            master_dev = bus->parent_dev;
> > > +        }
> > 
> > So the reason for this is that device to device MA
> > is still not implemented here, correct?
> Nope, the above code differentiate 2 scenarios:
> 1. the bus is the root bus => look for host bridge
>    to update STATUS register
> 2. the bus is bridge's secondary bus =>
>    update bridge STATUS register
> "Device 2 Device" is handled implicitly because we have
> a background region covering all device's target memory
> 
> > If it was it would be just one instance of it.
> > I'm not saying this must block this patch, however
> > there should be a code comment to this effect,
> > and commit log should mention this explicitly.
> As above, "Device 2 Device" is handled implicit

Implicit => confused.

I'm confused. Where's the code that sets MA bit on
the initiator device?
Maybe if you split each case properly it will become
clear.

> > 
> > > +        downstream = 1;
> > > +    }
> > > +
> > > +    assert(master_dev);
> > > +    pc = PCI_DEVICE_GET_CLASS(master_dev);
> > > +
> > 
> > There's very little common code between devices and
> > buses. So just use 2 functions.
> As stated above:
> The region for merge was to use the same master_abort_mem_ops
> because there are the same operations that must be done:
> return -1 (0xFFF...) and set master abort received bit.
> Separating them will result in some code duplication.

About 2 lines. But you'll lose the nasty dynamic casts.
opaque pointers are bad enough.

> The read and write methods are the same, but would call different
> "master_abort" methods.
> If it does bother you the down casting anyway, I'll change.

Pls do.

> > 
> > > +    if (downstream && pc->is_bridge) {
> > > +        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
> > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > +    } else {
> > > +        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
> > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > +    }
> > > +}
> > > +
> > >  static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> > >  {
> > > -   return -1ULL;
> > 
> > I didn't notice but this means there were 3 spaces here in previous
> > patch?
> yes :(, checkpatch didn't catch it
> 
> Thanks for the review,
> Marcel
> 
> > 
> > > +    master_abort(opaque);
> > > +    return -1ULL;
> > >  }
> > >  
> > >  static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> > >                                     unsigned size)
> > >  {
> > > +    master_abort(opaque);
> > >  }
> > >  
> > > -static const MemoryRegionOps master_abort_mem_ops = {
> > > +static const MemoryRegionOps master_abort_ops = {
> > >      .read = master_abort_mem_read,
> > >      .write = master_abort_mem_write,
> > >      .endianness = DEVICE_LITTLE_ENDIAN,
> > > @@ -301,6 +360,23 @@ static const MemoryRegionOps master_abort_mem_ops = {
> > >  
> > >  #define MASTER_ABORT_MEM_PRIORITY INT_MIN
> > >  
> > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
> > > +{
> > > +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > +                          &master_abort_ops, bus, mem,
> > > +                          memory_region_size(bus->address_space_mem));
> > > +    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > +                                        0, &bus->master_abort_mem,
> > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > +
> > > +    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
> > > +                          &master_abort_ops, bus, io,
> > > +                          memory_region_size(bus->address_space_io));
> > > +    memory_region_add_subregion_overlap(bus->address_space_io,
> > > +                                        0, &bus->master_abort_io,
> > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > +}
> > > +
> > >  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > >                           const char *name,
> > >                           MemoryRegion *address_space_mem,
> > > @@ -312,13 +388,8 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > >      bus->address_space_mem = address_space_mem;
> > >      bus->address_space_io = address_space_io;
> > >  
> > > -
> > > -    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > -                          &master_abort_mem_ops, bus, "pci-master-abort",
> > > -                          memory_region_size(bus->address_space_mem));
> > > -    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > -                                        0, &bus->master_abort_mem,
> > > -                                        MASTER_ABORT_MEM_PRIORITY);
> > > +    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
> > > +                              "pci-master-abort-io");
> > >  
> > >      /* host bridge */
> > >      QLIST_INIT(&bus->child);
> > > @@ -840,9 +911,24 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> > >      pci_dev->bus = bus;
> > >      dma_as = pci_device_iommu_address_space(pci_dev);
> > >  
> > > -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> > > -                             OBJECT(pci_dev), "bus master",
> > > +    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
> > > +                       "bus master", UINT64_MAX);
> > > +
> > > +    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
> > > +                          &master_abort_ops, OBJECT(pci_dev),
> > > +                          "bus master master-abort",
> > > +                          memory_region_size(&pci_dev->bus_master_enable_region));
> > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > +                                        0, &pci_dev->bus_master_abort_mem,
> > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > +
> > > +    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
> > > +                             OBJECT(pci_dev), "bus master dma",
> > >                               dma_as->root, 0, memory_region_size(dma_as->root));
> > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > +                                        0, &pci_dev->bus_master_dma_mem, 0);
> > > +
> > > +
> > >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> > >      address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> > >                         name);
> > > @@ -901,6 +987,13 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
> > >      pci_config_free(pci_dev);
> > >  
> > >      address_space_destroy(&pci_dev->bus_master_as);
> > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > +                                &pci_dev->bus_master_dma_mem);
> > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > +                                &pci_dev->bus_master_abort_mem);
> > > +    memory_region_destroy(&pci_dev->bus_master_dma_mem);
> > > +    memory_region_destroy(&pci_dev->bus_master_abort_mem);
> > > +
> > >      memory_region_destroy(&pci_dev->bus_master_enable_region);
> > >  }
> > >  
> > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > index e6b22b8..56b682f 100644
> > > --- a/hw/pci/pci_bridge.c
> > > +++ b/hw/pci/pci_bridge.c
> > > @@ -376,6 +376,10 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
> > >      sec_bus->address_space_io = &br->address_space_io;
> > >      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
> > >      br->windows = pci_bridge_region_init(br);
> > > +
> > > +    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
> > > +                              "pci_bridge_master_abort_io");
> > > +
> > >      QLIST_INIT(&sec_bus->child);
> > >      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> > >      return 0;
> > > @@ -389,6 +393,12 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
> > >      QLIST_REMOVE(&s->sec_bus, sibling);
> > >      pci_bridge_region_del(s, s->windows);
> > >      pci_bridge_region_cleanup(s, s->windows);
> > > +    memory_region_del_subregion(s->sec_bus.address_space_mem,
> > > +                                &s->sec_bus.master_abort_mem);
> > > +    memory_region_del_subregion(s->sec_bus.address_space_io,
> > > +                                &s->sec_bus.master_abort_io);
> > > +    memory_region_destroy(&s->sec_bus.master_abort_mem);
> > > +    memory_region_destroy(&s->sec_bus.master_abort_io);
> > >      memory_region_destroy(&s->address_space_mem);
> > >      memory_region_destroy(&s->address_space_io);
> > >      /* qbus_free() is called automatically by qdev_free() */
> > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > index 37979aa..d69e06d 100644
> > > --- a/include/hw/pci/pci.h
> > > +++ b/include/hw/pci/pci.h
> > > @@ -242,6 +242,8 @@ struct PCIDevice {
> > >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> > >      AddressSpace bus_master_as;
> > >      MemoryRegion bus_master_enable_region;
> > > +    MemoryRegion bus_master_dma_mem;
> > > +    MemoryRegion bus_master_abort_mem;
> > >  
> > >      /* do not access the following fields */
> > >      PCIConfigReadFunc *config_read;
> > > @@ -357,6 +359,7 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
> > >                      MemoryRegion *address_space_mem,
> > >                      MemoryRegion *address_space_io,
> > >                      uint8_t devfn_min, const char *typename);
> > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
> > >  void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
> > >                    void *irq_opaque, int nirq);
> > >  int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
> > > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > > index 2ad5edb..57b1541 100644
> > > --- a/include/hw/pci/pci_bus.h
> > > +++ b/include/hw/pci/pci_bus.h
> > > @@ -24,6 +24,7 @@ struct PCIBus {
> > >      MemoryRegion *address_space_mem;
> > >      MemoryRegion *address_space_io;
> > >      MemoryRegion master_abort_mem;
> > > +    MemoryRegion master_abort_io;
> > >  
> > >      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
> > >      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> > > -- 
> > > 1.8.3.1
> 
>
Marcel Apfelbaum Sept. 23, 2013, 2:43 p.m. UTC | #4
On Mon, 2013-09-23 at 16:45 +0300, Michael S. Tsirkin wrote:
> On Mon, Sep 23, 2013 at 03:37:43PM +0300, Marcel Apfelbaum wrote:
> > On Mon, 2013-09-23 at 14:27 +0300, Michael S. Tsirkin wrote:
> > > On Mon, Sep 23, 2013 at 02:01:17PM +0300, Marcel Apfelbaum wrote:
> > > > This patch is implemented on top of series:
> > > > [PATCH v5 0/3] pci: implement upstream master abort protocol
> > > > 
> > > > Added "master abort io" background region for PCIBus.
> > > > 
> > > > Added "master abort mem" region to catch transactions initiated
> > > > by pci devices targeted to unassigned addresses.
> > > > 
> > > > Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.
> > > > 
> > > > Set "Received Master Abort" Bit on Status/Secondary Status
> > > > register as defined in the PCI Spec.
> > > > 
> > > > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> > > 
> > > I think it will be easier to review the complete
> > > series, not an incremental patch.
> > Should I combine this with the prev series without the
> > memory patches?
> > 
> > > 
> > > We probably need to think what's the right thing
> > > to do for master abort mode since we do not
> > > emulate SERR. Do we make it writeable at all?
> > Master abort mode can be enabled. (Tested)
> > > 
> > > It's a pci only bit:
> > > 	When the Master-Abort Mode bit is cleared and a posted write transaction
> > > 	forwarded by the
> > > 	bridge terminates with a Master-Abort, no error is reported (note that
> > > 	the Master-Abort bit
> > > 	is still set). When the Master-Abort Mode bit is set and a posted write
> > > 	transaction forwarded
> > > 	by the bridge terminates with a Master-Abort on the destination bus, the
> > > 	bridge must:
> > > 	Assert SERR# on the primary interfaceCommand register)
> > > 	(if enabled by the SERR# Enable bitin the
> > > 	Set the Signaled System Errorbit in the Command register)
> > > 	bitin the Status register (if enabled by the SERR# Enable)
> > > 
> > > one way to do this would be not to set Master-Abort bit even
> > > though spec says we should, and pretend there was no error.
> > Maybe we can just not allow to set "Master abort mode"
> > in BRIDGE_CONTROL register. It is a cleaner way (I think)
> > considering we don't emulate SERR.
> 
> I'm not sure P2P spec allows this - want to check.
I will check this too, thanks

> It's the right thing to do for express.
> 
> Also pls note that cross-version migration
> requires that we keep it writable for old
> machine types.
Yes :(. It seems we need another solution.

> 
> > > 
> > > > ---
> > > >  hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
> > > >  hw/pci/pci_bridge.c      |  10 +++++
> > > >  include/hw/pci/pci.h     |   3 ++
> > > >  include/hw/pci/pci_bus.h |   1 +
> > > >  4 files changed, 118 insertions(+), 11 deletions(-)
> > > > 
> > > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > index d8a1b11..1f4e707 100644
> > > > --- a/hw/pci/pci.c
> > > > +++ b/hw/pci/pci.c
> > > > @@ -283,17 +283,76 @@ const char *pci_root_bus_path(PCIDevice *dev)
> > > >      return rootbus->qbus.name;
> > > >  }
> > > >  
> > > > +static PCIDevice *pci_bus_find_host(PCIBus *bus)
> > > > +{
> > > > +    PCIDevice *dev;
> > > > +    int i;
> > > > +
> > > > +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> > > > +        dev = bus->devices[i];
> > > > +        if (dev) {
> > > > +            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> > > > +            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {
> > > 
> > > In fact you can do this easier:
> > > 	 pci_get_word(dev->config + PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_HOST
> > Thanks, I'll change.
> > > 
> > > > +                return dev;
> > > > +            }
> > > > +        }
> > > > +    }
> > > > +
> > > > +    return NULL;
> > > > +}
> > > > +
> > > > +static void master_abort(void *opaque)
> > > > +{
> > > > +    PCIDevice *master_dev = NULL;
> > > > +    PCIDeviceClass *pc;
> > > > +    PCIBus *bus;
> > > > +    int downstream;
> > > 
> > > bool?
> > Yes, I'll change
> > 
> > > > +
> > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {
> > > 
> > > Please don't do dynamic cast just because you can.
> > > You know very well what kind of object you pass
> > > when you create the MR.
> > > So just call the appropriate function.
> > I wanted to merge the code for all 3 entities involved:
> > root PCIBus, PCIBus behind a bridge, and PCIDevice.
> > The region for merge was to use the same master_abort_mem_ops
> > because there are the same operations that must be done:
> > return -1 (0xFFF...) and set master abort received bit. 
> 
> return -1 is not a lot of common code, and MA bit
> is in a different place each time.
Ack

> 
> > > 
> > > > +        master_dev = PCI_DEVICE(opaque);
> > > > +        bus = master_dev->bus;
> > > > +        while (!pci_bus_is_root(bus)) {
> > > > +            master_dev = bus->parent_dev;
> > > > +            bus = master_dev->bus;
> > > > +        }
> > > > +        downstream = 0;
> > > > +    }
> > > > +
> > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
> > > > +        bus = PCI_BUS(opaque);
> > > > +        if (pci_bus_is_root(bus)) {
> > > > +            master_dev = pci_bus_find_host(bus);
> > > > +        } else { /* bus behind a PCI-2-PCI bridge */
> > > > +            master_dev = bus->parent_dev;
> > > > +        }
> > > 
> > > So the reason for this is that device to device MA
> > > is still not implemented here, correct?
> > Nope, the above code differentiate 2 scenarios:
> > 1. the bus is the root bus => look for host bridge
> >    to update STATUS register
> > 2. the bus is bridge's secondary bus =>
> >    update bridge STATUS register
> > "Device 2 Device" is handled implicitly because we have
> > a background region covering all device's target memory
> > 
> > > If it was it would be just one instance of it.
> > > I'm not saying this must block this patch, however
> > > there should be a code comment to this effect,
> > > and commit log should mention this explicitly.
> > As above, "Device 2 Device" is handled implicit
> 
> Implicit => confused.
> 
> I'm confused. Where's the code that sets MA bit on
> the initiator device?
Is the code that adds the background bus_master_abort_mem region to
bus_master_enable_region.
bus_master_enable_region is the target for all PCIDevice
reads and writes(if I understand correctly). So it doesn't matter if
a device initiate a transaction to another device or to memory,
it will do it using the bus_master_enable_region.
The master_abort() method will recognize this as an
upstream transaction. 

> Maybe if you split each case properly it will become
> clear.
I hope it will.

> 
> > > 
> > > > +        downstream = 1;
> > > > +    }
> > > > +
> > > > +    assert(master_dev);
> > > > +    pc = PCI_DEVICE_GET_CLASS(master_dev);
> > > > +
> > > 
> > > There's very little common code between devices and
> > > buses. So just use 2 functions.
> > As stated above:
> > The region for merge was to use the same master_abort_mem_ops
> > because there are the same operations that must be done:
> > return -1 (0xFFF...) and set master abort received bit.
> > Separating them will result in some code duplication.
> 
> About 2 lines. But you'll lose the nasty dynamic casts.
> opaque pointers are bad enough.
Ack

> 
> > The read and write methods are the same, but would call different
> > "master_abort" methods.
> > If it does bother you the down casting anyway, I'll change.
> 
> Pls do.
Ack

> 
> > > 
> > > > +    if (downstream && pc->is_bridge) {
> > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
> > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > +    } else {
> > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
> > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > +    }
> > > > +}
> > > > +
> > > >  static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> > > >  {
> > > > -   return -1ULL;
> > > 
> > > I didn't notice but this means there were 3 spaces here in previous
> > > patch?
> > yes :(, checkpatch didn't catch it
> > 
> > Thanks for the review,
> > Marcel
> > 
> > > 
> > > > +    master_abort(opaque);
> > > > +    return -1ULL;
> > > >  }
> > > >  
> > > >  static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> > > >                                     unsigned size)
> > > >  {
> > > > +    master_abort(opaque);
> > > >  }
> > > >  
> > > > -static const MemoryRegionOps master_abort_mem_ops = {
> > > > +static const MemoryRegionOps master_abort_ops = {
> > > >      .read = master_abort_mem_read,
> > > >      .write = master_abort_mem_write,
> > > >      .endianness = DEVICE_LITTLE_ENDIAN,
> > > > @@ -301,6 +360,23 @@ static const MemoryRegionOps master_abort_mem_ops = {
> > > >  
> > > >  #define MASTER_ABORT_MEM_PRIORITY INT_MIN
> > > >  
> > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
> > > > +{
> > > > +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > +                          &master_abort_ops, bus, mem,
> > > > +                          memory_region_size(bus->address_space_mem));
> > > > +    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > +                                        0, &bus->master_abort_mem,
> > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > +
> > > > +    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
> > > > +                          &master_abort_ops, bus, io,
> > > > +                          memory_region_size(bus->address_space_io));
> > > > +    memory_region_add_subregion_overlap(bus->address_space_io,
> > > > +                                        0, &bus->master_abort_io,
> > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > +}
> > > > +
> > > >  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > >                           const char *name,
> > > >                           MemoryRegion *address_space_mem,
> > > > @@ -312,13 +388,8 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > >      bus->address_space_mem = address_space_mem;
> > > >      bus->address_space_io = address_space_io;
> > > >  
> > > > -
> > > > -    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > -                          &master_abort_mem_ops, bus, "pci-master-abort",
> > > > -                          memory_region_size(bus->address_space_mem));
> > > > -    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > -                                        0, &bus->master_abort_mem,
> > > > -                                        MASTER_ABORT_MEM_PRIORITY);
> > > > +    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
> > > > +                              "pci-master-abort-io");
> > > >  
> > > >      /* host bridge */
> > > >      QLIST_INIT(&bus->child);
> > > > @@ -840,9 +911,24 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> > > >      pci_dev->bus = bus;
> > > >      dma_as = pci_device_iommu_address_space(pci_dev);
> > > >  
> > > > -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> > > > -                             OBJECT(pci_dev), "bus master",
> > > > +    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
> > > > +                       "bus master", UINT64_MAX);
> > > > +
> > > > +    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
> > > > +                          &master_abort_ops, OBJECT(pci_dev),
> > > > +                          "bus master master-abort",
> > > > +                          memory_region_size(&pci_dev->bus_master_enable_region));
> > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > +                                        0, &pci_dev->bus_master_abort_mem,
> > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > +
> > > > +    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
> > > > +                             OBJECT(pci_dev), "bus master dma",
> > > >                               dma_as->root, 0, memory_region_size(dma_as->root));
> > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > +                                        0, &pci_dev->bus_master_dma_mem, 0);
> > > > +
> > > > +
> > > >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> > > >      address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> > > >                         name);
> > > > @@ -901,6 +987,13 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
> > > >      pci_config_free(pci_dev);
> > > >  
> > > >      address_space_destroy(&pci_dev->bus_master_as);
> > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > +                                &pci_dev->bus_master_dma_mem);
> > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > +                                &pci_dev->bus_master_abort_mem);
> > > > +    memory_region_destroy(&pci_dev->bus_master_dma_mem);
> > > > +    memory_region_destroy(&pci_dev->bus_master_abort_mem);
> > > > +
> > > >      memory_region_destroy(&pci_dev->bus_master_enable_region);
> > > >  }
> > > >  
> > > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > > index e6b22b8..56b682f 100644
> > > > --- a/hw/pci/pci_bridge.c
> > > > +++ b/hw/pci/pci_bridge.c
> > > > @@ -376,6 +376,10 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
> > > >      sec_bus->address_space_io = &br->address_space_io;
> > > >      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
> > > >      br->windows = pci_bridge_region_init(br);
> > > > +
> > > > +    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
> > > > +                              "pci_bridge_master_abort_io");
> > > > +
> > > >      QLIST_INIT(&sec_bus->child);
> > > >      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> > > >      return 0;
> > > > @@ -389,6 +393,12 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
> > > >      QLIST_REMOVE(&s->sec_bus, sibling);
> > > >      pci_bridge_region_del(s, s->windows);
> > > >      pci_bridge_region_cleanup(s, s->windows);
> > > > +    memory_region_del_subregion(s->sec_bus.address_space_mem,
> > > > +                                &s->sec_bus.master_abort_mem);
> > > > +    memory_region_del_subregion(s->sec_bus.address_space_io,
> > > > +                                &s->sec_bus.master_abort_io);
> > > > +    memory_region_destroy(&s->sec_bus.master_abort_mem);
> > > > +    memory_region_destroy(&s->sec_bus.master_abort_io);
> > > >      memory_region_destroy(&s->address_space_mem);
> > > >      memory_region_destroy(&s->address_space_io);
> > > >      /* qbus_free() is called automatically by qdev_free() */
> > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > index 37979aa..d69e06d 100644
> > > > --- a/include/hw/pci/pci.h
> > > > +++ b/include/hw/pci/pci.h
> > > > @@ -242,6 +242,8 @@ struct PCIDevice {
> > > >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> > > >      AddressSpace bus_master_as;
> > > >      MemoryRegion bus_master_enable_region;
> > > > +    MemoryRegion bus_master_dma_mem;
> > > > +    MemoryRegion bus_master_abort_mem;
> > > >  
> > > >      /* do not access the following fields */
> > > >      PCIConfigReadFunc *config_read;
> > > > @@ -357,6 +359,7 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
> > > >                      MemoryRegion *address_space_mem,
> > > >                      MemoryRegion *address_space_io,
> > > >                      uint8_t devfn_min, const char *typename);
> > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
> > > >  void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
> > > >                    void *irq_opaque, int nirq);
> > > >  int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
> > > > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > > > index 2ad5edb..57b1541 100644
> > > > --- a/include/hw/pci/pci_bus.h
> > > > +++ b/include/hw/pci/pci_bus.h
> > > > @@ -24,6 +24,7 @@ struct PCIBus {
> > > >      MemoryRegion *address_space_mem;
> > > >      MemoryRegion *address_space_io;
> > > >      MemoryRegion master_abort_mem;
> > > > +    MemoryRegion master_abort_io;
> > > >  
> > > >      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
> > > >      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> > > > -- 
> > > > 1.8.3.1
> > 
> >
Michael S. Tsirkin Sept. 23, 2013, 3:10 p.m. UTC | #5
On Mon, Sep 23, 2013 at 05:43:38PM +0300, Marcel Apfelbaum wrote:
> On Mon, 2013-09-23 at 16:45 +0300, Michael S. Tsirkin wrote:
> > On Mon, Sep 23, 2013 at 03:37:43PM +0300, Marcel Apfelbaum wrote:
> > > On Mon, 2013-09-23 at 14:27 +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Sep 23, 2013 at 02:01:17PM +0300, Marcel Apfelbaum wrote:
> > > > > This patch is implemented on top of series:
> > > > > [PATCH v5 0/3] pci: implement upstream master abort protocol
> > > > > 
> > > > > Added "master abort io" background region for PCIBus.
> > > > > 
> > > > > Added "master abort mem" region to catch transactions initiated
> > > > > by pci devices targeted to unassigned addresses.
> > > > > 
> > > > > Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.
> > > > > 
> > > > > Set "Received Master Abort" Bit on Status/Secondary Status
> > > > > register as defined in the PCI Spec.
> > > > > 
> > > > > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> > > > 
> > > > I think it will be easier to review the complete
> > > > series, not an incremental patch.
> > > Should I combine this with the prev series without the
> > > memory patches?
> > > 
> > > > 
> > > > We probably need to think what's the right thing
> > > > to do for master abort mode since we do not
> > > > emulate SERR. Do we make it writeable at all?
> > > Master abort mode can be enabled. (Tested)
> > > > 
> > > > It's a pci only bit:
> > > > 	When the Master-Abort Mode bit is cleared and a posted write transaction
> > > > 	forwarded by the
> > > > 	bridge terminates with a Master-Abort, no error is reported (note that
> > > > 	the Master-Abort bit
> > > > 	is still set). When the Master-Abort Mode bit is set and a posted write
> > > > 	transaction forwarded
> > > > 	by the bridge terminates with a Master-Abort on the destination bus, the
> > > > 	bridge must:
> > > > 	Assert SERR# on the primary interfaceCommand register)
> > > > 	(if enabled by the SERR# Enable bitin the
> > > > 	Set the Signaled System Errorbit in the Command register)
> > > > 	bitin the Status register (if enabled by the SERR# Enable)
> > > > 
> > > > one way to do this would be not to set Master-Abort bit even
> > > > though spec says we should, and pretend there was no error.
> > > Maybe we can just not allow to set "Master abort mode"
> > > in BRIDGE_CONTROL register. It is a cleaner way (I think)
> > > considering we don't emulate SERR.
> > 
> > I'm not sure P2P spec allows this - want to check.
> I will check this too, thanks
> 
> > It's the right thing to do for express.
> > 
> > Also pls note that cross-version migration
> > requires that we keep it writable for old
> > machine types.
> Yes :(. It seems we need another solution.

Not really. Just set a flag "don't assert master abort"
for old machine types.

> > 
> > > > 
> > > > > ---
> > > > >  hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
> > > > >  hw/pci/pci_bridge.c      |  10 +++++
> > > > >  include/hw/pci/pci.h     |   3 ++
> > > > >  include/hw/pci/pci_bus.h |   1 +
> > > > >  4 files changed, 118 insertions(+), 11 deletions(-)
> > > > > 
> > > > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > > index d8a1b11..1f4e707 100644
> > > > > --- a/hw/pci/pci.c
> > > > > +++ b/hw/pci/pci.c
> > > > > @@ -283,17 +283,76 @@ const char *pci_root_bus_path(PCIDevice *dev)
> > > > >      return rootbus->qbus.name;
> > > > >  }
> > > > >  
> > > > > +static PCIDevice *pci_bus_find_host(PCIBus *bus)
> > > > > +{
> > > > > +    PCIDevice *dev;
> > > > > +    int i;
> > > > > +
> > > > > +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> > > > > +        dev = bus->devices[i];
> > > > > +        if (dev) {
> > > > > +            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> > > > > +            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {
> > > > 
> > > > In fact you can do this easier:
> > > > 	 pci_get_word(dev->config + PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_HOST
> > > Thanks, I'll change.
> > > > 
> > > > > +                return dev;
> > > > > +            }
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    return NULL;
> > > > > +}
> > > > > +
> > > > > +static void master_abort(void *opaque)
> > > > > +{
> > > > > +    PCIDevice *master_dev = NULL;
> > > > > +    PCIDeviceClass *pc;
> > > > > +    PCIBus *bus;
> > > > > +    int downstream;
> > > > 
> > > > bool?
> > > Yes, I'll change
> > > 
> > > > > +
> > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {
> > > > 
> > > > Please don't do dynamic cast just because you can.
> > > > You know very well what kind of object you pass
> > > > when you create the MR.
> > > > So just call the appropriate function.
> > > I wanted to merge the code for all 3 entities involved:
> > > root PCIBus, PCIBus behind a bridge, and PCIDevice.
> > > The region for merge was to use the same master_abort_mem_ops
> > > because there are the same operations that must be done:
> > > return -1 (0xFFF...) and set master abort received bit. 
> > 
> > return -1 is not a lot of common code, and MA bit
> > is in a different place each time.
> Ack
> 
> > 
> > > > 
> > > > > +        master_dev = PCI_DEVICE(opaque);
> > > > > +        bus = master_dev->bus;
> > > > > +        while (!pci_bus_is_root(bus)) {
> > > > > +            master_dev = bus->parent_dev;
> > > > > +            bus = master_dev->bus;
> > > > > +        }
> > > > > +        downstream = 0;
> > > > > +    }
> > > > > +
> > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
> > > > > +        bus = PCI_BUS(opaque);
> > > > > +        if (pci_bus_is_root(bus)) {
> > > > > +            master_dev = pci_bus_find_host(bus);
> > > > > +        } else { /* bus behind a PCI-2-PCI bridge */
> > > > > +            master_dev = bus->parent_dev;
> > > > > +        }
> > > > 
> > > > So the reason for this is that device to device MA
> > > > is still not implemented here, correct?
> > > Nope, the above code differentiate 2 scenarios:
> > > 1. the bus is the root bus => look for host bridge
> > >    to update STATUS register
> > > 2. the bus is bridge's secondary bus =>
> > >    update bridge STATUS register
> > > "Device 2 Device" is handled implicitly because we have
> > > a background region covering all device's target memory
> > > 
> > > > If it was it would be just one instance of it.
> > > > I'm not saying this must block this patch, however
> > > > there should be a code comment to this effect,
> > > > and commit log should mention this explicitly.
> > > As above, "Device 2 Device" is handled implicit
> > 
> > Implicit => confused.
> > 
> > I'm confused. Where's the code that sets MA bit on
> > the initiator device?
> Is the code that adds the background bus_master_abort_mem region to
> bus_master_enable_region.
> bus_master_enable_region is the target for all PCIDevice
> reads and writes(if I understand correctly). So it doesn't matter if
> a device initiate a transaction to another device or to memory,
> it will do it using the bus_master_enable_region.
> The master_abort() method will recognize this as an
> upstream transaction. 

But if target is behind a bridge then master does
not detect the MA, it's the bridge that does it
(in PCI, express seems to be different).


> > Maybe if you split each case properly it will become
> > clear.
> I hope it will.
> 
> > 
> > > > 
> > > > > +        downstream = 1;
> > > > > +    }
> > > > > +
> > > > > +    assert(master_dev);
> > > > > +    pc = PCI_DEVICE_GET_CLASS(master_dev);
> > > > > +
> > > > 
> > > > There's very little common code between devices and
> > > > buses. So just use 2 functions.
> > > As stated above:
> > > The region for merge was to use the same master_abort_mem_ops
> > > because there are the same operations that must be done:
> > > return -1 (0xFFF...) and set master abort received bit.
> > > Separating them will result in some code duplication.
> > 
> > About 2 lines. But you'll lose the nasty dynamic casts.
> > opaque pointers are bad enough.
> Ack
> 
> > 
> > > The read and write methods are the same, but would call different
> > > "master_abort" methods.
> > > If it does bother you the down casting anyway, I'll change.
> > 
> > Pls do.
> Ack
> 
> > 
> > > > 
> > > > > +    if (downstream && pc->is_bridge) {
> > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
> > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > +    } else {
> > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
> > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > +    }
> > > > > +}
> > > > > +
> > > > >  static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> > > > >  {
> > > > > -   return -1ULL;
> > > > 
> > > > I didn't notice but this means there were 3 spaces here in previous
> > > > patch?
> > > yes :(, checkpatch didn't catch it
> > > 
> > > Thanks for the review,
> > > Marcel
> > > 
> > > > 
> > > > > +    master_abort(opaque);
> > > > > +    return -1ULL;
> > > > >  }
> > > > >  
> > > > >  static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> > > > >                                     unsigned size)
> > > > >  {
> > > > > +    master_abort(opaque);
> > > > >  }
> > > > >  
> > > > > -static const MemoryRegionOps master_abort_mem_ops = {
> > > > > +static const MemoryRegionOps master_abort_ops = {
> > > > >      .read = master_abort_mem_read,
> > > > >      .write = master_abort_mem_write,
> > > > >      .endianness = DEVICE_LITTLE_ENDIAN,
> > > > > @@ -301,6 +360,23 @@ static const MemoryRegionOps master_abort_mem_ops = {
> > > > >  
> > > > >  #define MASTER_ABORT_MEM_PRIORITY INT_MIN
> > > > >  
> > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
> > > > > +{
> > > > > +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > +                          &master_abort_ops, bus, mem,
> > > > > +                          memory_region_size(bus->address_space_mem));
> > > > > +    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > +                                        0, &bus->master_abort_mem,
> > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > +
> > > > > +    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
> > > > > +                          &master_abort_ops, bus, io,
> > > > > +                          memory_region_size(bus->address_space_io));
> > > > > +    memory_region_add_subregion_overlap(bus->address_space_io,
> > > > > +                                        0, &bus->master_abort_io,
> > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > +}
> > > > > +
> > > > >  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > >                           const char *name,
> > > > >                           MemoryRegion *address_space_mem,
> > > > > @@ -312,13 +388,8 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > >      bus->address_space_mem = address_space_mem;
> > > > >      bus->address_space_io = address_space_io;
> > > > >  
> > > > > -
> > > > > -    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > -                          &master_abort_mem_ops, bus, "pci-master-abort",
> > > > > -                          memory_region_size(bus->address_space_mem));
> > > > > -    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > -                                        0, &bus->master_abort_mem,
> > > > > -                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > +    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
> > > > > +                              "pci-master-abort-io");
> > > > >  
> > > > >      /* host bridge */
> > > > >      QLIST_INIT(&bus->child);
> > > > > @@ -840,9 +911,24 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> > > > >      pci_dev->bus = bus;
> > > > >      dma_as = pci_device_iommu_address_space(pci_dev);
> > > > >  
> > > > > -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> > > > > -                             OBJECT(pci_dev), "bus master",
> > > > > +    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
> > > > > +                       "bus master", UINT64_MAX);
> > > > > +
> > > > > +    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
> > > > > +                          &master_abort_ops, OBJECT(pci_dev),
> > > > > +                          "bus master master-abort",
> > > > > +                          memory_region_size(&pci_dev->bus_master_enable_region));
> > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > +                                        0, &pci_dev->bus_master_abort_mem,
> > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > +
> > > > > +    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
> > > > > +                             OBJECT(pci_dev), "bus master dma",
> > > > >                               dma_as->root, 0, memory_region_size(dma_as->root));
> > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > +                                        0, &pci_dev->bus_master_dma_mem, 0);
> > > > > +
> > > > > +
> > > > >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> > > > >      address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> > > > >                         name);
> > > > > @@ -901,6 +987,13 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
> > > > >      pci_config_free(pci_dev);
> > > > >  
> > > > >      address_space_destroy(&pci_dev->bus_master_as);
> > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > +                                &pci_dev->bus_master_dma_mem);
> > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > +                                &pci_dev->bus_master_abort_mem);
> > > > > +    memory_region_destroy(&pci_dev->bus_master_dma_mem);
> > > > > +    memory_region_destroy(&pci_dev->bus_master_abort_mem);
> > > > > +
> > > > >      memory_region_destroy(&pci_dev->bus_master_enable_region);
> > > > >  }
> > > > >  
> > > > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > > > index e6b22b8..56b682f 100644
> > > > > --- a/hw/pci/pci_bridge.c
> > > > > +++ b/hw/pci/pci_bridge.c
> > > > > @@ -376,6 +376,10 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
> > > > >      sec_bus->address_space_io = &br->address_space_io;
> > > > >      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
> > > > >      br->windows = pci_bridge_region_init(br);
> > > > > +
> > > > > +    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
> > > > > +                              "pci_bridge_master_abort_io");
> > > > > +
> > > > >      QLIST_INIT(&sec_bus->child);
> > > > >      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> > > > >      return 0;
> > > > > @@ -389,6 +393,12 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
> > > > >      QLIST_REMOVE(&s->sec_bus, sibling);
> > > > >      pci_bridge_region_del(s, s->windows);
> > > > >      pci_bridge_region_cleanup(s, s->windows);
> > > > > +    memory_region_del_subregion(s->sec_bus.address_space_mem,
> > > > > +                                &s->sec_bus.master_abort_mem);
> > > > > +    memory_region_del_subregion(s->sec_bus.address_space_io,
> > > > > +                                &s->sec_bus.master_abort_io);
> > > > > +    memory_region_destroy(&s->sec_bus.master_abort_mem);
> > > > > +    memory_region_destroy(&s->sec_bus.master_abort_io);
> > > > >      memory_region_destroy(&s->address_space_mem);
> > > > >      memory_region_destroy(&s->address_space_io);
> > > > >      /* qbus_free() is called automatically by qdev_free() */
> > > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > > index 37979aa..d69e06d 100644
> > > > > --- a/include/hw/pci/pci.h
> > > > > +++ b/include/hw/pci/pci.h
> > > > > @@ -242,6 +242,8 @@ struct PCIDevice {
> > > > >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> > > > >      AddressSpace bus_master_as;
> > > > >      MemoryRegion bus_master_enable_region;
> > > > > +    MemoryRegion bus_master_dma_mem;
> > > > > +    MemoryRegion bus_master_abort_mem;
> > > > >  
> > > > >      /* do not access the following fields */
> > > > >      PCIConfigReadFunc *config_read;
> > > > > @@ -357,6 +359,7 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
> > > > >                      MemoryRegion *address_space_mem,
> > > > >                      MemoryRegion *address_space_io,
> > > > >                      uint8_t devfn_min, const char *typename);
> > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
> > > > >  void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
> > > > >                    void *irq_opaque, int nirq);
> > > > >  int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
> > > > > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > > > > index 2ad5edb..57b1541 100644
> > > > > --- a/include/hw/pci/pci_bus.h
> > > > > +++ b/include/hw/pci/pci_bus.h
> > > > > @@ -24,6 +24,7 @@ struct PCIBus {
> > > > >      MemoryRegion *address_space_mem;
> > > > >      MemoryRegion *address_space_io;
> > > > >      MemoryRegion master_abort_mem;
> > > > > +    MemoryRegion master_abort_io;
> > > > >  
> > > > >      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
> > > > >      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> > > > > -- 
> > > > > 1.8.3.1
> > > 
> > > 
> 
>
Marcel Apfelbaum Sept. 23, 2013, 5:49 p.m. UTC | #6
On Mon, 2013-09-23 at 18:10 +0300, Michael S. Tsirkin wrote:
> On Mon, Sep 23, 2013 at 05:43:38PM +0300, Marcel Apfelbaum wrote:
> > On Mon, 2013-09-23 at 16:45 +0300, Michael S. Tsirkin wrote:
> > > On Mon, Sep 23, 2013 at 03:37:43PM +0300, Marcel Apfelbaum wrote:
> > > > On Mon, 2013-09-23 at 14:27 +0300, Michael S. Tsirkin wrote:
> > > > > On Mon, Sep 23, 2013 at 02:01:17PM +0300, Marcel Apfelbaum wrote:
> > > > > > This patch is implemented on top of series:
> > > > > > [PATCH v5 0/3] pci: implement upstream master abort protocol
> > > > > > 
> > > > > > Added "master abort io" background region for PCIBus.
> > > > > > 
> > > > > > Added "master abort mem" region to catch transactions initiated
> > > > > > by pci devices targeted to unassigned addresses.
> > > > > > 
> > > > > > Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.
> > > > > > 
> > > > > > Set "Received Master Abort" Bit on Status/Secondary Status
> > > > > > register as defined in the PCI Spec.
> > > > > > 
> > > > > > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> > > > > 
> > > > > I think it will be easier to review the complete
> > > > > series, not an incremental patch.
> > > > Should I combine this with the prev series without the
> > > > memory patches?
> > > > 
> > > > > 
> > > > > We probably need to think what's the right thing
> > > > > to do for master abort mode since we do not
> > > > > emulate SERR. Do we make it writeable at all?
> > > > Master abort mode can be enabled. (Tested)
> > > > > 
> > > > > It's a pci only bit:
> > > > > 	When the Master-Abort Mode bit is cleared and a posted write transaction
> > > > > 	forwarded by the
> > > > > 	bridge terminates with a Master-Abort, no error is reported (note that
> > > > > 	the Master-Abort bit
> > > > > 	is still set). When the Master-Abort Mode bit is set and a posted write
> > > > > 	transaction forwarded
> > > > > 	by the bridge terminates with a Master-Abort on the destination bus, the
> > > > > 	bridge must:
> > > > > 	Assert SERR# on the primary interfaceCommand register)
> > > > > 	(if enabled by the SERR# Enable bitin the
> > > > > 	Set the Signaled System Errorbit in the Command register)
> > > > > 	bitin the Status register (if enabled by the SERR# Enable)
> > > > > 
> > > > > one way to do this would be not to set Master-Abort bit even
> > > > > though spec says we should, and pretend there was no error.
> > > > Maybe we can just not allow to set "Master abort mode"
> > > > in BRIDGE_CONTROL register. It is a cleaner way (I think)
> > > > considering we don't emulate SERR.
> > > 
> > > I'm not sure P2P spec allows this - want to check.
> > I will check this too, thanks
> > 
> > > It's the right thing to do for express.
> > > 
> > > Also pls note that cross-version migration
> > > requires that we keep it writable for old
> > > machine types.
> > Yes :(. It seems we need another solution.
> 
> Not really. Just set a flag "don't assert master abort"
> for old machine types.
OK, thanks

> 
> > > 
> > > > > 
> > > > > > ---
> > > > > >  hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
> > > > > >  hw/pci/pci_bridge.c      |  10 +++++
> > > > > >  include/hw/pci/pci.h     |   3 ++
> > > > > >  include/hw/pci/pci_bus.h |   1 +
> > > > > >  4 files changed, 118 insertions(+), 11 deletions(-)
> > > > > > 
> > > > > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > > > index d8a1b11..1f4e707 100644
> > > > > > --- a/hw/pci/pci.c
> > > > > > +++ b/hw/pci/pci.c
> > > > > > @@ -283,17 +283,76 @@ const char *pci_root_bus_path(PCIDevice *dev)
> > > > > >      return rootbus->qbus.name;
> > > > > >  }
> > > > > >  
> > > > > > +static PCIDevice *pci_bus_find_host(PCIBus *bus)
> > > > > > +{
> > > > > > +    PCIDevice *dev;
> > > > > > +    int i;
> > > > > > +
> > > > > > +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> > > > > > +        dev = bus->devices[i];
> > > > > > +        if (dev) {
> > > > > > +            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> > > > > > +            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {
> > > > > 
> > > > > In fact you can do this easier:
> > > > > 	 pci_get_word(dev->config + PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_HOST
> > > > Thanks, I'll change.
> > > > > 
> > > > > > +                return dev;
> > > > > > +            }
> > > > > > +        }
> > > > > > +    }
> > > > > > +
> > > > > > +    return NULL;
> > > > > > +}
> > > > > > +
> > > > > > +static void master_abort(void *opaque)
> > > > > > +{
> > > > > > +    PCIDevice *master_dev = NULL;
> > > > > > +    PCIDeviceClass *pc;
> > > > > > +    PCIBus *bus;
> > > > > > +    int downstream;
> > > > > 
> > > > > bool?
> > > > Yes, I'll change
> > > > 
> > > > > > +
> > > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {
> > > > > 
> > > > > Please don't do dynamic cast just because you can.
> > > > > You know very well what kind of object you pass
> > > > > when you create the MR.
> > > > > So just call the appropriate function.
> > > > I wanted to merge the code for all 3 entities involved:
> > > > root PCIBus, PCIBus behind a bridge, and PCIDevice.
> > > > The region for merge was to use the same master_abort_mem_ops
> > > > because there are the same operations that must be done:
> > > > return -1 (0xFFF...) and set master abort received bit. 
> > > 
> > > return -1 is not a lot of common code, and MA bit
> > > is in a different place each time.
> > Ack
> > 
> > > 
> > > > > 
> > > > > > +        master_dev = PCI_DEVICE(opaque);
> > > > > > +        bus = master_dev->bus;
> > > > > > +        while (!pci_bus_is_root(bus)) {
> > > > > > +            master_dev = bus->parent_dev;
> > > > > > +            bus = master_dev->bus;
> > > > > > +        }
> > > > > > +        downstream = 0;
> > > > > > +    }
> > > > > > +
> > > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
> > > > > > +        bus = PCI_BUS(opaque);
> > > > > > +        if (pci_bus_is_root(bus)) {
> > > > > > +            master_dev = pci_bus_find_host(bus);
> > > > > > +        } else { /* bus behind a PCI-2-PCI bridge */
> > > > > > +            master_dev = bus->parent_dev;
> > > > > > +        }
> > > > > 
> > > > > So the reason for this is that device to device MA
> > > > > is still not implemented here, correct?
> > > > Nope, the above code differentiate 2 scenarios:
> > > > 1. the bus is the root bus => look for host bridge
> > > >    to update STATUS register
> > > > 2. the bus is bridge's secondary bus =>
> > > >    update bridge STATUS register
> > > > "Device 2 Device" is handled implicitly because we have
> > > > a background region covering all device's target memory
> > > > 
> > > > > If it was it would be just one instance of it.
> > > > > I'm not saying this must block this patch, however
> > > > > there should be a code comment to this effect,
> > > > > and commit log should mention this explicitly.
> > > > As above, "Device 2 Device" is handled implicit
> > > 
> > > Implicit => confused.
> > > 
> > > I'm confused. Where's the code that sets MA bit on
> > > the initiator device?
> > Is the code that adds the background bus_master_abort_mem region to
> > bus_master_enable_region.
> > bus_master_enable_region is the target for all PCIDevice
> > reads and writes(if I understand correctly). So it doesn't matter if
> > a device initiate a transaction to another device or to memory,
> > it will do it using the bus_master_enable_region.
> > The master_abort() method will recognize this as an
> > upstream transaction. 
> 
> But if target is behind a bridge then master does
> not detect the MA, it's the bridge that does it
> (in PCI, express seems to be different).
The current code handles upstream transactions as follows:
1. If the PCI device is behind a bridge, finds
   recursively the corresponding bridge
   on the root bus and modifies its status register.
2. If the PCI device is not behind the bridge, directly
   modifies the device status register
Regarding PCI express, I'll check this issue.
Thanks,
Marcel

> 
> 
> > > Maybe if you split each case properly it will become
> > > clear.
> > I hope it will.
> > 
> > > 
> > > > > 
> > > > > > +        downstream = 1;
> > > > > > +    }
> > > > > > +
> > > > > > +    assert(master_dev);
> > > > > > +    pc = PCI_DEVICE_GET_CLASS(master_dev);
> > > > > > +
> > > > > 
> > > > > There's very little common code between devices and
> > > > > buses. So just use 2 functions.
> > > > As stated above:
> > > > The region for merge was to use the same master_abort_mem_ops
> > > > because there are the same operations that must be done:
> > > > return -1 (0xFFF...) and set master abort received bit.
> > > > Separating them will result in some code duplication.
> > > 
> > > About 2 lines. But you'll lose the nasty dynamic casts.
> > > opaque pointers are bad enough.
> > Ack
> > 
> > > 
> > > > The read and write methods are the same, but would call different
> > > > "master_abort" methods.
> > > > If it does bother you the down casting anyway, I'll change.
> > > 
> > > Pls do.
> > Ack
> > 
> > > 
> > > > > 
> > > > > > +    if (downstream && pc->is_bridge) {
> > > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
> > > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > > +    } else {
> > > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
> > > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > > +    }
> > > > > > +}
> > > > > > +
> > > > > >  static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> > > > > >  {
> > > > > > -   return -1ULL;
> > > > > 
> > > > > I didn't notice but this means there were 3 spaces here in previous
> > > > > patch?
> > > > yes :(, checkpatch didn't catch it
> > > > 
> > > > Thanks for the review,
> > > > Marcel
> > > > 
> > > > > 
> > > > > > +    master_abort(opaque);
> > > > > > +    return -1ULL;
> > > > > >  }
> > > > > >  
> > > > > >  static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> > > > > >                                     unsigned size)
> > > > > >  {
> > > > > > +    master_abort(opaque);
> > > > > >  }
> > > > > >  
> > > > > > -static const MemoryRegionOps master_abort_mem_ops = {
> > > > > > +static const MemoryRegionOps master_abort_ops = {
> > > > > >      .read = master_abort_mem_read,
> > > > > >      .write = master_abort_mem_write,
> > > > > >      .endianness = DEVICE_LITTLE_ENDIAN,
> > > > > > @@ -301,6 +360,23 @@ static const MemoryRegionOps master_abort_mem_ops = {
> > > > > >  
> > > > > >  #define MASTER_ABORT_MEM_PRIORITY INT_MIN
> > > > > >  
> > > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
> > > > > > +{
> > > > > > +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > > +                          &master_abort_ops, bus, mem,
> > > > > > +                          memory_region_size(bus->address_space_mem));
> > > > > > +    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > > +                                        0, &bus->master_abort_mem,
> > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > +
> > > > > > +    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
> > > > > > +                          &master_abort_ops, bus, io,
> > > > > > +                          memory_region_size(bus->address_space_io));
> > > > > > +    memory_region_add_subregion_overlap(bus->address_space_io,
> > > > > > +                                        0, &bus->master_abort_io,
> > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > +}
> > > > > > +
> > > > > >  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > > >                           const char *name,
> > > > > >                           MemoryRegion *address_space_mem,
> > > > > > @@ -312,13 +388,8 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > > >      bus->address_space_mem = address_space_mem;
> > > > > >      bus->address_space_io = address_space_io;
> > > > > >  
> > > > > > -
> > > > > > -    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > > -                          &master_abort_mem_ops, bus, "pci-master-abort",
> > > > > > -                          memory_region_size(bus->address_space_mem));
> > > > > > -    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > > -                                        0, &bus->master_abort_mem,
> > > > > > -                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > +    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
> > > > > > +                              "pci-master-abort-io");
> > > > > >  
> > > > > >      /* host bridge */
> > > > > >      QLIST_INIT(&bus->child);
> > > > > > @@ -840,9 +911,24 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> > > > > >      pci_dev->bus = bus;
> > > > > >      dma_as = pci_device_iommu_address_space(pci_dev);
> > > > > >  
> > > > > > -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> > > > > > -                             OBJECT(pci_dev), "bus master",
> > > > > > +    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
> > > > > > +                       "bus master", UINT64_MAX);
> > > > > > +
> > > > > > +    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
> > > > > > +                          &master_abort_ops, OBJECT(pci_dev),
> > > > > > +                          "bus master master-abort",
> > > > > > +                          memory_region_size(&pci_dev->bus_master_enable_region));
> > > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > > +                                        0, &pci_dev->bus_master_abort_mem,
> > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > +
> > > > > > +    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
> > > > > > +                             OBJECT(pci_dev), "bus master dma",
> > > > > >                               dma_as->root, 0, memory_region_size(dma_as->root));
> > > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > > +                                        0, &pci_dev->bus_master_dma_mem, 0);
> > > > > > +
> > > > > > +
> > > > > >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> > > > > >      address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> > > > > >                         name);
> > > > > > @@ -901,6 +987,13 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
> > > > > >      pci_config_free(pci_dev);
> > > > > >  
> > > > > >      address_space_destroy(&pci_dev->bus_master_as);
> > > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > > +                                &pci_dev->bus_master_dma_mem);
> > > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > > +                                &pci_dev->bus_master_abort_mem);
> > > > > > +    memory_region_destroy(&pci_dev->bus_master_dma_mem);
> > > > > > +    memory_region_destroy(&pci_dev->bus_master_abort_mem);
> > > > > > +
> > > > > >      memory_region_destroy(&pci_dev->bus_master_enable_region);
> > > > > >  }
> > > > > >  
> > > > > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > > > > index e6b22b8..56b682f 100644
> > > > > > --- a/hw/pci/pci_bridge.c
> > > > > > +++ b/hw/pci/pci_bridge.c
> > > > > > @@ -376,6 +376,10 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
> > > > > >      sec_bus->address_space_io = &br->address_space_io;
> > > > > >      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
> > > > > >      br->windows = pci_bridge_region_init(br);
> > > > > > +
> > > > > > +    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
> > > > > > +                              "pci_bridge_master_abort_io");
> > > > > > +
> > > > > >      QLIST_INIT(&sec_bus->child);
> > > > > >      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> > > > > >      return 0;
> > > > > > @@ -389,6 +393,12 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
> > > > > >      QLIST_REMOVE(&s->sec_bus, sibling);
> > > > > >      pci_bridge_region_del(s, s->windows);
> > > > > >      pci_bridge_region_cleanup(s, s->windows);
> > > > > > +    memory_region_del_subregion(s->sec_bus.address_space_mem,
> > > > > > +                                &s->sec_bus.master_abort_mem);
> > > > > > +    memory_region_del_subregion(s->sec_bus.address_space_io,
> > > > > > +                                &s->sec_bus.master_abort_io);
> > > > > > +    memory_region_destroy(&s->sec_bus.master_abort_mem);
> > > > > > +    memory_region_destroy(&s->sec_bus.master_abort_io);
> > > > > >      memory_region_destroy(&s->address_space_mem);
> > > > > >      memory_region_destroy(&s->address_space_io);
> > > > > >      /* qbus_free() is called automatically by qdev_free() */
> > > > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > > > index 37979aa..d69e06d 100644
> > > > > > --- a/include/hw/pci/pci.h
> > > > > > +++ b/include/hw/pci/pci.h
> > > > > > @@ -242,6 +242,8 @@ struct PCIDevice {
> > > > > >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> > > > > >      AddressSpace bus_master_as;
> > > > > >      MemoryRegion bus_master_enable_region;
> > > > > > +    MemoryRegion bus_master_dma_mem;
> > > > > > +    MemoryRegion bus_master_abort_mem;
> > > > > >  
> > > > > >      /* do not access the following fields */
> > > > > >      PCIConfigReadFunc *config_read;
> > > > > > @@ -357,6 +359,7 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
> > > > > >                      MemoryRegion *address_space_mem,
> > > > > >                      MemoryRegion *address_space_io,
> > > > > >                      uint8_t devfn_min, const char *typename);
> > > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
> > > > > >  void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
> > > > > >                    void *irq_opaque, int nirq);
> > > > > >  int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
> > > > > > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > > > > > index 2ad5edb..57b1541 100644
> > > > > > --- a/include/hw/pci/pci_bus.h
> > > > > > +++ b/include/hw/pci/pci_bus.h
> > > > > > @@ -24,6 +24,7 @@ struct PCIBus {
> > > > > >      MemoryRegion *address_space_mem;
> > > > > >      MemoryRegion *address_space_io;
> > > > > >      MemoryRegion master_abort_mem;
> > > > > > +    MemoryRegion master_abort_io;
> > > > > >  
> > > > > >      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
> > > > > >      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> > > > > > -- 
> > > > > > 1.8.3.1
> > > > 
> > > > 
> > 
> >
Michael S. Tsirkin Sept. 23, 2013, 6:45 p.m. UTC | #7
On Mon, Sep 23, 2013 at 08:49:53PM +0300, Marcel Apfelbaum wrote:
> On Mon, 2013-09-23 at 18:10 +0300, Michael S. Tsirkin wrote:
> > On Mon, Sep 23, 2013 at 05:43:38PM +0300, Marcel Apfelbaum wrote:
> > > On Mon, 2013-09-23 at 16:45 +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Sep 23, 2013 at 03:37:43PM +0300, Marcel Apfelbaum wrote:
> > > > > On Mon, 2013-09-23 at 14:27 +0300, Michael S. Tsirkin wrote:
> > > > > > On Mon, Sep 23, 2013 at 02:01:17PM +0300, Marcel Apfelbaum wrote:
> > > > > > > This patch is implemented on top of series:
> > > > > > > [PATCH v5 0/3] pci: implement upstream master abort protocol
> > > > > > > 
> > > > > > > Added "master abort io" background region for PCIBus.
> > > > > > > 
> > > > > > > Added "master abort mem" region to catch transactions initiated
> > > > > > > by pci devices targeted to unassigned addresses.
> > > > > > > 
> > > > > > > Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.
> > > > > > > 
> > > > > > > Set "Received Master Abort" Bit on Status/Secondary Status
> > > > > > > register as defined in the PCI Spec.
> > > > > > > 
> > > > > > > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> > > > > > 
> > > > > > I think it will be easier to review the complete
> > > > > > series, not an incremental patch.
> > > > > Should I combine this with the prev series without the
> > > > > memory patches?
> > > > > 
> > > > > > 
> > > > > > We probably need to think what's the right thing
> > > > > > to do for master abort mode since we do not
> > > > > > emulate SERR. Do we make it writeable at all?
> > > > > Master abort mode can be enabled. (Tested)
> > > > > > 
> > > > > > It's a pci only bit:
> > > > > > 	When the Master-Abort Mode bit is cleared and a posted write transaction
> > > > > > 	forwarded by the
> > > > > > 	bridge terminates with a Master-Abort, no error is reported (note that
> > > > > > 	the Master-Abort bit
> > > > > > 	is still set). When the Master-Abort Mode bit is set and a posted write
> > > > > > 	transaction forwarded
> > > > > > 	by the bridge terminates with a Master-Abort on the destination bus, the
> > > > > > 	bridge must:
> > > > > > 	Assert SERR# on the primary interfaceCommand register)
> > > > > > 	(if enabled by the SERR# Enable bitin the
> > > > > > 	Set the Signaled System Errorbit in the Command register)
> > > > > > 	bitin the Status register (if enabled by the SERR# Enable)
> > > > > > 
> > > > > > one way to do this would be not to set Master-Abort bit even
> > > > > > though spec says we should, and pretend there was no error.
> > > > > Maybe we can just not allow to set "Master abort mode"
> > > > > in BRIDGE_CONTROL register. It is a cleaner way (I think)
> > > > > considering we don't emulate SERR.
> > > > 
> > > > I'm not sure P2P spec allows this - want to check.
> > > I will check this too, thanks
> > > 
> > > > It's the right thing to do for express.
> > > > 
> > > > Also pls note that cross-version migration
> > > > requires that we keep it writable for old
> > > > machine types.
> > > Yes :(. It seems we need another solution.
> > 
> > Not really. Just set a flag "don't assert master abort"
> > for old machine types.
> OK, thanks
> 
> > 
> > > > 
> > > > > > 
> > > > > > > ---
> > > > > > >  hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
> > > > > > >  hw/pci/pci_bridge.c      |  10 +++++
> > > > > > >  include/hw/pci/pci.h     |   3 ++
> > > > > > >  include/hw/pci/pci_bus.h |   1 +
> > > > > > >  4 files changed, 118 insertions(+), 11 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > > > > index d8a1b11..1f4e707 100644
> > > > > > > --- a/hw/pci/pci.c
> > > > > > > +++ b/hw/pci/pci.c
> > > > > > > @@ -283,17 +283,76 @@ const char *pci_root_bus_path(PCIDevice *dev)
> > > > > > >      return rootbus->qbus.name;
> > > > > > >  }
> > > > > > >  
> > > > > > > +static PCIDevice *pci_bus_find_host(PCIBus *bus)
> > > > > > > +{
> > > > > > > +    PCIDevice *dev;
> > > > > > > +    int i;
> > > > > > > +
> > > > > > > +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> > > > > > > +        dev = bus->devices[i];
> > > > > > > +        if (dev) {
> > > > > > > +            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> > > > > > > +            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {
> > > > > > 
> > > > > > In fact you can do this easier:
> > > > > > 	 pci_get_word(dev->config + PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_HOST
> > > > > Thanks, I'll change.
> > > > > > 
> > > > > > > +                return dev;
> > > > > > > +            }
> > > > > > > +        }
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    return NULL;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void master_abort(void *opaque)
> > > > > > > +{
> > > > > > > +    PCIDevice *master_dev = NULL;
> > > > > > > +    PCIDeviceClass *pc;
> > > > > > > +    PCIBus *bus;
> > > > > > > +    int downstream;
> > > > > > 
> > > > > > bool?
> > > > > Yes, I'll change
> > > > > 
> > > > > > > +
> > > > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {
> > > > > > 
> > > > > > Please don't do dynamic cast just because you can.
> > > > > > You know very well what kind of object you pass
> > > > > > when you create the MR.
> > > > > > So just call the appropriate function.
> > > > > I wanted to merge the code for all 3 entities involved:
> > > > > root PCIBus, PCIBus behind a bridge, and PCIDevice.
> > > > > The region for merge was to use the same master_abort_mem_ops
> > > > > because there are the same operations that must be done:
> > > > > return -1 (0xFFF...) and set master abort received bit. 
> > > > 
> > > > return -1 is not a lot of common code, and MA bit
> > > > is in a different place each time.
> > > Ack
> > > 
> > > > 
> > > > > > 
> > > > > > > +        master_dev = PCI_DEVICE(opaque);
> > > > > > > +        bus = master_dev->bus;
> > > > > > > +        while (!pci_bus_is_root(bus)) {
> > > > > > > +            master_dev = bus->parent_dev;
> > > > > > > +            bus = master_dev->bus;
> > > > > > > +        }
> > > > > > > +        downstream = 0;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
> > > > > > > +        bus = PCI_BUS(opaque);
> > > > > > > +        if (pci_bus_is_root(bus)) {
> > > > > > > +            master_dev = pci_bus_find_host(bus);
> > > > > > > +        } else { /* bus behind a PCI-2-PCI bridge */
> > > > > > > +            master_dev = bus->parent_dev;
> > > > > > > +        }
> > > > > > 
> > > > > > So the reason for this is that device to device MA
> > > > > > is still not implemented here, correct?
> > > > > Nope, the above code differentiate 2 scenarios:
> > > > > 1. the bus is the root bus => look for host bridge
> > > > >    to update STATUS register
> > > > > 2. the bus is bridge's secondary bus =>
> > > > >    update bridge STATUS register
> > > > > "Device 2 Device" is handled implicitly because we have
> > > > > a background region covering all device's target memory
> > > > > 
> > > > > > If it was it would be just one instance of it.
> > > > > > I'm not saying this must block this patch, however
> > > > > > there should be a code comment to this effect,
> > > > > > and commit log should mention this explicitly.
> > > > > As above, "Device 2 Device" is handled implicit
> > > > 
> > > > Implicit => confused.
> > > > 
> > > > I'm confused. Where's the code that sets MA bit on
> > > > the initiator device?
> > > Is the code that adds the background bus_master_abort_mem region to
> > > bus_master_enable_region.
> > > bus_master_enable_region is the target for all PCIDevice
> > > reads and writes(if I understand correctly). So it doesn't matter if
> > > a device initiate a transaction to another device or to memory,
> > > it will do it using the bus_master_enable_region.
> > > The master_abort() method will recognize this as an
> > > upstream transaction. 
> > 
> > But if target is behind a bridge then master does
> > not detect the MA, it's the bridge that does it
> > (in PCI, express seems to be different).
> The current code handles upstream transactions as follows:
> 1. If the PCI device is behind a bridge, finds
>    recursively the corresponding bridge
>    on the root bus and modifies its status register.

Why recursively?
I think you need to set MA bit on the bit in bridge right
above the device.

> 2. If the PCI device is not behind the bridge, directly
>    modifies the device status register


The issue really isn't whether the PCI device
(which PCI device?) is behind the bridge.
Rather it's whether the address is claimed by some bridge
on the same bus as the master, or by a parent bridge.

If it is not claimed, MA bit in the original master
is set.

If it is claimed, master becomes some bridge:
either bridge on the secondary bus (forwarding downstream) or a parent bridge
(forwarding upstream) of the bus, then repeat the logic, until we find a
bus where it is not claimed.

At that point to know where in bridge config to set the bit, look at the last step we
took.  If it was a parent bus (transaction was forwarded upstream) set
primary status, otherwise (it was a bridge on the secondary bus)
set secondary status.

The last piece of puzzle is that for transactions coming from
outside the PCI domain, pci host bridge is the original master.


This applies to memory,io and configuration cycles.

This is for PCI.

> Regarding PCI express, I'll check this issue.
> Thanks,
> Marcel

As far as I can tell, for express instead of the last bridge
detecting UR (and setting MA), all bridges on the path detect UR and the
original master detects UR, in all cases.

Also, UR apparently can only be detected by non-posted transactions:
mem/io reads and configuration. Even on the same "bus".

If you cross a pci to express bridge (direct bridge)
then the bridge is the original master and
UR is detected on path from it on.

If you cross an express to pci bridge (reversed bridge)
you lose this info and only the last master sets MA.

> > 
> > 
> > > > Maybe if you split each case properly it will become
> > > > clear.
> > > I hope it will.
> > > 
> > > > 
> > > > > > 
> > > > > > > +        downstream = 1;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    assert(master_dev);
> > > > > > > +    pc = PCI_DEVICE_GET_CLASS(master_dev);
> > > > > > > +
> > > > > > 
> > > > > > There's very little common code between devices and
> > > > > > buses. So just use 2 functions.
> > > > > As stated above:
> > > > > The region for merge was to use the same master_abort_mem_ops
> > > > > because there are the same operations that must be done:
> > > > > return -1 (0xFFF...) and set master abort received bit.
> > > > > Separating them will result in some code duplication.
> > > > 
> > > > About 2 lines. But you'll lose the nasty dynamic casts.
> > > > opaque pointers are bad enough.
> > > Ack
> > > 
> > > > 
> > > > > The read and write methods are the same, but would call different
> > > > > "master_abort" methods.
> > > > > If it does bother you the down casting anyway, I'll change.
> > > > 
> > > > Pls do.
> > > Ack
> > > 
> > > > 
> > > > > > 
> > > > > > > +    if (downstream && pc->is_bridge) {
> > > > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
> > > > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > > > +    } else {
> > > > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
> > > > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > > > +    }
> > > > > > > +}
> > > > > > > +
> > > > > > >  static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> > > > > > >  {
> > > > > > > -   return -1ULL;
> > > > > > 
> > > > > > I didn't notice but this means there were 3 spaces here in previous
> > > > > > patch?
> > > > > yes :(, checkpatch didn't catch it
> > > > > 
> > > > > Thanks for the review,
> > > > > Marcel
> > > > > 
> > > > > > 
> > > > > > > +    master_abort(opaque);
> > > > > > > +    return -1ULL;
> > > > > > >  }
> > > > > > >  
> > > > > > >  static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> > > > > > >                                     unsigned size)
> > > > > > >  {
> > > > > > > +    master_abort(opaque);
> > > > > > >  }
> > > > > > >  
> > > > > > > -static const MemoryRegionOps master_abort_mem_ops = {
> > > > > > > +static const MemoryRegionOps master_abort_ops = {
> > > > > > >      .read = master_abort_mem_read,
> > > > > > >      .write = master_abort_mem_write,
> > > > > > >      .endianness = DEVICE_LITTLE_ENDIAN,
> > > > > > > @@ -301,6 +360,23 @@ static const MemoryRegionOps master_abort_mem_ops = {
> > > > > > >  
> > > > > > >  #define MASTER_ABORT_MEM_PRIORITY INT_MIN
> > > > > > >  
> > > > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
> > > > > > > +{
> > > > > > > +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > > > +                          &master_abort_ops, bus, mem,
> > > > > > > +                          memory_region_size(bus->address_space_mem));
> > > > > > > +    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > > > +                                        0, &bus->master_abort_mem,
> > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > +
> > > > > > > +    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
> > > > > > > +                          &master_abort_ops, bus, io,
> > > > > > > +                          memory_region_size(bus->address_space_io));
> > > > > > > +    memory_region_add_subregion_overlap(bus->address_space_io,
> > > > > > > +                                        0, &bus->master_abort_io,
> > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > +}
> > > > > > > +
> > > > > > >  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > > > >                           const char *name,
> > > > > > >                           MemoryRegion *address_space_mem,
> > > > > > > @@ -312,13 +388,8 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > > > >      bus->address_space_mem = address_space_mem;
> > > > > > >      bus->address_space_io = address_space_io;
> > > > > > >  
> > > > > > > -
> > > > > > > -    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > > > -                          &master_abort_mem_ops, bus, "pci-master-abort",
> > > > > > > -                          memory_region_size(bus->address_space_mem));
> > > > > > > -    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > > > -                                        0, &bus->master_abort_mem,
> > > > > > > -                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > +    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
> > > > > > > +                              "pci-master-abort-io");
> > > > > > >  
> > > > > > >      /* host bridge */
> > > > > > >      QLIST_INIT(&bus->child);
> > > > > > > @@ -840,9 +911,24 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> > > > > > >      pci_dev->bus = bus;
> > > > > > >      dma_as = pci_device_iommu_address_space(pci_dev);
> > > > > > >  
> > > > > > > -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> > > > > > > -                             OBJECT(pci_dev), "bus master",
> > > > > > > +    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
> > > > > > > +                       "bus master", UINT64_MAX);
> > > > > > > +
> > > > > > > +    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
> > > > > > > +                          &master_abort_ops, OBJECT(pci_dev),
> > > > > > > +                          "bus master master-abort",
> > > > > > > +                          memory_region_size(&pci_dev->bus_master_enable_region));
> > > > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > > > +                                        0, &pci_dev->bus_master_abort_mem,
> > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > +
> > > > > > > +    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
> > > > > > > +                             OBJECT(pci_dev), "bus master dma",
> > > > > > >                               dma_as->root, 0, memory_region_size(dma_as->root));
> > > > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > > > +                                        0, &pci_dev->bus_master_dma_mem, 0);
> > > > > > > +
> > > > > > > +
> > > > > > >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> > > > > > >      address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> > > > > > >                         name);
> > > > > > > @@ -901,6 +987,13 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
> > > > > > >      pci_config_free(pci_dev);
> > > > > > >  
> > > > > > >      address_space_destroy(&pci_dev->bus_master_as);
> > > > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > > > +                                &pci_dev->bus_master_dma_mem);
> > > > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > > > +                                &pci_dev->bus_master_abort_mem);
> > > > > > > +    memory_region_destroy(&pci_dev->bus_master_dma_mem);
> > > > > > > +    memory_region_destroy(&pci_dev->bus_master_abort_mem);
> > > > > > > +
> > > > > > >      memory_region_destroy(&pci_dev->bus_master_enable_region);
> > > > > > >  }
> > > > > > >  
> > > > > > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > > > > > index e6b22b8..56b682f 100644
> > > > > > > --- a/hw/pci/pci_bridge.c
> > > > > > > +++ b/hw/pci/pci_bridge.c
> > > > > > > @@ -376,6 +376,10 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
> > > > > > >      sec_bus->address_space_io = &br->address_space_io;
> > > > > > >      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
> > > > > > >      br->windows = pci_bridge_region_init(br);
> > > > > > > +
> > > > > > > +    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
> > > > > > > +                              "pci_bridge_master_abort_io");
> > > > > > > +
> > > > > > >      QLIST_INIT(&sec_bus->child);
> > > > > > >      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> > > > > > >      return 0;
> > > > > > > @@ -389,6 +393,12 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
> > > > > > >      QLIST_REMOVE(&s->sec_bus, sibling);
> > > > > > >      pci_bridge_region_del(s, s->windows);
> > > > > > >      pci_bridge_region_cleanup(s, s->windows);
> > > > > > > +    memory_region_del_subregion(s->sec_bus.address_space_mem,
> > > > > > > +                                &s->sec_bus.master_abort_mem);
> > > > > > > +    memory_region_del_subregion(s->sec_bus.address_space_io,
> > > > > > > +                                &s->sec_bus.master_abort_io);
> > > > > > > +    memory_region_destroy(&s->sec_bus.master_abort_mem);
> > > > > > > +    memory_region_destroy(&s->sec_bus.master_abort_io);
> > > > > > >      memory_region_destroy(&s->address_space_mem);
> > > > > > >      memory_region_destroy(&s->address_space_io);
> > > > > > >      /* qbus_free() is called automatically by qdev_free() */
> > > > > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > > > > index 37979aa..d69e06d 100644
> > > > > > > --- a/include/hw/pci/pci.h
> > > > > > > +++ b/include/hw/pci/pci.h
> > > > > > > @@ -242,6 +242,8 @@ struct PCIDevice {
> > > > > > >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> > > > > > >      AddressSpace bus_master_as;
> > > > > > >      MemoryRegion bus_master_enable_region;
> > > > > > > +    MemoryRegion bus_master_dma_mem;
> > > > > > > +    MemoryRegion bus_master_abort_mem;
> > > > > > >  
> > > > > > >      /* do not access the following fields */
> > > > > > >      PCIConfigReadFunc *config_read;
> > > > > > > @@ -357,6 +359,7 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
> > > > > > >                      MemoryRegion *address_space_mem,
> > > > > > >                      MemoryRegion *address_space_io,
> > > > > > >                      uint8_t devfn_min, const char *typename);
> > > > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
> > > > > > >  void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
> > > > > > >                    void *irq_opaque, int nirq);
> > > > > > >  int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
> > > > > > > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > > > > > > index 2ad5edb..57b1541 100644
> > > > > > > --- a/include/hw/pci/pci_bus.h
> > > > > > > +++ b/include/hw/pci/pci_bus.h
> > > > > > > @@ -24,6 +24,7 @@ struct PCIBus {
> > > > > > >      MemoryRegion *address_space_mem;
> > > > > > >      MemoryRegion *address_space_io;
> > > > > > >      MemoryRegion master_abort_mem;
> > > > > > > +    MemoryRegion master_abort_io;
> > > > > > >  
> > > > > > >      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
> > > > > > >      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> > > > > > > -- 
> > > > > > > 1.8.3.1
> > > > > 
> > > > > 
> > > 
> > > 
> 
>
Marcel Apfelbaum Sept. 24, 2013, 8:07 a.m. UTC | #8
On Mon, 2013-09-23 at 21:45 +0300, Michael S. Tsirkin wrote:
> On Mon, Sep 23, 2013 at 08:49:53PM +0300, Marcel Apfelbaum wrote:
> > On Mon, 2013-09-23 at 18:10 +0300, Michael S. Tsirkin wrote:
> > > On Mon, Sep 23, 2013 at 05:43:38PM +0300, Marcel Apfelbaum wrote:
> > > > On Mon, 2013-09-23 at 16:45 +0300, Michael S. Tsirkin wrote:
> > > > > On Mon, Sep 23, 2013 at 03:37:43PM +0300, Marcel Apfelbaum wrote:
> > > > > > On Mon, 2013-09-23 at 14:27 +0300, Michael S. Tsirkin wrote:
> > > > > > > On Mon, Sep 23, 2013 at 02:01:17PM +0300, Marcel Apfelbaum wrote:
> > > > > > > > This patch is implemented on top of series:
> > > > > > > > [PATCH v5 0/3] pci: implement upstream master abort protocol
> > > > > > > > 
> > > > > > > > Added "master abort io" background region for PCIBus.
> > > > > > > > 
> > > > > > > > Added "master abort mem" region to catch transactions initiated
> > > > > > > > by pci devices targeted to unassigned addresses.
> > > > > > > > 
> > > > > > > > Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.
> > > > > > > > 
> > > > > > > > Set "Received Master Abort" Bit on Status/Secondary Status
> > > > > > > > register as defined in the PCI Spec.
> > > > > > > > 
> > > > > > > > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> > > > > > > 
> > > > > > > I think it will be easier to review the complete
> > > > > > > series, not an incremental patch.
> > > > > > Should I combine this with the prev series without the
> > > > > > memory patches?
> > > > > > 
> > > > > > > 
> > > > > > > We probably need to think what's the right thing
> > > > > > > to do for master abort mode since we do not
> > > > > > > emulate SERR. Do we make it writeable at all?
> > > > > > Master abort mode can be enabled. (Tested)
> > > > > > > 
> > > > > > > It's a pci only bit:
> > > > > > > 	When the Master-Abort Mode bit is cleared and a posted write transaction
> > > > > > > 	forwarded by the
> > > > > > > 	bridge terminates with a Master-Abort, no error is reported (note that
> > > > > > > 	the Master-Abort bit
> > > > > > > 	is still set). When the Master-Abort Mode bit is set and a posted write
> > > > > > > 	transaction forwarded
> > > > > > > 	by the bridge terminates with a Master-Abort on the destination bus, the
> > > > > > > 	bridge must:
> > > > > > > 	Assert SERR# on the primary interfaceCommand register)
> > > > > > > 	(if enabled by the SERR# Enable bitin the
> > > > > > > 	Set the Signaled System Errorbit in the Command register)
> > > > > > > 	bitin the Status register (if enabled by the SERR# Enable)
> > > > > > > 
> > > > > > > one way to do this would be not to set Master-Abort bit even
> > > > > > > though spec says we should, and pretend there was no error.
> > > > > > Maybe we can just not allow to set "Master abort mode"
> > > > > > in BRIDGE_CONTROL register. It is a cleaner way (I think)
> > > > > > considering we don't emulate SERR.
> > > > > 
> > > > > I'm not sure P2P spec allows this - want to check.
> > > > I will check this too, thanks
> > > > 
> > > > > It's the right thing to do for express.
> > > > > 
> > > > > Also pls note that cross-version migration
> > > > > requires that we keep it writable for old
> > > > > machine types.
> > > > Yes :(. It seems we need another solution.
> > > 
> > > Not really. Just set a flag "don't assert master abort"
> > > for old machine types.
> > OK, thanks
> > 
> > > 
> > > > > 
> > > > > > > 
> > > > > > > > ---
> > > > > > > >  hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
> > > > > > > >  hw/pci/pci_bridge.c      |  10 +++++
> > > > > > > >  include/hw/pci/pci.h     |   3 ++
> > > > > > > >  include/hw/pci/pci_bus.h |   1 +
> > > > > > > >  4 files changed, 118 insertions(+), 11 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > > > > > index d8a1b11..1f4e707 100644
> > > > > > > > --- a/hw/pci/pci.c
> > > > > > > > +++ b/hw/pci/pci.c
> > > > > > > > @@ -283,17 +283,76 @@ const char *pci_root_bus_path(PCIDevice *dev)
> > > > > > > >      return rootbus->qbus.name;
> > > > > > > >  }
> > > > > > > >  
> > > > > > > > +static PCIDevice *pci_bus_find_host(PCIBus *bus)
> > > > > > > > +{
> > > > > > > > +    PCIDevice *dev;
> > > > > > > > +    int i;
> > > > > > > > +
> > > > > > > > +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> > > > > > > > +        dev = bus->devices[i];
> > > > > > > > +        if (dev) {
> > > > > > > > +            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> > > > > > > > +            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {
> > > > > > > 
> > > > > > > In fact you can do this easier:
> > > > > > > 	 pci_get_word(dev->config + PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_HOST
> > > > > > Thanks, I'll change.
> > > > > > > 
> > > > > > > > +                return dev;
> > > > > > > > +            }
> > > > > > > > +        }
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    return NULL;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void master_abort(void *opaque)
> > > > > > > > +{
> > > > > > > > +    PCIDevice *master_dev = NULL;
> > > > > > > > +    PCIDeviceClass *pc;
> > > > > > > > +    PCIBus *bus;
> > > > > > > > +    int downstream;
> > > > > > > 
> > > > > > > bool?
> > > > > > Yes, I'll change
> > > > > > 
> > > > > > > > +
> > > > > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {
> > > > > > > 
> > > > > > > Please don't do dynamic cast just because you can.
> > > > > > > You know very well what kind of object you pass
> > > > > > > when you create the MR.
> > > > > > > So just call the appropriate function.
> > > > > > I wanted to merge the code for all 3 entities involved:
> > > > > > root PCIBus, PCIBus behind a bridge, and PCIDevice.
> > > > > > The region for merge was to use the same master_abort_mem_ops
> > > > > > because there are the same operations that must be done:
> > > > > > return -1 (0xFFF...) and set master abort received bit. 
> > > > > 
> > > > > return -1 is not a lot of common code, and MA bit
> > > > > is in a different place each time.
> > > > Ack
> > > > 
> > > > > 
> > > > > > > 
> > > > > > > > +        master_dev = PCI_DEVICE(opaque);
> > > > > > > > +        bus = master_dev->bus;
> > > > > > > > +        while (!pci_bus_is_root(bus)) {
> > > > > > > > +            master_dev = bus->parent_dev;
> > > > > > > > +            bus = master_dev->bus;
> > > > > > > > +        }
> > > > > > > > +        downstream = 0;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
> > > > > > > > +        bus = PCI_BUS(opaque);
> > > > > > > > +        if (pci_bus_is_root(bus)) {
> > > > > > > > +            master_dev = pci_bus_find_host(bus);
> > > > > > > > +        } else { /* bus behind a PCI-2-PCI bridge */
> > > > > > > > +            master_dev = bus->parent_dev;
> > > > > > > > +        }
> > > > > > > 
> > > > > > > So the reason for this is that device to device MA
> > > > > > > is still not implemented here, correct?
> > > > > > Nope, the above code differentiate 2 scenarios:
> > > > > > 1. the bus is the root bus => look for host bridge
> > > > > >    to update STATUS register
> > > > > > 2. the bus is bridge's secondary bus =>
> > > > > >    update bridge STATUS register
> > > > > > "Device 2 Device" is handled implicitly because we have
> > > > > > a background region covering all device's target memory
> > > > > > 
> > > > > > > If it was it would be just one instance of it.
> > > > > > > I'm not saying this must block this patch, however
> > > > > > > there should be a code comment to this effect,
> > > > > > > and commit log should mention this explicitly.
> > > > > > As above, "Device 2 Device" is handled implicit
> > > > > 
> > > > > Implicit => confused.
> > > > > 
> > > > > I'm confused. Where's the code that sets MA bit on
> > > > > the initiator device?
> > > > Is the code that adds the background bus_master_abort_mem region to
> > > > bus_master_enable_region.
> > > > bus_master_enable_region is the target for all PCIDevice
> > > > reads and writes(if I understand correctly). So it doesn't matter if
> > > > a device initiate a transaction to another device or to memory,
> > > > it will do it using the bus_master_enable_region.
> > > > The master_abort() method will recognize this as an
> > > > upstream transaction. 
> > > 
> > > But if target is behind a bridge then master does
> > > not detect the MA, it's the bridge that does it
> > > (in PCI, express seems to be different).
> > The current code handles upstream transactions as follows:
> > 1. If the PCI device is behind a bridge, finds
> >    recursively the corresponding bridge
> >    on the root bus and modifies its status register.
> 
> Why recursively?
> I think you need to set MA bit on the bit in bridge right
> above the device.
If a PCI Device behind a bridge issues a transaction:
 - From my understanding, any address outside the bridge address range
   will be claimed by the bridge.
 - This will happen recursively until we reach the root bus (assuming
   that the target is not another device).
 - The bridge on the bus will set MA bit.

> 
> > 2. If the PCI device is not behind the bridge, directly
> >    modifies the device status register
> 
> 
> The issue really isn't whether the PCI device
> (which PCI device?) is behind the bridge.
> Rather it's whether the address is claimed by some bridge
> on the same bus as the master, or by a parent bridge.
This can be recursive, no?

> 
> If it is not claimed, MA bit in the original master
> is set.
> 
> If it is claimed, master becomes some bridge:
> either bridge on the secondary bus (forwarding downstream) or a parent bridge
> (forwarding upstream) of the bus, then repeat the logic, until we find a
> bus where it is not claimed.
I agree, the "repeat logic" is the response to your question "Why recursively?"

> 
> At that point to know where in bridge config to set the bit, look at the last step we
> took.  If it was a parent bus (transaction was forwarded upstream) set
> primary status, otherwise (it was a bridge on the secondary bus)
> set secondary status.
> 
> The last piece of puzzle is that for transactions coming from
> outside the PCI domain, pci host bridge is the original master.
> 
> 
> This applies to memory,io and configuration cycles.
> 
> This is for PCI.
I agree.

> 
> > Regarding PCI express, I'll check this issue.
> > Thanks,
> > Marcel
> 
> As far as I can tell, for express instead of the last bridge
> detecting UR (and setting MA), all bridges on the path detect UR and the
> original master detects UR, in all cases.
Thanks

> 
> Also, UR apparently can only be detected by non-posted transactions:
> mem/io reads and configuration. Even on the same "bus".
Also "Posted Write Transactions", PCI-2-PCI bridge Spec (6.3.2)
How can we differentiate in Qemu posted from non-posted transcations
anyway?

> 
> If you cross a pci to express bridge (direct bridge)
> then the bridge is the original master and
> UR is detected on path from it on.
> 
> If you cross an express to pci bridge (reversed bridge)
> you lose this info and only the last master sets MA.
Thanks, I'll look into it,
Marcel

> 
> > > 
> > > 
> > > > > Maybe if you split each case properly it will become
> > > > > clear.
> > > > I hope it will.
> > > > 
> > > > > 
> > > > > > > 
> > > > > > > > +        downstream = 1;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    assert(master_dev);
> > > > > > > > +    pc = PCI_DEVICE_GET_CLASS(master_dev);
> > > > > > > > +
> > > > > > > 
> > > > > > > There's very little common code between devices and
> > > > > > > buses. So just use 2 functions.
> > > > > > As stated above:
> > > > > > The region for merge was to use the same master_abort_mem_ops
> > > > > > because there are the same operations that must be done:
> > > > > > return -1 (0xFFF...) and set master abort received bit.
> > > > > > Separating them will result in some code duplication.
> > > > > 
> > > > > About 2 lines. But you'll lose the nasty dynamic casts.
> > > > > opaque pointers are bad enough.
> > > > Ack
> > > > 
> > > > > 
> > > > > > The read and write methods are the same, but would call different
> > > > > > "master_abort" methods.
> > > > > > If it does bother you the down casting anyway, I'll change.
> > > > > 
> > > > > Pls do.
> > > > Ack
> > > > 
> > > > > 
> > > > > > > 
> > > > > > > > +    if (downstream && pc->is_bridge) {
> > > > > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
> > > > > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > > > > +    } else {
> > > > > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
> > > > > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > > > > +    }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> > > > > > > >  {
> > > > > > > > -   return -1ULL;
> > > > > > > 
> > > > > > > I didn't notice but this means there were 3 spaces here in previous
> > > > > > > patch?
> > > > > > yes :(, checkpatch didn't catch it
> > > > > > 
> > > > > > Thanks for the review,
> > > > > > Marcel
> > > > > > 
> > > > > > > 
> > > > > > > > +    master_abort(opaque);
> > > > > > > > +    return -1ULL;
> > > > > > > >  }
> > > > > > > >  
> > > > > > > >  static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> > > > > > > >                                     unsigned size)
> > > > > > > >  {
> > > > > > > > +    master_abort(opaque);
> > > > > > > >  }
> > > > > > > >  
> > > > > > > > -static const MemoryRegionOps master_abort_mem_ops = {
> > > > > > > > +static const MemoryRegionOps master_abort_ops = {
> > > > > > > >      .read = master_abort_mem_read,
> > > > > > > >      .write = master_abort_mem_write,
> > > > > > > >      .endianness = DEVICE_LITTLE_ENDIAN,
> > > > > > > > @@ -301,6 +360,23 @@ static const MemoryRegionOps master_abort_mem_ops = {
> > > > > > > >  
> > > > > > > >  #define MASTER_ABORT_MEM_PRIORITY INT_MIN
> > > > > > > >  
> > > > > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
> > > > > > > > +{
> > > > > > > > +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > > > > +                          &master_abort_ops, bus, mem,
> > > > > > > > +                          memory_region_size(bus->address_space_mem));
> > > > > > > > +    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > > > > +                                        0, &bus->master_abort_mem,
> > > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > +
> > > > > > > > +    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
> > > > > > > > +                          &master_abort_ops, bus, io,
> > > > > > > > +                          memory_region_size(bus->address_space_io));
> > > > > > > > +    memory_region_add_subregion_overlap(bus->address_space_io,
> > > > > > > > +                                        0, &bus->master_abort_io,
> > > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > > > > >                           const char *name,
> > > > > > > >                           MemoryRegion *address_space_mem,
> > > > > > > > @@ -312,13 +388,8 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > > > > >      bus->address_space_mem = address_space_mem;
> > > > > > > >      bus->address_space_io = address_space_io;
> > > > > > > >  
> > > > > > > > -
> > > > > > > > -    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > > > > -                          &master_abort_mem_ops, bus, "pci-master-abort",
> > > > > > > > -                          memory_region_size(bus->address_space_mem));
> > > > > > > > -    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > > > > -                                        0, &bus->master_abort_mem,
> > > > > > > > -                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > +    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
> > > > > > > > +                              "pci-master-abort-io");
> > > > > > > >  
> > > > > > > >      /* host bridge */
> > > > > > > >      QLIST_INIT(&bus->child);
> > > > > > > > @@ -840,9 +911,24 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> > > > > > > >      pci_dev->bus = bus;
> > > > > > > >      dma_as = pci_device_iommu_address_space(pci_dev);
> > > > > > > >  
> > > > > > > > -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> > > > > > > > -                             OBJECT(pci_dev), "bus master",
> > > > > > > > +    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
> > > > > > > > +                       "bus master", UINT64_MAX);
> > > > > > > > +
> > > > > > > > +    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
> > > > > > > > +                          &master_abort_ops, OBJECT(pci_dev),
> > > > > > > > +                          "bus master master-abort",
> > > > > > > > +                          memory_region_size(&pci_dev->bus_master_enable_region));
> > > > > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > > > > +                                        0, &pci_dev->bus_master_abort_mem,
> > > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > +
> > > > > > > > +    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
> > > > > > > > +                             OBJECT(pci_dev), "bus master dma",
> > > > > > > >                               dma_as->root, 0, memory_region_size(dma_as->root));
> > > > > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > > > > +                                        0, &pci_dev->bus_master_dma_mem, 0);
> > > > > > > > +
> > > > > > > > +
> > > > > > > >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> > > > > > > >      address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> > > > > > > >                         name);
> > > > > > > > @@ -901,6 +987,13 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
> > > > > > > >      pci_config_free(pci_dev);
> > > > > > > >  
> > > > > > > >      address_space_destroy(&pci_dev->bus_master_as);
> > > > > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > > > > +                                &pci_dev->bus_master_dma_mem);
> > > > > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > > > > +                                &pci_dev->bus_master_abort_mem);
> > > > > > > > +    memory_region_destroy(&pci_dev->bus_master_dma_mem);
> > > > > > > > +    memory_region_destroy(&pci_dev->bus_master_abort_mem);
> > > > > > > > +
> > > > > > > >      memory_region_destroy(&pci_dev->bus_master_enable_region);
> > > > > > > >  }
> > > > > > > >  
> > > > > > > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > > > > > > index e6b22b8..56b682f 100644
> > > > > > > > --- a/hw/pci/pci_bridge.c
> > > > > > > > +++ b/hw/pci/pci_bridge.c
> > > > > > > > @@ -376,6 +376,10 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
> > > > > > > >      sec_bus->address_space_io = &br->address_space_io;
> > > > > > > >      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
> > > > > > > >      br->windows = pci_bridge_region_init(br);
> > > > > > > > +
> > > > > > > > +    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
> > > > > > > > +                              "pci_bridge_master_abort_io");
> > > > > > > > +
> > > > > > > >      QLIST_INIT(&sec_bus->child);
> > > > > > > >      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> > > > > > > >      return 0;
> > > > > > > > @@ -389,6 +393,12 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
> > > > > > > >      QLIST_REMOVE(&s->sec_bus, sibling);
> > > > > > > >      pci_bridge_region_del(s, s->windows);
> > > > > > > >      pci_bridge_region_cleanup(s, s->windows);
> > > > > > > > +    memory_region_del_subregion(s->sec_bus.address_space_mem,
> > > > > > > > +                                &s->sec_bus.master_abort_mem);
> > > > > > > > +    memory_region_del_subregion(s->sec_bus.address_space_io,
> > > > > > > > +                                &s->sec_bus.master_abort_io);
> > > > > > > > +    memory_region_destroy(&s->sec_bus.master_abort_mem);
> > > > > > > > +    memory_region_destroy(&s->sec_bus.master_abort_io);
> > > > > > > >      memory_region_destroy(&s->address_space_mem);
> > > > > > > >      memory_region_destroy(&s->address_space_io);
> > > > > > > >      /* qbus_free() is called automatically by qdev_free() */
> > > > > > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > > > > > index 37979aa..d69e06d 100644
> > > > > > > > --- a/include/hw/pci/pci.h
> > > > > > > > +++ b/include/hw/pci/pci.h
> > > > > > > > @@ -242,6 +242,8 @@ struct PCIDevice {
> > > > > > > >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> > > > > > > >      AddressSpace bus_master_as;
> > > > > > > >      MemoryRegion bus_master_enable_region;
> > > > > > > > +    MemoryRegion bus_master_dma_mem;
> > > > > > > > +    MemoryRegion bus_master_abort_mem;
> > > > > > > >  
> > > > > > > >      /* do not access the following fields */
> > > > > > > >      PCIConfigReadFunc *config_read;
> > > > > > > > @@ -357,6 +359,7 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
> > > > > > > >                      MemoryRegion *address_space_mem,
> > > > > > > >                      MemoryRegion *address_space_io,
> > > > > > > >                      uint8_t devfn_min, const char *typename);
> > > > > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
> > > > > > > >  void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
> > > > > > > >                    void *irq_opaque, int nirq);
> > > > > > > >  int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
> > > > > > > > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > > > > > > > index 2ad5edb..57b1541 100644
> > > > > > > > --- a/include/hw/pci/pci_bus.h
> > > > > > > > +++ b/include/hw/pci/pci_bus.h
> > > > > > > > @@ -24,6 +24,7 @@ struct PCIBus {
> > > > > > > >      MemoryRegion *address_space_mem;
> > > > > > > >      MemoryRegion *address_space_io;
> > > > > > > >      MemoryRegion master_abort_mem;
> > > > > > > > +    MemoryRegion master_abort_io;
> > > > > > > >  
> > > > > > > >      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
> > > > > > > >      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> > > > > > > > -- 
> > > > > > > > 1.8.3.1
> > > > > > 
> > > > > > 
> > > > 
> > > > 
> > 
> >
Michael S. Tsirkin Sept. 24, 2013, 8:29 a.m. UTC | #9
On Tue, Sep 24, 2013 at 11:07:19AM +0300, Marcel Apfelbaum wrote:
> On Mon, 2013-09-23 at 21:45 +0300, Michael S. Tsirkin wrote:
> > On Mon, Sep 23, 2013 at 08:49:53PM +0300, Marcel Apfelbaum wrote:
> > > On Mon, 2013-09-23 at 18:10 +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Sep 23, 2013 at 05:43:38PM +0300, Marcel Apfelbaum wrote:
> > > > > On Mon, 2013-09-23 at 16:45 +0300, Michael S. Tsirkin wrote:
> > > > > > On Mon, Sep 23, 2013 at 03:37:43PM +0300, Marcel Apfelbaum wrote:
> > > > > > > On Mon, 2013-09-23 at 14:27 +0300, Michael S. Tsirkin wrote:
> > > > > > > > On Mon, Sep 23, 2013 at 02:01:17PM +0300, Marcel Apfelbaum wrote:
> > > > > > > > > This patch is implemented on top of series:
> > > > > > > > > [PATCH v5 0/3] pci: implement upstream master abort protocol
> > > > > > > > > 
> > > > > > > > > Added "master abort io" background region for PCIBus.
> > > > > > > > > 
> > > > > > > > > Added "master abort mem" region to catch transactions initiated
> > > > > > > > > by pci devices targeted to unassigned addresses.
> > > > > > > > > 
> > > > > > > > > Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.
> > > > > > > > > 
> > > > > > > > > Set "Received Master Abort" Bit on Status/Secondary Status
> > > > > > > > > register as defined in the PCI Spec.
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> > > > > > > > 
> > > > > > > > I think it will be easier to review the complete
> > > > > > > > series, not an incremental patch.
> > > > > > > Should I combine this with the prev series without the
> > > > > > > memory patches?
> > > > > > > 
> > > > > > > > 
> > > > > > > > We probably need to think what's the right thing
> > > > > > > > to do for master abort mode since we do not
> > > > > > > > emulate SERR. Do we make it writeable at all?
> > > > > > > Master abort mode can be enabled. (Tested)
> > > > > > > > 
> > > > > > > > It's a pci only bit:
> > > > > > > > 	When the Master-Abort Mode bit is cleared and a posted write transaction
> > > > > > > > 	forwarded by the
> > > > > > > > 	bridge terminates with a Master-Abort, no error is reported (note that
> > > > > > > > 	the Master-Abort bit
> > > > > > > > 	is still set). When the Master-Abort Mode bit is set and a posted write
> > > > > > > > 	transaction forwarded
> > > > > > > > 	by the bridge terminates with a Master-Abort on the destination bus, the
> > > > > > > > 	bridge must:
> > > > > > > > 	Assert SERR# on the primary interfaceCommand register)
> > > > > > > > 	(if enabled by the SERR# Enable bitin the
> > > > > > > > 	Set the Signaled System Errorbit in the Command register)
> > > > > > > > 	bitin the Status register (if enabled by the SERR# Enable)
> > > > > > > > 
> > > > > > > > one way to do this would be not to set Master-Abort bit even
> > > > > > > > though spec says we should, and pretend there was no error.
> > > > > > > Maybe we can just not allow to set "Master abort mode"
> > > > > > > in BRIDGE_CONTROL register. It is a cleaner way (I think)
> > > > > > > considering we don't emulate SERR.
> > > > > > 
> > > > > > I'm not sure P2P spec allows this - want to check.
> > > > > I will check this too, thanks
> > > > > 
> > > > > > It's the right thing to do for express.
> > > > > > 
> > > > > > Also pls note that cross-version migration
> > > > > > requires that we keep it writable for old
> > > > > > machine types.
> > > > > Yes :(. It seems we need another solution.
> > > > 
> > > > Not really. Just set a flag "don't assert master abort"
> > > > for old machine types.
> > > OK, thanks
> > > 
> > > > 
> > > > > > 
> > > > > > > > 
> > > > > > > > > ---
> > > > > > > > >  hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
> > > > > > > > >  hw/pci/pci_bridge.c      |  10 +++++
> > > > > > > > >  include/hw/pci/pci.h     |   3 ++
> > > > > > > > >  include/hw/pci/pci_bus.h |   1 +
> > > > > > > > >  4 files changed, 118 insertions(+), 11 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > > > > > > index d8a1b11..1f4e707 100644
> > > > > > > > > --- a/hw/pci/pci.c
> > > > > > > > > +++ b/hw/pci/pci.c
> > > > > > > > > @@ -283,17 +283,76 @@ const char *pci_root_bus_path(PCIDevice *dev)
> > > > > > > > >      return rootbus->qbus.name;
> > > > > > > > >  }
> > > > > > > > >  
> > > > > > > > > +static PCIDevice *pci_bus_find_host(PCIBus *bus)
> > > > > > > > > +{
> > > > > > > > > +    PCIDevice *dev;
> > > > > > > > > +    int i;
> > > > > > > > > +
> > > > > > > > > +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> > > > > > > > > +        dev = bus->devices[i];
> > > > > > > > > +        if (dev) {
> > > > > > > > > +            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> > > > > > > > > +            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {
> > > > > > > > 
> > > > > > > > In fact you can do this easier:
> > > > > > > > 	 pci_get_word(dev->config + PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_HOST
> > > > > > > Thanks, I'll change.
> > > > > > > > 
> > > > > > > > > +                return dev;
> > > > > > > > > +            }
> > > > > > > > > +        }
> > > > > > > > > +    }
> > > > > > > > > +
> > > > > > > > > +    return NULL;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +static void master_abort(void *opaque)
> > > > > > > > > +{
> > > > > > > > > +    PCIDevice *master_dev = NULL;
> > > > > > > > > +    PCIDeviceClass *pc;
> > > > > > > > > +    PCIBus *bus;
> > > > > > > > > +    int downstream;
> > > > > > > > 
> > > > > > > > bool?
> > > > > > > Yes, I'll change
> > > > > > > 
> > > > > > > > > +
> > > > > > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {
> > > > > > > > 
> > > > > > > > Please don't do dynamic cast just because you can.
> > > > > > > > You know very well what kind of object you pass
> > > > > > > > when you create the MR.
> > > > > > > > So just call the appropriate function.
> > > > > > > I wanted to merge the code for all 3 entities involved:
> > > > > > > root PCIBus, PCIBus behind a bridge, and PCIDevice.
> > > > > > > The region for merge was to use the same master_abort_mem_ops
> > > > > > > because there are the same operations that must be done:
> > > > > > > return -1 (0xFFF...) and set master abort received bit. 
> > > > > > 
> > > > > > return -1 is not a lot of common code, and MA bit
> > > > > > is in a different place each time.
> > > > > Ack
> > > > > 
> > > > > > 
> > > > > > > > 
> > > > > > > > > +        master_dev = PCI_DEVICE(opaque);
> > > > > > > > > +        bus = master_dev->bus;
> > > > > > > > > +        while (!pci_bus_is_root(bus)) {
> > > > > > > > > +            master_dev = bus->parent_dev;
> > > > > > > > > +            bus = master_dev->bus;
> > > > > > > > > +        }
> > > > > > > > > +        downstream = 0;
> > > > > > > > > +    }
> > > > > > > > > +
> > > > > > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
> > > > > > > > > +        bus = PCI_BUS(opaque);
> > > > > > > > > +        if (pci_bus_is_root(bus)) {
> > > > > > > > > +            master_dev = pci_bus_find_host(bus);
> > > > > > > > > +        } else { /* bus behind a PCI-2-PCI bridge */
> > > > > > > > > +            master_dev = bus->parent_dev;
> > > > > > > > > +        }
> > > > > > > > 
> > > > > > > > So the reason for this is that device to device MA
> > > > > > > > is still not implemented here, correct?
> > > > > > > Nope, the above code differentiate 2 scenarios:
> > > > > > > 1. the bus is the root bus => look for host bridge
> > > > > > >    to update STATUS register
> > > > > > > 2. the bus is bridge's secondary bus =>
> > > > > > >    update bridge STATUS register
> > > > > > > "Device 2 Device" is handled implicitly because we have
> > > > > > > a background region covering all device's target memory
> > > > > > > 
> > > > > > > > If it was it would be just one instance of it.
> > > > > > > > I'm not saying this must block this patch, however
> > > > > > > > there should be a code comment to this effect,
> > > > > > > > and commit log should mention this explicitly.
> > > > > > > As above, "Device 2 Device" is handled implicit
> > > > > > 
> > > > > > Implicit => confused.
> > > > > > 
> > > > > > I'm confused. Where's the code that sets MA bit on
> > > > > > the initiator device?
> > > > > Is the code that adds the background bus_master_abort_mem region to
> > > > > bus_master_enable_region.
> > > > > bus_master_enable_region is the target for all PCIDevice
> > > > > reads and writes(if I understand correctly). So it doesn't matter if
> > > > > a device initiate a transaction to another device or to memory,
> > > > > it will do it using the bus_master_enable_region.
> > > > > The master_abort() method will recognize this as an
> > > > > upstream transaction. 
> > > > 
> > > > But if target is behind a bridge then master does
> > > > not detect the MA, it's the bridge that does it
> > > > (in PCI, express seems to be different).
> > > The current code handles upstream transactions as follows:
> > > 1. If the PCI device is behind a bridge, finds
> > >    recursively the corresponding bridge
> > >    on the root bus and modifies its status register.
> > 
> > Why recursively?
> > I think you need to set MA bit on the bit in bridge right
> > above the device.
> If a PCI Device behind a bridge issues a transaction:
>  - From my understanding, any address outside the bridge address range
>    will be claimed by the bridge.

By parent bridge, yes.

>  - This will happen recursively until we reach the root bus (assuming
>    that the target is not another device).

Not necessarily. Another bridge can claim it then
terminate with MA.

Example:

-[0000:00]-+-00.0
           +-02.0
           +-16.0
           +-16.3
           +-19.0
           +-1a.0
           +-1b.0
           +-1c.0-[02]--
           +-1c.1-[03]----00.0
           +-1c.3-[05-0c]--
           +-1c.4-[0d]--+-00.0
           |            \-00.3



Device 03:00.0 writes to within memory window of 00:1c.4,
but outside BARs of both 0d:00.0 and 0d:00.3.

On PCI, I think MA is set in sec status register of 00:1c.4.


>  - The bridge on the bus will set MA bit.


See above.

> > 
> > > 2. If the PCI device is not behind the bridge, directly
> > >    modifies the device status register
> > 
> > 
> > The issue really isn't whether the PCI device
> > (which PCI device?) is behind the bridge.
> > Rather it's whether the address is claimed by some bridge
> > on the same bus as the master, or by a parent bridge.
> This can be recursive, no?
> 
> > 
> > If it is not claimed, MA bit in the original master
> > is set.
> > 
> > If it is claimed, master becomes some bridge:
> > either bridge on the secondary bus (forwarding downstream) or a parent bridge
> > (forwarding upstream) of the bus, then repeat the logic, until we find a
> > bus where it is not claimed.
> I agree, the "repeat logic" is the response to your question "Why recursively?"
> 
> > 
> > At that point to know where in bridge config to set the bit, look at the last step we
> > took.  If it was a parent bus (transaction was forwarded upstream) set
> > primary status, otherwise (it was a bridge on the secondary bus)
> > set secondary status.
> > 
> > The last piece of puzzle is that for transactions coming from
> > outside the PCI domain, pci host bridge is the original master.
> > 
> > 
> > This applies to memory,io and configuration cycles.
> > 
> > This is for PCI.
> I agree.
> 
> > 
> > > Regarding PCI express, I'll check this issue.
> > > Thanks,
> > > Marcel
> > 
> > As far as I can tell, for express instead of the last bridge
> > detecting UR (and setting MA), all bridges on the path detect UR and the
> > original master detects UR, in all cases.
> Thanks
> 
> > 
> > Also, UR apparently can only be detected by non-posted transactions:
> > mem/io reads and configuration. Even on the same "bus".
> Also "Posted Write Transactions", PCI-2-PCI bridge Spec (6.3.2)
> How can we differentiate in Qemu posted from non-posted transcations
> anyway?
> 
> > 
> > If you cross a pci to express bridge (direct bridge)
> > then the bridge is the original master and
> > UR is detected on path from it on.
> > 
> > If you cross an express to pci bridge (reversed bridge)
> > you lose this info and only the last master sets MA.
> Thanks, I'll look into it,
> Marcel
> 
> > 
> > > > 
> > > > 
> > > > > > Maybe if you split each case properly it will become
> > > > > > clear.
> > > > > I hope it will.
> > > > > 
> > > > > > 
> > > > > > > > 
> > > > > > > > > +        downstream = 1;
> > > > > > > > > +    }
> > > > > > > > > +
> > > > > > > > > +    assert(master_dev);
> > > > > > > > > +    pc = PCI_DEVICE_GET_CLASS(master_dev);
> > > > > > > > > +
> > > > > > > > 
> > > > > > > > There's very little common code between devices and
> > > > > > > > buses. So just use 2 functions.
> > > > > > > As stated above:
> > > > > > > The region for merge was to use the same master_abort_mem_ops
> > > > > > > because there are the same operations that must be done:
> > > > > > > return -1 (0xFFF...) and set master abort received bit.
> > > > > > > Separating them will result in some code duplication.
> > > > > > 
> > > > > > About 2 lines. But you'll lose the nasty dynamic casts.
> > > > > > opaque pointers are bad enough.
> > > > > Ack
> > > > > 
> > > > > > 
> > > > > > > The read and write methods are the same, but would call different
> > > > > > > "master_abort" methods.
> > > > > > > If it does bother you the down casting anyway, I'll change.
> > > > > > 
> > > > > > Pls do.
> > > > > Ack
> > > > > 
> > > > > > 
> > > > > > > > 
> > > > > > > > > +    if (downstream && pc->is_bridge) {
> > > > > > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
> > > > > > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > > > > > +    } else {
> > > > > > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
> > > > > > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > > > > > +    }
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> > > > > > > > >  {
> > > > > > > > > -   return -1ULL;
> > > > > > > > 
> > > > > > > > I didn't notice but this means there were 3 spaces here in previous
> > > > > > > > patch?
> > > > > > > yes :(, checkpatch didn't catch it
> > > > > > > 
> > > > > > > Thanks for the review,
> > > > > > > Marcel
> > > > > > > 
> > > > > > > > 
> > > > > > > > > +    master_abort(opaque);
> > > > > > > > > +    return -1ULL;
> > > > > > > > >  }
> > > > > > > > >  
> > > > > > > > >  static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> > > > > > > > >                                     unsigned size)
> > > > > > > > >  {
> > > > > > > > > +    master_abort(opaque);
> > > > > > > > >  }
> > > > > > > > >  
> > > > > > > > > -static const MemoryRegionOps master_abort_mem_ops = {
> > > > > > > > > +static const MemoryRegionOps master_abort_ops = {
> > > > > > > > >      .read = master_abort_mem_read,
> > > > > > > > >      .write = master_abort_mem_write,
> > > > > > > > >      .endianness = DEVICE_LITTLE_ENDIAN,
> > > > > > > > > @@ -301,6 +360,23 @@ static const MemoryRegionOps master_abort_mem_ops = {
> > > > > > > > >  
> > > > > > > > >  #define MASTER_ABORT_MEM_PRIORITY INT_MIN
> > > > > > > > >  
> > > > > > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
> > > > > > > > > +{
> > > > > > > > > +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > > > > > +                          &master_abort_ops, bus, mem,
> > > > > > > > > +                          memory_region_size(bus->address_space_mem));
> > > > > > > > > +    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > > > > > +                                        0, &bus->master_abort_mem,
> > > > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > > +
> > > > > > > > > +    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
> > > > > > > > > +                          &master_abort_ops, bus, io,
> > > > > > > > > +                          memory_region_size(bus->address_space_io));
> > > > > > > > > +    memory_region_add_subregion_overlap(bus->address_space_io,
> > > > > > > > > +                                        0, &bus->master_abort_io,
> > > > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > > > > > >                           const char *name,
> > > > > > > > >                           MemoryRegion *address_space_mem,
> > > > > > > > > @@ -312,13 +388,8 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > > > > > >      bus->address_space_mem = address_space_mem;
> > > > > > > > >      bus->address_space_io = address_space_io;
> > > > > > > > >  
> > > > > > > > > -
> > > > > > > > > -    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > > > > > -                          &master_abort_mem_ops, bus, "pci-master-abort",
> > > > > > > > > -                          memory_region_size(bus->address_space_mem));
> > > > > > > > > -    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > > > > > -                                        0, &bus->master_abort_mem,
> > > > > > > > > -                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > > +    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
> > > > > > > > > +                              "pci-master-abort-io");
> > > > > > > > >  
> > > > > > > > >      /* host bridge */
> > > > > > > > >      QLIST_INIT(&bus->child);
> > > > > > > > > @@ -840,9 +911,24 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> > > > > > > > >      pci_dev->bus = bus;
> > > > > > > > >      dma_as = pci_device_iommu_address_space(pci_dev);
> > > > > > > > >  
> > > > > > > > > -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> > > > > > > > > -                             OBJECT(pci_dev), "bus master",
> > > > > > > > > +    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
> > > > > > > > > +                       "bus master", UINT64_MAX);
> > > > > > > > > +
> > > > > > > > > +    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
> > > > > > > > > +                          &master_abort_ops, OBJECT(pci_dev),
> > > > > > > > > +                          "bus master master-abort",
> > > > > > > > > +                          memory_region_size(&pci_dev->bus_master_enable_region));
> > > > > > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > > > > > +                                        0, &pci_dev->bus_master_abort_mem,
> > > > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > > +
> > > > > > > > > +    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
> > > > > > > > > +                             OBJECT(pci_dev), "bus master dma",
> > > > > > > > >                               dma_as->root, 0, memory_region_size(dma_as->root));
> > > > > > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > > > > > +                                        0, &pci_dev->bus_master_dma_mem, 0);
> > > > > > > > > +
> > > > > > > > > +
> > > > > > > > >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> > > > > > > > >      address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> > > > > > > > >                         name);
> > > > > > > > > @@ -901,6 +987,13 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
> > > > > > > > >      pci_config_free(pci_dev);
> > > > > > > > >  
> > > > > > > > >      address_space_destroy(&pci_dev->bus_master_as);
> > > > > > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > > > > > +                                &pci_dev->bus_master_dma_mem);
> > > > > > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > > > > > +                                &pci_dev->bus_master_abort_mem);
> > > > > > > > > +    memory_region_destroy(&pci_dev->bus_master_dma_mem);
> > > > > > > > > +    memory_region_destroy(&pci_dev->bus_master_abort_mem);
> > > > > > > > > +
> > > > > > > > >      memory_region_destroy(&pci_dev->bus_master_enable_region);
> > > > > > > > >  }
> > > > > > > > >  
> > > > > > > > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > > > > > > > index e6b22b8..56b682f 100644
> > > > > > > > > --- a/hw/pci/pci_bridge.c
> > > > > > > > > +++ b/hw/pci/pci_bridge.c
> > > > > > > > > @@ -376,6 +376,10 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
> > > > > > > > >      sec_bus->address_space_io = &br->address_space_io;
> > > > > > > > >      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
> > > > > > > > >      br->windows = pci_bridge_region_init(br);
> > > > > > > > > +
> > > > > > > > > +    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
> > > > > > > > > +                              "pci_bridge_master_abort_io");
> > > > > > > > > +
> > > > > > > > >      QLIST_INIT(&sec_bus->child);
> > > > > > > > >      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> > > > > > > > >      return 0;
> > > > > > > > > @@ -389,6 +393,12 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
> > > > > > > > >      QLIST_REMOVE(&s->sec_bus, sibling);
> > > > > > > > >      pci_bridge_region_del(s, s->windows);
> > > > > > > > >      pci_bridge_region_cleanup(s, s->windows);
> > > > > > > > > +    memory_region_del_subregion(s->sec_bus.address_space_mem,
> > > > > > > > > +                                &s->sec_bus.master_abort_mem);
> > > > > > > > > +    memory_region_del_subregion(s->sec_bus.address_space_io,
> > > > > > > > > +                                &s->sec_bus.master_abort_io);
> > > > > > > > > +    memory_region_destroy(&s->sec_bus.master_abort_mem);
> > > > > > > > > +    memory_region_destroy(&s->sec_bus.master_abort_io);
> > > > > > > > >      memory_region_destroy(&s->address_space_mem);
> > > > > > > > >      memory_region_destroy(&s->address_space_io);
> > > > > > > > >      /* qbus_free() is called automatically by qdev_free() */
> > > > > > > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > > > > > > index 37979aa..d69e06d 100644
> > > > > > > > > --- a/include/hw/pci/pci.h
> > > > > > > > > +++ b/include/hw/pci/pci.h
> > > > > > > > > @@ -242,6 +242,8 @@ struct PCIDevice {
> > > > > > > > >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> > > > > > > > >      AddressSpace bus_master_as;
> > > > > > > > >      MemoryRegion bus_master_enable_region;
> > > > > > > > > +    MemoryRegion bus_master_dma_mem;
> > > > > > > > > +    MemoryRegion bus_master_abort_mem;
> > > > > > > > >  
> > > > > > > > >      /* do not access the following fields */
> > > > > > > > >      PCIConfigReadFunc *config_read;
> > > > > > > > > @@ -357,6 +359,7 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
> > > > > > > > >                      MemoryRegion *address_space_mem,
> > > > > > > > >                      MemoryRegion *address_space_io,
> > > > > > > > >                      uint8_t devfn_min, const char *typename);
> > > > > > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
> > > > > > > > >  void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
> > > > > > > > >                    void *irq_opaque, int nirq);
> > > > > > > > >  int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
> > > > > > > > > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > > > > > > > > index 2ad5edb..57b1541 100644
> > > > > > > > > --- a/include/hw/pci/pci_bus.h
> > > > > > > > > +++ b/include/hw/pci/pci_bus.h
> > > > > > > > > @@ -24,6 +24,7 @@ struct PCIBus {
> > > > > > > > >      MemoryRegion *address_space_mem;
> > > > > > > > >      MemoryRegion *address_space_io;
> > > > > > > > >      MemoryRegion master_abort_mem;
> > > > > > > > > +    MemoryRegion master_abort_io;
> > > > > > > > >  
> > > > > > > > >      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
> > > > > > > > >      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> > > > > > > > > -- 
> > > > > > > > > 1.8.3.1
> > > > > > > 
> > > > > > > 
> > > > > 
> > > > > 
> > > 
> > > 
> 
>
Marcel Apfelbaum Sept. 24, 2013, 8:44 a.m. UTC | #10
On Tue, 2013-09-24 at 11:29 +0300, Michael S. Tsirkin wrote:
> On Tue, Sep 24, 2013 at 11:07:19AM +0300, Marcel Apfelbaum wrote:
> > On Mon, 2013-09-23 at 21:45 +0300, Michael S. Tsirkin wrote:
> > > On Mon, Sep 23, 2013 at 08:49:53PM +0300, Marcel Apfelbaum wrote:
> > > > On Mon, 2013-09-23 at 18:10 +0300, Michael S. Tsirkin wrote:
> > > > > On Mon, Sep 23, 2013 at 05:43:38PM +0300, Marcel Apfelbaum wrote:
> > > > > > On Mon, 2013-09-23 at 16:45 +0300, Michael S. Tsirkin wrote:
> > > > > > > On Mon, Sep 23, 2013 at 03:37:43PM +0300, Marcel Apfelbaum wrote:
> > > > > > > > On Mon, 2013-09-23 at 14:27 +0300, Michael S. Tsirkin wrote:
> > > > > > > > > On Mon, Sep 23, 2013 at 02:01:17PM +0300, Marcel Apfelbaum wrote:
> > > > > > > > > > This patch is implemented on top of series:
> > > > > > > > > > [PATCH v5 0/3] pci: implement upstream master abort protocol
> > > > > > > > > > 
> > > > > > > > > > Added "master abort io" background region for PCIBus.
> > > > > > > > > > 
> > > > > > > > > > Added "master abort mem" region to catch transactions initiated
> > > > > > > > > > by pci devices targeted to unassigned addresses.
> > > > > > > > > > 
> > > > > > > > > > Enabled "master abort" regions for PCI-2-PCI bridge's secondary bus.
> > > > > > > > > > 
> > > > > > > > > > Set "Received Master Abort" Bit on Status/Secondary Status
> > > > > > > > > > register as defined in the PCI Spec.
> > > > > > > > > > 
> > > > > > > > > > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> > > > > > > > > 
> > > > > > > > > I think it will be easier to review the complete
> > > > > > > > > series, not an incremental patch.
> > > > > > > > Should I combine this with the prev series without the
> > > > > > > > memory patches?
> > > > > > > > 
> > > > > > > > > 
> > > > > > > > > We probably need to think what's the right thing
> > > > > > > > > to do for master abort mode since we do not
> > > > > > > > > emulate SERR. Do we make it writeable at all?
> > > > > > > > Master abort mode can be enabled. (Tested)
> > > > > > > > > 
> > > > > > > > > It's a pci only bit:
> > > > > > > > > 	When the Master-Abort Mode bit is cleared and a posted write transaction
> > > > > > > > > 	forwarded by the
> > > > > > > > > 	bridge terminates with a Master-Abort, no error is reported (note that
> > > > > > > > > 	the Master-Abort bit
> > > > > > > > > 	is still set). When the Master-Abort Mode bit is set and a posted write
> > > > > > > > > 	transaction forwarded
> > > > > > > > > 	by the bridge terminates with a Master-Abort on the destination bus, the
> > > > > > > > > 	bridge must:
> > > > > > > > > 	Assert SERR# on the primary interfaceCommand register)
> > > > > > > > > 	(if enabled by the SERR# Enable bitin the
> > > > > > > > > 	Set the Signaled System Errorbit in the Command register)
> > > > > > > > > 	bitin the Status register (if enabled by the SERR# Enable)
> > > > > > > > > 
> > > > > > > > > one way to do this would be not to set Master-Abort bit even
> > > > > > > > > though spec says we should, and pretend there was no error.
> > > > > > > > Maybe we can just not allow to set "Master abort mode"
> > > > > > > > in BRIDGE_CONTROL register. It is a cleaner way (I think)
> > > > > > > > considering we don't emulate SERR.
> > > > > > > 
> > > > > > > I'm not sure P2P spec allows this - want to check.
> > > > > > I will check this too, thanks
> > > > > > 
> > > > > > > It's the right thing to do for express.
> > > > > > > 
> > > > > > > Also pls note that cross-version migration
> > > > > > > requires that we keep it writable for old
> > > > > > > machine types.
> > > > > > Yes :(. It seems we need another solution.
> > > > > 
> > > > > Not really. Just set a flag "don't assert master abort"
> > > > > for old machine types.
> > > > OK, thanks
> > > > 
> > > > > 
> > > > > > > 
> > > > > > > > > 
> > > > > > > > > > ---
> > > > > > > > > >  hw/pci/pci.c             | 115 ++++++++++++++++++++++++++++++++++++++++++-----
> > > > > > > > > >  hw/pci/pci_bridge.c      |  10 +++++
> > > > > > > > > >  include/hw/pci/pci.h     |   3 ++
> > > > > > > > > >  include/hw/pci/pci_bus.h |   1 +
> > > > > > > > > >  4 files changed, 118 insertions(+), 11 deletions(-)
> > > > > > > > > > 
> > > > > > > > > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > > > > > > > index d8a1b11..1f4e707 100644
> > > > > > > > > > --- a/hw/pci/pci.c
> > > > > > > > > > +++ b/hw/pci/pci.c
> > > > > > > > > > @@ -283,17 +283,76 @@ const char *pci_root_bus_path(PCIDevice *dev)
> > > > > > > > > >      return rootbus->qbus.name;
> > > > > > > > > >  }
> > > > > > > > > >  
> > > > > > > > > > +static PCIDevice *pci_bus_find_host(PCIBus *bus)
> > > > > > > > > > +{
> > > > > > > > > > +    PCIDevice *dev;
> > > > > > > > > > +    int i;
> > > > > > > > > > +
> > > > > > > > > > +    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
> > > > > > > > > > +        dev = bus->devices[i];
> > > > > > > > > > +        if (dev) {
> > > > > > > > > > +            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> > > > > > > > > > +            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {
> > > > > > > > > 
> > > > > > > > > In fact you can do this easier:
> > > > > > > > > 	 pci_get_word(dev->config + PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_HOST
> > > > > > > > Thanks, I'll change.
> > > > > > > > > 
> > > > > > > > > > +                return dev;
> > > > > > > > > > +            }
> > > > > > > > > > +        }
> > > > > > > > > > +    }
> > > > > > > > > > +
> > > > > > > > > > +    return NULL;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +static void master_abort(void *opaque)
> > > > > > > > > > +{
> > > > > > > > > > +    PCIDevice *master_dev = NULL;
> > > > > > > > > > +    PCIDeviceClass *pc;
> > > > > > > > > > +    PCIBus *bus;
> > > > > > > > > > +    int downstream;
> > > > > > > > > 
> > > > > > > > > bool?
> > > > > > > > Yes, I'll change
> > > > > > > > 
> > > > > > > > > > +
> > > > > > > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {
> > > > > > > > > 
> > > > > > > > > Please don't do dynamic cast just because you can.
> > > > > > > > > You know very well what kind of object you pass
> > > > > > > > > when you create the MR.
> > > > > > > > > So just call the appropriate function.
> > > > > > > > I wanted to merge the code for all 3 entities involved:
> > > > > > > > root PCIBus, PCIBus behind a bridge, and PCIDevice.
> > > > > > > > The region for merge was to use the same master_abort_mem_ops
> > > > > > > > because there are the same operations that must be done:
> > > > > > > > return -1 (0xFFF...) and set master abort received bit. 
> > > > > > > 
> > > > > > > return -1 is not a lot of common code, and MA bit
> > > > > > > is in a different place each time.
> > > > > > Ack
> > > > > > 
> > > > > > > 
> > > > > > > > > 
> > > > > > > > > > +        master_dev = PCI_DEVICE(opaque);
> > > > > > > > > > +        bus = master_dev->bus;
> > > > > > > > > > +        while (!pci_bus_is_root(bus)) {
> > > > > > > > > > +            master_dev = bus->parent_dev;
> > > > > > > > > > +            bus = master_dev->bus;
> > > > > > > > > > +        }
> > > > > > > > > > +        downstream = 0;
> > > > > > > > > > +    }
> > > > > > > > > > +
> > > > > > > > > > +    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
> > > > > > > > > > +        bus = PCI_BUS(opaque);
> > > > > > > > > > +        if (pci_bus_is_root(bus)) {
> > > > > > > > > > +            master_dev = pci_bus_find_host(bus);
> > > > > > > > > > +        } else { /* bus behind a PCI-2-PCI bridge */
> > > > > > > > > > +            master_dev = bus->parent_dev;
> > > > > > > > > > +        }
> > > > > > > > > 
> > > > > > > > > So the reason for this is that device to device MA
> > > > > > > > > is still not implemented here, correct?
> > > > > > > > Nope, the above code differentiate 2 scenarios:
> > > > > > > > 1. the bus is the root bus => look for host bridge
> > > > > > > >    to update STATUS register
> > > > > > > > 2. the bus is bridge's secondary bus =>
> > > > > > > >    update bridge STATUS register
> > > > > > > > "Device 2 Device" is handled implicitly because we have
> > > > > > > > a background region covering all device's target memory
> > > > > > > > 
> > > > > > > > > If it was it would be just one instance of it.
> > > > > > > > > I'm not saying this must block this patch, however
> > > > > > > > > there should be a code comment to this effect,
> > > > > > > > > and commit log should mention this explicitly.
> > > > > > > > As above, "Device 2 Device" is handled implicit
> > > > > > > 
> > > > > > > Implicit => confused.
> > > > > > > 
> > > > > > > I'm confused. Where's the code that sets MA bit on
> > > > > > > the initiator device?
> > > > > > Is the code that adds the background bus_master_abort_mem region to
> > > > > > bus_master_enable_region.
> > > > > > bus_master_enable_region is the target for all PCIDevice
> > > > > > reads and writes(if I understand correctly). So it doesn't matter if
> > > > > > a device initiate a transaction to another device or to memory,
> > > > > > it will do it using the bus_master_enable_region.
> > > > > > The master_abort() method will recognize this as an
> > > > > > upstream transaction. 
> > > > > 
> > > > > But if target is behind a bridge then master does
> > > > > not detect the MA, it's the bridge that does it
> > > > > (in PCI, express seems to be different).
> > > > The current code handles upstream transactions as follows:
> > > > 1. If the PCI device is behind a bridge, finds
> > > >    recursively the corresponding bridge
> > > >    on the root bus and modifies its status register.
> > > 
> > > Why recursively?
> > > I think you need to set MA bit on the bit in bridge right
> > > above the device.
> > If a PCI Device behind a bridge issues a transaction:
> >  - From my understanding, any address outside the bridge address range
> >    will be claimed by the bridge.
> 
> By parent bridge, yes.
> 
> >  - This will happen recursively until we reach the root bus (assuming
> >    that the target is not another device).
> 
> Not necessarily. Another bridge can claim it then
> terminate with MA.
> 
> Example:
> 
> -[0000:00]-+-00.0
>            +-02.0
>            +-16.0
>            +-16.3
>            +-19.0
>            +-1a.0
>            +-1b.0
>            +-1c.0-[02]--
>            +-1c.1-[03]----00.0
>            +-1c.3-[05-0c]--
>            +-1c.4-[0d]--+-00.0
>            |            \-00.3
> 
> 
> 
> Device 03:00.0 writes to within memory window of 00:1c.4,
> but outside BARs of both 0d:00.0 and 0d:00.3.
> 
> On PCI, I think MA is set in sec status register of 00:1c.4.
You are right, my code will work only under the assumption
that the devices do not communicate between them.
I will state the above in the next version.

Thanks,
Marcel

> 
> 
> >  - The bridge on the bus will set MA bit.
> 
> 
> See above.
> 
> > > 
> > > > 2. If the PCI device is not behind the bridge, directly
> > > >    modifies the device status register
> > > 
> > > 
> > > The issue really isn't whether the PCI device
> > > (which PCI device?) is behind the bridge.
> > > Rather it's whether the address is claimed by some bridge
> > > on the same bus as the master, or by a parent bridge.
> > This can be recursive, no?
> > 
> > > 
> > > If it is not claimed, MA bit in the original master
> > > is set.
> > > 
> > > If it is claimed, master becomes some bridge:
> > > either bridge on the secondary bus (forwarding downstream) or a parent bridge
> > > (forwarding upstream) of the bus, then repeat the logic, until we find a
> > > bus where it is not claimed.
> > I agree, the "repeat logic" is the response to your question "Why recursively?"
> > 
> > > 
> > > At that point to know where in bridge config to set the bit, look at the last step we
> > > took.  If it was a parent bus (transaction was forwarded upstream) set
> > > primary status, otherwise (it was a bridge on the secondary bus)
> > > set secondary status.
> > > 
> > > The last piece of puzzle is that for transactions coming from
> > > outside the PCI domain, pci host bridge is the original master.
> > > 
> > > 
> > > This applies to memory,io and configuration cycles.
> > > 
> > > This is for PCI.
> > I agree.
> > 
> > > 
> > > > Regarding PCI express, I'll check this issue.
> > > > Thanks,
> > > > Marcel
> > > 
> > > As far as I can tell, for express instead of the last bridge
> > > detecting UR (and setting MA), all bridges on the path detect UR and the
> > > original master detects UR, in all cases.
> > Thanks
> > 
> > > 
> > > Also, UR apparently can only be detected by non-posted transactions:
> > > mem/io reads and configuration. Even on the same "bus".
> > Also "Posted Write Transactions", PCI-2-PCI bridge Spec (6.3.2)
> > How can we differentiate in Qemu posted from non-posted transcations
> > anyway?
> > 
> > > 
> > > If you cross a pci to express bridge (direct bridge)
> > > then the bridge is the original master and
> > > UR is detected on path from it on.
> > > 
> > > If you cross an express to pci bridge (reversed bridge)
> > > you lose this info and only the last master sets MA.
> > Thanks, I'll look into it,
> > Marcel
> > 
> > > 
> > > > > 
> > > > > 
> > > > > > > Maybe if you split each case properly it will become
> > > > > > > clear.
> > > > > > I hope it will.
> > > > > > 
> > > > > > > 
> > > > > > > > > 
> > > > > > > > > > +        downstream = 1;
> > > > > > > > > > +    }
> > > > > > > > > > +
> > > > > > > > > > +    assert(master_dev);
> > > > > > > > > > +    pc = PCI_DEVICE_GET_CLASS(master_dev);
> > > > > > > > > > +
> > > > > > > > > 
> > > > > > > > > There's very little common code between devices and
> > > > > > > > > buses. So just use 2 functions.
> > > > > > > > As stated above:
> > > > > > > > The region for merge was to use the same master_abort_mem_ops
> > > > > > > > because there are the same operations that must be done:
> > > > > > > > return -1 (0xFFF...) and set master abort received bit.
> > > > > > > > Separating them will result in some code duplication.
> > > > > > > 
> > > > > > > About 2 lines. But you'll lose the nasty dynamic casts.
> > > > > > > opaque pointers are bad enough.
> > > > > > Ack
> > > > > > 
> > > > > > > 
> > > > > > > > The read and write methods are the same, but would call different
> > > > > > > > "master_abort" methods.
> > > > > > > > If it does bother you the down casting anyway, I'll change.
> > > > > > > 
> > > > > > > Pls do.
> > > > > > Ack
> > > > > > 
> > > > > > > 
> > > > > > > > > 
> > > > > > > > > > +    if (downstream && pc->is_bridge) {
> > > > > > > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
> > > > > > > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > > > > > > +    } else {
> > > > > > > > > > +        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
> > > > > > > > > > +                                   PCI_STATUS_REC_MASTER_ABORT);
> > > > > > > > > > +    }
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > >  static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> > > > > > > > > >  {
> > > > > > > > > > -   return -1ULL;
> > > > > > > > > 
> > > > > > > > > I didn't notice but this means there were 3 spaces here in previous
> > > > > > > > > patch?
> > > > > > > > yes :(, checkpatch didn't catch it
> > > > > > > > 
> > > > > > > > Thanks for the review,
> > > > > > > > Marcel
> > > > > > > > 
> > > > > > > > > 
> > > > > > > > > > +    master_abort(opaque);
> > > > > > > > > > +    return -1ULL;
> > > > > > > > > >  }
> > > > > > > > > >  
> > > > > > > > > >  static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> > > > > > > > > >                                     unsigned size)
> > > > > > > > > >  {
> > > > > > > > > > +    master_abort(opaque);
> > > > > > > > > >  }
> > > > > > > > > >  
> > > > > > > > > > -static const MemoryRegionOps master_abort_mem_ops = {
> > > > > > > > > > +static const MemoryRegionOps master_abort_ops = {
> > > > > > > > > >      .read = master_abort_mem_read,
> > > > > > > > > >      .write = master_abort_mem_write,
> > > > > > > > > >      .endianness = DEVICE_LITTLE_ENDIAN,
> > > > > > > > > > @@ -301,6 +360,23 @@ static const MemoryRegionOps master_abort_mem_ops = {
> > > > > > > > > >  
> > > > > > > > > >  #define MASTER_ABORT_MEM_PRIORITY INT_MIN
> > > > > > > > > >  
> > > > > > > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
> > > > > > > > > > +{
> > > > > > > > > > +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > > > > > > +                          &master_abort_ops, bus, mem,
> > > > > > > > > > +                          memory_region_size(bus->address_space_mem));
> > > > > > > > > > +    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > > > > > > +                                        0, &bus->master_abort_mem,
> > > > > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > > > +
> > > > > > > > > > +    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
> > > > > > > > > > +                          &master_abort_ops, bus, io,
> > > > > > > > > > +                          memory_region_size(bus->address_space_io));
> > > > > > > > > > +    memory_region_add_subregion_overlap(bus->address_space_io,
> > > > > > > > > > +                                        0, &bus->master_abort_io,
> > > > > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > >  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > > > > > > >                           const char *name,
> > > > > > > > > >                           MemoryRegion *address_space_mem,
> > > > > > > > > > @@ -312,13 +388,8 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> > > > > > > > > >      bus->address_space_mem = address_space_mem;
> > > > > > > > > >      bus->address_space_io = address_space_io;
> > > > > > > > > >  
> > > > > > > > > > -
> > > > > > > > > > -    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > > > > > > > > > -                          &master_abort_mem_ops, bus, "pci-master-abort",
> > > > > > > > > > -                          memory_region_size(bus->address_space_mem));
> > > > > > > > > > -    memory_region_add_subregion_overlap(bus->address_space_mem,
> > > > > > > > > > -                                        0, &bus->master_abort_mem,
> > > > > > > > > > -                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > > > +    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
> > > > > > > > > > +                              "pci-master-abort-io");
> > > > > > > > > >  
> > > > > > > > > >      /* host bridge */
> > > > > > > > > >      QLIST_INIT(&bus->child);
> > > > > > > > > > @@ -840,9 +911,24 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> > > > > > > > > >      pci_dev->bus = bus;
> > > > > > > > > >      dma_as = pci_device_iommu_address_space(pci_dev);
> > > > > > > > > >  
> > > > > > > > > > -    memory_region_init_alias(&pci_dev->bus_master_enable_region,
> > > > > > > > > > -                             OBJECT(pci_dev), "bus master",
> > > > > > > > > > +    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
> > > > > > > > > > +                       "bus master", UINT64_MAX);
> > > > > > > > > > +
> > > > > > > > > > +    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
> > > > > > > > > > +                          &master_abort_ops, OBJECT(pci_dev),
> > > > > > > > > > +                          "bus master master-abort",
> > > > > > > > > > +                          memory_region_size(&pci_dev->bus_master_enable_region));
> > > > > > > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > > > > > > +                                        0, &pci_dev->bus_master_abort_mem,
> > > > > > > > > > +                                        MASTER_ABORT_MEM_PRIORITY);
> > > > > > > > > > +
> > > > > > > > > > +    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
> > > > > > > > > > +                             OBJECT(pci_dev), "bus master dma",
> > > > > > > > > >                               dma_as->root, 0, memory_region_size(dma_as->root));
> > > > > > > > > > +    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
> > > > > > > > > > +                                        0, &pci_dev->bus_master_dma_mem, 0);
> > > > > > > > > > +
> > > > > > > > > > +
> > > > > > > > > >      memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
> > > > > > > > > >      address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
> > > > > > > > > >                         name);
> > > > > > > > > > @@ -901,6 +987,13 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
> > > > > > > > > >      pci_config_free(pci_dev);
> > > > > > > > > >  
> > > > > > > > > >      address_space_destroy(&pci_dev->bus_master_as);
> > > > > > > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > > > > > > +                                &pci_dev->bus_master_dma_mem);
> > > > > > > > > > +    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
> > > > > > > > > > +                                &pci_dev->bus_master_abort_mem);
> > > > > > > > > > +    memory_region_destroy(&pci_dev->bus_master_dma_mem);
> > > > > > > > > > +    memory_region_destroy(&pci_dev->bus_master_abort_mem);
> > > > > > > > > > +
> > > > > > > > > >      memory_region_destroy(&pci_dev->bus_master_enable_region);
> > > > > > > > > >  }
> > > > > > > > > >  
> > > > > > > > > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > > > > > > > > index e6b22b8..56b682f 100644
> > > > > > > > > > --- a/hw/pci/pci_bridge.c
> > > > > > > > > > +++ b/hw/pci/pci_bridge.c
> > > > > > > > > > @@ -376,6 +376,10 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
> > > > > > > > > >      sec_bus->address_space_io = &br->address_space_io;
> > > > > > > > > >      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
> > > > > > > > > >      br->windows = pci_bridge_region_init(br);
> > > > > > > > > > +
> > > > > > > > > > +    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
> > > > > > > > > > +                              "pci_bridge_master_abort_io");
> > > > > > > > > > +
> > > > > > > > > >      QLIST_INIT(&sec_bus->child);
> > > > > > > > > >      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> > > > > > > > > >      return 0;
> > > > > > > > > > @@ -389,6 +393,12 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
> > > > > > > > > >      QLIST_REMOVE(&s->sec_bus, sibling);
> > > > > > > > > >      pci_bridge_region_del(s, s->windows);
> > > > > > > > > >      pci_bridge_region_cleanup(s, s->windows);
> > > > > > > > > > +    memory_region_del_subregion(s->sec_bus.address_space_mem,
> > > > > > > > > > +                                &s->sec_bus.master_abort_mem);
> > > > > > > > > > +    memory_region_del_subregion(s->sec_bus.address_space_io,
> > > > > > > > > > +                                &s->sec_bus.master_abort_io);
> > > > > > > > > > +    memory_region_destroy(&s->sec_bus.master_abort_mem);
> > > > > > > > > > +    memory_region_destroy(&s->sec_bus.master_abort_io);
> > > > > > > > > >      memory_region_destroy(&s->address_space_mem);
> > > > > > > > > >      memory_region_destroy(&s->address_space_io);
> > > > > > > > > >      /* qbus_free() is called automatically by qdev_free() */
> > > > > > > > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > > > > > > > index 37979aa..d69e06d 100644
> > > > > > > > > > --- a/include/hw/pci/pci.h
> > > > > > > > > > +++ b/include/hw/pci/pci.h
> > > > > > > > > > @@ -242,6 +242,8 @@ struct PCIDevice {
> > > > > > > > > >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> > > > > > > > > >      AddressSpace bus_master_as;
> > > > > > > > > >      MemoryRegion bus_master_enable_region;
> > > > > > > > > > +    MemoryRegion bus_master_dma_mem;
> > > > > > > > > > +    MemoryRegion bus_master_abort_mem;
> > > > > > > > > >  
> > > > > > > > > >      /* do not access the following fields */
> > > > > > > > > >      PCIConfigReadFunc *config_read;
> > > > > > > > > > @@ -357,6 +359,7 @@ PCIBus *pci_bus_new(DeviceState *parent, const char *name,
> > > > > > > > > >                      MemoryRegion *address_space_mem,
> > > > > > > > > >                      MemoryRegion *address_space_io,
> > > > > > > > > >                      uint8_t devfn_min, const char *typename);
> > > > > > > > > > +void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
> > > > > > > > > >  void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
> > > > > > > > > >                    void *irq_opaque, int nirq);
> > > > > > > > > >  int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
> > > > > > > > > > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > > > > > > > > > index 2ad5edb..57b1541 100644
> > > > > > > > > > --- a/include/hw/pci/pci_bus.h
> > > > > > > > > > +++ b/include/hw/pci/pci_bus.h
> > > > > > > > > > @@ -24,6 +24,7 @@ struct PCIBus {
> > > > > > > > > >      MemoryRegion *address_space_mem;
> > > > > > > > > >      MemoryRegion *address_space_io;
> > > > > > > > > >      MemoryRegion master_abort_mem;
> > > > > > > > > > +    MemoryRegion master_abort_io;
> > > > > > > > > >  
> > > > > > > > > >      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
> > > > > > > > > >      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> > > > > > > > > > -- 
> > > > > > > > > > 1.8.3.1
> > > > > > > > 
> > > > > > > > 
> > > > > > 
> > > > > > 
> > > > 
> > > > 
> > 
> >
Michael S. Tsirkin Sept. 24, 2013, 8:58 a.m. UTC | #11
corrected Anthony's mail.

On Tue, Sep 24, 2013 at 11:44:57AM +0300, Marcel Apfelbaum wrote:
> > Not necessarily. Another bridge can claim it then
> > terminate with MA.
> > 
> > Example:
> > 
> > -[0000:00]-+-00.0
> >            +-02.0
> >            +-16.0
> >            +-16.3
> >            +-19.0
> >            +-1a.0
> >            +-1b.0
> >            +-1c.0-[02]--
> >            +-1c.1-[03]----00.0
> >            +-1c.3-[05-0c]--
> >            +-1c.4-[0d]--+-00.0
> >            |            \-00.3
> > 
> > 
> > 
> > Device 03:00.0 writes to within memory window of 00:1c.4,
> > but outside BARs of both 0d:00.0 and 0d:00.3.
> > 
> > On PCI, I think MA is set in sec status register of 00:1c.4.
> You are right, my code will work only under the assumption
> that the devices do not communicate between them.
> I will state the above in the next version.
> 
> Thanks,
> Marcel

How hard is it to fix properly?
If that's hard, would it be easier to implement express
semantics unconditionally?
Marcel Apfelbaum Sept. 24, 2013, 10:44 a.m. UTC | #12
On Tue, 2013-09-24 at 11:58 +0300, Michael S. Tsirkin wrote:
> corrected Anthony's mail.
> 
> On Tue, Sep 24, 2013 at 11:44:57AM +0300, Marcel Apfelbaum wrote:
> > > Not necessarily. Another bridge can claim it then
> > > terminate with MA.
> > > 
> > > Example:
> > > 
> > > -[0000:00]-+-00.0
> > >            +-02.0
> > >            +-16.0
> > >            +-16.3
> > >            +-19.0
> > >            +-1a.0
> > >            +-1b.0
> > >            +-1c.0-[02]--
> > >            +-1c.1-[03]----00.0
> > >            +-1c.3-[05-0c]--
> > >            +-1c.4-[0d]--+-00.0
> > >            |            \-00.3
> > > 
> > > 
> > > 
> > > Device 03:00.0 writes to within memory window of 00:1c.4,
> > > but outside BARs of both 0d:00.0 and 0d:00.3.
> > > 
> > > On PCI, I think MA is set in sec status register of 00:1c.4.
> > You are right, my code will work only under the assumption
> > that the devices do not communicate between them.
> > I will state the above in the next version.
> > 
> > Thanks,
> > Marcel
> 
> How hard is it to fix properly?
We need to check all the bridges on each bus encountered 
for their address range; if it corresponds to the transaction address,
we pass the bridge to the other bus(depending on transaction's direction).

> If that's hard, would it be easier to implement express
> semantics unconditionally?
I think PCI Express would follow the same algorithm anyway,
Thanks,
Marcel

> 
>
Peter Maydell Sept. 24, 2013, 10:55 a.m. UTC | #13
On 24 September 2013 19:44, Marcel Apfelbaum <marcel.a@redhat.com> wrote:
> We need to check all the bridges on each bus encountered
> for their address range; if it corresponds to the transaction address,
> we pass the bridge to the other bus(depending on transaction's direction).

I haven't looked at all at the details, but you can probably rephrase
this kind of "check address against range and pass recursively
to other bridge" algorithm in terms of appropriate statically
constructed memory regions. Since "transaction aborted" is
definitely not a fast path, the only argument for doing it that way
would be if the code worked out more neatly -- maybe worth
thinking about whether it would do so?

-- PMM
Marcel Apfelbaum Sept. 24, 2013, 11:17 a.m. UTC | #14
On Tue, 2013-09-24 at 19:55 +0900, Peter Maydell wrote:
> On 24 September 2013 19:44, Marcel Apfelbaum <marcel.a@redhat.com> wrote:
> > We need to check all the bridges on each bus encountered
> > for their address range; if it corresponds to the transaction address,
> > we pass the bridge to the other bus(depending on transaction's direction).
> 
> I haven't looked at all at the details, but you can probably rephrase
> this kind of "check address against range and pass recursively
> to other bridge" algorithm in terms of appropriate statically
> constructed memory regions. Since "transaction aborted" is
> definitely not a fast path, the only argument for doing it that way
> would be if the code worked out more neatly -- maybe worth
> thinking about whether it would do so?
I didn't fully understand your comment, please let me explain:

A PCI Device issues a transaction to an unassigned address,
which is in a range corresponding to a bridge on some "upper"
(close to CPU) bus.

The region that will "catch" this access is a background region
"behind" the "target" memory region (bus_master_enable_region).

At this point we know:
1. The PCI device that initiated the transaction
2. The transaction's address 

I was suggesting an algorithm to find the MA device in order
to set MA Received Bit in its Status(Sec_Status) register.

The algorithm was to traverse the PCI buses for finding the
MA device using the transaction address.

Marcel

> 
> -- PMM
Peter Maydell Sept. 24, 2013, 11:21 a.m. UTC | #15
On 24 September 2013 20:17, Marcel Apfelbaum <marcel.a@redhat.com> wrote:
> I was suggesting an algorithm to find the MA device in order
> to set MA Received Bit in its Status(Sec_Status) register.
>
> The algorithm was to traverse the PCI buses for finding the
> MA device using the transaction address.

Yes. My point is that if you have an algorithm phrased as
"dynamically traverse some hierarchy using an address
to find something" then you can rephrase it as "statically
construct a hierarchy of memory regions and then just
query it with the address".

-- PMM
Marcel Apfelbaum Sept. 24, 2013, 11:41 a.m. UTC | #16
On Tue, 2013-09-24 at 20:21 +0900, Peter Maydell wrote:
> On 24 September 2013 20:17, Marcel Apfelbaum <marcel.a@redhat.com> wrote:
> > I was suggesting an algorithm to find the MA device in order
> > to set MA Received Bit in its Status(Sec_Status) register.
> >
> > The algorithm was to traverse the PCI buses for finding the
> > MA device using the transaction address.
> 
> Yes. My point is that if you have an algorithm phrased as
> "dynamically traverse some hierarchy using an address
> to find something" then you can rephrase it as "statically
> construct a hierarchy of memory regions and then just
> query it with the address".
I got it, thanks.
You mean that instead of traveling on the PCI buses, I could
query the PCI address space and find the MemoryRegion corresponding
with the address, then look for the owner.

Thank you, I'll try it.
Marcel
  
> 
> -- PMM
>
Michael S. Tsirkin Sept. 24, 2013, 3:41 p.m. UTC | #17
On Tue, Sep 24, 2013 at 08:21:50PM +0900, Peter Maydell wrote:
> On 24 September 2013 20:17, Marcel Apfelbaum <marcel.a@redhat.com> wrote:
> > I was suggesting an algorithm to find the MA device in order
> > to set MA Received Bit in its Status(Sec_Status) register.
> >
> > The algorithm was to traverse the PCI buses for finding the
> > MA device using the transaction address.
> 
> Yes. My point is that if you have an algorithm phrased as
> "dynamically traverse some hierarchy using an address
> to find something" then you can rephrase it as "statically
> construct a hierarchy of memory regions and then just
> query it with the address".
> 
> -- PMM

Right. You might be able to use MR hierarchy to find the last bridge to
claim transaction.  That should to be enough for PCI, for express you
then need to find all devices on the path between bus master and the
last bridge.  For this task, memory subsystem can't be used I think.

Whether the result will be cleaner than open-coding it all, I don't
really know.
Michael S. Tsirkin Sept. 24, 2013, 4:24 p.m. UTC | #18
On Tue, Sep 24, 2013 at 06:41:11PM +0300, Michael S. Tsirkin wrote:
> On Tue, Sep 24, 2013 at 08:21:50PM +0900, Peter Maydell wrote:
> > On 24 September 2013 20:17, Marcel Apfelbaum <marcel.a@redhat.com> wrote:
> > > I was suggesting an algorithm to find the MA device in order
> > > to set MA Received Bit in its Status(Sec_Status) register.
> > >
> > > The algorithm was to traverse the PCI buses for finding the
> > > MA device using the transaction address.
> > 
> > Yes. My point is that if you have an algorithm phrased as
> > "dynamically traverse some hierarchy using an address
> > to find something" then you can rephrase it as "statically
> > construct a hierarchy of memory regions and then just
> > query it with the address".
> > 
> > -- PMM
> 
> Right. You might be able to use MR hierarchy to find the last bridge to
> claim transaction.  That should to be enough for PCI, for express you
> then need to find all devices on the path between bus master and the
> last bridge.  For this task, memory subsystem can't be used I think.

Specifically, I think we need at least two things:
- initiator of the transaction
- last bridge to claim transaction

any idea how to find both at the same time?

> Whether the result will be cleaner than open-coding it all, I don't
> really know.
> 
> -- 
> MST
Peter Maydell Sept. 24, 2013, 11:36 p.m. UTC | #19
On 25 September 2013 00:41, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Tue, Sep 24, 2013 at 08:21:50PM +0900, Peter Maydell wrote:
> Right. You might be able to use MR hierarchy to find the last bridge to
> claim transaction.  That should to be enough for PCI, for express you
> then need to find all devices on the path between bus master and the
> last bridge.  For this task, memory subsystem can't be used I think.

Mmm, if you need to do something for every device on the
path then that's more awkward with MRs.

> Whether the result will be cleaner than open-coding it all, I don't
> really know.

Yes. I was just suggesting it was worth considering at least to
the point of being able to make a decision about which design
is likely to be cleaner. If PCI express needs the dynamic walk
of the path then it's probably better to handle plain PCI that
way too for consistency.

-- PMM
Michael S. Tsirkin Sept. 24, 2013, 11:43 p.m. UTC | #20
On Wed, Sep 25, 2013 at 08:36:09AM +0900, Peter Maydell wrote:
> On 25 September 2013 00:41, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Tue, Sep 24, 2013 at 08:21:50PM +0900, Peter Maydell wrote:
> > Right. You might be able to use MR hierarchy to find the last bridge to
> > claim transaction.  That should to be enough for PCI, for express you
> > then need to find all devices on the path between bus master and the
> > last bridge.  For this task, memory subsystem can't be used I think.
> 
> Mmm, if you need to do something for every device on the
> path then that's more awkward with MRs.
>
> > Whether the result will be cleaner than open-coding it all, I don't
> > really know.
> 
> Yes. I was just suggesting it was worth considering at least to
> the point of being able to make a decision about which design
> is likely to be cleaner. If PCI express needs the dynamic walk
> of the path then it's probably better to handle plain PCI that
> way too for consistency.
> 
> -- PMM

It would seem so. The express spec is kind of murky about this
but experiments with real hardware seem to confirm this.
diff mbox

Patch

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index d8a1b11..1f4e707 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -283,17 +283,76 @@  const char *pci_root_bus_path(PCIDevice *dev)
     return rootbus->qbus.name;
 }
 
+static PCIDevice *pci_bus_find_host(PCIBus *bus)
+{
+    PCIDevice *dev;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
+        dev = bus->devices[i];
+        if (dev) {
+            PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
+            if (pc->class_id == PCI_CLASS_BRIDGE_HOST) {
+                return dev;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+static void master_abort(void *opaque)
+{
+    PCIDevice *master_dev = NULL;
+    PCIDeviceClass *pc;
+    PCIBus *bus;
+    int downstream;
+
+    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_DEVICE)) {
+        master_dev = PCI_DEVICE(opaque);
+        bus = master_dev->bus;
+        while (!pci_bus_is_root(bus)) {
+            master_dev = bus->parent_dev;
+            bus = master_dev->bus;
+        }
+        downstream = 0;
+    }
+
+    if (object_dynamic_cast(OBJECT(opaque), TYPE_PCI_BUS)) {
+        bus = PCI_BUS(opaque);
+        if (pci_bus_is_root(bus)) {
+            master_dev = pci_bus_find_host(bus);
+        } else { /* bus behind a PCI-2-PCI bridge */
+            master_dev = bus->parent_dev;
+        }
+        downstream = 1;
+    }
+
+    assert(master_dev);
+    pc = PCI_DEVICE_GET_CLASS(master_dev);
+
+    if (downstream && pc->is_bridge) {
+        pci_word_test_and_set_mask(master_dev->config + PCI_SEC_STATUS,
+                                   PCI_STATUS_REC_MASTER_ABORT);
+    } else {
+        pci_word_test_and_set_mask(master_dev->config + PCI_STATUS,
+                                   PCI_STATUS_REC_MASTER_ABORT);
+    }
+}
+
 static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
 {
-   return -1ULL;
+    master_abort(opaque);
+    return -1ULL;
 }
 
 static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
                                    unsigned size)
 {
+    master_abort(opaque);
 }
 
-static const MemoryRegionOps master_abort_mem_ops = {
+static const MemoryRegionOps master_abort_ops = {
     .read = master_abort_mem_read,
     .write = master_abort_mem_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
@@ -301,6 +360,23 @@  static const MemoryRegionOps master_abort_mem_ops = {
 
 #define MASTER_ABORT_MEM_PRIORITY INT_MIN
 
+void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io)
+{
+    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
+                          &master_abort_ops, bus, mem,
+                          memory_region_size(bus->address_space_mem));
+    memory_region_add_subregion_overlap(bus->address_space_mem,
+                                        0, &bus->master_abort_mem,
+                                        MASTER_ABORT_MEM_PRIORITY);
+
+    memory_region_init_io(&bus->master_abort_io, OBJECT(bus),
+                          &master_abort_ops, bus, io,
+                          memory_region_size(bus->address_space_io));
+    memory_region_add_subregion_overlap(bus->address_space_io,
+                                        0, &bus->master_abort_io,
+                                        MASTER_ABORT_MEM_PRIORITY);
+}
+
 static void pci_bus_init(PCIBus *bus, DeviceState *parent,
                          const char *name,
                          MemoryRegion *address_space_mem,
@@ -312,13 +388,8 @@  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
     bus->address_space_mem = address_space_mem;
     bus->address_space_io = address_space_io;
 
-
-    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
-                          &master_abort_mem_ops, bus, "pci-master-abort",
-                          memory_region_size(bus->address_space_mem));
-    memory_region_add_subregion_overlap(bus->address_space_mem,
-                                        0, &bus->master_abort_mem,
-                                        MASTER_ABORT_MEM_PRIORITY);
+    pci_bus_master_abort_init(bus, "pci-master-abort-mem",
+                              "pci-master-abort-io");
 
     /* host bridge */
     QLIST_INIT(&bus->child);
@@ -840,9 +911,24 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pci_dev->bus = bus;
     dma_as = pci_device_iommu_address_space(pci_dev);
 
-    memory_region_init_alias(&pci_dev->bus_master_enable_region,
-                             OBJECT(pci_dev), "bus master",
+    memory_region_init(&pci_dev->bus_master_enable_region, NULL,
+                       "bus master", UINT64_MAX);
+
+    memory_region_init_io(&pci_dev->bus_master_abort_mem, OBJECT(bus),
+                          &master_abort_ops, OBJECT(pci_dev),
+                          "bus master master-abort",
+                          memory_region_size(&pci_dev->bus_master_enable_region));
+    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
+                                        0, &pci_dev->bus_master_abort_mem,
+                                        MASTER_ABORT_MEM_PRIORITY);
+
+    memory_region_init_alias(&pci_dev->bus_master_dma_mem,
+                             OBJECT(pci_dev), "bus master dma",
                              dma_as->root, 0, memory_region_size(dma_as->root));
+    memory_region_add_subregion_overlap(&pci_dev->bus_master_enable_region,
+                                        0, &pci_dev->bus_master_dma_mem, 0);
+
+
     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
     address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
                        name);
@@ -901,6 +987,13 @@  static void do_pci_unregister_device(PCIDevice *pci_dev)
     pci_config_free(pci_dev);
 
     address_space_destroy(&pci_dev->bus_master_as);
+    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
+                                &pci_dev->bus_master_dma_mem);
+    memory_region_del_subregion(&pci_dev->bus_master_enable_region,
+                                &pci_dev->bus_master_abort_mem);
+    memory_region_destroy(&pci_dev->bus_master_dma_mem);
+    memory_region_destroy(&pci_dev->bus_master_abort_mem);
+
     memory_region_destroy(&pci_dev->bus_master_enable_region);
 }
 
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index e6b22b8..56b682f 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -376,6 +376,10 @@  int pci_bridge_initfn(PCIDevice *dev, const char *typename)
     sec_bus->address_space_io = &br->address_space_io;
     memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
     br->windows = pci_bridge_region_init(br);
+
+    pci_bus_master_abort_init(sec_bus, "pci_bridge_master_abort_mem",
+                              "pci_bridge_master_abort_io");
+
     QLIST_INIT(&sec_bus->child);
     QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
     return 0;
@@ -389,6 +393,12 @@  void pci_bridge_exitfn(PCIDevice *pci_dev)
     QLIST_REMOVE(&s->sec_bus, sibling);
     pci_bridge_region_del(s, s->windows);
     pci_bridge_region_cleanup(s, s->windows);
+    memory_region_del_subregion(s->sec_bus.address_space_mem,
+                                &s->sec_bus.master_abort_mem);
+    memory_region_del_subregion(s->sec_bus.address_space_io,
+                                &s->sec_bus.master_abort_io);
+    memory_region_destroy(&s->sec_bus.master_abort_mem);
+    memory_region_destroy(&s->sec_bus.master_abort_io);
     memory_region_destroy(&s->address_space_mem);
     memory_region_destroy(&s->address_space_io);
     /* qbus_free() is called automatically by qdev_free() */
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 37979aa..d69e06d 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -242,6 +242,8 @@  struct PCIDevice {
     PCIIORegion io_regions[PCI_NUM_REGIONS];
     AddressSpace bus_master_as;
     MemoryRegion bus_master_enable_region;
+    MemoryRegion bus_master_dma_mem;
+    MemoryRegion bus_master_abort_mem;
 
     /* do not access the following fields */
     PCIConfigReadFunc *config_read;
@@ -357,6 +359,7 @@  PCIBus *pci_bus_new(DeviceState *parent, const char *name,
                     MemoryRegion *address_space_mem,
                     MemoryRegion *address_space_io,
                     uint8_t devfn_min, const char *typename);
+void pci_bus_master_abort_init(PCIBus *bus, const char *mem, const char *io);
 void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
                   void *irq_opaque, int nirq);
 int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 2ad5edb..57b1541 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -24,6 +24,7 @@  struct PCIBus {
     MemoryRegion *address_space_mem;
     MemoryRegion *address_space_io;
     MemoryRegion master_abort_mem;
+    MemoryRegion master_abort_io;
 
     QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
     QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */