Patchwork [v4,3/3] hw/pci: handle downstream pci master abort

login
register
mail settings
Submitter Marcel Apfelbaum
Date Sept. 15, 2013, 4:16 p.m.
Message ID <1379261801-16969-4-git-send-email-marcel.a@redhat.com>
Download mbox | patch
Permalink /patch/275033/
State New
Headers show

Comments

Marcel Apfelbaum - Sept. 15, 2013, 4:16 p.m.
A MemoryRegion with negative priority was created and
it spans over all the pci address space.
It "intercepts" the accesses to unassigned pci
address space and will follow the pci spec:
 1. returns -1 on read
 2. does nothing on write

Note: setting the RECEIVED MASTER ABORT bit in the STATUS register
      of the device that initiated the transaction will be
      implemented in another series
Note: This implementation handles only the reads/writes to
      the pci address space that are done by the cpu.(downstream)

Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
---
Changes from v3:
 - Addresses Michael S. Tsirkin comments
   - Changed the name of the new Memory region to master_abort_mem
   - Made master abort priority INT_MIN instead of -1
 - Removed handling of RECEIVED MASTER ABORT BIT; it will be taken
   care in a different series

Changes from v1:
 - "pci-unassigned-mem" MemoryRegion resides now in PCIBus and not on
    various Host Bridges
 - "pci-unassgined-mem" does not have a ".valid.accept" field and
    implements read write methods

 hw/pci/pci.c             | 27 +++++++++++++++++++++++++++
 include/hw/pci/pci_bus.h |  1 +
 2 files changed, 28 insertions(+)
Michael S. Tsirkin - Sept. 15, 2013, 5:30 p.m.
On Sun, Sep 15, 2013 at 07:16:41PM +0300, Marcel Apfelbaum wrote:
> A MemoryRegion with negative priority was created and
> it spans over all the pci address space.
> It "intercepts" the accesses to unassigned pci
> address space and will follow the pci spec:
>  1. returns -1 on read
>  2. does nothing on write
> 
> Note: setting the RECEIVED MASTER ABORT bit in the STATUS register
>       of the device that initiated the transaction will be
>       implemented in another series

Fine though I'd like to see how it all works
together before applying.

> Note: This implementation handles only the reads/writes to
>       the pci address space that are done by the cpu.(downstream)

Strange, I don't see where does the limitation come from.
Looks like any read returns -1 - what did I miss.

> 
> Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> ---
> Changes from v3:
>  - Addresses Michael S. Tsirkin comments
>    - Changed the name of the new Memory region to master_abort_mem
>    - Made master abort priority INT_MIN instead of -1
>  - Removed handling of RECEIVED MASTER ABORT BIT; it will be taken
>    care in a different series
> 
> Changes from v1:
>  - "pci-unassigned-mem" MemoryRegion resides now in PCIBus and not on
>     various Host Bridges
>  - "pci-unassgined-mem" does not have a ".valid.accept" field and
>     implements read write methods
> 
>  hw/pci/pci.c             | 27 +++++++++++++++++++++++++++
>  include/hw/pci/pci_bus.h |  1 +
>  2 files changed, 28 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index d00682e..9b12375 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -283,6 +283,24 @@ const char *pci_root_bus_path(PCIDevice *dev)
>      return rootbus->qbus.name;
>  }
>  
> +static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +   return -1ULL;
> +}
> +
> +static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> +                                   unsigned size)
> +{
> +}
> +
> +static const MemoryRegionOps master_abort_mem_ops = {
> +    .read = master_abort_mem_read,
> +    .write = master_abort_mem_write,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +};
> +

Please make it little endian.
DEVICE_NATIVE_ENDIAN is almost always a bug.

> +#define MASTER_ABORT_MEM_PRIORITY INT_MIN
> +
>  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
>                           const char *name,
>                           MemoryRegion *address_space_mem,
> @@ -294,6 +312,15 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
>      bus->address_space_mem = address_space_mem;
>      bus->address_space_io = address_space_io;
>  
> +
> +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> +                          &master_abort_mem_ops, bus, "pci-master-abort",
> +                          memory_region_size(bus->address_space_mem));
> +    memory_region_add_subregion_overlap(bus->address_space_mem,
> +                                        bus->address_space_mem->addr,
> +                                        &bus->master_abort_mem,
> +                                        MASTER_ABORT_MEM_PRIORITY);
> +
>      /* host bridge */
>      QLIST_INIT(&bus->child);
>  
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 9df1788..2ad5edb 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -23,6 +23,7 @@ struct PCIBus {
>      PCIDevice *parent_dev;
>      MemoryRegion *address_space_mem;
>      MemoryRegion *address_space_io;
> +    MemoryRegion master_abort_mem;
>  
>      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
>      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> -- 
> 1.8.3.1
Peter Maydell - Sept. 15, 2013, 5:32 p.m.
On 15 September 2013 18:30, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Sun, Sep 15, 2013 at 07:16:41PM +0300, Marcel Apfelbaum wrote:
>> +static const MemoryRegionOps master_abort_mem_ops = {
>> +    .read = master_abort_mem_read,
>> +    .write = master_abort_mem_write,
>> +    .endianness = DEVICE_NATIVE_ENDIAN,
>> +};
>> +
>
> Please make it little endian.
> DEVICE_NATIVE_ENDIAN is almost always a bug.

...when dealing with PCI devices. For a random device on the system bus
it's often correct.

-- PMM
Marcel Apfelbaum - Sept. 15, 2013, 6:26 p.m.
On Sun, 2013-09-15 at 20:30 +0300, Michael S. Tsirkin wrote:
> On Sun, Sep 15, 2013 at 07:16:41PM +0300, Marcel Apfelbaum wrote:
> > A MemoryRegion with negative priority was created and
> > it spans over all the pci address space.
> > It "intercepts" the accesses to unassigned pci
> > address space and will follow the pci spec:
> >  1. returns -1 on read
> >  2. does nothing on write
> > 
> > Note: setting the RECEIVED MASTER ABORT bit in the STATUS register
> >       of the device that initiated the transaction will be
> >       implemented in another series
> 
> Fine though I'd like to see how it all works
> together before applying.
> 
> > Note: This implementation handles only the reads/writes to
> >       the pci address space that are done by the cpu.(downstream)
> 
> Strange, I don't see where does the limitation come from.
> Looks like any read returns -1 - what did I miss.
Devices using IOMMU have a different way to get their
memory regions. I am not sure that master abort memory
region covers the scenario.
Anyway, the above statement was for the prev implementation,
I will remove it in the next version.

Thanks,
Marcel

> 
> > 
> > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> > ---
> > Changes from v3:
> >  - Addresses Michael S. Tsirkin comments
> >    - Changed the name of the new Memory region to master_abort_mem
> >    - Made master abort priority INT_MIN instead of -1
> >  - Removed handling of RECEIVED MASTER ABORT BIT; it will be taken
> >    care in a different series
> > 
> > Changes from v1:
> >  - "pci-unassigned-mem" MemoryRegion resides now in PCIBus and not on
> >     various Host Bridges
> >  - "pci-unassgined-mem" does not have a ".valid.accept" field and
> >     implements read write methods
> > 
> >  hw/pci/pci.c             | 27 +++++++++++++++++++++++++++
> >  include/hw/pci/pci_bus.h |  1 +
> >  2 files changed, 28 insertions(+)
> > 
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index d00682e..9b12375 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -283,6 +283,24 @@ const char *pci_root_bus_path(PCIDevice *dev)
> >      return rootbus->qbus.name;
> >  }
> >  
> > +static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
> > +{
> > +   return -1ULL;
> > +}
> > +
> > +static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
> > +                                   unsigned size)
> > +{
> > +}
> > +
> > +static const MemoryRegionOps master_abort_mem_ops = {
> > +    .read = master_abort_mem_read,
> > +    .write = master_abort_mem_write,
> > +    .endianness = DEVICE_NATIVE_ENDIAN,
> > +};
> > +
> 
> Please make it little endian.
> DEVICE_NATIVE_ENDIAN is almost always a bug.
> 
> > +#define MASTER_ABORT_MEM_PRIORITY INT_MIN
> > +
> >  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> >                           const char *name,
> >                           MemoryRegion *address_space_mem,
> > @@ -294,6 +312,15 @@ static void pci_bus_init(PCIBus *bus, DeviceState *parent,
> >      bus->address_space_mem = address_space_mem;
> >      bus->address_space_io = address_space_io;
> >  
> > +
> > +    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
> > +                          &master_abort_mem_ops, bus, "pci-master-abort",
> > +                          memory_region_size(bus->address_space_mem));
> > +    memory_region_add_subregion_overlap(bus->address_space_mem,
> > +                                        bus->address_space_mem->addr,
> > +                                        &bus->master_abort_mem,
> > +                                        MASTER_ABORT_MEM_PRIORITY);
> > +
> >      /* host bridge */
> >      QLIST_INIT(&bus->child);
> >  
> > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > index 9df1788..2ad5edb 100644
> > --- a/include/hw/pci/pci_bus.h
> > +++ b/include/hw/pci/pci_bus.h
> > @@ -23,6 +23,7 @@ struct PCIBus {
> >      PCIDevice *parent_dev;
> >      MemoryRegion *address_space_mem;
> >      MemoryRegion *address_space_io;
> > +    MemoryRegion master_abort_mem;
> >  
> >      QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
> >      QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */
> > -- 
> > 1.8.3.1
Marcel Apfelbaum - Sept. 15, 2013, 6:32 p.m.
On Sun, 2013-09-15 at 18:32 +0100, Peter Maydell wrote:
> On 15 September 2013 18:30, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Sun, Sep 15, 2013 at 07:16:41PM +0300, Marcel Apfelbaum wrote:
> >> +static const MemoryRegionOps master_abort_mem_ops = {
> >> +    .read = master_abort_mem_read,
> >> +    .write = master_abort_mem_write,
> >> +    .endianness = DEVICE_NATIVE_ENDIAN,
> >> +};
> >> +
> >
> > Please make it little endian.
> > DEVICE_NATIVE_ENDIAN is almost always a bug.
> 
> ...when dealing with PCI devices. For a random device on the system bus
> it's often correct.
But this is a PCI bus, I'll change it to little endian.
Thanks!
Marcel

> 
> -- PMM
Michael S. Tsirkin - Sept. 15, 2013, 8:25 p.m.
On Sun, Sep 15, 2013 at 06:32:13PM +0100, Peter Maydell wrote:
> On 15 September 2013 18:30, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Sun, Sep 15, 2013 at 07:16:41PM +0300, Marcel Apfelbaum wrote:
> >> +static const MemoryRegionOps master_abort_mem_ops = {
> >> +    .read = master_abort_mem_read,
> >> +    .write = master_abort_mem_write,
> >> +    .endianness = DEVICE_NATIVE_ENDIAN,
> >> +};
> >> +
> >
> > Please make it little endian.
> > DEVICE_NATIVE_ENDIAN is almost always a bug.
> 
> ...when dealing with PCI devices. For a random device on the system bus
> it's often correct.
> 
> -- PMM

native is really qemu host endian-ness ... what are some
examples when it's actually correct?
Peter Maydell - Sept. 15, 2013, 8:40 p.m.
On 15 September 2013 21:25, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Sun, Sep 15, 2013 at 06:32:13PM +0100, Peter Maydell wrote:
>> On 15 September 2013 18:30, Michael S. Tsirkin <mst@redhat.com> wrote:
>> > On Sun, Sep 15, 2013 at 07:16:41PM +0300, Marcel Apfelbaum wrote:
>> >> +static const MemoryRegionOps master_abort_mem_ops = {
>> >> +    .read = master_abort_mem_read,
>> >> +    .write = master_abort_mem_write,
>> >> +    .endianness = DEVICE_NATIVE_ENDIAN,
>> >> +};
>> >> +
>> >
>> > Please make it little endian.
>> > DEVICE_NATIVE_ENDIAN is almost always a bug.
>>
>> ...when dealing with PCI devices. For a random device on the system bus
>> it's often correct.

> native is really qemu host endian-ness ... what are some
> examples when it's actually correct?

"native" means "if the device's MMIO callback does 'return 0x12345678;'
for a 32 bit read then the guest CPU should see 0x12345678". That's
almost always what you want for simple devices (which may in fact
only support 32 bit accesses to registers), because it means you don't
have to fill your device with explicit endianness swaps. It's also useful
if that kind of simple device might be built into either a little endian
or a bigendian system: as far as the device is concerned its 32 bit
registers still have (say) the TXRDY bit at the low end of the status
register even if the CPU is bigendian.

That said, our current setup of marking the mmio ops with an endianness
is really conflating what in real hardware is a number of distinct properties
of the CPU, the bus, any bus controllers (like PCI!) in the path between
CPU and device and finally the device itself. So it's inherently both
confusing and confused.

-- PMM
Michael S. Tsirkin - Sept. 15, 2013, 9:07 p.m.
On Sun, Sep 15, 2013 at 09:40:37PM +0100, Peter Maydell wrote:
> On 15 September 2013 21:25, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Sun, Sep 15, 2013 at 06:32:13PM +0100, Peter Maydell wrote:
> >> On 15 September 2013 18:30, Michael S. Tsirkin <mst@redhat.com> wrote:
> >> > On Sun, Sep 15, 2013 at 07:16:41PM +0300, Marcel Apfelbaum wrote:
> >> >> +static const MemoryRegionOps master_abort_mem_ops = {
> >> >> +    .read = master_abort_mem_read,
> >> >> +    .write = master_abort_mem_write,
> >> >> +    .endianness = DEVICE_NATIVE_ENDIAN,
> >> >> +};
> >> >> +
> >> >
> >> > Please make it little endian.
> >> > DEVICE_NATIVE_ENDIAN is almost always a bug.
> >>
> >> ...when dealing with PCI devices. For a random device on the system bus
> >> it's often correct.
> 
> > native is really qemu host endian-ness ... what are some
> > examples when it's actually correct?
> 
> "native" means "if the device's MMIO callback does 'return 0x12345678;'
> for a 32 bit read then the guest CPU should see 0x12345678". That's
> almost always what you want for simple devices (which may in fact
> only support 32 bit accesses to registers), because it means you don't
> have to fill your device with explicit endianness swaps.

But this means that you device behaves differently
depending on the endian-ness of the guest system.
So it only makes sense if the device is very
system specific: anything outside hw/<specific architecture>
is at least in theory not a system specific device so
it should not do this.

> It's also useful
> if that kind of simple device might be built into either a little endian
> or a bigendian system: as far as the device is concerned its 32 bit
> registers still have (say) the TXRDY bit at the low end of the status
> register even if the CPU is bigendian.
> 
> That said, our current setup of marking the mmio ops with an endianness
> is really conflating what in real hardware is a number of distinct properties
> of the CPU, the bus, any bus controllers (like PCI!) in the path between
> CPU and device and finally the device itself. So it's inherently both
> confusing and confused.
> 
> -- PMM
Peter Maydell - Sept. 15, 2013, 9:41 p.m.
On 15 September 2013 22:07, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Sun, Sep 15, 2013 at 09:40:37PM +0100, Peter Maydell wrote:
>> "native" means "if the device's MMIO callback does 'return 0x12345678;'
>> for a 32 bit read then the guest CPU should see 0x12345678". That's
>> almost always what you want for simple devices (which may in fact
>> only support 32 bit accesses to registers), because it means you don't
>> have to fill your device with explicit endianness swaps.
>
> But this means that you device behaves differently
> depending on the endian-ness of the guest system.
> So it only makes sense if the device is very
> system specific

If you mark a device as specifically DEVICE_LITTLE_ENDIAN
or DEVICE_BIG_ENDIAN this is *also* very system specific.
So you're a bit stuck either way. As I say, for basic "this just
provides a bunch of registers" devices _NATIVE_ is the
pragmatic answer, since it effectively models the way that the
same bit of hardware is wired up to the bus differently if it's
expected to be in a big or little endian system.
(Any device where you can make byte accesses into the "middle"
of a 32 bit register probably needs to think more carefully, but those
are pretty rare.)

>anything outside hw/<specific architecture>
> is at least in theory not a system specific device

This is wrong, by the way. hw/$arch contains:
 * board models
 * things we haven't properly separated out into self contained devices
 * random "not actually a device" things like boot code

Anything that's really a device goes in its appropriate subdirectory
(char, video, etc etc), whether it happens to be used only on one
system or one architecture or not. (For instance all the interrupt
controllers live in hw/intc though obviously they're hopelessly
system specific.)

-- PMM
Michael S. Tsirkin - Sept. 16, 2013, 6:14 a.m.
On Sun, Sep 15, 2013 at 10:41:26PM +0100, Peter Maydell wrote:
> On 15 September 2013 22:07, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Sun, Sep 15, 2013 at 09:40:37PM +0100, Peter Maydell wrote:
> >> "native" means "if the device's MMIO callback does 'return 0x12345678;'
> >> for a 32 bit read then the guest CPU should see 0x12345678". That's
> >> almost always what you want for simple devices (which may in fact
> >> only support 32 bit accesses to registers), because it means you don't
> >> have to fill your device with explicit endianness swaps.
> >
> > But this means that you device behaves differently
> > depending on the endian-ness of the guest system.
> > So it only makes sense if the device is very
> > system specific
> 
> If you mark a device as specifically DEVICE_LITTLE_ENDIAN
> or DEVICE_BIG_ENDIAN this is *also* very system specific.

No, this just means the device is always wired in
the same way on all systems. It's the pragmatic
choice for any bus that supports device plug-in.

> So you're a bit stuck either way. As I say, for basic "this just
> provides a bunch of registers" devices _NATIVE_ is the
> pragmatic answer, since it effectively models the way that the
> same bit of hardware is wired up to the bus differently if it's
> expected to be in a big or little endian system.
> (Any device where you can make byte accesses into the "middle"
> of a 32 bit register probably needs to think more carefully, but those
> are pretty rare.)
> 
> >anything outside hw/<specific architecture>
> > is at least in theory not a system specific device
> 
> This is wrong, by the way. hw/$arch contains:
>  * board models
>  * things we haven't properly separated out into self contained devices
>  * random "not actually a device" things like boot code
> 
> Anything that's really a device goes in its appropriate subdirectory
> (char, video, etc etc), whether it happens to be used only on one
> system or one architecture or not. (For instance all the interrupt
> controllers live in hw/intc though obviously they're hopelessly
> system specific.)
> 
> -- PMM

Thanks for the clarification.
Peter Maydell - Sept. 16, 2013, 6:57 a.m.
On 16 September 2013 07:14, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Sun, Sep 15, 2013 at 10:41:26PM +0100, Peter Maydell wrote:
>> On 15 September 2013 22:07, Michael S. Tsirkin <mst@redhat.com> wrote:
>> > On Sun, Sep 15, 2013 at 09:40:37PM +0100, Peter Maydell wrote:
>> >> "native" means "if the device's MMIO callback does 'return 0x12345678;'
>> >> for a 32 bit read then the guest CPU should see 0x12345678". That's
>> >> almost always what you want for simple devices (which may in fact
>> >> only support 32 bit accesses to registers), because it means you don't
>> >> have to fill your device with explicit endianness swaps.
>> >
>> > But this means that you device behaves differently
>> > depending on the endian-ness of the guest system.
>> > So it only makes sense if the device is very
>> > system specific
>>
>> If you mark a device as specifically DEVICE_LITTLE_ENDIAN
>> or DEVICE_BIG_ENDIAN this is *also* very system specific.
>
> No, this just means the device is always wired in
> the same way on all systems. It's the pragmatic
> choice for any bus that supports device plug-in.

No, it means the device is little endian even on a big
endian system. On BE systems that is weird, and the only
reason for it is if it's an external bus with a standard that
specifies which endianness it is. That's true for ISA and PCI
(and this is where we're using the device's endianness
specification to compensate for the fact we don't have a way
to specify that PCI host bridges will do endianness swapping
on a bigendian system).
But "pluggable bus" is not equivalent to "system specific"
(in either direction).

-- PMM

Patch

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index d00682e..9b12375 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -283,6 +283,24 @@  const char *pci_root_bus_path(PCIDevice *dev)
     return rootbus->qbus.name;
 }
 
+static uint64_t master_abort_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+   return -1ULL;
+}
+
+static void master_abort_mem_write(void *opaque, hwaddr addr, uint64_t val,
+                                   unsigned size)
+{
+}
+
+static const MemoryRegionOps master_abort_mem_ops = {
+    .read = master_abort_mem_read,
+    .write = master_abort_mem_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+#define MASTER_ABORT_MEM_PRIORITY INT_MIN
+
 static void pci_bus_init(PCIBus *bus, DeviceState *parent,
                          const char *name,
                          MemoryRegion *address_space_mem,
@@ -294,6 +312,15 @@  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
     bus->address_space_mem = address_space_mem;
     bus->address_space_io = address_space_io;
 
+
+    memory_region_init_io(&bus->master_abort_mem, OBJECT(bus),
+                          &master_abort_mem_ops, bus, "pci-master-abort",
+                          memory_region_size(bus->address_space_mem));
+    memory_region_add_subregion_overlap(bus->address_space_mem,
+                                        bus->address_space_mem->addr,
+                                        &bus->master_abort_mem,
+                                        MASTER_ABORT_MEM_PRIORITY);
+
     /* host bridge */
     QLIST_INIT(&bus->child);
 
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 9df1788..2ad5edb 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -23,6 +23,7 @@  struct PCIBus {
     PCIDevice *parent_dev;
     MemoryRegion *address_space_mem;
     MemoryRegion *address_space_io;
+    MemoryRegion master_abort_mem;
 
     QLIST_HEAD(, PCIBus) child; /* this will be replaced by qdev later */
     QLIST_ENTRY(PCIBus) sibling;/* this will be replaced by qdev later */