Patchwork [01/10] Add stub functions for PCI device models to do PCI DMA

login
register
mail settings
Submitter David Gibson
Date Sept. 1, 2011, 5 a.m.
Message ID <1314853263-2086-2-git-send-email-david@gibson.dropbear.id.au>
Download mbox | patch
Permalink /patch/112795/
State New
Headers show

Comments

David Gibson - Sept. 1, 2011, 5 a.m.
This patch adds functions to pci.[ch] to perform PCI DMA operations.  At
present, these are just stubs which perform directly cpu physical memory
accesses.

Using these stubs, however, distinguishes PCI device DMA transactions from
other accesses to physical memory, which will allow PCI IOMMU support to
be added in one place, rather than updating every PCI driver at that time.

That is, it allows us to update individual PCI drivers to support an IOMMU
without having yet determined the details of how the IOMMU emulation will
operate.  This will let us remove the most bitrot-sensitive part of an
IOMMU patch in advance.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 dma.h    |    2 ++
 hw/pci.c |   31 +++++++++++++++++++++++++++++++
 hw/pci.h |   33 +++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 0 deletions(-)
Michael S. Tsirkin - Sept. 1, 2011, 3:35 p.m.
On Thu, Sep 01, 2011 at 03:00:54PM +1000, David Gibson wrote:
> This patch adds functions to pci.[ch] to perform PCI DMA operations.  At
> present, these are just stubs which perform directly cpu physical memory
> accesses.
> 
> Using these stubs, however, distinguishes PCI device DMA transactions from
> other accesses to physical memory, which will allow PCI IOMMU support to
> be added in one place, rather than updating every PCI driver at that time.
> 
> That is, it allows us to update individual PCI drivers to support an IOMMU
> without having yet determined the details of how the IOMMU emulation will
> operate.  This will let us remove the most bitrot-sensitive part of an
> IOMMU patch in advance.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  dma.h    |    2 ++
>  hw/pci.c |   31 +++++++++++++++++++++++++++++++
>  hw/pci.h |   33 +++++++++++++++++++++++++++++++++
>  3 files changed, 66 insertions(+), 0 deletions(-)
> 
> diff --git a/dma.h b/dma.h
> index a6db5ba..06e91cb 100644
> --- a/dma.h
> +++ b/dma.h
> @@ -15,6 +15,8 @@
>  #include "hw/hw.h"
>  #include "block.h"
>  
> +typedef target_phys_addr_t dma_addr_t;
> +
>  typedef struct {
>      target_phys_addr_t base;
>      target_phys_addr_t len;
> diff --git a/hw/pci.c b/hw/pci.c
> index 1cdcbb7..842b066 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -2211,3 +2211,34 @@ MemoryRegion *pci_address_space(PCIDevice *dev)
>  {
>      return dev->bus->address_space_mem;
>  }
> +
> +#define DEFINE_LDST_DMA(_lname, _sname, _bits) \
> +    uint##_bits##_t ld##_lname##_pci_dma(PCIDevice *dev, dma_addr_t addr) \
> +    { \
> +        uint##_bits##_t val; \
> +        pci_dma_read(dev, addr, &val, sizeof(val)); \
> +        return le##_bits##_to_cpu(val); \
> +    } \
> +    void st##_sname##_pci_dma(PCIDevice *dev, \
> +                              dma_addr_t addr, uint##_bits##_t val) \
> +    { \
> +        val = cpu_to_le##_bits(val); \
> +        pci_dma_write(dev, addr, &val, sizeof(val)); \
> +    }
> +
> +uint8_t ldub_pci_dma(PCIDevice *dev, dma_addr_t addr)
> +{
> +    uint8_t val;
> +
> +    pci_dma_read(dev, addr, &val, sizeof(val));
> +    return val;
> +}
> +
> +void stb_pci_dma(PCIDevice *dev, dma_addr_t addr, uint8_t val)
> +{
> +    pci_dma_write(dev, addr, &val, sizeof(val));
> +}
> +
> +DEFINE_LDST_DMA(uw, w, 16);
> +DEFINE_LDST_DMA(l, l, 32);
> +DEFINE_LDST_DMA(q, q, 64);
> diff --git a/hw/pci.h b/hw/pci.h
> index 391217e..401d14a 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -6,6 +6,7 @@
>  
>  #include "qdev.h"
>  #include "memory.h"
> +#include "dma.h"
>  
>  /* PCI includes legacy ISA access.  */
>  #include "isa.h"
> @@ -492,4 +493,36 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
>      return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
>  }
>  
> +/* DMA access functions */
> +static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> +                             void *buf, dma_addr_t len, int is_write)
> +{
> +    cpu_physical_memory_rw(addr, buf, len, is_write);
> +    return 0;
> +}
> +
> +static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr,
> +                               void *buf, dma_addr_t len)
> +{
> +    return pci_dma_rw(dev, addr, buf, len, 0);
> +}
> +
> +static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr,
> +                                const void *buf, dma_addr_t len)
> +{
> +    return pci_dma_rw(dev, addr, (void *) buf, len, 1);
> +}
> +
> +#define DECLARE_LDST_DMA(_lname, _sname, _bits) \
> +    uint##_bits##_t ld##_lname##_pci_dma(PCIDevice *dev, dma_addr_t addr); \
> +    void st##_sname##_pci_dma(PCIDevice *dev, dma_addr_t addr, \
> +                              uint##_bits##_t val);            \

prefix macros with PCI_ please.

> +DECLARE_LDST_DMA(ub, b, 8);
> +DECLARE_LDST_DMA(uw, w, 16);
> +DECLARE_LDST_DMA(l, l, 32);
> +DECLARE_LDST_DMA(q, q, 64);
> +
> +#undef DECLARE_LDST_DMA
> +
>  #endif

I'd prefer the stubs to be inline. Not just as an optimization:
it also makes it easier to grok what goes on in the common
no-iommu case.
Anthony Liguori - Sept. 1, 2011, 3:55 p.m.
On 09/01/2011 12:00 AM, David Gibson wrote:
> This patch adds functions to pci.[ch] to perform PCI DMA operations.  At
> present, these are just stubs which perform directly cpu physical memory
> accesses.
>
> Using these stubs, however, distinguishes PCI device DMA transactions from
> other accesses to physical memory, which will allow PCI IOMMU support to
> be added in one place, rather than updating every PCI driver at that time.
>
> That is, it allows us to update individual PCI drivers to support an IOMMU
> without having yet determined the details of how the IOMMU emulation will
> operate.  This will let us remove the most bitrot-sensitive part of an
> IOMMU patch in advance.
>
> Signed-off-by: David Gibson<david@gibson.dropbear.id.au>

I think this is the wrong approach given the introduction of the memory API.

I think we should have a generic memory access function that takes a 
MemoryRegion as it's first argument.

The PCI bus should then expose one memory region for each device (that's 
how it can figure out where the access is coming from).

Richard/Avi, what do you think?

Regards,

Anthony Liguori

> ---
>   dma.h    |    2 ++
>   hw/pci.c |   31 +++++++++++++++++++++++++++++++
>   hw/pci.h |   33 +++++++++++++++++++++++++++++++++
>   3 files changed, 66 insertions(+), 0 deletions(-)
>
> diff --git a/dma.h b/dma.h
> index a6db5ba..06e91cb 100644
> --- a/dma.h
> +++ b/dma.h
> @@ -15,6 +15,8 @@
>   #include "hw/hw.h"
>   #include "block.h"
>
> +typedef target_phys_addr_t dma_addr_t;
> +
>   typedef struct {
>       target_phys_addr_t base;
>       target_phys_addr_t len;
> diff --git a/hw/pci.c b/hw/pci.c
> index 1cdcbb7..842b066 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -2211,3 +2211,34 @@ MemoryRegion *pci_address_space(PCIDevice *dev)
>   {
>       return dev->bus->address_space_mem;
>   }
> +
> +#define DEFINE_LDST_DMA(_lname, _sname, _bits) \
> +    uint##_bits##_t ld##_lname##_pci_dma(PCIDevice *dev, dma_addr_t addr) \
> +    { \
> +        uint##_bits##_t val; \
> +        pci_dma_read(dev, addr,&val, sizeof(val)); \
> +        return le##_bits##_to_cpu(val); \
> +    } \
> +    void st##_sname##_pci_dma(PCIDevice *dev, \
> +                              dma_addr_t addr, uint##_bits##_t val) \
> +    { \
> +        val = cpu_to_le##_bits(val); \
> +        pci_dma_write(dev, addr,&val, sizeof(val)); \
> +    }
> +
> +uint8_t ldub_pci_dma(PCIDevice *dev, dma_addr_t addr)
> +{
> +    uint8_t val;
> +
> +    pci_dma_read(dev, addr,&val, sizeof(val));
> +    return val;
> +}
> +
> +void stb_pci_dma(PCIDevice *dev, dma_addr_t addr, uint8_t val)
> +{
> +    pci_dma_write(dev, addr,&val, sizeof(val));
> +}
> +
> +DEFINE_LDST_DMA(uw, w, 16);
> +DEFINE_LDST_DMA(l, l, 32);
> +DEFINE_LDST_DMA(q, q, 64);
> diff --git a/hw/pci.h b/hw/pci.h
> index 391217e..401d14a 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -6,6 +6,7 @@
>
>   #include "qdev.h"
>   #include "memory.h"
> +#include "dma.h"
>
>   /* PCI includes legacy ISA access.  */
>   #include "isa.h"
> @@ -492,4 +493,36 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
>       return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
>   }
>
> +/* DMA access functions */
> +static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> +                             void *buf, dma_addr_t len, int is_write)
> +{
> +    cpu_physical_memory_rw(addr, buf, len, is_write);
> +    return 0;
> +}
> +
> +static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr,
> +                               void *buf, dma_addr_t len)
> +{
> +    return pci_dma_rw(dev, addr, buf, len, 0);
> +}
> +
> +static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr,
> +                                const void *buf, dma_addr_t len)
> +{
> +    return pci_dma_rw(dev, addr, (void *) buf, len, 1);
> +}
> +
> +#define DECLARE_LDST_DMA(_lname, _sname, _bits) \
> +    uint##_bits##_t ld##_lname##_pci_dma(PCIDevice *dev, dma_addr_t addr); \
> +    void st##_sname##_pci_dma(PCIDevice *dev, dma_addr_t addr, \
> +                              uint##_bits##_t val);            \
> +
> +DECLARE_LDST_DMA(ub, b, 8);
> +DECLARE_LDST_DMA(uw, w, 16);
> +DECLARE_LDST_DMA(l, l, 32);
> +DECLARE_LDST_DMA(q, q, 64);
> +
> +#undef DECLARE_LDST_DMA
> +
>   #endif
Avi Kivity - Sept. 1, 2011, 4:03 p.m.
On 09/01/2011 06:55 PM, Anthony Liguori wrote:
> On 09/01/2011 12:00 AM, David Gibson wrote:
>> This patch adds functions to pci.[ch] to perform PCI DMA operations.  At
>> present, these are just stubs which perform directly cpu physical memory
>> accesses.
>>
>> Using these stubs, however, distinguishes PCI device DMA transactions 
>> from
>> other accesses to physical memory, which will allow PCI IOMMU support to
>> be added in one place, rather than updating every PCI driver at that 
>> time.
>>
>> That is, it allows us to update individual PCI drivers to support an 
>> IOMMU
>> without having yet determined the details of how the IOMMU emulation 
>> will
>> operate.  This will let us remove the most bitrot-sensitive part of an
>> IOMMU patch in advance.
>>
>> Signed-off-by: David Gibson<david@gibson.dropbear.id.au>
>
> I think this is the wrong approach given the introduction of the 
> memory API.
>
> I think we should have a generic memory access function that takes a 
> MemoryRegion as it's first argument.
>
> The PCI bus should then expose one memory region for each device 
> (that's how it can figure out where the access is coming from).
>
> Richard/Avi, what do you think?
>

I think the patchset is fine.  It routes all access through 
pci_dma_rw(), which accepts a PCIDevice.  We can later define 
pci_dma_rw() in terms of the memory API and get the benefit of the 
memory hierarchy.
Anthony Liguori - Sept. 1, 2011, 4:05 p.m.
On 09/01/2011 11:03 AM, Avi Kivity wrote:
> On 09/01/2011 06:55 PM, Anthony Liguori wrote:
>> On 09/01/2011 12:00 AM, David Gibson wrote:
>>> This patch adds functions to pci.[ch] to perform PCI DMA operations. At
>>> present, these are just stubs which perform directly cpu physical memory
>>> accesses.
>>>
>>> Using these stubs, however, distinguishes PCI device DMA transactions
>>> from
>>> other accesses to physical memory, which will allow PCI IOMMU support to
>>> be added in one place, rather than updating every PCI driver at that
>>> time.
>>>
>>> That is, it allows us to update individual PCI drivers to support an
>>> IOMMU
>>> without having yet determined the details of how the IOMMU emulation
>>> will
>>> operate. This will let us remove the most bitrot-sensitive part of an
>>> IOMMU patch in advance.
>>>
>>> Signed-off-by: David Gibson<david@gibson.dropbear.id.au>
>>
>> I think this is the wrong approach given the introduction of the
>> memory API.
>>
>> I think we should have a generic memory access function that takes a
>> MemoryRegion as it's first argument.
>>
>> The PCI bus should then expose one memory region for each device
>> (that's how it can figure out where the access is coming from).
>>
>> Richard/Avi, what do you think?
>>
>
> I think the patchset is fine. It routes all access through pci_dma_rw(),
> which accepts a PCIDevice. We can later define pci_dma_rw() in terms of
> the memory API and get the benefit of the memory hierarchy.

The challenge is what you do about something like ne2k where the core 
chipset can either be a PCI device or an ISA device.  You would have to 
implement a wrapper around pci_dma_rw() in order to turn it into 
cpu_physical_memory_rw when doing ISA.

Regards,

Anthony Liguori

>
Avi Kivity - Sept. 1, 2011, 4:11 p.m.
On 09/01/2011 07:05 PM, Anthony Liguori wrote:
>> I think the patchset is fine. It routes all access through pci_dma_rw(),
>> which accepts a PCIDevice. We can later define pci_dma_rw() in terms of
>> the memory API and get the benefit of the memory hierarchy.
>
>
> The challenge is what you do about something like ne2k where the core 
> chipset can either be a PCI device or an ISA device.  You would have 
> to implement a wrapper around pci_dma_rw() in order to turn it into 
> cpu_physical_memory_rw when doing ISA.
>

True.  But I still think it's the right thing.

We can't really pass a MemoryRegion as the source address, since there 
is no per-device MemoryRegion.  We can use pci_address_space() for basic 
offsetting, and for bypassing bridge windows, but iommu source detection 
has to use the PCIDevice directly.
Anthony Liguori - Sept. 1, 2011, 4:32 p.m.
On 09/01/2011 11:11 AM, Avi Kivity wrote:
> On 09/01/2011 07:05 PM, Anthony Liguori wrote:
>>> I think the patchset is fine. It routes all access through pci_dma_rw(),
>>> which accepts a PCIDevice. We can later define pci_dma_rw() in terms of
>>> the memory API and get the benefit of the memory hierarchy.
>>
>>
>> The challenge is what you do about something like ne2k where the core
>> chipset can either be a PCI device or an ISA device. You would have to
>> implement a wrapper around pci_dma_rw() in order to turn it into
>> cpu_physical_memory_rw when doing ISA.
>>
>
> True. But I still think it's the right thing.
>
> We can't really pass a MemoryRegion as the source address, since there
> is no per-device MemoryRegion.

Couldn't the PCI bus expose 255 MemoryRegions though?  It could still 
use the pci_address_space I think since that should include RAM too, right?

In fact, initially, you could have a 
pci_bus_get_device_memory_region(bus, dev) that just returns 
pci_address_space().

You just need the memory_st[bwl] functions I think.

Regards,

Anthony Liguori

  We can use pci_address_space() for basic
> offsetting, and for bypassing bridge windows, but iommu source detection
> has to use the PCIDevice directly.
>
David Gibson - Sept. 2, 2011, 12:38 a.m.
On Thu, Sep 01, 2011 at 11:05:48AM -0500, Anthony Liguori wrote:
> On 09/01/2011 11:03 AM, Avi Kivity wrote:
> >On 09/01/2011 06:55 PM, Anthony Liguori wrote:
> >>On 09/01/2011 12:00 AM, David Gibson wrote:
[snip]
> The challenge is what you do about something like ne2k where the
> core chipset can either be a PCI device or an ISA device.  You would
> have to implement a wrapper around pci_dma_rw() in order to turn it
> into cpu_physical_memory_rw when doing ISA.

That would be one of the less-easy cases I'm still thinking about...

I don't think it invalidates this patchset, though.
David Gibson - Sept. 2, 2011, 12:39 a.m.
On Thu, Sep 01, 2011 at 06:35:51PM +0300, Michael S. Tsirkin wrote:
> On Thu, Sep 01, 2011 at 03:00:54PM +1000, David Gibson wrote:
[snip]
> > +DECLARE_LDST_DMA(ub, b, 8);
> > +DECLARE_LDST_DMA(uw, w, 16);
> > +DECLARE_LDST_DMA(l, l, 32);
> > +DECLARE_LDST_DMA(q, q, 64);
> > +
> > +#undef DECLARE_LDST_DMA
> > +
> >  #endif
> 
> I'd prefer the stubs to be inline. Not just as an optimization:
> it also makes it easier to grok what goes on in the common
> no-iommu case.

So would I, but that doesn't work because of the poisonong of the phys
access functions.
David Gibson - Sept. 2, 2011, 4:38 a.m.
On Thu, Sep 01, 2011 at 06:35:51PM +0300, Michael S. Tsirkin wrote:
> On Thu, Sep 01, 2011 at 03:00:54PM +1000, David Gibson wrote:
[snip]
> > +#define DECLARE_LDST_DMA(_lname, _sname, _bits) \
> > +    uint##_bits##_t ld##_lname##_pci_dma(PCIDevice *dev, dma_addr_t addr); \
> > +    void st##_sname##_pci_dma(PCIDevice *dev, dma_addr_t addr, \
> > +                              uint##_bits##_t val);            \
> 
> prefix macros with PCI_ please.

Corrected for the next spin.

> > +DECLARE_LDST_DMA(ub, b, 8);
> > +DECLARE_LDST_DMA(uw, w, 16);
> > +DECLARE_LDST_DMA(l, l, 32);
> > +DECLARE_LDST_DMA(q, q, 64);
> > +
> > +#undef DECLARE_LDST_DMA
> > +
> >  #endif
> 
> I'd prefer the stubs to be inline. Not just as an optimization:
> it also makes it easier to grok what goes on in the common
> no-iommu case.

To elaborate on my earlier mail.  The problem with making them inlines
is that the cpu_physical_*() functions then appear in pci.h, which is
used in pci.c amongst other places that are included in
libhw32/libhw64, where those functions are poisoned.
David Gibson - Sept. 2, 2011, 4:40 a.m.
On Thu, Sep 01, 2011 at 07:03:34PM +0300, Avi Kivity wrote:
> On 09/01/2011 06:55 PM, Anthony Liguori wrote:
> >On 09/01/2011 12:00 AM, David Gibson wrote:
> >>This patch adds functions to pci.[ch] to perform PCI DMA operations.  At
> >>present, these are just stubs which perform directly cpu physical memory
> >>accesses.
> >>
> >>Using these stubs, however, distinguishes PCI device DMA
> >>transactions from
> >>other accesses to physical memory, which will allow PCI IOMMU support to
> >>be added in one place, rather than updating every PCI driver at
> >>that time.
> >>
> >>That is, it allows us to update individual PCI drivers to
> >>support an IOMMU
> >>without having yet determined the details of how the IOMMU
> >>emulation will
> >>operate.  This will let us remove the most bitrot-sensitive part of an
> >>IOMMU patch in advance.
> >>
> >>Signed-off-by: David Gibson<david@gibson.dropbear.id.au>
> >
> >I think this is the wrong approach given the introduction of the
> >memory API.
> >
> >I think we should have a generic memory access function that takes
> >a MemoryRegion as it's first argument.
> >
> >The PCI bus should then expose one memory region for each device
> >(that's how it can figure out where the access is coming from).
> >
> >Richard/Avi, what do you think?
> 
> I think the patchset is fine.  It routes all access through
> pci_dma_rw(), which accepts a PCIDevice.  We can later define
> pci_dma_rw() in terms of the memory API and get the benefit of the
> memory hierarchy.

Right.  Exactly what to use as a handle on the right bus / DMA
addressing context is something still to be thrashed out - a
MemoryRegion may well be the right answer.

But whatever we do there, a convenience wrapper would be useful for
the common case of PCI devices - certainly the correct addressing
context should be reachable from the PCI device.

This approach means we can get most of the PCI drivers IOMMU ready
now, and we just need fix the final IOMMU support in one place,
instead of in every PCI driver.
Avi Kivity - Sept. 2, 2011, 8:35 a.m.
On 09/01/2011 07:32 PM, Anthony Liguori wrote:
>> True. But I still think it's the right thing.
>>
>> We can't really pass a MemoryRegion as the source address, since there
>> is no per-device MemoryRegion.
>
>
> Couldn't the PCI bus expose 255 MemoryRegions though? 

What would those mean?  A MemoryRegion is something that can respond to 
reads and writes.

> It could still use the pci_address_space I think since that should 
> include RAM too, right?
>

No.

> In fact, initially, you could have a 
> pci_bus_get_device_memory_region(bus, dev) that just returns 
> pci_address_space().
>
> You just need the memory_st[bwl] functions I think.
>

Maybe we need a different type of object here - MemoryClient or something.
Avi Kivity - Sept. 2, 2011, 8:37 a.m.
On 09/01/2011 07:05 PM, Anthony Liguori wrote:
>
> The challenge is what you do about something like ne2k where the core 
> chipset can either be a PCI device or an ISA device.  You would have 
> to implement a wrapper around pci_dma_rw() in order to turn it into 
> cpu_physical_memory_rw when doing ISA.

btw, ISA DMA is very different, no?  You program a dma controller to 
copy memory from the device to RAM (or vice versa); the device never 
initiates a transaction IIRC.
Richard Henderson - Sept. 3, 2011, 1:16 a.m.
On 09/01/2011 09:25 PM, Anthony Liguori wrote:
> I think this is the wrong approach given the introduction of the memory API.
> 
> I think we should have a generic memory access function that takes a MemoryRegion as it's first argument.
> 
> The PCI bus should then expose one memory region for each device (that's how it can figure out where the access is coming from).

A MemoryRegion is the wrong abstraction for the device's view back into system memory.
The new memory API really has nothing to do with DMA or an IOMMU.  Think about it:
99.999% of the time we're writing to the one MemoryRegion that is RAM (with the other
remaining fraction being pci-pci dma tricks), except in a non-contiguous manner.
Obviously we need something else to represent the non-contiguous-ness.

I suppose you could abuse Avi's AddressSpace, with hundreds of MemoryRegion aliases,
but that would be a horrible interface to actually use in this situation.

I think David's patch is fine, as far as it goes for the majority of the PCI-only 
devices that we emulate.  As for the devices attached to more than one bus, they'll
just have to wait until we actually hash out the underlying dma api.


r~
Eduard - Gabriel Munteanu - Sept. 3, 2011, 3:04 a.m.
On Fri, Sep 02, 2011 at 11:37:25AM +0300, Avi Kivity wrote:
> On 09/01/2011 07:05 PM, Anthony Liguori wrote:
> >
> > The challenge is what you do about something like ne2k where the core 
> > chipset can either be a PCI device or an ISA device.  You would have 
> > to implement a wrapper around pci_dma_rw() in order to turn it into 
> > cpu_physical_memory_rw when doing ISA.
> 
> btw, ISA DMA is very different, no?  You program a dma controller to 
> copy memory from the device to RAM (or vice versa); the device never 
> initiates a transaction IIRC.

It seems to me ISA devices should use memory I/O primitives exposed by
the ISA bus code. Or maybe the DMA controllers involved should be doing
that. As for either-PCI/ISA devices, they can handle dispatching DMA
through the right interface themselves.

Anyway, I'm not sure why we're worrying about this now. Do we even have
IOMMUs for such use cases?


	Eduard
Blue Swirl - Sept. 3, 2011, 12:25 p.m.
On Fri, Sep 2, 2011 at 8:35 AM, Avi Kivity <avi@redhat.com> wrote:
> On 09/01/2011 07:32 PM, Anthony Liguori wrote:
>>>
>>> True. But I still think it's the right thing.
>>>
>>> We can't really pass a MemoryRegion as the source address, since there
>>> is no per-device MemoryRegion.
>>
>>
>> Couldn't the PCI bus expose 255 MemoryRegions though?
>
> What would those mean?  A MemoryRegion is something that can respond to
> reads and writes.

If I understand Anthony's idea, the MemoryRegion (one of 255 in this
case) would identify the device in bus agnostic (even PIO vs. MMIO)
way. In the numerous previously failed attempts to manage generic DMA,
the upstream handle was either an opaque pointer, bus specific
structure or IOMMU handle, but this is much better. From the device
MemoryRegion, the bus should be discoverable and then the bus can
resolve the chain back to host RAM with similar logic to perform the
DMA via IOMMUs or whatever strange buses or devices are involved in
between. Awesome!

>> It could still use the pci_address_space I think since that should include
>> RAM too, right?
>>
>
> No.
>
>> In fact, initially, you could have a pci_bus_get_device_memory_region(bus,
>> dev) that just returns pci_address_space().
>>
>> You just need the memory_st[bwl] functions I think.
>>
>
> Maybe we need a different type of object here - MemoryClient or something.
>
> --
> I have a truly marvellous patch that fixes the bug which this
> signature is too narrow to contain.
>
>
>
Michael S. Tsirkin - Sept. 5, 2011, 7:55 a.m.
On Fri, Sep 02, 2011 at 02:38:25PM +1000, David Gibson wrote:
> > I'd prefer the stubs to be inline. Not just as an optimization:
> > it also makes it easier to grok what goes on in the common
> > no-iommu case.
> 
> To elaborate on my earlier mail.  The problem with making them inlines
> is that the cpu_physical_*() functions then appear in pci.h, which is
> used in pci.c amongst other places that are included in
> libhw32/libhw64, where those functions are poisoned.

Hmm, how are they poisoned?
I thought almost all devices currently use cpu_physical_*()?
For example, e1000 uses cpu_physical_memory_write and
it seems to get included in libhw32/libhw64 without issues.


> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson
Anthony Liguori - Sept. 23, 2011, 4:37 p.m.
On 09/01/2011 12:00 AM, David Gibson wrote:
> This patch adds functions to pci.[ch] to perform PCI DMA operations.  At
> present, these are just stubs which perform directly cpu physical memory
> accesses.
>
> Using these stubs, however, distinguishes PCI device DMA transactions from
> other accesses to physical memory, which will allow PCI IOMMU support to
> be added in one place, rather than updating every PCI driver at that time.
>
> That is, it allows us to update individual PCI drivers to support an IOMMU
> without having yet determined the details of how the IOMMU emulation will
> operate.  This will let us remove the most bitrot-sensitive part of an
> IOMMU patch in advance.
>
> Signed-off-by: David Gibson<david@gibson.dropbear.id.au>

Please include wrappers for cpu_physical_memory_map/unmap and I'll apply.

Regards,

Anthony Liguori

> ---
>   dma.h    |    2 ++
>   hw/pci.c |   31 +++++++++++++++++++++++++++++++
>   hw/pci.h |   33 +++++++++++++++++++++++++++++++++
>   3 files changed, 66 insertions(+), 0 deletions(-)
>
> diff --git a/dma.h b/dma.h
> index a6db5ba..06e91cb 100644
> --- a/dma.h
> +++ b/dma.h
> @@ -15,6 +15,8 @@
>   #include "hw/hw.h"
>   #include "block.h"
>
> +typedef target_phys_addr_t dma_addr_t;
> +
>   typedef struct {
>       target_phys_addr_t base;
>       target_phys_addr_t len;
> diff --git a/hw/pci.c b/hw/pci.c
> index 1cdcbb7..842b066 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -2211,3 +2211,34 @@ MemoryRegion *pci_address_space(PCIDevice *dev)
>   {
>       return dev->bus->address_space_mem;
>   }
> +
> +#define DEFINE_LDST_DMA(_lname, _sname, _bits) \
> +    uint##_bits##_t ld##_lname##_pci_dma(PCIDevice *dev, dma_addr_t addr) \
> +    { \
> +        uint##_bits##_t val; \
> +        pci_dma_read(dev, addr,&val, sizeof(val)); \
> +        return le##_bits##_to_cpu(val); \
> +    } \
> +    void st##_sname##_pci_dma(PCIDevice *dev, \
> +                              dma_addr_t addr, uint##_bits##_t val) \
> +    { \
> +        val = cpu_to_le##_bits(val); \
> +        pci_dma_write(dev, addr,&val, sizeof(val)); \
> +    }
> +
> +uint8_t ldub_pci_dma(PCIDevice *dev, dma_addr_t addr)
> +{
> +    uint8_t val;
> +
> +    pci_dma_read(dev, addr,&val, sizeof(val));
> +    return val;
> +}
> +
> +void stb_pci_dma(PCIDevice *dev, dma_addr_t addr, uint8_t val)
> +{
> +    pci_dma_write(dev, addr,&val, sizeof(val));
> +}
> +
> +DEFINE_LDST_DMA(uw, w, 16);
> +DEFINE_LDST_DMA(l, l, 32);
> +DEFINE_LDST_DMA(q, q, 64);
> diff --git a/hw/pci.h b/hw/pci.h
> index 391217e..401d14a 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -6,6 +6,7 @@
>
>   #include "qdev.h"
>   #include "memory.h"
> +#include "dma.h"
>
>   /* PCI includes legacy ISA access.  */
>   #include "isa.h"
> @@ -492,4 +493,36 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
>       return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
>   }
>
> +/* DMA access functions */
> +static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
> +                             void *buf, dma_addr_t len, int is_write)
> +{
> +    cpu_physical_memory_rw(addr, buf, len, is_write);
> +    return 0;
> +}
> +
> +static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr,
> +                               void *buf, dma_addr_t len)
> +{
> +    return pci_dma_rw(dev, addr, buf, len, 0);
> +}
> +
> +static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr,
> +                                const void *buf, dma_addr_t len)
> +{
> +    return pci_dma_rw(dev, addr, (void *) buf, len, 1);
> +}
> +
> +#define DECLARE_LDST_DMA(_lname, _sname, _bits) \
> +    uint##_bits##_t ld##_lname##_pci_dma(PCIDevice *dev, dma_addr_t addr); \
> +    void st##_sname##_pci_dma(PCIDevice *dev, dma_addr_t addr, \
> +                              uint##_bits##_t val);            \
> +
> +DECLARE_LDST_DMA(ub, b, 8);
> +DECLARE_LDST_DMA(uw, w, 16);
> +DECLARE_LDST_DMA(l, l, 32);
> +DECLARE_LDST_DMA(q, q, 64);
> +
> +#undef DECLARE_LDST_DMA
> +
>   #endif

Patch

diff --git a/dma.h b/dma.h
index a6db5ba..06e91cb 100644
--- a/dma.h
+++ b/dma.h
@@ -15,6 +15,8 @@ 
 #include "hw/hw.h"
 #include "block.h"
 
+typedef target_phys_addr_t dma_addr_t;
+
 typedef struct {
     target_phys_addr_t base;
     target_phys_addr_t len;
diff --git a/hw/pci.c b/hw/pci.c
index 1cdcbb7..842b066 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -2211,3 +2211,34 @@  MemoryRegion *pci_address_space(PCIDevice *dev)
 {
     return dev->bus->address_space_mem;
 }
+
+#define DEFINE_LDST_DMA(_lname, _sname, _bits) \
+    uint##_bits##_t ld##_lname##_pci_dma(PCIDevice *dev, dma_addr_t addr) \
+    { \
+        uint##_bits##_t val; \
+        pci_dma_read(dev, addr, &val, sizeof(val)); \
+        return le##_bits##_to_cpu(val); \
+    } \
+    void st##_sname##_pci_dma(PCIDevice *dev, \
+                              dma_addr_t addr, uint##_bits##_t val) \
+    { \
+        val = cpu_to_le##_bits(val); \
+        pci_dma_write(dev, addr, &val, sizeof(val)); \
+    }
+
+uint8_t ldub_pci_dma(PCIDevice *dev, dma_addr_t addr)
+{
+    uint8_t val;
+
+    pci_dma_read(dev, addr, &val, sizeof(val));
+    return val;
+}
+
+void stb_pci_dma(PCIDevice *dev, dma_addr_t addr, uint8_t val)
+{
+    pci_dma_write(dev, addr, &val, sizeof(val));
+}
+
+DEFINE_LDST_DMA(uw, w, 16);
+DEFINE_LDST_DMA(l, l, 32);
+DEFINE_LDST_DMA(q, q, 64);
diff --git a/hw/pci.h b/hw/pci.h
index 391217e..401d14a 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -6,6 +6,7 @@ 
 
 #include "qdev.h"
 #include "memory.h"
+#include "dma.h"
 
 /* PCI includes legacy ISA access.  */
 #include "isa.h"
@@ -492,4 +493,36 @@  static inline uint32_t pci_config_size(const PCIDevice *d)
     return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
 }
 
+/* DMA access functions */
+static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
+                             void *buf, dma_addr_t len, int is_write)
+{
+    cpu_physical_memory_rw(addr, buf, len, is_write);
+    return 0;
+}
+
+static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr,
+                               void *buf, dma_addr_t len)
+{
+    return pci_dma_rw(dev, addr, buf, len, 0);
+}
+
+static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr,
+                                const void *buf, dma_addr_t len)
+{
+    return pci_dma_rw(dev, addr, (void *) buf, len, 1);
+}
+
+#define DECLARE_LDST_DMA(_lname, _sname, _bits) \
+    uint##_bits##_t ld##_lname##_pci_dma(PCIDevice *dev, dma_addr_t addr); \
+    void st##_sname##_pci_dma(PCIDevice *dev, dma_addr_t addr, \
+                              uint##_bits##_t val);            \
+
+DECLARE_LDST_DMA(ub, b, 8);
+DECLARE_LDST_DMA(uw, w, 16);
+DECLARE_LDST_DMA(l, l, 32);
+DECLARE_LDST_DMA(q, q, 64);
+
+#undef DECLARE_LDST_DMA
+
 #endif