Patchwork pci: Factor out bounds checking on config space accesses

login
register
mail settings
Submitter David Gibson
Date March 19, 2012, 4:58 a.m.
Message ID <1332133091-7782-1-git-send-email-david@gibson.dropbear.id.au>
Download mbox | patch
Permalink /patch/147464/
State New
Headers show

Comments

David Gibson - March 19, 2012, 4:58 a.m.
There are several paths into the code to emulate PCI config space accesses:
one for MMIO to a plain old PCI bridge one for MMIO to a PCIe bridge and
one for the pseries machine which provides para-virtualized access to PCI
config space.  Each of these functions does their own bounds checking
against the size of config space to check for addresses outside the
size of config space.  The pci_host_config_{read,write}_common() (sort
of) checks for partial overruns, that is where the address is within
the size of config space, but address + length is not, it takes a
limit parameter for this purpose.

As well as being a small code duplication, and it being weird to
separate the checks for partial and total overruns, this checking
currently has a few buglets:

    * For non PCI-Express we assume that the size of config space is
      PCI_CONFIG_SPACE_SIZE.  That's true for everything we emulate
      now, but is not necessarily true (e.g. PCI-X devices can have
      extended config space)

    * The limit parameter is not necessary, since the size of config
      space can be obtained using pci_config_size()

    * Partial overruns could only occur with a misaligned access,
      which should have already been dealt with by this point

    * Partial overruns are handled as a partial read or write, which
      is very unlikely behaviour for real hardware

    * Furthermore, partial reads are 0x0 padded, whereas returning
      0xff for unimplemented addresses us much more common.

    * The partial reads/writes only work correctly by assuming
      little-endian byte layout.  While that is always true for PCI
      config space, it's an awfully subtle thing to rely on without
      comment.

This patch, therefore, moves the bounds checking wholly into
pci_host_config_{read,write}_common().  No partial reads or writes are
performed, instead any out-of-bounds write is simply ignored and an
out-of-bounds read returns 0xff.

This simplifies all the callers, and makes the overall semantics saner
for edge cases.

Cc: Michael S. Tsirkin <mst@redhat.com>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/pci_host.c  |   26 ++++++++++++++------------
 hw/pci_host.h  |    4 ++--
 hw/pcie_host.c |   18 ++----------------
 hw/spapr_pci.c |   27 ++++-----------------------
 4 files changed, 22 insertions(+), 53 deletions(-)
David Gibson - March 28, 2012, 1:11 a.m.
Michael,

Any chance of an ack or nack on this one?

On Mon, Mar 19, 2012 at 03:58:11PM +1100, David Gibson wrote:
> There are several paths into the code to emulate PCI config space accesses:
> one for MMIO to a plain old PCI bridge one for MMIO to a PCIe bridge and
> one for the pseries machine which provides para-virtualized access to PCI
> config space.  Each of these functions does their own bounds checking
> against the size of config space to check for addresses outside the
> size of config space.  The pci_host_config_{read,write}_common() (sort
> of) checks for partial overruns, that is where the address is within
> the size of config space, but address + length is not, it takes a
> limit parameter for this purpose.
> 
> As well as being a small code duplication, and it being weird to
> separate the checks for partial and total overruns, this checking
> currently has a few buglets:
> 
>     * For non PCI-Express we assume that the size of config space is
>       PCI_CONFIG_SPACE_SIZE.  That's true for everything we emulate
>       now, but is not necessarily true (e.g. PCI-X devices can have
>       extended config space)
> 
>     * The limit parameter is not necessary, since the size of config
>       space can be obtained using pci_config_size()
> 
>     * Partial overruns could only occur with a misaligned access,
>       which should have already been dealt with by this point
> 
>     * Partial overruns are handled as a partial read or write, which
>       is very unlikely behaviour for real hardware
> 
>     * Furthermore, partial reads are 0x0 padded, whereas returning
>       0xff for unimplemented addresses us much more common.
> 
>     * The partial reads/writes only work correctly by assuming
>       little-endian byte layout.  While that is always true for PCI
>       config space, it's an awfully subtle thing to rely on without
>       comment.
> 
> This patch, therefore, moves the bounds checking wholly into
> pci_host_config_{read,write}_common().  No partial reads or writes are
> performed, instead any out-of-bounds write is simply ignored and an
> out-of-bounds read returns 0xff.
> 
> This simplifies all the callers, and makes the overall semantics saner
> for edge cases.
> 
> Cc: Michael S. Tsirkin <mst@redhat.com>
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/pci_host.c  |   26 ++++++++++++++------------
>  hw/pci_host.h  |    4 ++--
>  hw/pcie_host.c |   18 ++----------------
>  hw/spapr_pci.c |   27 ++++-----------------------
>  4 files changed, 22 insertions(+), 53 deletions(-)
> 
> diff --git a/hw/pci_host.c b/hw/pci_host.c
> index 44c6c20..829d797 100644
> --- a/hw/pci_host.c
> +++ b/hw/pci_host.c
> @@ -48,48 +48,50 @@ static inline PCIDevice *pci_dev_find_by_addr(PCIBus *bus, uint32_t addr)
>  }
>  
>  void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
> -                                  uint32_t limit, uint32_t val, uint32_t len)
> +                                  uint32_t val, uint32_t len)
>  {
>      assert(len <= 4);
> -    pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr));
> +    if ((addr + len) <= pci_config_size(pci_dev)) {
> +        pci_dev->config_write(pci_dev, addr, val, len);
> +    }
>  }
>  
>  uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
> -                                     uint32_t limit, uint32_t len)
> +                                     uint32_t len)
>  {
>      assert(len <= 4);
> -    return pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
> +    if ((addr + len) <= pci_config_size(pci_dev)) {
> +        return pci_dev->config_read(pci_dev, addr, len);
> +    } else {
> +        return ~0x0;
> +    }
>  }
>  
>  void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len)
>  {
>      PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
> -    uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
>  
>      if (!pci_dev) {
>          return;
>      }
>  
>      PCI_DPRINTF("%s: %s: addr=%02" PRIx32 " val=%08" PRIx32 " len=%d\n",
> -                __func__, pci_dev->name, config_addr, val, len);
> -    pci_host_config_write_common(pci_dev, config_addr, PCI_CONFIG_SPACE_SIZE,
> -                                 val, len);
> +                __func__, pci_dev->name, addr, val, len);
> +    pci_host_config_write_common(pci_dev, addr, val, len);
>  }
>  
>  uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len)
>  {
>      PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
> -    uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
>      uint32_t val;
>  
>      if (!pci_dev) {
>          return ~0x0;
>      }
>  
> -    val = pci_host_config_read_common(pci_dev, config_addr,
> -                                      PCI_CONFIG_SPACE_SIZE, len);
> +    val = pci_host_config_read_common(pci_dev, addr, len);
>      PCI_DPRINTF("%s: %s: addr=%02"PRIx32" val=%08"PRIx32" len=%d\n",
> -                __func__, pci_dev->name, config_addr, val, len);
> +                __func__, pci_dev->name, addr, val, len);
>  
>      return val;
>  }
> diff --git a/hw/pci_host.h b/hw/pci_host.h
> index 359e38f..4bb0838 100644
> --- a/hw/pci_host.h
> +++ b/hw/pci_host.h
> @@ -42,9 +42,9 @@ struct PCIHostState {
>  
>  /* common internal helpers for PCI/PCIe hosts, cut off overflows */
>  void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
> -                                  uint32_t limit, uint32_t val, uint32_t len);
> +                                  uint32_t val, uint32_t len);
>  uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
> -                                     uint32_t limit, uint32_t len);
> +                                     uint32_t len);
>  
>  void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
>  uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
> diff --git a/hw/pcie_host.c b/hw/pcie_host.c
> index 28bbe72..3af8610 100644
> --- a/hw/pcie_host.c
> +++ b/hw/pcie_host.c
> @@ -60,19 +60,12 @@ static void pcie_mmcfg_data_write(void *opaque, target_phys_addr_t mmcfg_addr,
>      PCIBus *s = e->pci.bus;
>      PCIDevice *pci_dev = pcie_dev_find_by_mmcfg_addr(s, mmcfg_addr);
>      uint32_t addr;
> -    uint32_t limit;
>  
>      if (!pci_dev) {
>          return;
>      }
>      addr = PCIE_MMCFG_CONFOFFSET(mmcfg_addr);
> -    limit = pci_config_size(pci_dev);
> -    if (limit <= addr) {
> -        /* conventional pci device can be behind pcie-to-pci bridge.
> -           256 <= addr < 4K has no effects. */
> -        return;
> -    }
> -    pci_host_config_write_common(pci_dev, addr, limit, val, len);
> +    pci_host_config_write_common(pci_dev, addr, val, len);
>  }
>  
>  static uint64_t pcie_mmcfg_data_read(void *opaque,
> @@ -83,19 +76,12 @@ static uint64_t pcie_mmcfg_data_read(void *opaque,
>      PCIBus *s = e->pci.bus;
>      PCIDevice *pci_dev = pcie_dev_find_by_mmcfg_addr(s, mmcfg_addr);
>      uint32_t addr;
> -    uint32_t limit;
>  
>      if (!pci_dev) {
>          return ~0x0;
>      }
>      addr = PCIE_MMCFG_CONFOFFSET(mmcfg_addr);
> -    limit = pci_config_size(pci_dev);
> -    if (limit <= addr) {
> -        /* conventional pci device can be behind pcie-to-pci bridge.
> -           256 <= addr < 4K has no effects. */
> -        return ~0x0;
> -    }
> -    return pci_host_config_read_common(pci_dev, addr, limit, len);
> +    return pci_host_config_read_common(pci_dev, addr, len);
>  }
>  
>  static const MemoryRegionOps pcie_mmcfg_ops = {
> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
> index e7ef551..77f3e52 100644
> --- a/hw/spapr_pci.c
> +++ b/hw/spapr_pci.c
> @@ -60,25 +60,6 @@ static uint32_t rtas_pci_cfgaddr(uint32_t arg)
>      return ((arg >> 20) & 0xf00) | (arg & 0xff);
>  }
>  
> -static uint32_t rtas_read_pci_config_do(PCIDevice *pci_dev, uint32_t addr,
> -                                        uint32_t limit, uint32_t len)
> -{
> -    if ((addr + len) <= limit) {
> -        return pci_host_config_read_common(pci_dev, addr, limit, len);
> -    } else {
> -        return ~0x0;
> -    }
> -}
> -
> -static void rtas_write_pci_config_do(PCIDevice *pci_dev, uint32_t addr,
> -                                     uint32_t limit, uint32_t val,
> -                                     uint32_t len)
> -{
> -    if ((addr + len) <= limit) {
> -        pci_host_config_write_common(pci_dev, addr, limit, val, len);
> -    }
> -}
> -
>  static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr,
>                                       uint32_t token, uint32_t nargs,
>                                       target_ulong args,
> @@ -94,7 +75,7 @@ static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr,
>      }
>      size = rtas_ld(args, 3);
>      addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
> -    val = rtas_read_pci_config_do(dev, addr, pci_config_size(dev), size);
> +    val = pci_host_config_read_common(dev, addr, size);
>      rtas_st(rets, 0, 0);
>      rtas_st(rets, 1, val);
>  }
> @@ -113,7 +94,7 @@ static void rtas_read_pci_config(sPAPREnvironment *spapr,
>      }
>      size = rtas_ld(args, 1);
>      addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
> -    val = rtas_read_pci_config_do(dev, addr, pci_config_size(dev), size);
> +    val = pci_host_config_read_common(dev, addr, size);
>      rtas_st(rets, 0, 0);
>      rtas_st(rets, 1, val);
>  }
> @@ -134,7 +115,7 @@ static void rtas_ibm_write_pci_config(sPAPREnvironment *spapr,
>      val = rtas_ld(args, 4);
>      size = rtas_ld(args, 3);
>      addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
> -    rtas_write_pci_config_do(dev, addr, pci_config_size(dev), val, size);
> +    pci_host_config_write_common(dev, addr, val, size);
>      rtas_st(rets, 0, 0);
>  }
>  
> @@ -153,7 +134,7 @@ static void rtas_write_pci_config(sPAPREnvironment *spapr,
>      val = rtas_ld(args, 2);
>      size = rtas_ld(args, 1);
>      addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
> -    rtas_write_pci_config_do(dev, addr, pci_config_size(dev), val, size);
> +    pci_host_config_write_common(dev, addr, val, size);
>      rtas_st(rets, 0, 0);
>  }
>
Michael S. Tsirkin - March 28, 2012, 9:30 a.m.
On Wed, Mar 28, 2012 at 12:11:52PM +1100, David Gibson wrote:
> Michael,
> 
> Any chance of an ack or nack on this one?
> 
> On Mon, Mar 19, 2012 at 03:58:11PM +1100, David Gibson wrote:
> > There are several paths into the code to emulate PCI config space accesses:
> > one for MMIO to a plain old PCI bridge one for MMIO to a PCIe bridge and
> > one for the pseries machine which provides para-virtualized access to PCI
> > config space.  Each of these functions does their own bounds checking
> > against the size of config space to check for addresses outside the
> > size of config space.  The pci_host_config_{read,write}_common() (sort
> > of) checks for partial overruns, that is where the address is within
> > the size of config space, but address + length is not, it takes a
> > limit parameter for this purpose.
> > 
> > As well as being a small code duplication, and it being weird to
> > separate the checks for partial and total overruns, this checking
> > currently has a few buglets:
> > 
> >     * For non PCI-Express we assume that the size of config space is
> >       PCI_CONFIG_SPACE_SIZE.  That's true for everything we emulate
> >       now, but is not necessarily true (e.g. PCI-X devices can have
> >       extended config space)
> > 
> >     * The limit parameter is not necessary, since the size of config
> >       space can be obtained using pci_config_size()
> > 
> >     * Partial overruns could only occur with a misaligned access,
> >       which should have already been dealt with by this point
> > 
> >     * Partial overruns are handled as a partial read or write, which
> >       is very unlikely behaviour for real hardware
> > 
> >     * Furthermore, partial reads are 0x0 padded, whereas returning
> >       0xff for unimplemented addresses us much more common.
> > 
> >     * The partial reads/writes only work correctly by assuming
> >       little-endian byte layout.  While that is always true for PCI
> >       config space, it's an awfully subtle thing to rely on without
> >       comment.

This last point can be addressed by adding a comment?
Patch welcome.

> > This patch, therefore, moves the bounds checking wholly into
> > pci_host_config_{read,write}_common().  No partial reads or writes are
> > performed, instead any out-of-bounds write is simply ignored and an
> > out-of-bounds read returns 0xff.
> > 
> > This simplifies all the callers, and makes the overall semantics saner
> > for edge cases.
> > 
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

Sorry, I didn't reply because I have no idea whether this patch is correct.
Couldn't figure out from the description
whether there's a test case where we differ
from real hardware in our behaviour.
The change affects lots of platforms and
there's no mention of which ones were tested.


> > ---
> >  hw/pci_host.c  |   26 ++++++++++++++------------
> >  hw/pci_host.h  |    4 ++--
> >  hw/pcie_host.c |   18 ++----------------
> >  hw/spapr_pci.c |   27 ++++-----------------------
> >  4 files changed, 22 insertions(+), 53 deletions(-)
> > 
> > diff --git a/hw/pci_host.c b/hw/pci_host.c
> > index 44c6c20..829d797 100644
> > --- a/hw/pci_host.c
> > +++ b/hw/pci_host.c
> > @@ -48,48 +48,50 @@ static inline PCIDevice *pci_dev_find_by_addr(PCIBus *bus, uint32_t addr)
> >  }
> >  
> >  void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
> > -                                  uint32_t limit, uint32_t val, uint32_t len)
> > +                                  uint32_t val, uint32_t len)
> >  {
> >      assert(len <= 4);
> > -    pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr));
> > +    if ((addr + len) <= pci_config_size(pci_dev)) {
> > +        pci_dev->config_write(pci_dev, addr, val, len);
> > +    }
> >  }
> >  
> >  uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
> > -                                     uint32_t limit, uint32_t len)
> > +                                     uint32_t len)
> >  {
> >      assert(len <= 4);
> > -    return pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
> > +    if ((addr + len) <= pci_config_size(pci_dev)) {
> > +        return pci_dev->config_read(pci_dev, addr, len);
> > +    } else {
> > +        return ~0x0;
> > +    }
> >  }
> >  
> >  void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len)
> >  {
> >      PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
> > -    uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
> >  
> >      if (!pci_dev) {
> >          return;
> >      }
> >  
> >      PCI_DPRINTF("%s: %s: addr=%02" PRIx32 " val=%08" PRIx32 " len=%d\n",
> > -                __func__, pci_dev->name, config_addr, val, len);
> > -    pci_host_config_write_common(pci_dev, config_addr, PCI_CONFIG_SPACE_SIZE,
> > -                                 val, len);
> > +                __func__, pci_dev->name, addr, val, len);
> > +    pci_host_config_write_common(pci_dev, addr, val, len);
> >  }
> >  
> >  uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len)
> >  {
> >      PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
> > -    uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
> >      uint32_t val;
> >  
> >      if (!pci_dev) {
> >          return ~0x0;
> >      }
> >  
> > -    val = pci_host_config_read_common(pci_dev, config_addr,
> > -                                      PCI_CONFIG_SPACE_SIZE, len);
> > +    val = pci_host_config_read_common(pci_dev, addr, len);
> >      PCI_DPRINTF("%s: %s: addr=%02"PRIx32" val=%08"PRIx32" len=%d\n",
> > -                __func__, pci_dev->name, config_addr, val, len);
> > +                __func__, pci_dev->name, addr, val, len);
> >  
> >      return val;
> >  }
> > diff --git a/hw/pci_host.h b/hw/pci_host.h
> > index 359e38f..4bb0838 100644
> > --- a/hw/pci_host.h
> > +++ b/hw/pci_host.h
> > @@ -42,9 +42,9 @@ struct PCIHostState {
> >  
> >  /* common internal helpers for PCI/PCIe hosts, cut off overflows */
> >  void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
> > -                                  uint32_t limit, uint32_t val, uint32_t len);
> > +                                  uint32_t val, uint32_t len);
> >  uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
> > -                                     uint32_t limit, uint32_t len);
> > +                                     uint32_t len);
> >  
> >  void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
> >  uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
> > diff --git a/hw/pcie_host.c b/hw/pcie_host.c
> > index 28bbe72..3af8610 100644
> > --- a/hw/pcie_host.c
> > +++ b/hw/pcie_host.c
> > @@ -60,19 +60,12 @@ static void pcie_mmcfg_data_write(void *opaque, target_phys_addr_t mmcfg_addr,
> >      PCIBus *s = e->pci.bus;
> >      PCIDevice *pci_dev = pcie_dev_find_by_mmcfg_addr(s, mmcfg_addr);
> >      uint32_t addr;
> > -    uint32_t limit;
> >  
> >      if (!pci_dev) {
> >          return;
> >      }
> >      addr = PCIE_MMCFG_CONFOFFSET(mmcfg_addr);
> > -    limit = pci_config_size(pci_dev);
> > -    if (limit <= addr) {
> > -        /* conventional pci device can be behind pcie-to-pci bridge.
> > -           256 <= addr < 4K has no effects. */
> > -        return;
> > -    }
> > -    pci_host_config_write_common(pci_dev, addr, limit, val, len);
> > +    pci_host_config_write_common(pci_dev, addr, val, len);
> >  }
> >  
> >  static uint64_t pcie_mmcfg_data_read(void *opaque,
> > @@ -83,19 +76,12 @@ static uint64_t pcie_mmcfg_data_read(void *opaque,
> >      PCIBus *s = e->pci.bus;
> >      PCIDevice *pci_dev = pcie_dev_find_by_mmcfg_addr(s, mmcfg_addr);
> >      uint32_t addr;
> > -    uint32_t limit;
> >  
> >      if (!pci_dev) {
> >          return ~0x0;
> >      }
> >      addr = PCIE_MMCFG_CONFOFFSET(mmcfg_addr);
> > -    limit = pci_config_size(pci_dev);
> > -    if (limit <= addr) {
> > -        /* conventional pci device can be behind pcie-to-pci bridge.
> > -           256 <= addr < 4K has no effects. */
> > -        return ~0x0;
> > -    }
> > -    return pci_host_config_read_common(pci_dev, addr, limit, len);
> > +    return pci_host_config_read_common(pci_dev, addr, len);
> >  }
> >  
> >  static const MemoryRegionOps pcie_mmcfg_ops = {
> > diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
> > index e7ef551..77f3e52 100644
> > --- a/hw/spapr_pci.c
> > +++ b/hw/spapr_pci.c
> > @@ -60,25 +60,6 @@ static uint32_t rtas_pci_cfgaddr(uint32_t arg)
> >      return ((arg >> 20) & 0xf00) | (arg & 0xff);
> >  }
> >  
> > -static uint32_t rtas_read_pci_config_do(PCIDevice *pci_dev, uint32_t addr,
> > -                                        uint32_t limit, uint32_t len)
> > -{
> > -    if ((addr + len) <= limit) {
> > -        return pci_host_config_read_common(pci_dev, addr, limit, len);
> > -    } else {
> > -        return ~0x0;
> > -    }
> > -}
> > -
> > -static void rtas_write_pci_config_do(PCIDevice *pci_dev, uint32_t addr,
> > -                                     uint32_t limit, uint32_t val,
> > -                                     uint32_t len)
> > -{
> > -    if ((addr + len) <= limit) {
> > -        pci_host_config_write_common(pci_dev, addr, limit, val, len);
> > -    }
> > -}
> > -
> >  static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr,
> >                                       uint32_t token, uint32_t nargs,
> >                                       target_ulong args,
> > @@ -94,7 +75,7 @@ static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr,
> >      }
> >      size = rtas_ld(args, 3);
> >      addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
> > -    val = rtas_read_pci_config_do(dev, addr, pci_config_size(dev), size);
> > +    val = pci_host_config_read_common(dev, addr, size);
> >      rtas_st(rets, 0, 0);
> >      rtas_st(rets, 1, val);
> >  }
> > @@ -113,7 +94,7 @@ static void rtas_read_pci_config(sPAPREnvironment *spapr,
> >      }
> >      size = rtas_ld(args, 1);
> >      addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
> > -    val = rtas_read_pci_config_do(dev, addr, pci_config_size(dev), size);
> > +    val = pci_host_config_read_common(dev, addr, size);
> >      rtas_st(rets, 0, 0);
> >      rtas_st(rets, 1, val);
> >  }
> > @@ -134,7 +115,7 @@ static void rtas_ibm_write_pci_config(sPAPREnvironment *spapr,
> >      val = rtas_ld(args, 4);
> >      size = rtas_ld(args, 3);
> >      addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
> > -    rtas_write_pci_config_do(dev, addr, pci_config_size(dev), val, size);
> > +    pci_host_config_write_common(dev, addr, val, size);
> >      rtas_st(rets, 0, 0);
> >  }
> >  
> > @@ -153,7 +134,7 @@ static void rtas_write_pci_config(sPAPREnvironment *spapr,
> >      val = rtas_ld(args, 2);
> >      size = rtas_ld(args, 1);
> >      addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
> > -    rtas_write_pci_config_do(dev, addr, pci_config_size(dev), val, size);
> > +    pci_host_config_write_common(dev, addr, val, size);
> >      rtas_st(rets, 0, 0);
> >  }
> >  
> 
> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson
David Gibson - March 29, 2012, 3:53 a.m.
On Wed, Mar 28, 2012 at 11:30:56AM +0200, Michael S. Tsirkin wrote:
> On Wed, Mar 28, 2012 at 12:11:52PM +1100, David Gibson wrote:
> > Michael,
> > 
> > Any chance of an ack or nack on this one?
> > 
> > On Mon, Mar 19, 2012 at 03:58:11PM +1100, David Gibson wrote:
> > > There are several paths into the code to emulate PCI config space accesses:
> > > one for MMIO to a plain old PCI bridge one for MMIO to a PCIe bridge and
> > > one for the pseries machine which provides para-virtualized access to PCI
> > > config space.  Each of these functions does their own bounds checking
> > > against the size of config space to check for addresses outside the
> > > size of config space.  The pci_host_config_{read,write}_common() (sort
> > > of) checks for partial overruns, that is where the address is within
> > > the size of config space, but address + length is not, it takes a
> > > limit parameter for this purpose.
> > > 
> > > As well as being a small code duplication, and it being weird to
> > > separate the checks for partial and total overruns, this checking
> > > currently has a few buglets:
> > > 
> > >     * For non PCI-Express we assume that the size of config space is
> > >       PCI_CONFIG_SPACE_SIZE.  That's true for everything we emulate
> > >       now, but is not necessarily true (e.g. PCI-X devices can have
> > >       extended config space)
> > > 
> > >     * The limit parameter is not necessary, since the size of config
> > >       space can be obtained using pci_config_size()
> > > 
> > >     * Partial overruns could only occur with a misaligned access,
> > >       which should have already been dealt with by this point
> > > 
> > >     * Partial overruns are handled as a partial read or write, which
> > >       is very unlikely behaviour for real hardware
> > > 
> > >     * Furthermore, partial reads are 0x0 padded, whereas returning
> > >       0xff for unimplemented addresses us much more common.
> > > 
> > >     * The partial reads/writes only work correctly by assuming
> > >       little-endian byte layout.  While that is always true for PCI
> > >       config space, it's an awfully subtle thing to rely on without
> > >       comment.
> 
> This last point can be addressed by adding a comment?
> Patch welcome.

Well, it could.  But why comment on the subtle assumptions of code
which implements a totally bizarre behaviour, rather than just
removing the bizarre behaviour.

> 
> > > This patch, therefore, moves the bounds checking wholly into
> > > pci_host_config_{read,write}_common().  No partial reads or writes are
> > > performed, instead any out-of-bounds write is simply ignored and an
> > > out-of-bounds read returns 0xff.
> > > 
> > > This simplifies all the callers, and makes the overall semantics saner
> > > for edge cases.
> > > 
> > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > 
> > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> 
> Sorry, I didn't reply because I have no idea whether this patch is
> correct.

Well, what do you need to decide one way or the other?

Would it help to split this up into micro-patches which address single
aspects of the points covered in the patch description?

> Couldn't figure out from the description whether there's a test case
> where we differ from real hardware in our behaviour.

Not sure what you mean by a testcase here.  Do you mean a formal
automated testcase somewhere, or just are there cases in which the
existing behaviour differs from hardware.  If the latter, then yes,
absolutely.  In fact I'd be astonished if there is *any* hardware
which implements partial writes (or reads) the way the existing code
does.

> The change affects lots of platforms and there's no mention of which
> ones were tested.

Only pseries, I'm afraid, because that's all I've really got guest
setups available to try.
Michael S. Tsirkin - March 29, 2012, 9:28 a.m.
On Thu, Mar 29, 2012 at 02:53:52PM +1100, David Gibson wrote:
> On Wed, Mar 28, 2012 at 11:30:56AM +0200, Michael S. Tsirkin wrote:
> > On Wed, Mar 28, 2012 at 12:11:52PM +1100, David Gibson wrote:
> > > Michael,
> > > 
> > > Any chance of an ack or nack on this one?
> > > 
> > > On Mon, Mar 19, 2012 at 03:58:11PM +1100, David Gibson wrote:
> > > > There are several paths into the code to emulate PCI config space accesses:
> > > > one for MMIO to a plain old PCI bridge one for MMIO to a PCIe bridge and
> > > > one for the pseries machine which provides para-virtualized access to PCI
> > > > config space.  Each of these functions does their own bounds checking
> > > > against the size of config space to check for addresses outside the
> > > > size of config space.  The pci_host_config_{read,write}_common() (sort
> > > > of) checks for partial overruns, that is where the address is within
> > > > the size of config space, but address + length is not, it takes a
> > > > limit parameter for this purpose.
> > > > 
> > > > As well as being a small code duplication, and it being weird to
> > > > separate the checks for partial and total overruns, this checking
> > > > currently has a few buglets:
> > > > 
> > > >     * For non PCI-Express we assume that the size of config space is
> > > >       PCI_CONFIG_SPACE_SIZE.  That's true for everything we emulate
> > > >       now, but is not necessarily true (e.g. PCI-X devices can have
> > > >       extended config space)
> > > > 
> > > >     * The limit parameter is not necessary, since the size of config
> > > >       space can be obtained using pci_config_size()
> > > > 
> > > >     * Partial overruns could only occur with a misaligned access,
> > > >       which should have already been dealt with by this point
> > > > 
> > > >     * Partial overruns are handled as a partial read or write, which
> > > >       is very unlikely behaviour for real hardware
> > > > 
> > > >     * Furthermore, partial reads are 0x0 padded, whereas returning
> > > >       0xff for unimplemented addresses us much more common.
> > > > 
> > > >     * The partial reads/writes only work correctly by assuming
> > > >       little-endian byte layout.  While that is always true for PCI
> > > >       config space, it's an awfully subtle thing to rely on without
> > > >       comment.
> > 
> > This last point can be addressed by adding a comment?
> > Patch welcome.
> 
> Well, it could.  But why comment on the subtle assumptions of code
> which implements a totally bizarre behaviour, rather than just
> removing the bizarre behaviour.
> 
> > 
> > > > This patch, therefore, moves the bounds checking wholly into
> > > > pci_host_config_{read,write}_common().  No partial reads or writes are
> > > > performed, instead any out-of-bounds write is simply ignored and an
> > > > out-of-bounds read returns 0xff.
> > > > 
> > > > This simplifies all the callers, and makes the overall semantics saner
> > > > for edge cases.
> > > > 
> > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > 
> > > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > 
> > Sorry, I didn't reply because I have no idea whether this patch is
> > correct.
> 
> Well, what do you need to decide one way or the other?
> 
> Would it help to split this up into micro-patches which address single
> aspects of the points covered in the patch description?

That's always good.
But most importantly, I'd like to get the motivation straight.

> > Couldn't figure out from the description whether there's a test case
> > where we differ from real hardware in our behaviour.
> 
> Not sure what you mean by a testcase here.  Do you mean a formal
> automated testcase somewhere, or just are there cases in which the
> existing behaviour differs from hardware.

No, not formal. Simply
- what these cases are
- what is the actual versus expected behaviour
- how to observe the difference from the guest

>  If the latter, then yes,
> absolutely.  In fact I'd be astonished if there is *any* hardware
> which implements partial writes (or reads) the way the existing code
> does.

We could classify such a difference as a minor bug.
The fix might turn out to be different for different platforms.

> > The change affects lots of platforms and there's no mention of which
> > ones were tested.
> 
> Only pseries, I'm afraid, because that's all I've really got guest
> setups available to try.

Then it would be better to find a way to limit the change to that
platform.

Alternatively need to get others interested enough to spend cycles
on testing your patches.

> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson
David Gibson - March 30, 2012, 3:17 a.m.
On Thu, Mar 29, 2012 at 11:28:19AM +0200, Michael S. Tsirkin wrote:
> On Thu, Mar 29, 2012 at 02:53:52PM +1100, David Gibson wrote:
> > On Wed, Mar 28, 2012 at 11:30:56AM +0200, Michael S. Tsirkin wrote:
> > > On Wed, Mar 28, 2012 at 12:11:52PM +1100, David Gibson wrote:
> > > > Michael,
> > > > 
> > > > Any chance of an ack or nack on this one?
> > > > 
> > > > On Mon, Mar 19, 2012 at 03:58:11PM +1100, David Gibson wrote:
> > > > > There are several paths into the code to emulate PCI config space accesses:
> > > > > one for MMIO to a plain old PCI bridge one for MMIO to a PCIe bridge and
> > > > > one for the pseries machine which provides para-virtualized access to PCI
> > > > > config space.  Each of these functions does their own bounds checking
> > > > > against the size of config space to check for addresses outside the
> > > > > size of config space.  The pci_host_config_{read,write}_common() (sort
> > > > > of) checks for partial overruns, that is where the address is within
> > > > > the size of config space, but address + length is not, it takes a
> > > > > limit parameter for this purpose.
> > > > > 
> > > > > As well as being a small code duplication, and it being weird to
> > > > > separate the checks for partial and total overruns, this checking
> > > > > currently has a few buglets:
> > > > > 
> > > > >     * For non PCI-Express we assume that the size of config space is
> > > > >       PCI_CONFIG_SPACE_SIZE.  That's true for everything we emulate
> > > > >       now, but is not necessarily true (e.g. PCI-X devices can have
> > > > >       extended config space)
> > > > > 
> > > > >     * The limit parameter is not necessary, since the size of config
> > > > >       space can be obtained using pci_config_size()
> > > > > 
> > > > >     * Partial overruns could only occur with a misaligned access,
> > > > >       which should have already been dealt with by this point
> > > > > 
> > > > >     * Partial overruns are handled as a partial read or write, which
> > > > >       is very unlikely behaviour for real hardware
> > > > > 
> > > > >     * Furthermore, partial reads are 0x0 padded, whereas returning
> > > > >       0xff for unimplemented addresses us much more common.
> > > > > 
> > > > >     * The partial reads/writes only work correctly by assuming
> > > > >       little-endian byte layout.  While that is always true for PCI
> > > > >       config space, it's an awfully subtle thing to rely on without
> > > > >       comment.
> > > 
> > > This last point can be addressed by adding a comment?
> > > Patch welcome.
> > 
> > Well, it could.  But why comment on the subtle assumptions of code
> > which implements a totally bizarre behaviour, rather than just
> > removing the bizarre behaviour.
> > 
> > > 
> > > > > This patch, therefore, moves the bounds checking wholly into
> > > > > pci_host_config_{read,write}_common().  No partial reads or writes are
> > > > > performed, instead any out-of-bounds write is simply ignored and an
> > > > > out-of-bounds read returns 0xff.
> > > > > 
> > > > > This simplifies all the callers, and makes the overall semantics saner
> > > > > for edge cases.
> > > > > 
> > > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > > 
> > > > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > 
> > > Sorry, I didn't reply because I have no idea whether this patch is
> > > correct.
> > 
> > Well, what do you need to decide one way or the other?
> > 
> > Would it help to split this up into micro-patches which address single
> > aspects of the points covered in the patch description?
> 
> That's always good.
> But most importantly, I'd like to get the motivation straight.

Ok, I'll split the patch up.

> > > Couldn't figure out from the description whether there's a test case
> > > where we differ from real hardware in our behaviour.
> > 
> > Not sure what you mean by a testcase here.  Do you mean a formal
> > automated testcase somewhere, or just are there cases in which the
> > existing behaviour differs from hardware.
> 
> No, not formal. Simply
> - what these cases are
> - what is the actual versus expected behaviour
> - how to observe the difference from the guest

Ok, I'll try to be clearer about that in the split patch comments.

However, in this case doing exactly the same thing as real hardware
isn't particularly important - the guest has already done something
probably fatally wrong.  But the point is that it's silly to have
extra code, with subtle and non-obvious assumptions just to implement
a behaviour, in an error case, that will appear on no hardware.

> >  If the latter, then yes,
> > absolutely.  In fact I'd be astonished if there is *any* hardware
> > which implements partial writes (or reads) the way the existing code
> > does.
> 
> We could classify such a difference as a minor bug.
> The fix might turn out to be different for different platforms.

The point isn't really that what we do is different from hardware, but
that it's more complex than necessary, useless and without even the

> > > The change affects lots of platforms and there's no mention of which
> > > ones were tested.
> > 
> > Only pseries, I'm afraid, because that's all I've really got guest
> > setups available to try.
> 
> Then it would be better to find a way to limit the change to that
> platform.

That would completely defeat the purpose, since the result would be
more complex rather than less.

> Alternatively need to get others interested enough to spend cycles
> on testing your patches.

Well, I was trying to get the PCI maintainer interested, but he seems
to be in unbelievably pointless nitpicking mode.

Patch

diff --git a/hw/pci_host.c b/hw/pci_host.c
index 44c6c20..829d797 100644
--- a/hw/pci_host.c
+++ b/hw/pci_host.c
@@ -48,48 +48,50 @@  static inline PCIDevice *pci_dev_find_by_addr(PCIBus *bus, uint32_t addr)
 }
 
 void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
-                                  uint32_t limit, uint32_t val, uint32_t len)
+                                  uint32_t val, uint32_t len)
 {
     assert(len <= 4);
-    pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr));
+    if ((addr + len) <= pci_config_size(pci_dev)) {
+        pci_dev->config_write(pci_dev, addr, val, len);
+    }
 }
 
 uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
-                                     uint32_t limit, uint32_t len)
+                                     uint32_t len)
 {
     assert(len <= 4);
-    return pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
+    if ((addr + len) <= pci_config_size(pci_dev)) {
+        return pci_dev->config_read(pci_dev, addr, len);
+    } else {
+        return ~0x0;
+    }
 }
 
 void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len)
 {
     PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
-    uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
 
     if (!pci_dev) {
         return;
     }
 
     PCI_DPRINTF("%s: %s: addr=%02" PRIx32 " val=%08" PRIx32 " len=%d\n",
-                __func__, pci_dev->name, config_addr, val, len);
-    pci_host_config_write_common(pci_dev, config_addr, PCI_CONFIG_SPACE_SIZE,
-                                 val, len);
+                __func__, pci_dev->name, addr, val, len);
+    pci_host_config_write_common(pci_dev, addr, val, len);
 }
 
 uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len)
 {
     PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
-    uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
     uint32_t val;
 
     if (!pci_dev) {
         return ~0x0;
     }
 
-    val = pci_host_config_read_common(pci_dev, config_addr,
-                                      PCI_CONFIG_SPACE_SIZE, len);
+    val = pci_host_config_read_common(pci_dev, addr, len);
     PCI_DPRINTF("%s: %s: addr=%02"PRIx32" val=%08"PRIx32" len=%d\n",
-                __func__, pci_dev->name, config_addr, val, len);
+                __func__, pci_dev->name, addr, val, len);
 
     return val;
 }
diff --git a/hw/pci_host.h b/hw/pci_host.h
index 359e38f..4bb0838 100644
--- a/hw/pci_host.h
+++ b/hw/pci_host.h
@@ -42,9 +42,9 @@  struct PCIHostState {
 
 /* common internal helpers for PCI/PCIe hosts, cut off overflows */
 void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
-                                  uint32_t limit, uint32_t val, uint32_t len);
+                                  uint32_t val, uint32_t len);
 uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
-                                     uint32_t limit, uint32_t len);
+                                     uint32_t len);
 
 void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
 uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
diff --git a/hw/pcie_host.c b/hw/pcie_host.c
index 28bbe72..3af8610 100644
--- a/hw/pcie_host.c
+++ b/hw/pcie_host.c
@@ -60,19 +60,12 @@  static void pcie_mmcfg_data_write(void *opaque, target_phys_addr_t mmcfg_addr,
     PCIBus *s = e->pci.bus;
     PCIDevice *pci_dev = pcie_dev_find_by_mmcfg_addr(s, mmcfg_addr);
     uint32_t addr;
-    uint32_t limit;
 
     if (!pci_dev) {
         return;
     }
     addr = PCIE_MMCFG_CONFOFFSET(mmcfg_addr);
-    limit = pci_config_size(pci_dev);
-    if (limit <= addr) {
-        /* conventional pci device can be behind pcie-to-pci bridge.
-           256 <= addr < 4K has no effects. */
-        return;
-    }
-    pci_host_config_write_common(pci_dev, addr, limit, val, len);
+    pci_host_config_write_common(pci_dev, addr, val, len);
 }
 
 static uint64_t pcie_mmcfg_data_read(void *opaque,
@@ -83,19 +76,12 @@  static uint64_t pcie_mmcfg_data_read(void *opaque,
     PCIBus *s = e->pci.bus;
     PCIDevice *pci_dev = pcie_dev_find_by_mmcfg_addr(s, mmcfg_addr);
     uint32_t addr;
-    uint32_t limit;
 
     if (!pci_dev) {
         return ~0x0;
     }
     addr = PCIE_MMCFG_CONFOFFSET(mmcfg_addr);
-    limit = pci_config_size(pci_dev);
-    if (limit <= addr) {
-        /* conventional pci device can be behind pcie-to-pci bridge.
-           256 <= addr < 4K has no effects. */
-        return ~0x0;
-    }
-    return pci_host_config_read_common(pci_dev, addr, limit, len);
+    return pci_host_config_read_common(pci_dev, addr, len);
 }
 
 static const MemoryRegionOps pcie_mmcfg_ops = {
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index e7ef551..77f3e52 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -60,25 +60,6 @@  static uint32_t rtas_pci_cfgaddr(uint32_t arg)
     return ((arg >> 20) & 0xf00) | (arg & 0xff);
 }
 
-static uint32_t rtas_read_pci_config_do(PCIDevice *pci_dev, uint32_t addr,
-                                        uint32_t limit, uint32_t len)
-{
-    if ((addr + len) <= limit) {
-        return pci_host_config_read_common(pci_dev, addr, limit, len);
-    } else {
-        return ~0x0;
-    }
-}
-
-static void rtas_write_pci_config_do(PCIDevice *pci_dev, uint32_t addr,
-                                     uint32_t limit, uint32_t val,
-                                     uint32_t len)
-{
-    if ((addr + len) <= limit) {
-        pci_host_config_write_common(pci_dev, addr, limit, val, len);
-    }
-}
-
 static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr,
                                      uint32_t token, uint32_t nargs,
                                      target_ulong args,
@@ -94,7 +75,7 @@  static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr,
     }
     size = rtas_ld(args, 3);
     addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
-    val = rtas_read_pci_config_do(dev, addr, pci_config_size(dev), size);
+    val = pci_host_config_read_common(dev, addr, size);
     rtas_st(rets, 0, 0);
     rtas_st(rets, 1, val);
 }
@@ -113,7 +94,7 @@  static void rtas_read_pci_config(sPAPREnvironment *spapr,
     }
     size = rtas_ld(args, 1);
     addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
-    val = rtas_read_pci_config_do(dev, addr, pci_config_size(dev), size);
+    val = pci_host_config_read_common(dev, addr, size);
     rtas_st(rets, 0, 0);
     rtas_st(rets, 1, val);
 }
@@ -134,7 +115,7 @@  static void rtas_ibm_write_pci_config(sPAPREnvironment *spapr,
     val = rtas_ld(args, 4);
     size = rtas_ld(args, 3);
     addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
-    rtas_write_pci_config_do(dev, addr, pci_config_size(dev), val, size);
+    pci_host_config_write_common(dev, addr, val, size);
     rtas_st(rets, 0, 0);
 }
 
@@ -153,7 +134,7 @@  static void rtas_write_pci_config(sPAPREnvironment *spapr,
     val = rtas_ld(args, 2);
     size = rtas_ld(args, 1);
     addr = rtas_pci_cfgaddr(rtas_ld(args, 0));
-    rtas_write_pci_config_do(dev, addr, pci_config_size(dev), val, size);
+    pci_host_config_write_common(dev, addr, val, size);
     rtas_st(rets, 0, 0);
 }