Patchwork [1/8] pci: pci_default_cap_write_config ignores wmask

login
register
mail settings
Submitter Alex Williamson
Date Nov. 12, 2010, 2:55 a.m.
Message ID <20101112025456.31423.497.stgit@s20.home>
Download mbox | patch
Permalink /patch/70914/
State New
Headers show

Comments

Alex Williamson - Nov. 12, 2010, 2:55 a.m.
Make use of wmask, just like the rest of config space.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 hw/pci.c |   19 ++++++++-----------
 1 files changed, 8 insertions(+), 11 deletions(-)
Michael S. Tsirkin - Nov. 12, 2010, 5:22 a.m.
On Thu, Nov 11, 2010 at 07:55:01PM -0700, Alex Williamson wrote:
> Make use of wmask, just like the rest of config space.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
> 
>  hw/pci.c |   19 ++++++++-----------
>  1 files changed, 8 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 92aaa85..12c47ac 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -1175,13 +1175,14 @@ uint32_t pci_default_read_config(PCIDevice *d,
>      return pci_read_config(d, address, len);
>  }
>  
> -static void pci_write_config(PCIDevice *pci_dev,
> -                             uint32_t address, uint32_t val, int len)
> +static void pci_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
>  {
>      int i;
> -    for (i = 0; i < len; i++) {
> -        pci_dev->config[address + i] = val & 0xff;
> -        val >>= 8;
> +    uint32_t config_size = pci_config_size(d);
> +
> +    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
> +        uint8_t wmask = d->wmask[addr + i];
> +        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
>      }
>  }
>


Let's not name an internal static helper pci_write_config.
This is really update_config_by_mask or something like that.
But see below: maybe we don't need it at all?

> @@ -1207,18 +1208,14 @@ void pci_default_cap_write_config(PCIDevice *pci_dev,
>  
>  void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
>  {
> -    int i, was_irq_disabled = pci_irq_disabled(d);
> -    uint32_t config_size = pci_config_size(d);
> +    int was_irq_disabled = pci_irq_disabled(d);
>  
>      if (pci_access_cap_config(d, addr, l)) {
>          d->cap.config_write(d, addr, val, l);
>          return;
>      }
>  

I would like to also examine the need for _cap_
functions. Why can assigned devices just do

	pci_default_write_config 
	if (range_overlap(...msi)) {
	}
	if (range_overlap(...msix)) {
	}
and then we could remove all the _cap_ extensions
altogether?

> -    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
> -        uint8_t wmask = d->wmask[addr + i];
> -        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
> -    }
> +    pci_write_config(d, addr, val, l);
>  
>  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
>      if (kvm_enabled() && kvm_irqchip_in_kernel() &&
Alex Williamson - Nov. 12, 2010, 6:03 a.m.
On Fri, 2010-11-12 at 07:22 +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 11, 2010 at 07:55:01PM -0700, Alex Williamson wrote:
> > Make use of wmask, just like the rest of config space.
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> > 
> >  hw/pci.c |   19 ++++++++-----------
> >  1 files changed, 8 insertions(+), 11 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index 92aaa85..12c47ac 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -1175,13 +1175,14 @@ uint32_t pci_default_read_config(PCIDevice *d,
> >      return pci_read_config(d, address, len);
> >  }
> >  
> > -static void pci_write_config(PCIDevice *pci_dev,
> > -                             uint32_t address, uint32_t val, int len)
> > +static void pci_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
> >  {
> >      int i;
> > -    for (i = 0; i < len; i++) {
> > -        pci_dev->config[address + i] = val & 0xff;
> > -        val >>= 8;
> > +    uint32_t config_size = pci_config_size(d);
> > +
> > +    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
> > +        uint8_t wmask = d->wmask[addr + i];
> > +        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
> >      }
> >  }
> >
> 
> 
> Let's not name an internal static helper pci_write_config.
> This is really update_config_by_mask or something like that.
> But see below: maybe we don't need it at all?

The function already exists, I just made it do what it seems like it
should have been doing already.

> > @@ -1207,18 +1208,14 @@ void pci_default_cap_write_config(PCIDevice *pci_dev,
> >  
> >  void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
> >  {
> > -    int i, was_irq_disabled = pci_irq_disabled(d);
> > -    uint32_t config_size = pci_config_size(d);
> > +    int was_irq_disabled = pci_irq_disabled(d);
> >  
> >      if (pci_access_cap_config(d, addr, l)) {
> >          d->cap.config_write(d, addr, val, l);
> >          return;
> >      }
> >  
> 
> I would like to also examine the need for _cap_
> functions. Why can assigned devices just do
> 
> 	pci_default_write_config 
> 	if (range_overlap(...msi)) {
> 	}
> 	if (range_overlap(...msix)) {
> 	}
> and then we could remove all the _cap_ extensions
> altogether?

I think that somewhere we need to track what capabilities are at what
offset, config space isn't a performance path, but that look horribly
inefficient and gets worse with more capabilities.

Why don't we define capability id 0xff as normal config space (first 64
bytes), then add the capability id to read/write_config (this is what
vfio does).  Then the driver can split capability handling off from
their main functions if they want.  Anyway, I think such an improvement
could be added incrementally later.  Thanks,

Alex

> > -    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
> > -        uint8_t wmask = d->wmask[addr + i];
> > -        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
> > -    }
> > +    pci_write_config(d, addr, val, l);
> >  
> >  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
> >      if (kvm_enabled() && kvm_irqchip_in_kernel() &&
Michael S. Tsirkin - Nov. 12, 2010, 8:48 a.m.
On Thu, Nov 11, 2010 at 11:03:19PM -0700, Alex Williamson wrote:
> On Fri, 2010-11-12 at 07:22 +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 11, 2010 at 07:55:01PM -0700, Alex Williamson wrote:
> > > Make use of wmask, just like the rest of config space.
> > > 
> > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > ---
> > > 
> > >  hw/pci.c |   19 ++++++++-----------
> > >  1 files changed, 8 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/hw/pci.c b/hw/pci.c
> > > index 92aaa85..12c47ac 100644
> > > --- a/hw/pci.c
> > > +++ b/hw/pci.c
> > > @@ -1175,13 +1175,14 @@ uint32_t pci_default_read_config(PCIDevice *d,
> > >      return pci_read_config(d, address, len);
> > >  }
> > >  
> > > -static void pci_write_config(PCIDevice *pci_dev,
> > > -                             uint32_t address, uint32_t val, int len)
> > > +static void pci_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
> > >  {
> > >      int i;
> > > -    for (i = 0; i < len; i++) {
> > > -        pci_dev->config[address + i] = val & 0xff;
> > > -        val >>= 8;
> > > +    uint32_t config_size = pci_config_size(d);
> > > +
> > > +    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
> > > +        uint8_t wmask = d->wmask[addr + i];
> > > +        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
> > >      }
> > >  }
> > >
> > 
> > 
> > Let's not name an internal static helper pci_write_config.
> > This is really update_config_by_mask or something like that.
> > But see below: maybe we don't need it at all?
> 
> The function already exists, I just made it do what it seems like it
> should have been doing already.

Yep. But since you are rewriting this function - could you rename it as
well?

> > > @@ -1207,18 +1208,14 @@ void pci_default_cap_write_config(PCIDevice *pci_dev,
> > >  
> > >  void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
> > >  {
> > > -    int i, was_irq_disabled = pci_irq_disabled(d);
> > > -    uint32_t config_size = pci_config_size(d);
> > > +    int was_irq_disabled = pci_irq_disabled(d);
> > >  
> > >      if (pci_access_cap_config(d, addr, l)) {
> > >          d->cap.config_write(d, addr, val, l);
> > >          return;
> > >      }
> > >  
> > 
> > I would like to also examine the need for _cap_
> > functions. Why can assigned devices just do
> > 
> > 	pci_default_write_config 
> > 	if (range_overlap(...msi)) {
> > 	}
> > 	if (range_overlap(...msix)) {
> > 	}
> > and then we could remove all the _cap_ extensions
> > altogether?
> 
> I think that somewhere we need to track what capabilities are at what
> offset, config space isn't a performance path, but that look horribly
> inefficient and gets worse with more capabilities.

Looks like premature optimization to me.  I guess when we get more than
say 8 capabilities to support, I'll start to worry.
Even then, these optimizations are better internal in pci core.

> Why don't we define capability id 0xff as normal config space (first 64
> bytes), then add the capability id to read/write_config (this is what
> vfio does).  Then the driver can split capability handling off from
> their main functions if they want.

My feeling is we need higher level APIs than 'capability write'.
Otherwise we get the PCI config handling all over the place.
E.g. a callback when msi is enabled/disabled would make sense,
so that pci core can keep track of current state and only notify
the device when there are things to do.

>  Anyway, I think such an improvement
> could be added incrementally later.  Thanks,
> 
> Alex

Sure.

> > > -    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
> > > -        uint8_t wmask = d->wmask[addr + i];
> > > -        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
> > > -    }
> > > +    pci_write_config(d, addr, val, l);
> > >  
> > >  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
> > >      if (kvm_enabled() && kvm_irqchip_in_kernel() &&
> 
>
Alex Williamson - Nov. 12, 2010, 3:49 p.m.
On Fri, 2010-11-12 at 10:48 +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 11, 2010 at 11:03:19PM -0700, Alex Williamson wrote:
> > On Fri, 2010-11-12 at 07:22 +0200, Michael S. Tsirkin wrote:
> > > On Thu, Nov 11, 2010 at 07:55:01PM -0700, Alex Williamson wrote:
> > > > Make use of wmask, just like the rest of config space.
> > > > 
> > > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > > ---
> > > > 
> > > >  hw/pci.c |   19 ++++++++-----------
> > > >  1 files changed, 8 insertions(+), 11 deletions(-)
> > > > 
> > > > diff --git a/hw/pci.c b/hw/pci.c
> > > > index 92aaa85..12c47ac 100644
> > > > --- a/hw/pci.c
> > > > +++ b/hw/pci.c
> > > > @@ -1175,13 +1175,14 @@ uint32_t pci_default_read_config(PCIDevice *d,
> > > >      return pci_read_config(d, address, len);
> > > >  }
> > > >  
> > > > -static void pci_write_config(PCIDevice *pci_dev,
> > > > -                             uint32_t address, uint32_t val, int len)
> > > > +static void pci_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
> > > >  {
> > > >      int i;
> > > > -    for (i = 0; i < len; i++) {
> > > > -        pci_dev->config[address + i] = val & 0xff;
> > > > -        val >>= 8;
> > > > +    uint32_t config_size = pci_config_size(d);
> > > > +
> > > > +    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
> > > > +        uint8_t wmask = d->wmask[addr + i];
> > > > +        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
> > > >      }
> > > >  }
> > > >
> > > 
> > > 
> > > Let's not name an internal static helper pci_write_config.
> > > This is really update_config_by_mask or something like that.
> > > But see below: maybe we don't need it at all?
> > 
> > The function already exists, I just made it do what it seems like it
> > should have been doing already.
> 
> Yep. But since you are rewriting this function - could you rename it as
> well?

Ok

> > > > @@ -1207,18 +1208,14 @@ void pci_default_cap_write_config(PCIDevice *pci_dev,
> > > >  
> > > >  void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
> > > >  {
> > > > -    int i, was_irq_disabled = pci_irq_disabled(d);
> > > > -    uint32_t config_size = pci_config_size(d);
> > > > +    int was_irq_disabled = pci_irq_disabled(d);
> > > >  
> > > >      if (pci_access_cap_config(d, addr, l)) {
> > > >          d->cap.config_write(d, addr, val, l);
> > > >          return;
> > > >      }
> > > >  
> > > 
> > > I would like to also examine the need for _cap_
> > > functions. Why can assigned devices just do
> > > 
> > > 	pci_default_write_config 
> > > 	if (range_overlap(...msi)) {
> > > 	}
> > > 	if (range_overlap(...msix)) {
> > > 	}
> > > and then we could remove all the _cap_ extensions
> > > altogether?
> > 
> > I think that somewhere we need to track what capabilities are at what
> > offset, config space isn't a performance path, but that look horribly
> > inefficient and gets worse with more capabilities.
> 
> Looks like premature optimization to me.  I guess when we get more than
> say 8 capabilities to support, I'll start to worry.
> Even then, these optimizations are better internal in pci core.

It's not just an optimization, as noted in another reply, we should be
using it to make sure we don't have collisions, and it simply makes the
callback code much cleaner to be able to do a switch statement instead
of a pile of 'if (ranges_overlap)', IMO.

> > Why don't we define capability id 0xff as normal config space (first 64
> > bytes), then add the capability id to read/write_config (this is what
> > vfio does).  Then the driver can split capability handling off from
> > their main functions if they want.
> 
> My feeling is we need higher level APIs than 'capability write'.
> Otherwise we get the PCI config handling all over the place.
> E.g. a callback when msi is enabled/disabled would make sense,
> so that pci core can keep track of current state and only notify
> the device when there are things to do.

I agree, but it's difficult to provide the flexibility to meet all the
needs.  Device assignment might want to be called for more or less bit
flips than an emulated device, PM is probably a good example of this.
We could actually change state on a PMCSR write, but I'm not sure what
an emulated device would do.  Does that mean we add a callback
specifically for that, or do we provide some generic interface that
drivers can register which bits they want to know about changing?

> >  Anyway, I think such an improvement
> > could be added incrementally later.  Thanks,
> > 
> > Alex
> 
> Sure.
> 
> > > > -    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
> > > > -        uint8_t wmask = d->wmask[addr + i];
> > > > -        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
> > > > -    }
> > > > +    pci_write_config(d, addr, val, l);
> > > >  
> > > >  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
> > > >      if (kvm_enabled() && kvm_irqchip_in_kernel() &&
> > 
> >
Michael S. Tsirkin - Nov. 16, 2010, 4:12 p.m.
On Fri, Nov 12, 2010 at 08:49:20AM -0700, Alex Williamson wrote:
> > > > > @@ -1207,18 +1208,14 @@ void pci_default_cap_write_config(PCIDevice *pci_dev,
> > > > >  
> > > > >  void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
> > > > >  {
> > > > > -    int i, was_irq_disabled = pci_irq_disabled(d);
> > > > > -    uint32_t config_size = pci_config_size(d);
> > > > > +    int was_irq_disabled = pci_irq_disabled(d);
> > > > >  
> > > > >      if (pci_access_cap_config(d, addr, l)) {
> > > > >          d->cap.config_write(d, addr, val, l);
> > > > >          return;
> > > > >      }
> > > > >  
> > > > 
> > > > I would like to also examine the need for _cap_
> > > > functions. Why can assigned devices just do
> > > > 
> > > > 	pci_default_write_config 
> > > > 	if (range_overlap(...msi)) {
> > > > 	}
> > > > 	if (range_overlap(...msix)) {
> > > > 	}
> > > > and then we could remove all the _cap_ extensions
> > > > altogether?
> > > 
> > > I think that somewhere we need to track what capabilities are at what
> > > offset, config space isn't a performance path, but that look horribly
> > > inefficient and gets worse with more capabilities.
> > 
> > Looks like premature optimization to me.  I guess when we get more than
> > say 8 capabilities to support, I'll start to worry.
> > Even then, these optimizations are better internal in pci core.
> 
> It's not just an optimization, as noted in another reply, we should be
> using it to make sure we don't have collisions, and it simply makes the
> callback code much cleaner to be able to do a switch statement instead
> of a pile of 'if (ranges_overlap)', IMO.

Two if statements is not a pile :)
I think in the end we will have
a general pci handler dealing with all capabilities,
and device assignment would *maybe* deal with msi and msix.


> > > Why don't we define capability id 0xff as normal config space (first 64
> > > bytes), then add the capability id to read/write_config (this is what
> > > vfio does).  Then the driver can split capability handling off from
> > > their main functions if they want.
> > 
> > My feeling is we need higher level APIs than 'capability write'.
> > Otherwise we get the PCI config handling all over the place.
> > E.g. a callback when msi is enabled/disabled would make sense,
> > so that pci core can keep track of current state and only notify
> > the device when there are things to do.
> 
> I agree, but it's difficult to provide the flexibility to meet all the
> needs.  Device assignment might want to be called for more or less bit
> flips than an emulated device, PM is probably a good example of this.
> We could actually change state on a PMCSR write, but I'm not sure what
> an emulated device would do.  Does that mean we add a callback
> specifically for that, or do we provide some generic interface that
> drivers can register which bits they want to know about changing?

I'm arguing for the PM callback. This way pci config decoding
is local in pci.c, others use high level APIs.

> > >  Anyway, I think such an improvement
> > > could be added incrementally later.  Thanks,
> > > 
> > > Alex
> > 
> > Sure.
> > 
> > > > > -    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
> > > > > -        uint8_t wmask = d->wmask[addr + i];
> > > > > -        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
> > > > > -    }
> > > > > +    pci_write_config(d, addr, val, l);
> > > > >  
> > > > >  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
> > > > >      if (kvm_enabled() && kvm_irqchip_in_kernel() &&
> > > 
> > > 
> 
>

Patch

diff --git a/hw/pci.c b/hw/pci.c
index 92aaa85..12c47ac 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1175,13 +1175,14 @@  uint32_t pci_default_read_config(PCIDevice *d,
     return pci_read_config(d, address, len);
 }
 
-static void pci_write_config(PCIDevice *pci_dev,
-                             uint32_t address, uint32_t val, int len)
+static void pci_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
     int i;
-    for (i = 0; i < len; i++) {
-        pci_dev->config[address + i] = val & 0xff;
-        val >>= 8;
+    uint32_t config_size = pci_config_size(d);
+
+    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
+        uint8_t wmask = d->wmask[addr + i];
+        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
     }
 }
 
@@ -1207,18 +1208,14 @@  void pci_default_cap_write_config(PCIDevice *pci_dev,
 
 void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
-    int i, was_irq_disabled = pci_irq_disabled(d);
-    uint32_t config_size = pci_config_size(d);
+    int was_irq_disabled = pci_irq_disabled(d);
 
     if (pci_access_cap_config(d, addr, l)) {
         d->cap.config_write(d, addr, val, l);
         return;
     }
 
-    for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
-        uint8_t wmask = d->wmask[addr + i];
-        d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
-    }
+    pci_write_config(d, addr, val, l);
 
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
     if (kvm_enabled() && kvm_irqchip_in_kernel() &&