Patchwork [V6,17/32] pci: 64bit bar support.

login
register
mail settings
Submitter Isaku Yamahata
Date Oct. 30, 2009, 12:21 p.m.
Message ID <1256905286-25435-18-git-send-email-yamahata@valinux.co.jp>
Download mbox | patch
Permalink /patch/37276/
State New
Headers show

Comments

Isaku Yamahata - Oct. 30, 2009, 12:21 p.m.
implemented pci 64bit bar support.
The tricky bit is pci_update_mapping().
An OS is allowed to set the BAR such that OS can't address the area
pointed by BAR. It doesn't make sense, though.
In that case, don't map the BAR.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 hw/pci.c |   35 ++++++++++++++++++++++++++++++-----
 hw/pci.h |    1 +
 2 files changed, 31 insertions(+), 5 deletions(-)
Michael S. Tsirkin - Nov. 1, 2009, 4:07 p.m.
On Fri, Oct 30, 2009 at 09:21:11PM +0900, Isaku Yamahata wrote:
> implemented pci 64bit bar support.
> The tricky bit is pci_update_mapping().
> An OS is allowed to set the BAR such that OS can't address the area
> pointed by BAR. It doesn't make sense, though.

It might make sense. 32 bit guest can address more than 4G of
physical RAM, e.g. using PAE.
Since I think qemu can not support this if target phys address is 32
bit, we should declare lack of support for 64 bit addressing on these
platforms, by forcing BAR into 32 bit mode, rather than silently failing
to map it.

> In that case, don't map the BAR.
> 
> Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
> ---
>  hw/pci.c |   35 ++++++++++++++++++++++++++++++-----
>  hw/pci.h |    1 +
>  2 files changed, 31 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index b462bd6..7da3db9 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -484,8 +484,14 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
>          wmask |= PCI_ROM_ADDRESS_ENABLE;
>      }
>      pci_set_long(pci_dev->config + addr, type);
> -    pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
> -    pci_set_long(pci_dev->cmask + addr, 0xffffffff);
> +    if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
> +        r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
> +        pci_set_quad(pci_dev->wmask + addr, wmask);
> +        pci_set_quad(pci_dev->cmask + addr, ~0ULL);
> +    } else {
> +        pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
> +        pci_set_long(pci_dev->cmask + addr, 0xffffffff);
> +    }
>  }
>  
>  static void pci_update_mappings(PCIDevice *d)
> @@ -513,7 +519,11 @@ static void pci_update_mappings(PCIDevice *d)
>                  }
>              } else {
>                  if (cmd & PCI_COMMAND_MEMORY) {
> -                    new_addr = pci_get_long(d->config + pci_bar(d, i));
> +                    if (r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
> +                        new_addr = pci_get_quad(d->config + pci_bar(d, i));
> +                    } else {
> +                        new_addr = pci_get_long(d->config + pci_bar(d, i));
> +                    }
>                      /* the ROM slot has a specific enable bit */
>                      if (i == PCI_ROM_SLOT && !(new_addr & PCI_ROM_ADDRESS_ENABLE))
>                          goto no_mem_map;
> @@ -531,7 +541,15 @@ static void pci_update_mappings(PCIDevice *d)
>                           * Without this, PC ide doesn't work well.
>                           * TODO: remove this work around.
>                           */
> -                        last_addr >= UINT32_MAX) {
> +                        (!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) &&
> +                         last_addr >= UINT32_MAX) ||
> +
> +                        /*
> +                         * OS is allowed to set BAR beyond its addressable
> +                         * bits. For example, 32 bit OS can set 64bit bar
> +                         * to >4G. Check it.
> +                         */
> +                        last_addr >= TARGET_PHYS_ADDR_MAX) {
>                          new_addr = PCI_BAR_UNMAPPED;
>                      }
>                  } else {
> @@ -773,8 +791,15 @@ static void pci_info_device(PCIDevice *d)
>                                 " [0x%04"FMT_PCIBUS"].\n",
>                                 r->addr, r->addr + r->size - 1);
>              } else {
> -                monitor_printf(mon, "32 bit memory at 0x%08"FMT_PCIBUS
> +                const char *type = r->type & PCI_BASE_ADDRESS_MEM_TYPE_64 ?
> +                    "64 bit" : "32 bit";
> +                const char *prefetch =
> +                    r->type & PCI_BASE_ADDRESS_MEM_PREFETCH ?
> +                    " prefetchable" : "";
> +
> +                monitor_printf(mon, "%s%s memory at 0x%08"FMT_PCIBUS
>                                 " [0x%08"FMT_PCIBUS"].\n",
> +                               type, prefetch,
>                                 r->addr, r->addr + r->size - 1);
>              }
>          }
> diff --git a/hw/pci.h b/hw/pci.h
> index 305c030..e83faf5 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -114,6 +114,7 @@ typedef struct PCIIORegion {
>  #define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
>  #define  PCI_BASE_ADDRESS_SPACE_IO	0x01
>  #define  PCI_BASE_ADDRESS_SPACE_MEMORY	0x00
> +#define  PCI_BASE_ADDRESS_MEM_TYPE_64	0x04	/* 64 bit address */
>  #define  PCI_BASE_ADDRESS_MEM_PREFETCH	0x08	/* prefetchable? */
>  #define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
>  #define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
> -- 
> 1.6.0.2
> 
>
Isaku Yamahata - Nov. 3, 2009, 3:52 a.m.
On Sun, Nov 01, 2009 at 06:07:30PM +0200, Michael S. Tsirkin wrote:
> On Fri, Oct 30, 2009 at 09:21:11PM +0900, Isaku Yamahata wrote:
> > implemented pci 64bit bar support.
> > The tricky bit is pci_update_mapping().
> > An OS is allowed to set the BAR such that OS can't address the area
> > pointed by BAR. It doesn't make sense, though.
> 
> It might make sense. 32 bit guest can address more than 4G of
> physical RAM, e.g. using PAE.

Yes, in that case, guest OS will set bar to be under 36 bit.
If PAE were supported, target phys address would be 64 bit.


> Since I think qemu can not support this if target phys address is 32
> bit, we should declare lack of support for 64 bit addressing on these
> platforms, by forcing BAR into 32 bit mode, rather than silently failing
> to map it.

I don't get your point. And I don't understand the benefit of focing
BAR into 32 bit mode.
Real hardware silently maps BAR to address beyond CPU addressable
area.
Let's stick to PCI spec as you said before.

32 bit guest OS will set 64 bit BAR to be smaller 32bit
(or 36bit if PAE). That's it.
Even if 64 bit CPU case, architectally addressable address bit is smaller
than 64 bit. It's CPU implementation dependent.
And guest OS sets BAR according to it.
Michael S. Tsirkin - Nov. 3, 2009, 11:47 a.m.
On Tue, Nov 03, 2009 at 12:52:10PM +0900, Isaku Yamahata wrote:
> On Sun, Nov 01, 2009 at 06:07:30PM +0200, Michael S. Tsirkin wrote:
> > On Fri, Oct 30, 2009 at 09:21:11PM +0900, Isaku Yamahata wrote:
> > > implemented pci 64bit bar support.
> > > The tricky bit is pci_update_mapping().
> > > An OS is allowed to set the BAR such that OS can't address the area
> > > pointed by BAR. It doesn't make sense, though.
> > 
> > It might make sense. 32 bit guest can address more than 4G of
> > physical RAM, e.g. using PAE.
> 
> Yes, in that case, guest OS will set bar to be under 36 bit.
> If PAE were supported, target phys address would be 64 bit.
> 
> 
> > Since I think qemu can not support this if target phys address is 32
> > bit, we should declare lack of support for 64 bit addressing on these
> > platforms, by forcing BAR into 32 bit mode, rather than silently failing
> > to map it.
> 
> I don't get your point. And I don't understand the benefit of focing
> BAR into 32 bit mode.
> Real hardware silently maps BAR to address beyond CPU addressable
> area.
> Let's stick to PCI spec as you said before.
> 
> 32 bit guest OS will set 64 bit BAR to be smaller 32bit
> (or 36bit if PAE). That's it.
> Even if 64 bit CPU case, architectally addressable address bit is smaller
> than 64 bit. It's CPU implementation dependent.
> And guest OS sets BAR according to it.
> -- 
> yamahata

IMO, this is not a question of what guest does or does not do.

If qemu is compiled with target phys address size 32 bit, emulated
devices can not support a 64 bit BAR.  Therefore, according to PCI spec,
such devices should declare all BARs as 32 bit.

I think you are right that guests on such systems really do not have a
way to address PCI devices if BAR is set beyond 4G. But pci emulation is
better off not relying on this, IMO. Makes sense?
Avi Kivity - Nov. 3, 2009, 12:22 p.m.
On 11/03/2009 01:47 PM, Michael S. Tsirkin wrote:
>
> If qemu is compiled with target phys address size 32 bit, emulated
> devices can not support a 64 bit BAR.  Therefore, according to PCI spec,
> such devices should declare all BARs as 32 bit.
>
>    

What happens if you take a PCI card that supports 64-bit BARs and stick 
it into a machine that has a 32-bit physical address space?

The firmware/OS will configure the BARs to below 4G.

> I think you are right that guests on such systems really do not have a
> way to address PCI devices if BAR is set beyond 4G. But pci emulation is
> better off not relying on this, IMO. Makes sense?
>    

No.  Device emulation shouldn't change with the machine type.
Michael S. Tsirkin - Nov. 3, 2009, 12:39 p.m.
On Tue, Nov 03, 2009 at 02:22:07PM +0200, Avi Kivity wrote:
> On 11/03/2009 01:47 PM, Michael S. Tsirkin wrote:
>>
>> If qemu is compiled with target phys address size 32 bit, emulated
>> devices can not support a 64 bit BAR.  Therefore, according to PCI spec,
>> such devices should declare all BARs as 32 bit.
>>
>>    
>
> What happens if you take a PCI card that supports 64-bit BARs and stick  
> it into a machine that has a 32-bit physical address space?
> The firmware/OS will configure the BARs to below 4G.
> 
>> I think you are right that guests on such systems really do not have a
>> way to address PCI devices if BAR is set beyond 4G. But pci emulation is
>> better off not relying on this, IMO. Makes sense?
>>    
>
> No.  Device emulation shouldn't change with the machine type.

I agree. Issue is, we recompile the *devices* as well.
It's the device emulation that is broken when compiled
with target phys addr set to 32 bit, because all devices
take pcibus_t and cast it to target_phys_addr_t
and then do stuff with it.

So such emulation should not claim to support 64 bit.

Long term, we should fix all devices and *then* they can claim 64 bit
support always.  As a nice side effect, we'll be able to avoid
rebuilding devices.

> -- 
> error compiling committee.c: too many arguments to function
Michael S. Tsirkin - Nov. 3, 2009, 1:21 p.m.
On Tue, Nov 03, 2009 at 02:39:06PM +0200, Michael S. Tsirkin wrote:
> On Tue, Nov 03, 2009 at 02:22:07PM +0200, Avi Kivity wrote:
> > On 11/03/2009 01:47 PM, Michael S. Tsirkin wrote:
> >>
> >> If qemu is compiled with target phys address size 32 bit, emulated
> >> devices can not support a 64 bit BAR.  Therefore, according to PCI spec,
> >> such devices should declare all BARs as 32 bit.
> >>
> >>    
> >
> > What happens if you take a PCI card that supports 64-bit BARs and stick  
> > it into a machine that has a 32-bit physical address space?
> > The firmware/OS will configure the BARs to below 4G.
> > 
> >> I think you are right that guests on such systems really do not have a
> >> way to address PCI devices if BAR is set beyond 4G. But pci emulation is
> >> better off not relying on this, IMO. Makes sense?
> >>    
> >
> > No.  Device emulation shouldn't change with the machine type.
> 
> I agree. Issue is, we recompile the *devices* as well.
> It's the device emulation that is broken when compiled
> with target phys addr set to 32 bit, because all devices
> take pcibus_t and cast it to target_phys_addr_t
> and then do stuff with it.
> 
> So such emulation should not claim to support 64 bit.
> 
> Long term, we should fix all devices and *then* they can claim 64 bit
> support always.  As a nice side effect, we'll be able to avoid
> rebuilding devices.

BTW, the right way to do this is probably to move mapping device memory
out of devices proper and into pci.


> > -- 
> > error compiling committee.c: too many arguments to function
Isaku Yamahata - Nov. 3, 2009, 2:01 p.m.
On Tue, Nov 03, 2009 at 02:39:06PM +0200, Michael S. Tsirkin wrote:
> On Tue, Nov 03, 2009 at 02:22:07PM +0200, Avi Kivity wrote:
> > On 11/03/2009 01:47 PM, Michael S. Tsirkin wrote:
> >>
> >> If qemu is compiled with target phys address size 32 bit, emulated
> >> devices can not support a 64 bit BAR.  Therefore, according to PCI spec,
> >> such devices should declare all BARs as 32 bit.

Here is the point.
  > "emulated devices can not support a 64 bit BAR"
If target_phy_addr_t = uint32_t, the emulation of 64 bit BAR which
is set to >4GB is cpu can't access it. So just not-mapping it
is correct behavior.


> > What happens if you take a PCI card that supports 64-bit BARs and stick  
> > it into a machine that has a 32-bit physical address space?
> > The firmware/OS will configure the BARs to below 4G.
> > 
> >> I think you are right that guests on such systems really do not have a
> >> way to address PCI devices if BAR is set beyond 4G. But pci emulation is
> >> better off not relying on this, IMO. Makes sense?
> >>    
> >
> > No.  Device emulation shouldn't change with the machine type.
> 
> I agree. Issue is, we recompile the *devices* as well.
> It's the device emulation that is broken when compiled
> with target phys addr set to 32 bit, because all devices
> take pcibus_t and cast it to target_phys_addr_t
> and then do stuff with it.
> So such emulation should not claim to support 64 bit.

Such case is checked by "last_addr >= TARGET_PHYS_ADDR_MAX",
so the device emulation works well.

Generally device drivers know their devices. For example they know
that BAR0 is 64bit memory and so on. So if BAR type were changed
by forcing 64 bit BAR into 32 bit BAR, the device driver wouldn't be
confused.


> Long term, we should fix all devices and *then* they can claim 64 bit
> support always.  As a nice side effect, we'll be able to avoid
> rebuilding devices.

Are you claiming that (PCI) devices emulation shouldn't depend on
target_phys_addr_t? That sounds a good idea.

However I don't agree on "*then*".
The conversions would take place step by step as it's long term object.
I don't see any reason to penalize correct device emulations just
because there are incomplete device emulations left.

thanks,
Michael S. Tsirkin - Nov. 3, 2009, 2:09 p.m.
On Tue, Nov 03, 2009 at 11:01:00PM +0900, Isaku Yamahata wrote:
> On Tue, Nov 03, 2009 at 02:39:06PM +0200, Michael S. Tsirkin wrote:
> > On Tue, Nov 03, 2009 at 02:22:07PM +0200, Avi Kivity wrote:
> > > On 11/03/2009 01:47 PM, Michael S. Tsirkin wrote:
> > >>
> > >> If qemu is compiled with target phys address size 32 bit, emulated
> > >> devices can not support a 64 bit BAR.  Therefore, according to PCI spec,
> > >> such devices should declare all BARs as 32 bit.
> 
> Here is the point.
>   > "emulated devices can not support a 64 bit BAR"
> If target_phy_addr_t = uint32_t, the emulation of 64 bit BAR which
> is set to >4GB is cpu can't access it. So just not-mapping it
> is correct behavior.
> 
> 
> > > What happens if you take a PCI card that supports 64-bit BARs and stick  
> > > it into a machine that has a 32-bit physical address space?
> > > The firmware/OS will configure the BARs to below 4G.
> > > 
> > >> I think you are right that guests on such systems really do not have a
> > >> way to address PCI devices if BAR is set beyond 4G. But pci emulation is
> > >> better off not relying on this, IMO. Makes sense?
> > >>    
> > >
> > > No.  Device emulation shouldn't change with the machine type.
> > 
> > I agree. Issue is, we recompile the *devices* as well.
> > It's the device emulation that is broken when compiled
> > with target phys addr set to 32 bit, because all devices
> > take pcibus_t and cast it to target_phys_addr_t
> > and then do stuff with it.
> > So such emulation should not claim to support 64 bit.
> 
> Such case is checked by "last_addr >= TARGET_PHYS_ADDR_MAX",
> so the device emulation works well.
> 
> Generally device drivers know their devices. For example they know
> that BAR0 is 64bit memory and so on. So if BAR type were changed
> by forcing 64 bit BAR into 32 bit BAR, the device driver wouldn't be
> confused.
> 
> > Long term, we should fix all devices and *then* they can claim 64 bit
> > support always.  As a nice side effect, we'll be able to avoid
> > rebuilding devices.
> 
> Are you claiming that (PCI) devices emulation shouldn't depend on
> target_phys_addr_t? That sounds a good idea.

Yes. Maybe we can stop devices from mapping memory, have pci
core do it for them.

> However I don't agree on "*then*".
> The conversions would take place step by step as it's long term object.
> I don't see any reason to penalize correct device emulations just
> because there are incomplete device emulations left.

Hmm. Okay ... but which device emulations are correct?
It seems that none of them are.
I don't see the rush .

> thanks,
> -- 
> yamahata
Isaku Yamahata - Nov. 4, 2009, 6:20 a.m.
On Tue, Nov 03, 2009 at 04:09:16PM +0200, Michael S. Tsirkin wrote:
> > > Long term, we should fix all devices and *then* they can claim 64 bit
> > > support always.  As a nice side effect, we'll be able to avoid
> > > rebuilding devices.
> > 
> > Are you claiming that (PCI) devices emulation shouldn't depend on
> > target_phys_addr_t? That sounds a good idea.
> 
> Yes. Maybe we can stop devices from mapping memory, have pci
> core do it for them.

IIRC Avi proposed such a patch, but it didn't get merged.
Michael S. Tsirkin - Nov. 4, 2009, 12:19 p.m.
On Wed, Nov 04, 2009 at 03:20:02PM +0900, Isaku Yamahata wrote:
> On Tue, Nov 03, 2009 at 04:09:16PM +0200, Michael S. Tsirkin wrote:
> > > > Long term, we should fix all devices and *then* they can claim 64 bit
> > > > support always.  As a nice side effect, we'll be able to avoid
> > > > rebuilding devices.
> > > 
> > > Are you claiming that (PCI) devices emulation shouldn't depend on
> > > target_phys_addr_t? That sounds a good idea.
> > 
> > Yes. Maybe we can stop devices from mapping memory, have pci
> > core do it for them.
> 
> IIRC Avi proposed such a patch,

link?

> but it didn't get merged.

I wonder why - currently mapping is done by devices but unmapping is
done by pci core. That's been always bothering me.


> -- 
> yamahata

Patch

diff --git a/hw/pci.c b/hw/pci.c
index b462bd6..7da3db9 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -484,8 +484,14 @@  void pci_register_bar(PCIDevice *pci_dev, int region_num,
         wmask |= PCI_ROM_ADDRESS_ENABLE;
     }
     pci_set_long(pci_dev->config + addr, type);
-    pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
-    pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+    if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+        r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+        pci_set_quad(pci_dev->wmask + addr, wmask);
+        pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+    } else {
+        pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
+        pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+    }
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -513,7 +519,11 @@  static void pci_update_mappings(PCIDevice *d)
                 }
             } else {
                 if (cmd & PCI_COMMAND_MEMORY) {
-                    new_addr = pci_get_long(d->config + pci_bar(d, i));
+                    if (r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+                        new_addr = pci_get_quad(d->config + pci_bar(d, i));
+                    } else {
+                        new_addr = pci_get_long(d->config + pci_bar(d, i));
+                    }
                     /* the ROM slot has a specific enable bit */
                     if (i == PCI_ROM_SLOT && !(new_addr & PCI_ROM_ADDRESS_ENABLE))
                         goto no_mem_map;
@@ -531,7 +541,15 @@  static void pci_update_mappings(PCIDevice *d)
                          * Without this, PC ide doesn't work well.
                          * TODO: remove this work around.
                          */
-                        last_addr >= UINT32_MAX) {
+                        (!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) &&
+                         last_addr >= UINT32_MAX) ||
+
+                        /*
+                         * OS is allowed to set BAR beyond its addressable
+                         * bits. For example, 32 bit OS can set 64bit bar
+                         * to >4G. Check it.
+                         */
+                        last_addr >= TARGET_PHYS_ADDR_MAX) {
                         new_addr = PCI_BAR_UNMAPPED;
                     }
                 } else {
@@ -773,8 +791,15 @@  static void pci_info_device(PCIDevice *d)
                                " [0x%04"FMT_PCIBUS"].\n",
                                r->addr, r->addr + r->size - 1);
             } else {
-                monitor_printf(mon, "32 bit memory at 0x%08"FMT_PCIBUS
+                const char *type = r->type & PCI_BASE_ADDRESS_MEM_TYPE_64 ?
+                    "64 bit" : "32 bit";
+                const char *prefetch =
+                    r->type & PCI_BASE_ADDRESS_MEM_PREFETCH ?
+                    " prefetchable" : "";
+
+                monitor_printf(mon, "%s%s memory at 0x%08"FMT_PCIBUS
                                " [0x%08"FMT_PCIBUS"].\n",
+                               type, prefetch,
                                r->addr, r->addr + r->size - 1);
             }
         }
diff --git a/hw/pci.h b/hw/pci.h
index 305c030..e83faf5 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -114,6 +114,7 @@  typedef struct PCIIORegion {
 #define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
 #define  PCI_BASE_ADDRESS_SPACE_IO	0x01
 #define  PCI_BASE_ADDRESS_SPACE_MEMORY	0x00
+#define  PCI_BASE_ADDRESS_MEM_TYPE_64	0x04	/* 64 bit address */
 #define  PCI_BASE_ADDRESS_MEM_PREFETCH	0x08	/* prefetchable? */
 #define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
 #define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */