Patchwork FWD:[PATCH] [Seabios] Over 4GB address ranges for 64bit PCI BARs]

login
register
mail settings
Submitter Isaku Yamahata
Date Nov. 6, 2010, 1:11 a.m.
Message ID <20101106011128.GH26077@valinux.co.jp>
Download mbox | patch
Permalink /patch/70317/
State New
Headers show

Comments

Isaku Yamahata - Nov. 6, 2010, 1:11 a.m.
FWD

----- Forwarded message from Alexey Korolev <alexey.korolev@endace.com> -----

Subject: [PATCH] [Seabios] Over 4GB address ranges for 64bit PCI BARs
X-Mailer: Evolution 2.24.3 

Hi,

We have seen some issues with 64bit PCI address and large BAR region support in 
seabios. 

On attempting to register a 64bit BAR of 1GB size we had some strange failures
in qemu. After some debugging we find out that source of the issue was in 
seabios PCI enumeration code. 

The issue takes place because of u32 overflow in pciinit.c. 
….........
             if (ALIGN(*paddr, size) + size >= BUILD_PCIPREFMEM_END) {
                 dprintf(1,
                         "prefmem region of (bdf 0x%x bar %d) can't be mapped. "
                         "decrease BUILD_PCIMEM_SIZE and recompile. size %x\n",
                         bdf, region_num, BUILD_PCIPREFMEM_SIZE);
                 size = 0;
             }
…...........
       if (size > 0) {
            *paddr = ALIGN(*paddr, size);
            pci_set_io_region_addr(bdf, region_num, *paddr);
            *paddr += size;
       }

If size is greater than 0xFFFFFFFF ??? BUILD_PCIPREFMEM_START (256MB), 
this call ALIGN(*paddr, size) will always return 0. The protection test fails, size remains > 0, and guest memory mapping will be corrupted.

We have found that a very similar problem was discussed previously:

http://www.mail-archive.com/qemu-devel@nongnu.org/msg38090.html

The discussion also touches on the question of 64bit addressing for PCI BARs.
In the current implementation regardless of whether the PCI BAR type is 64bit or not
the addressable range will be limited to the first 4GB. We also want to have "true" 64bit
addressable range for PCI BARs.

So to solve these issues some changes were made. 
Tracking for overflows has been added as well as support for 64bit range. 
To support the full 64bit address range a pci_bios_64bit_addr variable has been added. 
The 64bit range can be used only if BAR has PCI_BASE_ADDRESS_MEM_TYPE_64
attribute and the given region doesn't fit in first 4GB.  The patch has been
tested on Linux and BSD guest OS's and it appears to work fine. 



Signed-off-by Alexey Korolev <Alexey.Korolev@endace.com>
Signed-off-by Stephen Donelly <Stephen.Donelly@endace.com>
---
 pciinit.c |   83 ++++++++++++++++++++++++++++++++------------------------------
 1 file changed, 43 insertions(+), 40 deletions(-)




----- End forwarded message -----

Patch

diff --git a/src/pciinit.c b/src/pciinit.c
index 0346423..86c8137 100644
--- a/src/pciinit.c
+++ b/src/pciinit.c
@@ -20,6 +20,7 @@  static void pci_bios_init_device_in_bus(int bus);
 static u32 pci_bios_io_addr;
 static u32 pci_bios_mem_addr;
 static u32 pci_bios_prefmem_addr;
+static u64 pci_bios_64bit_addr;
 /* host irqs corresponding to PCI irqs A-D */
 const u8 pci_irqs[4] = {
     10, 10, 11, 11
@@ -56,9 +57,11 @@  static int pci_bios_allocate_region(u16 bdf, int region_num)
 {
     u32 *paddr;
     u32 ofs = pci_bar(bdf, region_num);
-
     u32 old = pci_config_readl(bdf, ofs);
     u32 mask;
+    u32 limit_addr;
+    int is_64bit;
+
     if (region_num == PCI_ROM_SLOT) {
         mask = PCI_ROM_ADDRESS_MASK;
         pci_config_writel(bdf, ofs, mask);
@@ -73,50 +76,49 @@  static int pci_bios_allocate_region(u16 bdf, int region_num)
     pci_config_writel(bdf, ofs, old);
 
     u32 size = (~(val & mask)) + 1;
-    if (val != 0) {
-        if (val & PCI_BASE_ADDRESS_SPACE_IO) {
-            paddr = &pci_bios_io_addr;
-            if (ALIGN(*paddr, size) + size >= 64 * 1024) {
-                dprintf(1,
-                        "io region of (bdf 0x%x bar %d) can't be mapped.\n",
-                        bdf, region_num);
-                size = 0;
-            }
-        } else if ((val & PCI_BASE_ADDRESS_MEM_PREFETCH) &&
-                 /* keep behaviour on bus = 0 */
-                 pci_bdf_to_bus(bdf) != 0 &&
-                 /* If pci_bios_prefmem_addr == 0, keep old behaviour */
-                 pci_bios_prefmem_addr != 0) {
-            paddr = &pci_bios_prefmem_addr;
-            if (ALIGN(*paddr, size) + size >= BUILD_PCIPREFMEM_END) {
-                dprintf(1,
-                        "prefmem region of (bdf 0x%x bar %d) can't be mapped. "
-                        "decrease BUILD_PCIMEM_SIZE and recompile. size %x\n",
-                        bdf, region_num, BUILD_PCIPREFMEM_SIZE);
-                size = 0;
-            }
-        } else {
-            paddr = &pci_bios_mem_addr;
-            if (ALIGN(*paddr, size) + size >= BUILD_PCIMEM_END) {
+    if (!val || !size)
+        return 0;
+
+    is_64bit = !(val & PCI_BASE_ADDRESS_SPACE_IO) &&
+        (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64;
+
+    if (val & PCI_BASE_ADDRESS_SPACE_IO) {
+        paddr = &pci_bios_io_addr;
+        limit_addr = 64 * 1024;
+    } else if ((val & PCI_BASE_ADDRESS_MEM_PREFETCH) &&
+             /* keep behaviour on bus = 0 */
+             pci_bdf_to_bus(bdf) != 0 &&
+             /* If pci_bios_prefmem_addr == 0, keep old behaviour */
+             pci_bios_prefmem_addr != 0) {
+        paddr = &pci_bios_prefmem_addr;
+        limit_addr = BUILD_PCIPREFMEM_END;
+    } else {
+        paddr = &pci_bios_mem_addr;
+        limit_addr = BUILD_PCIMEM_END;
+    }
+
+    /* The region is out of allowed 32bit mapping range */
+    if ((ALIGN(*paddr, size) + size >= limit_addr) ||
+        (ALIGN(*paddr, size) + size <= *paddr)) {
+            if (!is_64bit) {
                 dprintf(1,
                         "mem region of (bdf 0x%x bar %d) can't be mapped. "
-                        "increase BUILD_PCIMEM_SIZE and recompile. size %x\n",
-                        bdf, region_num, BUILD_PCIMEM_SIZE);
-                size = 0;
+                        "Region size %x\n", bdf, region_num, size);
+                return 0;
             }
-        }
-        if (size > 0) {
-            *paddr = ALIGN(*paddr, size);
-            pci_set_io_region_addr(bdf, region_num, *paddr);
-            *paddr += size;
-        }
+            pci_bios_64bit_addr = ALIGN(pci_bios_64bit_addr, size);
+            pci_set_io_region_addr(bdf, region_num, (u32)pci_bios_64bit_addr);
+            pci_set_io_region_addr(bdf, region_num + 1,
+                                   (u32)(pci_bios_64bit_addr >> 32));
+            pci_bios_64bit_addr += size;
+            return is_64bit;
     }
 
-    int is_64bit = !(val & PCI_BASE_ADDRESS_SPACE_IO) &&
-        (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64;
-    if (is_64bit && size > 0) {
-        pci_config_writel(bdf, ofs + 4, 0);
-    }
+    *paddr = ALIGN(*paddr, size);
+    pci_set_io_region_addr(bdf, region_num, *paddr);
+    if (is_64bit)
+        pci_set_io_region_addr(bdf, region_num + 1, 0);
+    *paddr += size;
     return is_64bit;
 }
 
@@ -409,6 +411,7 @@  pci_setup(void)
     pci_bios_io_addr = 0xc000;
     pci_bios_mem_addr = BUILD_PCIMEM_START;
     pci_bios_prefmem_addr = BUILD_PCIPREFMEM_START;
+    pci_bios_64bit_addr = ALIGN(RamSizeOver4G + 0x100000000ull, 0x100000000ull);
 
     pci_bios_init_bus();