diff mbox

[11/12] Migrate 64bit entries to 64bit pci regions

Message ID 1335248739.13579.17.camel@nzhmlwks0057.ad.endace.com
State New
Headers show

Commit Message

Alexey Korolev April 24, 2012, 6:25 a.m. UTC
Migrate 64bit entries to 64bit pci regions if they do
not fit in 32bit range.

Signed-off-by: Alexey Korolev <alexey.korolev@endace.com>
---
 src/config.h  |    2 ++
 src/pciinit.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 48 insertions(+), 4 deletions(-)

Comments

Kevin O'Connor April 25, 2012, 1:48 a.m. UTC | #1
On Tue, Apr 24, 2012 at 06:25:39PM +1200, Alexey Korolev wrote:
> Migrate 64bit entries to 64bit pci regions if they do
> not fit in 32bit range.
[...]
> +static void pci_region_migrate_64bit_entries(struct pci_region *from,
> +                                             struct pci_region *to)
> +{
> +    struct pci_region_entry **pprev = &from->list;
> +    struct pci_region_entry **last = &to->list;
> +    while(*pprev) {
> +        if ((*pprev)->is64) {
> +            struct pci_region_entry *entry;
> +            entry = *pprev;
> +            /* Delete the entry and move next */
> +            *pprev = (*pprev)->next;
> +            /* Add entry at tail to keep a sorted order */
> +            entry->next = NULL;
> +            if (*last) {
> +               (*last)->next = entry;
> +                last  = &(*last)->next;
> +            }
> +            else
> +               (*last) = entry;
> +        }
> +        else
> +            pprev = &(*pprev)->next;
> +    }
> +}

It should be possible to simplify this - something like (untested):

static void pci_region_migrate_64bit_entries(struct pci_region *from,
                                             struct pci_region *to)
{
    struct pci_region_entry **pprev = &from->list, **last = &to->list;
    for (; *pprev; pprev = &(*pprev)->next) {
        struct pci_region_entry *entry = *pprev;
        if (!entry->is64)
            continue;
        // Move from source list to dest list.
        *pprev = entry->next;
        entry->next = NULL;
        *last = entry;
        last = &entry->next;
    }
}

[...]
>  static void pci_bios_map_devices(struct pci_bus *busses)
>  {
> +    if (pci_bios_init_root_regions(busses)) {
> +        struct pci_region r64_mem, r64_pref;
> +        r64_mem.list = NULL;
> +        r64_pref.list = NULL;
> +        pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> +                                         &r64_mem);
> +        pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_PREFMEM],
> +                                         &r64_pref);
> +
> +        if (pci_bios_init_root_regions(busses))
> +            panic("PCI: out of address space\n");
> +
> +        r64_mem.base = BUILD_PCIMEM64_START;
> +        r64_pref.base = ALIGN(r64_mem.base + pci_region_sum(&r64_mem),
> +                              pci_region_align(&r64_pref));

There should be a check to see if the regions fit.  Maybe pass
start/end into pci_bios_init_root_regions() and call it again for the
>4g region?

> +        pci_region_map_entries(busses, &r64_mem);
> +        pci_region_map_entries(busses, &r64_pref);
> +    }
>      // Map regions on each device.

This doesn't look right to me.  This will map the devices on bus 0 to
the proper >4g address, but devices on any subsequent bus will use
busses[0].r[].base which will be reset to the <4gig address.  Perhaps
pull base out of pci_region and make pci_region_map_entries()
recursive?

-Kevin
Alexey Korolev April 25, 2012, 6:25 a.m. UTC | #2
On 25/04/12 13:48, Kevin O'Connor wrote:
> On Tue, Apr 24, 2012 at 06:25:39PM +1200, Alexey Korolev wrote:
>> Migrate 64bit entries to 64bit pci regions if they do
>> not fit in 32bit range.
> [...]
>> +static void pci_region_migrate_64bit_entries(struct pci_region *from,
>> +                                             struct pci_region *to)
>> +{
>> +    struct pci_region_entry **pprev = &from->list;
>> +    struct pci_region_entry **last = &to->list;
>> +    while(*pprev) {
>> +        if ((*pprev)->is64) {
>> +            struct pci_region_entry *entry;
>> +            entry = *pprev;
>> +            /* Delete the entry and move next */
>> +            *pprev = (*pprev)->next;
>> +            /* Add entry at tail to keep a sorted order */
>> +            entry->next = NULL;
>> +            if (*last) {
>> +               (*last)->next = entry;
>> +                last  = &(*last)->next;
>> +            }
>> +            else
>> +               (*last) = entry;
>> +        }
>> +        else
>> +            pprev = &(*pprev)->next;
>> +    }
>> +}
> It should be possible to simplify this - something like (untested):
>
> static void pci_region_migrate_64bit_entries(struct pci_region *from,
>                                              struct pci_region *to)
> {
>     struct pci_region_entry **pprev = &from->list, **last = &to->list;
>     for (; *pprev; pprev = &(*pprev)->next) {
>         struct pci_region_entry *entry = *pprev;
>         if (!entry->is64)
>             continue;
>         // Move from source list to dest list.
>         *pprev = entry->next;
>         entry->next = NULL;
>         *last = entry;
>         last = &entry->next;
>     }
> }
Sorry it's not working.
I agree it's possible to simplify code a bit.


static void pci_region_migrate_64bit_entries(struct pci_region *from,
                                             struct pci_region *to)
{
    struct pci_region_entry **pprev = &from->list;
    struct pci_region_entry **last = &to->list;
    while(*pprev) {
        if ((*pprev)->is64) {
            struct pci_region_entry *entry;
            entry = *pprev;
            /* Delete the entry and move next */
            *pprev = (*pprev)->next;
            /* Add entry at tail to keep the order */
            entry->next = NULL;
            *last = entry;
            last = &entry->next;
        }
        else
            pprev = &(*pprev)->next;
    }
}

That should work.
> [...]
>>  static void pci_bios_map_devices(struct pci_bus *busses)
>>  {
>> +    if (pci_bios_init_root_regions(busses)) {
>> +        struct pci_region r64_mem, r64_pref;
>> +        r64_mem.list = NULL;
>> +        r64_pref.list = NULL;
>> +        pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
>> +                                         &r64_mem);
>> +        pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_PREFMEM],
>> +                                         &r64_pref);
>> +
>> +        if (pci_bios_init_root_regions(busses))
>> +            panic("PCI: out of address space\n");
>> +
>> +        r64_mem.base = BUILD_PCIMEM64_START;
>> +        r64_pref.base = ALIGN(r64_mem.base + pci_region_sum(&r64_mem),
>> +                              pci_region_align(&r64_pref));
> There should be a check to see if the regions fit.  Maybe pass
> start/end into pci_bios_init_root_regions() and call it again for the
>> 4g region?
Agree, I just ignored the check as 64bit range size is 2^39.
I will think how to make it better.
>> +        pci_region_map_entries(busses, &r64_mem);
>> +        pci_region_map_entries(busses, &r64_pref);
>> +    }
>>      // Map regions on each device.
> This doesn't look right to me.  This will map the devices on bus 0 to
> the proper >4g address, but devices on any subsequent bus will use
> busses[0].r[].base which will be reset to the <4gig address.  Perhaps
> pull base out of pci_region and make pci_region_map_entries()
> recursive?
No recursion is need here!
We map all entries which are 64bit on root bus.
If entry is a bridge region - a corresponding bus address will be updated.
Region won't be reseted to <4gig address as address is derived from parent region only.

Thanks,
Alexey
Kevin O'Connor April 25, 2012, 12:51 p.m. UTC | #3
On Wed, Apr 25, 2012 at 06:25:07PM +1200, Alexey Korolev wrote:
> On 25/04/12 13:48, Kevin O'Connor wrote:
> > On Tue, Apr 24, 2012 at 06:25:39PM +1200, Alexey Korolev wrote:
> >> +        pci_region_map_entries(busses, &r64_mem);
> >> +        pci_region_map_entries(busses, &r64_pref);
> >> +    }
> >>      // Map regions on each device.
> > This doesn't look right to me.  This will map the devices on bus 0 to
> > the proper >4g address, but devices on any subsequent bus will use
> > busses[0].r[].base which will be reset to the <4gig address.  Perhaps
> > pull base out of pci_region and make pci_region_map_entries()
> > recursive?
> No recursion is need here!
> We map all entries which are 64bit on root bus.
> If entry is a bridge region - a corresponding bus address will be updated.
> Region won't be reseted to <4gig address as address is derived from parent region only.

Okay - I missed that.  I think the patches look okay to be committed -
any additional changes can be made on top.  Gerd - do you have any
comments?

-Kevin
Gerd Hoffmann April 25, 2012, 3:29 p.m. UTC | #4
On 04/25/12 14:51, Kevin O'Connor wrote:
> On Wed, Apr 25, 2012 at 06:25:07PM +1200, Alexey Korolev wrote:
>> On 25/04/12 13:48, Kevin O'Connor wrote:
>>> On Tue, Apr 24, 2012 at 06:25:39PM +1200, Alexey Korolev wrote:
>>>> +        pci_region_map_entries(busses, &r64_mem);
>>>> +        pci_region_map_entries(busses, &r64_pref);
>>>> +    }
>>>>      // Map regions on each device.
>>> This doesn't look right to me.  This will map the devices on bus 0 to
>>> the proper >4g address, but devices on any subsequent bus will use
>>> busses[0].r[].base which will be reset to the <4gig address.  Perhaps
>>> pull base out of pci_region and make pci_region_map_entries()
>>> recursive?
>> No recursion is need here!
>> We map all entries which are 64bit on root bus.
>> If entry is a bridge region - a corresponding bus address will be updated.
>> Region won't be reseted to <4gig address as address is derived from parent region only.
> 
> Okay - I missed that.  I think the patches look okay to be committed -
> any additional changes can be made on top.  Gerd - do you have any
> comments?

Great job overall.  Can't spot issues in the code.  And the patches do
fine in testing too.  Some minor issues poped up:

Issue #1:  seabios can't boot from a virtio-scsi disk if the controller
is behind a pci bridge.  I think the reason simply is that (according to
the seabios log) only root bus pci devices are initialized.  Probably
even isn't related to this patch set, just trapped into this while
testing the bridge mapping code of the patch series.

Issue #2: root bus (non-pref) memory regions are mapped above 4G if they
are 64bit capable.  That happened to include the xhci usb controller.  I
don't think we want that.  Some day seabios will get xhci support, and
having the bar above 4G makes it unreachable in 32bit mode.  So this
needs some refinement.  Options I can think of:

  (1) Don't bother mapping non-prefmem bars above 4G.
  (2) Only map them above 4G if they are larger than a certain limit.
  (3) Allow devices to be excluded on certain conditions, for example
      when seabios has a driver, when they have an option rom, when
      they have a specific pci id, ...

These are certainly no blockers and can be discussed and solved after
committing this series.

cheers,
  Gerd
Kevin O'Connor April 26, 2012, 12:45 p.m. UTC | #5
On Wed, Apr 25, 2012 at 05:29:04PM +0200, Gerd Hoffmann wrote:
> Issue #1:  seabios can't boot from a virtio-scsi disk if the controller
> is behind a pci bridge.  I think the reason simply is that (according to
> the seabios log) only root bus pci devices are initialized.  Probably
> even isn't related to this patch set, just trapped into this while
> testing the bridge mapping code of the patch series.

Is this just a matter of removing the "if (pci_bdf_to_bus(pci->bdf) !=
0) break" from pci_bios_init_devices()?

The code should probably handle the irq swizzling that pci bridges do
though.

> Issue #2: root bus (non-pref) memory regions are mapped above 4G if they
> are 64bit capable.  That happened to include the xhci usb controller.  I
> don't think we want that.  Some day seabios will get xhci support, and
> having the bar above 4G makes it unreachable in 32bit mode.  So this
> needs some refinement.  Options I can think of:
> 
>   (1) Don't bother mapping non-prefmem bars above 4G.
>   (2) Only map them above 4G if they are larger than a certain limit.
>   (3) Allow devices to be excluded on certain conditions, for example
>       when seabios has a driver, when they have an option rom, when
>       they have a specific pci id, ...

I'd vote for 2.  There's nearly 500MB of space for PCI devices under
4G - small regions (say under 1MB) are unlikely to be the cause of an
allocation failure (1MB would be .2% of total space).

-Kevin
Alexey Korolev April 29, 2012, 10:41 p.m. UTC | #6
On 27/04/12 00:45, Kevin O'Connor wrote:
> On Wed, Apr 25, 2012 at 05:29:04PM +0200, Gerd Hoffmann wrote:
>> Issue #1:  seabios can't boot from a virtio-scsi disk if the controller
>> is behind a pci bridge.  I think the reason simply is that (according to
>> the seabios log) only root bus pci devices are initialized.  Probably
>> even isn't related to this patch set, just trapped into this while
>> testing the bridge mapping code of the patch series.
> Is this just a matter of removing the "if (pci_bdf_to_bus(pci->bdf) !=
> 0) break" from pci_bios_init_devices()?
This helps in my tests. The devices are no longer disabled. Not sure
about virtio-scsi test though.
> The code should probably handle the irq swizzling that pci bridges do
> though.
>
Gerd Hoffmann May 2, 2012, 1:42 p.m. UTC | #7
Hi,

> Is this just a matter of removing the "if (pci_bdf_to_bus(pci->bdf) !=
> 0) break" from pci_bios_init_devices()?

Seems to do the trick, at least the disks connected appear in the boot
menu now and the seabios log file looks sane.

The guest kernel has no virtio-scsi drivers though, need to update it
for more testing.

> The code should probably handle the irq swizzling that pci bridges do
> though.

i.e. add bridge handling to pci_slot_get_irq() ?

cheers,
  Gerd
Kevin O'Connor May 4, 2012, 1:16 p.m. UTC | #8
On Wed, May 02, 2012 at 03:42:51PM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > Is this just a matter of removing the "if (pci_bdf_to_bus(pci->bdf) !=
> > 0) break" from pci_bios_init_devices()?
> 
> Seems to do the trick, at least the disks connected appear in the boot
> menu now and the seabios log file looks sane.
> 
> The guest kernel has no virtio-scsi drivers though, need to update it
> for more testing.
> 
> > The code should probably handle the irq swizzling that pci bridges do
> > though.
> 
> i.e. add bridge handling to pci_slot_get_irq() ?

Yes.

-Kevin
diff mbox

Patch

diff --git a/src/config.h b/src/config.h
index b0187a4..bbacae7 100644
--- a/src/config.h
+++ b/src/config.h
@@ -47,6 +47,8 @@ 
 
 #define BUILD_PCIMEM_START        0xe0000000
 #define BUILD_PCIMEM_END          0xfec00000    /* IOAPIC is mapped at */
+#define BUILD_PCIMEM64_START      0x8000000000ULL
+#define BUILD_PCIMEM64_END        0x10000000000ULL
 
 #define BUILD_IOAPIC_ADDR         0xfec00000
 #define BUILD_HPET_ADDRESS        0xfed00000
diff --git a/src/pciinit.c b/src/pciinit.c
index a6cf98b..3d7640b 100644
--- a/src/pciinit.c
+++ b/src/pciinit.c
@@ -386,6 +386,31 @@  static u64 pci_region_sum(struct pci_region *r)
    return sum;
 }
 
+static void pci_region_migrate_64bit_entries(struct pci_region *from,
+                                             struct pci_region *to)
+{
+    struct pci_region_entry **pprev = &from->list;
+    struct pci_region_entry **last = &to->list;
+    while(*pprev) {
+        if ((*pprev)->is64) {
+            struct pci_region_entry *entry;
+            entry = *pprev;
+            /* Delete the entry and move next */
+            *pprev = (*pprev)->next;
+            /* Add entry at tail to keep a sorted order */
+            entry->next = NULL;
+            if (*last) {
+               (*last)->next = entry;
+                last  = &(*last)->next;
+            }
+            else
+               (*last) = entry;
+        }
+        else
+            pprev = &(*pprev)->next;
+    }
+}
+
 static struct pci_region_entry *
 pci_region_create_entry(struct pci_bus *bus, struct pci_device *dev,
                         int bar, u64 size, u64 align, int type, int is64)
@@ -478,7 +503,7 @@  static int pci_bios_check_devices(struct pci_bus *busses)
 }
 
 // Setup region bases (given the regions' size and alignment)
-static void pci_bios_init_root_regions(struct pci_bus *bus)
+static int pci_bios_init_root_regions(struct pci_bus *bus)
 {
     bus->r[PCI_REGION_TYPE_IO].base = 0xc000;
 
@@ -498,7 +523,8 @@  static void pci_bios_init_root_regions(struct pci_bus *bus)
     if ((r_start->base < BUILD_PCIMEM_START) ||
          (r_start->base > BUILD_PCIMEM_END))
         // Memory range requested is larger than available.
-        panic("PCI: out of address space\n");
+        return -1;
+    return 0;
 }
 
 
@@ -561,6 +587,24 @@  static void pci_region_map_entries(struct pci_bus *busses, struct pci_region *r)
 
 static void pci_bios_map_devices(struct pci_bus *busses)
 {
+    if (pci_bios_init_root_regions(busses)) {
+        struct pci_region r64_mem, r64_pref;
+        r64_mem.list = NULL;
+        r64_pref.list = NULL;
+        pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
+                                         &r64_mem);
+        pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_PREFMEM],
+                                         &r64_pref);
+
+        if (pci_bios_init_root_regions(busses))
+            panic("PCI: out of address space\n");
+
+        r64_mem.base = BUILD_PCIMEM64_START;
+        r64_pref.base = ALIGN(r64_mem.base + pci_region_sum(&r64_mem),
+                              pci_region_align(&r64_pref));
+        pci_region_map_entries(busses, &r64_mem);
+        pci_region_map_entries(busses, &r64_pref);
+    }
     // Map regions on each device.
     int bus;
     for (bus = 0; bus<=MaxPCIBus; bus++) {
@@ -605,8 +649,6 @@  pci_setup(void)
     if (pci_bios_check_devices(busses))
         return;
 
-    pci_bios_init_root_regions(&busses[0]);
-
     dprintf(1, "=== PCI new allocation pass #2 ===\n");
     pci_bios_map_devices(busses);