Message ID | 1335248739.13579.17.camel@nzhmlwks0057.ad.endace.com |
---|---|
State | New |
Headers | show |
On Tue, Apr 24, 2012 at 06:25:39PM +1200, Alexey Korolev wrote: > Migrate 64bit entries to 64bit pci regions if they do > not fit in 32bit range. [...] > +static void pci_region_migrate_64bit_entries(struct pci_region *from, > + struct pci_region *to) > +{ > + struct pci_region_entry **pprev = &from->list; > + struct pci_region_entry **last = &to->list; > + while(*pprev) { > + if ((*pprev)->is64) { > + struct pci_region_entry *entry; > + entry = *pprev; > + /* Delete the entry and move next */ > + *pprev = (*pprev)->next; > + /* Add entry at tail to keep a sorted order */ > + entry->next = NULL; > + if (*last) { > + (*last)->next = entry; > + last = &(*last)->next; > + } > + else > + (*last) = entry; > + } > + else > + pprev = &(*pprev)->next; > + } > +} It should be possible to simplify this - something like (untested): static void pci_region_migrate_64bit_entries(struct pci_region *from, struct pci_region *to) { struct pci_region_entry **pprev = &from->list, **last = &to->list; for (; *pprev; pprev = &(*pprev)->next) { struct pci_region_entry *entry = *pprev; if (!entry->is64) continue; // Move from source list to dest list. *pprev = entry->next; entry->next = NULL; *last = entry; last = &entry->next; } } [...] > static void pci_bios_map_devices(struct pci_bus *busses) > { > + if (pci_bios_init_root_regions(busses)) { > + struct pci_region r64_mem, r64_pref; > + r64_mem.list = NULL; > + r64_pref.list = NULL; > + pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM], > + &r64_mem); > + pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_PREFMEM], > + &r64_pref); > + > + if (pci_bios_init_root_regions(busses)) > + panic("PCI: out of address space\n"); > + > + r64_mem.base = BUILD_PCIMEM64_START; > + r64_pref.base = ALIGN(r64_mem.base + pci_region_sum(&r64_mem), > + pci_region_align(&r64_pref)); There should be a check to see if the regions fit. Maybe pass start/end into pci_bios_init_root_regions() and call it again for the >4g region? > + pci_region_map_entries(busses, &r64_mem); > + pci_region_map_entries(busses, &r64_pref); > + } > // Map regions on each device. This doesn't look right to me. This will map the devices on bus 0 to the proper >4g address, but devices on any subsequent bus will use busses[0].r[].base which will be reset to the <4gig address. Perhaps pull base out of pci_region and make pci_region_map_entries() recursive? -Kevin
On 25/04/12 13:48, Kevin O'Connor wrote: > On Tue, Apr 24, 2012 at 06:25:39PM +1200, Alexey Korolev wrote: >> Migrate 64bit entries to 64bit pci regions if they do >> not fit in 32bit range. > [...] >> +static void pci_region_migrate_64bit_entries(struct pci_region *from, >> + struct pci_region *to) >> +{ >> + struct pci_region_entry **pprev = &from->list; >> + struct pci_region_entry **last = &to->list; >> + while(*pprev) { >> + if ((*pprev)->is64) { >> + struct pci_region_entry *entry; >> + entry = *pprev; >> + /* Delete the entry and move next */ >> + *pprev = (*pprev)->next; >> + /* Add entry at tail to keep a sorted order */ >> + entry->next = NULL; >> + if (*last) { >> + (*last)->next = entry; >> + last = &(*last)->next; >> + } >> + else >> + (*last) = entry; >> + } >> + else >> + pprev = &(*pprev)->next; >> + } >> +} > It should be possible to simplify this - something like (untested): > > static void pci_region_migrate_64bit_entries(struct pci_region *from, > struct pci_region *to) > { > struct pci_region_entry **pprev = &from->list, **last = &to->list; > for (; *pprev; pprev = &(*pprev)->next) { > struct pci_region_entry *entry = *pprev; > if (!entry->is64) > continue; > // Move from source list to dest list. > *pprev = entry->next; > entry->next = NULL; > *last = entry; > last = &entry->next; > } > } Sorry it's not working. I agree it's possible to simplify code a bit. static void pci_region_migrate_64bit_entries(struct pci_region *from, struct pci_region *to) { struct pci_region_entry **pprev = &from->list; struct pci_region_entry **last = &to->list; while(*pprev) { if ((*pprev)->is64) { struct pci_region_entry *entry; entry = *pprev; /* Delete the entry and move next */ *pprev = (*pprev)->next; /* Add entry at tail to keep the order */ entry->next = NULL; *last = entry; last = &entry->next; } else pprev = &(*pprev)->next; } } That should work. > [...] >> static void pci_bios_map_devices(struct pci_bus *busses) >> { >> + if (pci_bios_init_root_regions(busses)) { >> + struct pci_region r64_mem, r64_pref; >> + r64_mem.list = NULL; >> + r64_pref.list = NULL; >> + pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM], >> + &r64_mem); >> + pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_PREFMEM], >> + &r64_pref); >> + >> + if (pci_bios_init_root_regions(busses)) >> + panic("PCI: out of address space\n"); >> + >> + r64_mem.base = BUILD_PCIMEM64_START; >> + r64_pref.base = ALIGN(r64_mem.base + pci_region_sum(&r64_mem), >> + pci_region_align(&r64_pref)); > There should be a check to see if the regions fit. Maybe pass > start/end into pci_bios_init_root_regions() and call it again for the >> 4g region? Agree, I just ignored the check as 64bit range size is 2^39. I will think how to make it better. >> + pci_region_map_entries(busses, &r64_mem); >> + pci_region_map_entries(busses, &r64_pref); >> + } >> // Map regions on each device. > This doesn't look right to me. This will map the devices on bus 0 to > the proper >4g address, but devices on any subsequent bus will use > busses[0].r[].base which will be reset to the <4gig address. Perhaps > pull base out of pci_region and make pci_region_map_entries() > recursive? No recursion is need here! We map all entries which are 64bit on root bus. If entry is a bridge region - a corresponding bus address will be updated. Region won't be reseted to <4gig address as address is derived from parent region only. Thanks, Alexey
On Wed, Apr 25, 2012 at 06:25:07PM +1200, Alexey Korolev wrote: > On 25/04/12 13:48, Kevin O'Connor wrote: > > On Tue, Apr 24, 2012 at 06:25:39PM +1200, Alexey Korolev wrote: > >> + pci_region_map_entries(busses, &r64_mem); > >> + pci_region_map_entries(busses, &r64_pref); > >> + } > >> // Map regions on each device. > > This doesn't look right to me. This will map the devices on bus 0 to > > the proper >4g address, but devices on any subsequent bus will use > > busses[0].r[].base which will be reset to the <4gig address. Perhaps > > pull base out of pci_region and make pci_region_map_entries() > > recursive? > No recursion is need here! > We map all entries which are 64bit on root bus. > If entry is a bridge region - a corresponding bus address will be updated. > Region won't be reseted to <4gig address as address is derived from parent region only. Okay - I missed that. I think the patches look okay to be committed - any additional changes can be made on top. Gerd - do you have any comments? -Kevin
On 04/25/12 14:51, Kevin O'Connor wrote: > On Wed, Apr 25, 2012 at 06:25:07PM +1200, Alexey Korolev wrote: >> On 25/04/12 13:48, Kevin O'Connor wrote: >>> On Tue, Apr 24, 2012 at 06:25:39PM +1200, Alexey Korolev wrote: >>>> + pci_region_map_entries(busses, &r64_mem); >>>> + pci_region_map_entries(busses, &r64_pref); >>>> + } >>>> // Map regions on each device. >>> This doesn't look right to me. This will map the devices on bus 0 to >>> the proper >4g address, but devices on any subsequent bus will use >>> busses[0].r[].base which will be reset to the <4gig address. Perhaps >>> pull base out of pci_region and make pci_region_map_entries() >>> recursive? >> No recursion is need here! >> We map all entries which are 64bit on root bus. >> If entry is a bridge region - a corresponding bus address will be updated. >> Region won't be reseted to <4gig address as address is derived from parent region only. > > Okay - I missed that. I think the patches look okay to be committed - > any additional changes can be made on top. Gerd - do you have any > comments? Great job overall. Can't spot issues in the code. And the patches do fine in testing too. Some minor issues poped up: Issue #1: seabios can't boot from a virtio-scsi disk if the controller is behind a pci bridge. I think the reason simply is that (according to the seabios log) only root bus pci devices are initialized. Probably even isn't related to this patch set, just trapped into this while testing the bridge mapping code of the patch series. Issue #2: root bus (non-pref) memory regions are mapped above 4G if they are 64bit capable. That happened to include the xhci usb controller. I don't think we want that. Some day seabios will get xhci support, and having the bar above 4G makes it unreachable in 32bit mode. So this needs some refinement. Options I can think of: (1) Don't bother mapping non-prefmem bars above 4G. (2) Only map them above 4G if they are larger than a certain limit. (3) Allow devices to be excluded on certain conditions, for example when seabios has a driver, when they have an option rom, when they have a specific pci id, ... These are certainly no blockers and can be discussed and solved after committing this series. cheers, Gerd
On Wed, Apr 25, 2012 at 05:29:04PM +0200, Gerd Hoffmann wrote: > Issue #1: seabios can't boot from a virtio-scsi disk if the controller > is behind a pci bridge. I think the reason simply is that (according to > the seabios log) only root bus pci devices are initialized. Probably > even isn't related to this patch set, just trapped into this while > testing the bridge mapping code of the patch series. Is this just a matter of removing the "if (pci_bdf_to_bus(pci->bdf) != 0) break" from pci_bios_init_devices()? The code should probably handle the irq swizzling that pci bridges do though. > Issue #2: root bus (non-pref) memory regions are mapped above 4G if they > are 64bit capable. That happened to include the xhci usb controller. I > don't think we want that. Some day seabios will get xhci support, and > having the bar above 4G makes it unreachable in 32bit mode. So this > needs some refinement. Options I can think of: > > (1) Don't bother mapping non-prefmem bars above 4G. > (2) Only map them above 4G if they are larger than a certain limit. > (3) Allow devices to be excluded on certain conditions, for example > when seabios has a driver, when they have an option rom, when > they have a specific pci id, ... I'd vote for 2. There's nearly 500MB of space for PCI devices under 4G - small regions (say under 1MB) are unlikely to be the cause of an allocation failure (1MB would be .2% of total space). -Kevin
On 27/04/12 00:45, Kevin O'Connor wrote: > On Wed, Apr 25, 2012 at 05:29:04PM +0200, Gerd Hoffmann wrote: >> Issue #1: seabios can't boot from a virtio-scsi disk if the controller >> is behind a pci bridge. I think the reason simply is that (according to >> the seabios log) only root bus pci devices are initialized. Probably >> even isn't related to this patch set, just trapped into this while >> testing the bridge mapping code of the patch series. > Is this just a matter of removing the "if (pci_bdf_to_bus(pci->bdf) != > 0) break" from pci_bios_init_devices()? This helps in my tests. The devices are no longer disabled. Not sure about virtio-scsi test though. > The code should probably handle the irq swizzling that pci bridges do > though. >
Hi, > Is this just a matter of removing the "if (pci_bdf_to_bus(pci->bdf) != > 0) break" from pci_bios_init_devices()? Seems to do the trick, at least the disks connected appear in the boot menu now and the seabios log file looks sane. The guest kernel has no virtio-scsi drivers though, need to update it for more testing. > The code should probably handle the irq swizzling that pci bridges do > though. i.e. add bridge handling to pci_slot_get_irq() ? cheers, Gerd
On Wed, May 02, 2012 at 03:42:51PM +0200, Gerd Hoffmann wrote: > Hi, > > > Is this just a matter of removing the "if (pci_bdf_to_bus(pci->bdf) != > > 0) break" from pci_bios_init_devices()? > > Seems to do the trick, at least the disks connected appear in the boot > menu now and the seabios log file looks sane. > > The guest kernel has no virtio-scsi drivers though, need to update it > for more testing. > > > The code should probably handle the irq swizzling that pci bridges do > > though. > > i.e. add bridge handling to pci_slot_get_irq() ? Yes. -Kevin
diff --git a/src/config.h b/src/config.h index b0187a4..bbacae7 100644 --- a/src/config.h +++ b/src/config.h @@ -47,6 +47,8 @@ #define BUILD_PCIMEM_START 0xe0000000 #define BUILD_PCIMEM_END 0xfec00000 /* IOAPIC is mapped at */ +#define BUILD_PCIMEM64_START 0x8000000000ULL +#define BUILD_PCIMEM64_END 0x10000000000ULL #define BUILD_IOAPIC_ADDR 0xfec00000 #define BUILD_HPET_ADDRESS 0xfed00000 diff --git a/src/pciinit.c b/src/pciinit.c index a6cf98b..3d7640b 100644 --- a/src/pciinit.c +++ b/src/pciinit.c @@ -386,6 +386,31 @@ static u64 pci_region_sum(struct pci_region *r) return sum; } +static void pci_region_migrate_64bit_entries(struct pci_region *from, + struct pci_region *to) +{ + struct pci_region_entry **pprev = &from->list; + struct pci_region_entry **last = &to->list; + while(*pprev) { + if ((*pprev)->is64) { + struct pci_region_entry *entry; + entry = *pprev; + /* Delete the entry and move next */ + *pprev = (*pprev)->next; + /* Add entry at tail to keep a sorted order */ + entry->next = NULL; + if (*last) { + (*last)->next = entry; + last = &(*last)->next; + } + else + (*last) = entry; + } + else + pprev = &(*pprev)->next; + } +} + static struct pci_region_entry * pci_region_create_entry(struct pci_bus *bus, struct pci_device *dev, int bar, u64 size, u64 align, int type, int is64) @@ -478,7 +503,7 @@ static int pci_bios_check_devices(struct pci_bus *busses) } // Setup region bases (given the regions' size and alignment) -static void pci_bios_init_root_regions(struct pci_bus *bus) +static int pci_bios_init_root_regions(struct pci_bus *bus) { bus->r[PCI_REGION_TYPE_IO].base = 0xc000; @@ -498,7 +523,8 @@ static void pci_bios_init_root_regions(struct pci_bus *bus) if ((r_start->base < BUILD_PCIMEM_START) || (r_start->base > BUILD_PCIMEM_END)) // Memory range requested is larger than available. - panic("PCI: out of address space\n"); + return -1; + return 0; } @@ -561,6 +587,24 @@ static void pci_region_map_entries(struct pci_bus *busses, struct pci_region *r) static void pci_bios_map_devices(struct pci_bus *busses) { + if (pci_bios_init_root_regions(busses)) { + struct pci_region r64_mem, r64_pref; + r64_mem.list = NULL; + r64_pref.list = NULL; + pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM], + &r64_mem); + pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_PREFMEM], + &r64_pref); + + if (pci_bios_init_root_regions(busses)) + panic("PCI: out of address space\n"); + + r64_mem.base = BUILD_PCIMEM64_START; + r64_pref.base = ALIGN(r64_mem.base + pci_region_sum(&r64_mem), + pci_region_align(&r64_pref)); + pci_region_map_entries(busses, &r64_mem); + pci_region_map_entries(busses, &r64_pref); + } // Map regions on each device. int bus; for (bus = 0; bus<=MaxPCIBus; bus++) { @@ -605,8 +649,6 @@ pci_setup(void) if (pci_bios_check_devices(busses)) return; - pci_bios_init_root_regions(&busses[0]); - dprintf(1, "=== PCI new allocation pass #2 ===\n"); pci_bios_map_devices(busses);
Migrate 64bit entries to 64bit pci regions if they do not fit in 32bit range. Signed-off-by: Alexey Korolev <alexey.korolev@endace.com> --- src/config.h | 2 ++ src/pciinit.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 4 deletions(-)