diff mbox

[v7,08/10] PCI, x86: add MMCFG information on demand

Message ID 4FDB2192.5070709@huawei.com
State Changes Requested
Headers show

Commit Message

Jiang Liu June 15, 2012, 11:50 a.m. UTC
Hi Yinghai,
	A formal patch to fix "here cache cfg too early.  should do 
that after pci_mmcfg_reject_broken()". This patch also improves
readability and fixes two condition compilation issues reported by
Fengguang. If you are ok with it, I will fold it into 
"[PATCH v7 08/10] PCI, x86: add MMCFG information on demand".
	Thanks!
	Gerry

---
---

On 2012-6-15 15:15, Yinghai Lu wrote:
> On Sat, May 26, 2012 at 2:54 AM, Jiang Liu <jiang.liu@huawei.com> wrote:
>> This patch changes mmconfig logic on x86 platforms to add MMCFG
>> information on demand instead of adding all MMCFG entries from
>> the ACPI MCFG table at boot time. So only MMCFG address ranges
>> used by active PCI host bridges will be actually mapped.
>>
>> Signed-off-by: Jiang Liu <liuj97@gmail.com>
>> ---
>>  arch/x86/include/asm/pci_x86.h |    5 +++
>>  arch/x86/pci/legacy.c          |    1 +
>>  arch/x86/pci/mmconfig-shared.c |   54 +++++++++++++++++++++++++++++++++++++---
>>  3 files changed, 56 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
>> index 3e5f43c..4a1a9aa 100644
>> --- a/arch/x86/include/asm/pci_x86.h
>> +++ b/arch/x86/include/asm/pci_x86.h
>> @@ -142,6 +142,11 @@ extern int __devinit pci_mmconfig_insert(struct device *dev,
>>                                         uint16_t seg, uint8_t start,
>>                                         uint8_t end, phys_addr_t addr);
>>  extern int pci_mmconfig_delete(uint16_t seg, uint8_t start, uint8_t end);
>> +#ifdef CONFIG_ACPI
>> +extern void pci_mmconfig_probe(uint8_t start);
>> +#else
>> +static inline void pci_mmconfig_probe(uint8_t start) { }
>> +#endif
>>  extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
>>
>>  extern struct list_head pci_mmcfg_list;
>> diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
>> index a1df191..e9a2384 100644
>> --- a/arch/x86/pci/legacy.c
>> +++ b/arch/x86/pci/legacy.c
>> @@ -49,6 +49,7 @@ void __devinit pcibios_scan_specific_bus(int busn)
>>                    l != 0x0000 && l != 0xffff) {
>>                        DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l);
>>                        printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn);
>> +                       pci_mmconfig_probe(busn);
>>                        pci_scan_bus_on_node(busn, &pci_root_ops, node);
>>                        return;
>>                }
>> diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
>> index fa0aa90..e80b5c2 100644
>> --- a/arch/x86/pci/mmconfig-shared.c
>> +++ b/arch/x86/pci/mmconfig-shared.c
>> @@ -19,6 +19,7 @@
>>  #include <linux/slab.h>
>>  #include <linux/mutex.h>
>>  #include <linux/rculist.h>
>> +#include <linux/pci-acpi.h>
>>  #include <asm/e820.h>
>>  #include <asm/pci_x86.h>
>>  #include <asm/acpi.h>
>> @@ -616,6 +617,16 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
>>                        return -ENODEV;
>>                }
>>
>> +               /*
>> +                * MMCFG information for host brideges will be added on demand
>> +                * by pci_root driver if ACPI is enabled. But there are special
>> +                * requirements for devices on segment 0, MMCFG information may
>> +                * be needed for fixing hardware quirks and probing for hidden
>> +                * buses.
>> +                */
>> +               if (!acpi_disabled && cfg->pci_segment)
>> +                       continue;
>> +
>>                if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number,
>>                                   cfg->end_bus_number, cfg->address) == NULL) {
>>                        printk(KERN_WARNING PREFIX
>> @@ -625,6 +636,13 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
>>                }
>>        }
>>
>> +       i = entries * sizeof(*cfg_table);
>> +       pci_acpi_mcfg_array = kmalloc(i, GFP_KERNEL);
>> +       if (pci_acpi_mcfg_array) {
>> +               memcpy(pci_acpi_mcfg_array, cfg_table, i);
>> +               pci_acpi_mcfg_entries = entries;
>> +       }
>> +
> 
> here cache cfg too early.  should do that after
> 
> pci_mmcfg_reject_broken().
> 
> otherwise will use mcfg even try to reject that before.
> 
>>        return 0;
>>  }
>>
>> @@ -634,14 +652,14 @@ static void __init __pci_mmcfg_init(int early)
>>        if ((pci_probe & PCI_PROBE_MMCONF) == 0)
>>                return;
>>
>> -       /* MMCONFIG already enabled */
>> -       if (!early && !(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF))
>> -               return;
>> -
>>        /* for late to exit */
>>        if (known_bridge)
>>                return;
>>
>> +       /* MMCONFIG already enabled */
>> +       if (!early && !(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF))
>> +               goto out;
>> +
>>        if (early) {
>>                if (pci_mmcfg_check_hostbridge())
>>                        known_bridge = 1;
>> @@ -675,6 +693,14 @@ static void __init __pci_mmcfg_init(int early)
>>                pci_mmcfg_resources_inserted = 1;
>>                pci_mmcfg_arch_init_failed = true;
>>        }
>> +
>> +out:
>> +       /*
>> +        * Free all MCFG entries if ACPI is enabled. MCFG information will
>> +        * be added back on demand by the pci_root driver later.
>> +        */
>> +       if (!early && !acpi_disabled && !known_bridge && pci_acpi_mcfg_array)
>> +               free_all_mmcfg();
> 
> that really change the logic.
> 
> looks like it will break mrst/sfi path.
> 
> the scan from pci_legacy_init() for mrst/sfi will not have ext_pci_ops
> set for bus 0.
> 
> | int __init pci_subsys_init(void)
> | {
> |        /*
> |         * The init function returns an non zero value when
> |         * pci_legacy_init should be invoked.
> |         */
> |        if (x86_init.pci.init())
> |                pci_legacy_init();
> |
> |        pcibios_fixup_peer_bridges();
> 
> 
> Yinghai
> 
>>  }
>>
>>  void __init pci_mmcfg_early_init(void)
>> @@ -809,3 +835,23 @@ int pci_mmconfig_delete(uint16_t seg, uint8_t start, uint8_t end)
>>
>>        return -ENOENT;
>>  }
>> +
>> +/* Probe MMCFG information for PCI bus blind probe */
>> +void __devinit pci_mmconfig_probe(uint8_t start)
>> +{
>> +       int end_bus, temp;
>> +       phys_addr_t addr;
>> +
>> +       if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed)
>> +               return;
>> +
>> +       addr = acpi_pci_root_get_mcfg_addr(NULL, 0, start, &end_bus);
>> +       if (addr && end_bus >= 0 && end_bus <= 255) {
>> +               for (temp = start + 1; temp <= end_bus; temp++)
>> +                       if (pci_find_bus(0, temp))
>> +                               break;
>> +
>> +               temp--;
>> +               pci_mmconfig_insert(NULL, 0, start, (uint8_t)temp, addr);
>> +       }
>> +}
>> --
>> 1.7.1
>>
>>
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Yinghai Lu June 15, 2012, 4:51 p.m. UTC | #1
On Fri, Jun 15, 2012 at 4:50 AM, Jiang Liu <jiang.liu@huawei.com> wrote:
> Hi Yinghai,
>        A formal patch to fix "here cache cfg too early.  should do
> that after pci_mmcfg_reject_broken()". This patch also improves
> readability and fixes two condition compilation issues reported by
> Fengguang. If you are ok with it, I will fold it into
> "[PATCH v7 08/10] PCI, x86: add MMCFG information on demand".
>        Thanks!
>        Gerry
>
> ---
> diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
> index 7eae174..5eb2ac9 100644
> --- a/arch/x86/include/asm/pci_x86.h
> +++ b/arch/x86/include/asm/pci_x86.h
> @@ -142,7 +142,7 @@ extern int __devinit pci_mmconfig_insert(struct device *dev,
>                                         u16 seg, u8 start, u8 end,
>                                         phys_addr_t addr);
>  extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
> -#ifdef CONFIG_ACPI
> +#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_MMCONFIG)
>  extern void pci_mmconfig_probe(u8 start);
>  #else
>  static inline void pci_mmconfig_probe(u8 start) { }
> diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
> index 20ab4f5..636de35 100644
> --- a/arch/x86/pci/mmconfig-shared.c
> +++ b/arch/x86/pci/mmconfig-shared.c
> @@ -636,13 +636,6 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
>                }
>        }
>
> -       i = entries * sizeof(*cfg_table);
> -       pci_acpi_mcfg_array = kmalloc(i, GFP_KERNEL);
> -       if (pci_acpi_mcfg_array) {
> -               memcpy(pci_acpi_mcfg_array, cfg_table, i);
> -               pci_acpi_mcfg_entries = entries;
> -       }
> -
>        return 0;
>  }
>
> @@ -699,8 +692,11 @@ out:
>         * Free all MCFG entries if ACPI is enabled. MCFG information will
>         * be added back on demand by the pci_root driver later.
>         */
> -       if (!early && !acpi_disabled && !known_bridge && pci_acpi_mcfg_array)
> -               free_all_mmcfg();
> +       if (!early && !acpi_disabled && !known_bridge &&
> +           !pci_mmcfg_arch_init_failed) {
> +               if (!acpi_pci_cache_mcfg())
> +                       free_all_mmcfg();
> +       }
>  }
>
>  void __init pci_mmcfg_early_init(void)
> diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
> index f5d2157..93e0f91 100644
> --- a/drivers/pci/pci-acpi.c
> +++ b/drivers/pci/pci-acpi.c
> @@ -163,8 +163,34 @@ acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev)
>  }
>
>  /* acpi_table_parse() is marked as __init, so cache MCFG info at boot time */
> -int pci_acpi_mcfg_entries;
> -struct acpi_mcfg_allocation *pci_acpi_mcfg_array;
> +static int pci_acpi_mcfg_entries;
> +static struct acpi_mcfg_allocation *pci_acpi_mcfg_array;
> +
> +static int __init pci_cache_mcfg(struct acpi_table_header *header)
> +{
> +       u32 sz;
> +       void *ptr;
> +
> +       if (!header || (header->length <= sizeof(struct acpi_table_mcfg)))
> +               return -EINVAL;
> +
> +       sz = (header->length - sizeof(struct acpi_table_mcfg));
> +       pci_acpi_mcfg_array = kmalloc(sz, GFP_KERNEL);
> +       if (!pci_acpi_mcfg_array)
> +               return -ENOMEM;
> +
> +       ptr = (void *)header + sizeof(struct acpi_table_mcfg);
> +       memcpy(pci_acpi_mcfg_array, ptr, sz);
> +       pci_acpi_mcfg_entries = sz / sizeof (struct acpi_mcfg_allocation);
> +
> +       return 0;
> +}
> +
> +int __init acpi_pci_cache_mcfg(void)
> +{
> +       acpi_table_parse(ACPI_SIG_MCFG, pci_cache_mcfg);
> +       return pci_acpi_mcfg_array ? 0 : -EINVAL;
> +}

still have some problem:
pci_mmcfg_check_reserved==>is_mmconf_reserved

will update cfg->end_bus.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiang Liu June 16, 2012, 9:23 a.m. UTC | #2
On 2012-6-16 0:51, Yinghai Lu wrote:
> 
> still have some problem:
> pci_mmcfg_check_reserved==>is_mmconf_reserved
> 
> will update cfg->end_bus.
Hi Yinghai,
	How about following patch for this issue? I guess we need to
keep current behavior at boot time for backward compatibility, right?

---
@@ -472,6 +472,14 @@ static int __devinit is_mmconf_reserved(check_reserved_t is_reserved,
                        break;
        }

+       /*
+        * For backward compatibility, we will adjust the MMCONFIG region
+        * at boot time if it's only partially reserved by firmware.
+        * For PCI host bridge hotplug at runtime, just reject it.
+        */
+       if (pci_mmcfg_running_state && old_size != size)
+               return 0;
+
        if (size < (16UL<<20) && size != old_size)
                return 0;

---

> 
> Thanks
> 
> Yinghai
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu June 16, 2012, 8:08 p.m. UTC | #3
On Sat, Jun 16, 2012 at 2:23 AM, Jiang Liu <jiang.liu@huawei.com> wrote:
> On 2012-6-16 0:51, Yinghai Lu wrote:
>>
>> still have some problem:
>> pci_mmcfg_check_reserved==>is_mmconf_reserved
>>
>> will update cfg->end_bus.
> Hi Yinghai,
>        How about following patch for this issue? I guess we need to
> keep current behavior at boot time for backward compatibility, right?

We'd better to make all path share as most code as possible.
1. hostbridge scanning during boot -- early, it will check chipset and e820
2. MCFG checking during boot -- early, it will check e820
3. MCFG checking during boot -- late, it will check acpi pnp
4. _CBA checking for hotplug-able pci root bus but it is installed during boot.
5. _CBA checking for hotplug-able pci root bus during run time.

please keep mapping for all entries in MCFG table. aka 1, 2, 3.
I have some local patches that will read ext pci conf space before scan pci bus.
please check attached one for nehalem-ioh.

for case 4, 5: only need to add one bool in acpi_pci_root to record if
mmconf list and mapping is touched.

Thanks

Yinghai
Bjorn Helgaas June 16, 2012, 9:48 p.m. UTC | #4
On Sat, Jun 16, 2012 at 2:08 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Sat, Jun 16, 2012 at 2:23 AM, Jiang Liu <jiang.liu@huawei.com> wrote:
>> On 2012-6-16 0:51, Yinghai Lu wrote:
>>>
>>> still have some problem:
>>> pci_mmcfg_check_reserved==>is_mmconf_reserved
>>>
>>> will update cfg->end_bus.
>> Hi Yinghai,
>>        How about following patch for this issue? I guess we need to
>> keep current behavior at boot time for backward compatibility, right?
>
> We'd better to make all path share as most code as possible.
> 1. hostbridge scanning during boot -- early, it will check chipset and e820
> 2. MCFG checking during boot -- early, it will check e820
> 3. MCFG checking during boot -- late, it will check acpi pnp
> 4. _CBA checking for hotplug-able pci root bus but it is installed during boot.
> 5. _CBA checking for hotplug-able pci root bus during run time.
>
> please keep mapping for all entries in MCFG table. aka 1, 2, 3.
> I have some local patches that will read ext pci conf space before scan pci bus.
> please check attached one for nehalem-ioh.

I don't think it's a requirement that Gerry keep your Nehalem patch
working.  Your intel_bus.c is not in the tree and you haven't provided
an explanation for why it should be.

The only requirement I'm aware of for PCI config access before we
discover the host bridges via ACPI is for segment group 0, bus 0, as
mentioned in ACPI spec 5.0, sec 5.2.3.1, PDF page 143, and I think
that applies only to the first 0x100 bytes of config space.  I don't
think there's a requirement for access to the extended configuration
space (bytes 0x100-0xFFF).  I do not see a requirement that this
pre-host bridge access happen via MMCONFIG; as far as I can tell, we
can use the legacy 0xCF8/0xCFC mechanism.

> for case 4, 5: only need to add one bool in acpi_pci_root to record if
> mmconf list and mapping is touched.
>
> Thanks
>
> Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu June 16, 2012, 10:44 p.m. UTC | #5
On Sat, Jun 16, 2012 at 2:48 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>
>> We'd better to make all path share as most code as possible.
>> 1. hostbridge scanning during boot -- early, it will check chipset and e820
>> 2. MCFG checking during boot -- early, it will check e820
>> 3. MCFG checking during boot -- late, it will check acpi pnp
>> 4. _CBA checking for hotplug-able pci root bus but it is installed during boot.
>> 5. _CBA checking for hotplug-able pci root bus during run time.
>>
>> please keep mapping for all entries in MCFG table. aka 1, 2, 3.
>> I have some local patches that will read ext pci conf space before scan pci bus.
>> please check attached one for nehalem-ioh.
>
> I don't think it's a requirement that Gerry keep your Nehalem patch
> working.  Your intel_bus.c is not in the tree and you haven't provided
> an explanation for why it should be.
>
> The only requirement I'm aware of for PCI config access before we
> discover the host bridges via ACPI is for segment group 0, bus 0, as
> mentioned in ACPI spec 5.0, sec 5.2.3.1, PDF page 143, and I think
> that applies only to the first 0x100 bytes of config space.  I don't
> think there's a requirement for access to the extended configuration
> space (bytes 0x100-0xFFF).  I do not see a requirement that this
> pre-host bridge access happen via MMCONFIG; as far as I can tell, we
> can use the legacy 0xCF8/0xCFC mechanism.

that one shot for intel host bridge resource discovery before root bus
scanning,
will need to access registers above 0x100.

Current MCFG handling have some sanitary checking during probing.

Now Jiang is trying to free result and cache it for two MCFG path 2/3.
and later use cached and map again for entries from cached entries.
but when use those cached entries sanitary of those entries are lost.

so choice would be
1. cache all checked MMCFG result and use that later
2. or just leave current MCFG handling alone, just add _CBA support.
like Jiang -v4 version.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu June 16, 2012, 10:48 p.m. UTC | #6
On Sat, Jun 16, 2012 at 3:44 PM, Yinghai Lu <yinghai@kernel.org> wrote:

> Current MCFG handling have some sanitary checking during probing.
>
> Now Jiang is trying to free result and cache it for two MCFG path 2/3.
> and later use cached and map again for entries from cached entries.
> but when use those cached entries sanitary of those entries are lost.
>
> so choice would be
> 1. cache all checked MMCFG result and use that later
> 2. or just leave current MCFG handling alone, just add _CBA support.
> like Jiang -v4 version.
>

I have used updated jiang's -v4 version for a while.

Yinghai
Bjorn Helgaas June 17, 2012, 1:55 a.m. UTC | #7
On Sat, Jun 16, 2012 at 4:44 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Sat, Jun 16, 2012 at 2:48 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>>
>>> We'd better to make all path share as most code as possible.
>>> 1. hostbridge scanning during boot -- early, it will check chipset and e820
>>> 2. MCFG checking during boot -- early, it will check e820
>>> 3. MCFG checking during boot -- late, it will check acpi pnp
>>> 4. _CBA checking for hotplug-able pci root bus but it is installed during boot.
>>> 5. _CBA checking for hotplug-able pci root bus during run time.
>>>
>>> please keep mapping for all entries in MCFG table. aka 1, 2, 3.
>>> I have some local patches that will read ext pci conf space before scan pci bus.
>>> please check attached one for nehalem-ioh.
>>
>> I don't think it's a requirement that Gerry keep your Nehalem patch
>> working.  Your intel_bus.c is not in the tree and you haven't provided
>> an explanation for why it should be.
>>
>> The only requirement I'm aware of for PCI config access before we
>> discover the host bridges via ACPI is for segment group 0, bus 0, as
>> mentioned in ACPI spec 5.0, sec 5.2.3.1, PDF page 143, and I think
>> that applies only to the first 0x100 bytes of config space.  I don't
>> think there's a requirement for access to the extended configuration
>> space (bytes 0x100-0xFFF).  I do not see a requirement that this
>> pre-host bridge access happen via MMCONFIG; as far as I can tell, we
>> can use the legacy 0xCF8/0xCFC mechanism.
>
> that one shot for intel host bridge resource discovery before root bus
> scanning,
> will need to access registers above 0x100.

I don't understand what you're saying.  Are you disagreeing with
something I said above?

As far as I know, we can rely on ACPI _CRS completely for host bridge
resources.  Are there exceptions?  What does "one shot for intel host
bridge resource discovery" mean?  Are there machines that are broken
because we don't have intel_bus.c?

> Current MCFG handling have some sanitary checking during probing.
>
> Now Jiang is trying to free result and cache it for two MCFG path 2/3.
> and later use cached and map again for entries from cached entries.
> but when use those cached entries sanitary of those entries are lost.
>
> so choice would be
> 1. cache all checked MMCFG result and use that later
> 2. or just leave current MCFG handling alone, just add _CBA support.
> like Jiang -v4 version.

Choice 2 sounds like a possibility.  I probably encouraged mucking
around in the current MCFG handling, but if we're not going to
actually clean anything up, there's not much point in touching it.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiang Liu June 18, 2012, 1:21 a.m. UTC | #8
On 2012-6-17 9:55, Bjorn Helgaas wrote:
> On Sat, Jun 16, 2012 at 4:44 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>> On Sat, Jun 16, 2012 at 2:48 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>>>
>>>> We'd better to make all path share as most code as possible.
>>>> 1. hostbridge scanning during boot -- early, it will check chipset and e820
>>>> 2. MCFG checking during boot -- early, it will check e820
>>>> 3. MCFG checking during boot -- late, it will check acpi pnp
>>>> 4. _CBA checking for hotplug-able pci root bus but it is installed during boot.
>>>> 5. _CBA checking for hotplug-able pci root bus during run time.
>>>>
>>>> please keep mapping for all entries in MCFG table. aka 1, 2, 3.
>>>> I have some local patches that will read ext pci conf space before scan pci bus.
>>>> please check attached one for nehalem-ioh.
>>>
>>> I don't think it's a requirement that Gerry keep your Nehalem patch
>>> working.  Your intel_bus.c is not in the tree and you haven't provided
>>> an explanation for why it should be.
>>>
>>> The only requirement I'm aware of for PCI config access before we
>>> discover the host bridges via ACPI is for segment group 0, bus 0, as
>>> mentioned in ACPI spec 5.0, sec 5.2.3.1, PDF page 143, and I think
>>> that applies only to the first 0x100 bytes of config space.  I don't
>>> think there's a requirement for access to the extended configuration
>>> space (bytes 0x100-0xFFF).  I do not see a requirement that this
>>> pre-host bridge access happen via MMCONFIG; as far as I can tell, we
>>> can use the legacy 0xCF8/0xCFC mechanism.
>>
>> that one shot for intel host bridge resource discovery before root bus
>> scanning,
>> will need to access registers above 0x100.
> 
> I don't understand what you're saying.  Are you disagreeing with
> something I said above?
> 
> As far as I know, we can rely on ACPI _CRS completely for host bridge
> resources.  Are there exceptions?  What does "one shot for intel host
> bridge resource discovery" mean?  Are there machines that are broken
> because we don't have intel_bus.c?

I have consulted Bob Moore about this topic before and Bob said that
they hasn't encountered a system on which the ACPICA has dependency 
on extended PCI configuration space yet.

>> Current MCFG handling have some sanitary checking during probing.
>>
>> Now Jiang is trying to free result and cache it for two MCFG path 2/3.
>> and later use cached and map again for entries from cached entries.
>> but when use those cached entries sanitary of those entries are lost.
>>
>> so choice would be
>> 1. cache all checked MMCFG result and use that later
>> 2. or just leave current MCFG handling alone, just add _CBA support.
>> like Jiang -v4 version.
> 
> Choice 2 sounds like a possibility.  I probably encouraged mucking
> around in the current MCFG handling, but if we're not going to
> actually clean anything up, there's not much point in touching it.
I prefer the second choice too. Backward compatibility is really
important on x86, so don't want to break anything here. I could
help to split the patch set into two parts: one for root bridge
hotplug and the other for cleanup. This time we could focus on 
the first patch to prepare for host bridge hotplug, and have 
more time to discuss about the cleanup work.

On the other hand, BIOS should report MMCONFIG information by _CBA
if a host bridge supports physical hotplug. So the cleanup only 
benefits two cases:
1) BIOS reports MMCONFIG information for hot-pluggable host bridges
   in MCFG table. This is really a BIOS bug. Currently we have no
   really x86 platforms in the field which support PCI host bridge
   hotplug yet. So it may be acceptable for OS to report such bugs
   and let BIOS people to fix them.
2) Account MMCONFIG information to specific host bridges. It does
   give better representation about the MMCONFIG resource usages,
   but it's on risk of breaking backward compatibilities.

So should we adopt the second solution here?

Thanks!
Gerry


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas June 18, 2012, 6:24 p.m. UTC | #9
On Sun, Jun 17, 2012 at 7:21 PM, Jiang Liu <jiang.liu@huawei.com> wrote:
> On 2012-6-17 9:55, Bjorn Helgaas wrote:
>> On Sat, Jun 16, 2012 at 4:44 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>>> On Sat, Jun 16, 2012 at 2:48 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>>>>
>>>>> We'd better to make all path share as most code as possible.
>>>>> 1. hostbridge scanning during boot -- early, it will check chipset and e820
>>>>> 2. MCFG checking during boot -- early, it will check e820
>>>>> 3. MCFG checking during boot -- late, it will check acpi pnp
>>>>> 4. _CBA checking for hotplug-able pci root bus but it is installed during boot.
>>>>> 5. _CBA checking for hotplug-able pci root bus during run time.
>>>>>
>>>>> please keep mapping for all entries in MCFG table. aka 1, 2, 3.
>>>>> I have some local patches that will read ext pci conf space before scan pci bus.
>>>>> please check attached one for nehalem-ioh.
>>>>
>>>> I don't think it's a requirement that Gerry keep your Nehalem patch
>>>> working.  Your intel_bus.c is not in the tree and you haven't provided
>>>> an explanation for why it should be.
>>>>
>>>> The only requirement I'm aware of for PCI config access before we
>>>> discover the host bridges via ACPI is for segment group 0, bus 0, as
>>>> mentioned in ACPI spec 5.0, sec 5.2.3.1, PDF page 143, and I think
>>>> that applies only to the first 0x100 bytes of config space.  I don't
>>>> think there's a requirement for access to the extended configuration
>>>> space (bytes 0x100-0xFFF).  I do not see a requirement that this
>>>> pre-host bridge access happen via MMCONFIG; as far as I can tell, we
>>>> can use the legacy 0xCF8/0xCFC mechanism.
>>>
>>> that one shot for intel host bridge resource discovery before root bus
>>> scanning,
>>> will need to access registers above 0x100.
>>
>> I don't understand what you're saying.  Are you disagreeing with
>> something I said above?
>>
>> As far as I know, we can rely on ACPI _CRS completely for host bridge
>> resources.  Are there exceptions?  What does "one shot for intel host
>> bridge resource discovery" mean?  Are there machines that are broken
>> because we don't have intel_bus.c?
>
> I have consulted Bob Moore about this topic before and Bob said that
> they hasn't encountered a system on which the ACPICA has dependency
> on extended PCI configuration space yet.
>
>>> Current MCFG handling have some sanitary checking during probing.
>>>
>>> Now Jiang is trying to free result and cache it for two MCFG path 2/3.
>>> and later use cached and map again for entries from cached entries.
>>> but when use those cached entries sanitary of those entries are lost.
>>>
>>> so choice would be
>>> 1. cache all checked MMCFG result and use that later
>>> 2. or just leave current MCFG handling alone, just add _CBA support.
>>> like Jiang -v4 version.
>>
>> Choice 2 sounds like a possibility.  I probably encouraged mucking
>> around in the current MCFG handling, but if we're not going to
>> actually clean anything up, there's not much point in touching it.
> I prefer the second choice too. Backward compatibility is really
> important on x86, so don't want to break anything here. I could
> help to split the patch set into two parts: one for root bridge
> hotplug and the other for cleanup. This time we could focus on
> the first patch to prepare for host bridge hotplug, and have
> more time to discuss about the cleanup work.
>
> On the other hand, BIOS should report MMCONFIG information by _CBA
> if a host bridge supports physical hotplug. So the cleanup only
> benefits two cases:
> 1) BIOS reports MMCONFIG information for hot-pluggable host bridges
>   in MCFG table. This is really a BIOS bug. Currently we have no
>   really x86 platforms in the field which support PCI host bridge
>   hotplug yet. So it may be acceptable for OS to report such bugs
>   and let BIOS people to fix them.
> 2) Account MMCONFIG information to specific host bridges. It does
>   give better representation about the MMCONFIG resource usages,
>   but it's on risk of breaking backward compatibilities.
>
> So should we adopt the second solution here?

Yes, let's try that.  I'm sorry I encouraged you to go on a wild goose chase.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 7eae174..5eb2ac9 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -142,7 +142,7 @@  extern int __devinit pci_mmconfig_insert(struct device *dev,
                                         u16 seg, u8 start, u8 end,
                                         phys_addr_t addr);
 extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
-#ifdef CONFIG_ACPI
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_MMCONFIG)
 extern void pci_mmconfig_probe(u8 start);
 #else
 static inline void pci_mmconfig_probe(u8 start) { }
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 20ab4f5..636de35 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -636,13 +636,6 @@  static int __init pci_parse_mcfg(struct acpi_table_header *header)
                }
        }

-       i = entries * sizeof(*cfg_table);
-       pci_acpi_mcfg_array = kmalloc(i, GFP_KERNEL);
-       if (pci_acpi_mcfg_array) {
-               memcpy(pci_acpi_mcfg_array, cfg_table, i);
-               pci_acpi_mcfg_entries = entries;
-       }
-
        return 0;
 }

@@ -699,8 +692,11 @@  out:
         * Free all MCFG entries if ACPI is enabled. MCFG information will
         * be added back on demand by the pci_root driver later.
         */
-       if (!early && !acpi_disabled && !known_bridge && pci_acpi_mcfg_array)
-               free_all_mmcfg();
+       if (!early && !acpi_disabled && !known_bridge &&
+           !pci_mmcfg_arch_init_failed) {
+               if (!acpi_pci_cache_mcfg())
+                       free_all_mmcfg();
+       }
 }

 void __init pci_mmcfg_early_init(void)
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index f5d2157..93e0f91 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -163,8 +163,34 @@  acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev)
 }

 /* acpi_table_parse() is marked as __init, so cache MCFG info at boot time */
-int pci_acpi_mcfg_entries;
-struct acpi_mcfg_allocation *pci_acpi_mcfg_array;
+static int pci_acpi_mcfg_entries;
+static struct acpi_mcfg_allocation *pci_acpi_mcfg_array;
+
+static int __init pci_cache_mcfg(struct acpi_table_header *header)
+{
+       u32 sz;
+       void *ptr;
+
+       if (!header || (header->length <= sizeof(struct acpi_table_mcfg)))
+               return -EINVAL;
+
+       sz = (header->length - sizeof(struct acpi_table_mcfg));
+       pci_acpi_mcfg_array = kmalloc(sz, GFP_KERNEL);
+       if (!pci_acpi_mcfg_array)
+               return -ENOMEM;
+
+       ptr = (void *)header + sizeof(struct acpi_table_mcfg);
+       memcpy(pci_acpi_mcfg_array, ptr, sz);
+       pci_acpi_mcfg_entries = sz / sizeof (struct acpi_mcfg_allocation);
+
+       return 0;
+}
+
+int __init acpi_pci_cache_mcfg(void)
+{
+       acpi_table_parse(ACPI_SIG_MCFG, pci_cache_mcfg);
+       return pci_acpi_mcfg_array ? 0 : -EINVAL;
+}

 phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle, u16 seg,
                                        u8 start, int *endp)
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 946789f..e03207c 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -11,15 +11,13 @@ 
 #include <linux/acpi.h>

 #ifdef CONFIG_ACPI
-extern int pci_acpi_mcfg_entries;
-extern struct acpi_mcfg_allocation *pci_acpi_mcfg_array;
-
 extern acpi_status pci_acpi_add_bus_pm_notifier(struct acpi_device *dev,
                                                 struct pci_bus *pci_bus);
 extern acpi_status pci_acpi_remove_bus_pm_notifier(struct acpi_device *dev);
 extern acpi_status pci_acpi_add_pm_notifier(struct acpi_device *dev,
                                             struct pci_dev *pci_dev);
 extern acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev);
+extern int acpi_pci_cache_mcfg(void);
 extern phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle,
                        u16 seg, u8 start, int *endp);

@@ -40,6 +38,8 @@  static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
        return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus),
                                              pbus->number);
 }
+#else
+static inline int acpi_pci_cache_mcfg(void) { return -EINVAL; }
 #endif

 #ifdef CONFIG_ACPI_APEI