diff mbox

[2/2] PCI: Only enable realloc auto when root bus has 64bit mmio

Message ID 1386546869-31900-3-git-send-email-yinghai@kernel.org
State Rejected
Headers show

Commit Message

Yinghai Lu Dec. 8, 2013, 11:54 p.m. UTC
Joseph found
| commit b07f2ebc109b607789f648dedcff4b125f9afec6
| Date:   Thu Feb 23 19:23:32 2012 -0800
|
|    PCI: add a PCI resource reallocation config option

cause one system can not load driver for Intel x520 NIC's.

The root resource:
[    1.212470] PCI host bridge to bus 0000:20
[    1.212475] pci_bus 0000:20: root bus resource [bus 20-3e]
[    1.212479] pci_bus 0000:20: root bus resource [io  0xc000-0xdfff]
[    1.212483] pci_bus 0000:20: root bus resource [mem 0xfecc0000-0xfecfffff]
[    1.212487] pci_bus 0000:20: root bus resource [mem 0xe9400000-0xe97fffff]

and bios does not assign sriov, also have two function ROM bar point to same
position.
[    1.213197] pci 0000:22:00.0: [8086:10fb] type 00 class 0x020000
...
[    1.213240] pci 0000:22:00.0: reg 0x30: [mem 0xe9500000-0xe957ffff pref]
[    1.213303] pci 0000:22:00.0: reg 0x184: [mem 0x00000000-0x00003fff 64bit]
[    1.213317] pci 0000:22:00.0: reg 0x190: [mem 0x00000000-0x00003fff 64bit]
[    1.213366] pci 0000:22:00.1: [8086:10fb] type 00 class 0x020000
...
[    1.213408] pci 0000:22:00.1: reg 0x30: [mem 0xe9500000-0xe957ffff pref]
[    1.213468] pci 0000:22:00.1: reg 0x184: [mem 0x00000000-0x00003fff 64bit]
[    1.213481] pci 0000:22:00.1: reg 0x190: [mem 0x00000000-0x00003fff 64bit]
[    1.218527] pci 0000:20:03.0: PCI bridge to [bus 22]
[    1.218534] pci 0000:20:03.0:   bridge window [io  0xd000-0xdfff]
[    1.218537] pci 0000:20:03.0:   bridge window [mem 0xe9400000-0xe95fffff]
...
[    1.254103] pci 0000:22:00.1: address space collision: [mem 0xe9500000-0xe957ffff pref] conflicts with 0000:22:00.0 [mem 0xe9500000-0xe957ffff pref]
[    1.254111] pci 0000:23:00.1: address space collision: [mem 0xe9700000-0xe977ffff pref] conflicts with 0000:23:00.0 [mem 0xe9700000-0xe977ffff pref]

We don't need to enable realloc for this case, as we can not alter root bus mmio
range to get big one to hold two rom bar, and sriov under 4G.

Add checking if pci root bus have 4G above mmio res, and don't enable
realloc auto accordingly.

bug report at: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1245938

Reported-by: Joseph Salisbury <joseph.salisbury@canonical.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/pci/setup-bus.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

Comments

Bjorn Helgaas Dec. 9, 2013, 5:54 p.m. UTC | #1
On Sun, Dec 8, 2013 at 4:54 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> Joseph found
> | commit b07f2ebc109b607789f648dedcff4b125f9afec6
> | Date:   Thu Feb 23 19:23:32 2012 -0800
> |
> |    PCI: add a PCI resource reallocation config option
>
> cause one system can not load driver for Intel x520 NIC's.
>
> The root resource:
> [    1.212470] PCI host bridge to bus 0000:20
> [    1.212475] pci_bus 0000:20: root bus resource [bus 20-3e]
> [    1.212479] pci_bus 0000:20: root bus resource [io  0xc000-0xdfff]
> [    1.212483] pci_bus 0000:20: root bus resource [mem 0xfecc0000-0xfecfffff]
> [    1.212487] pci_bus 0000:20: root bus resource [mem 0xe9400000-0xe97fffff]
>
> and bios does not assign sriov, also have two function ROM bar point to same
> position.
> [    1.213197] pci 0000:22:00.0: [8086:10fb] type 00 class 0x020000
> ...
> [    1.213240] pci 0000:22:00.0: reg 0x30: [mem 0xe9500000-0xe957ffff pref]
> [    1.213303] pci 0000:22:00.0: reg 0x184: [mem 0x00000000-0x00003fff 64bit]
> [    1.213317] pci 0000:22:00.0: reg 0x190: [mem 0x00000000-0x00003fff 64bit]
> [    1.213366] pci 0000:22:00.1: [8086:10fb] type 00 class 0x020000
> ...
> [    1.213408] pci 0000:22:00.1: reg 0x30: [mem 0xe9500000-0xe957ffff pref]
> [    1.213468] pci 0000:22:00.1: reg 0x184: [mem 0x00000000-0x00003fff 64bit]
> [    1.213481] pci 0000:22:00.1: reg 0x190: [mem 0x00000000-0x00003fff 64bit]
> [    1.218527] pci 0000:20:03.0: PCI bridge to [bus 22]
> [    1.218534] pci 0000:20:03.0:   bridge window [io  0xd000-0xdfff]
> [    1.218537] pci 0000:20:03.0:   bridge window [mem 0xe9400000-0xe95fffff]
> ...
> [    1.254103] pci 0000:22:00.1: address space collision: [mem 0xe9500000-0xe957ffff pref] conflicts with 0000:22:00.0 [mem 0xe9500000-0xe957ffff pref]
> [    1.254111] pci 0000:23:00.1: address space collision: [mem 0xe9700000-0xe977ffff pref] conflicts with 0000:23:00.0 [mem 0xe9700000-0xe977ffff pref]
>
> We don't need to enable realloc for this case, as we can not alter root bus mmio
> range to get big one to hold two rom bar, and sriov under 4G.
>
> Add checking if pci root bus have 4G above mmio res, and don't enable
> realloc auto accordingly.
>
> bug report at: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1245938
>
> Reported-by: Joseph Salisbury <joseph.salisbury@canonical.com>
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> Cc: stable@vger.kernel.org
> ---
>  drivers/pci/setup-bus.c | 32 +++++++++++++++++++++++++++-----
>  1 file changed, 27 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> index 219a410..f9e6efb 100644
> --- a/drivers/pci/setup-bus.c
> +++ b/drivers/pci/setup-bus.c
> @@ -1432,17 +1432,39 @@ static int iov_resources_unassigned(struct pci_dev *dev, void *data)
>         return 0;
>  }
>
> +static bool pci_bus_mem_above_4g(struct pci_bus *bus)
> +{
> +       int i;
> +       struct resource *res;
> +
> +       pci_bus_for_each_resource(bus, res, i) {
> +               struct pci_bus_region region;
> +
> +               if (!res || !(res->flags & IORESOURCE_MEM))
> +                       continue;
> +
> +               __pcibios_resource_to_bus(bus, &region, res);
> +               if (region.end > 0xffffffff)
> +                       return true;
> +       }
> +
> +       return false;
> +}
> +
>  static enum enable_type pci_realloc_detect(struct pci_bus *bus,
>                          enum enable_type enable_local)
>  {
> -       bool unassigned = false;
> -
>         if (enable_local != undefined)
>                 return enable_local;
>
> -       pci_walk_bus(bus, iov_resources_unassigned, &unassigned);
> -       if (unassigned)
> -               return auto_enabled;
> +       /* only enable auto when root bus does support 64bit mmio */
> +       if (pci_bus_mem_above_4g(bus)) {
> +               bool unassigned = false;
> +
> +               pci_walk_bus(bus, iov_resources_unassigned, &unassigned);
> +               if (unassigned)
> +                       return auto_enabled;

I don't see how the question of whether the host bridge has an
aperture above 4G is related to whether we should automatically
reassign resources.  They seem orthogonal to me.  No doubt it "fixes"
the current problem, but it doesn't make sense conceptually.

Bjorn

> +       }
>
>         return enable_local;
>  }
> --
> 1.8.4
>
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu Dec. 9, 2013, 7:20 p.m. UTC | #2
On Mon, Dec 9, 2013 at 9:54 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> I don't see how the question of whether the host bridge has an
> aperture above 4G is related to whether we should automatically
> reassign resources.  They seem orthogonal to me.  No doubt it "fixes"
> the current problem, but it doesn't make sense conceptually.

the BIOS has problem to have two functions point to same position.

During realloc first try: standard+two rom_bar+sriov will be fail at first as
pci root bus does not have enough mmio range,
later try will standard+two rom_bar it will fail too as root bus mmio range will
still only fit standard+one rom_bar.

My thoughts is limit use realloc auto in those systems that does not have mmio64
above 4g...
so old system will never have to specify "pci=realloc=off" to disable it.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Dec. 9, 2013, 7:42 p.m. UTC | #3
On Mon, Dec 9, 2013 at 12:20 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Mon, Dec 9, 2013 at 9:54 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>> I don't see how the question of whether the host bridge has an
>> aperture above 4G is related to whether we should automatically
>> reassign resources.  They seem orthogonal to me.  No doubt it "fixes"
>> the current problem, but it doesn't make sense conceptually.
>
> the BIOS has problem to have two functions point to same position.
>
> During realloc first try: standard+two rom_bar+sriov will be fail at first as
> pci root bus does not have enough mmio range,
> later try will standard+two rom_bar it will fail too as root bus mmio range will
> still only fit standard+one rom_bar.
>
> My thoughts is limit use realloc auto in those systems that does not have mmio64
> above 4g...
> so old system will never have to specify "pci=realloc=off" to disable it.

That doesn't answer my question at all.

I understand that this change makes it so Joseph doesn't have to use
"pci=realloc=off".  But why should auto-reallocation be limited to
buses that have resources above 4GB?  That doesn't make any sense.

We should fix the reallocation code so it can deal with this case.  If
there's not enough space for everything, obviously we have to leave
something unassigned.  A ROM BAR is a good candidate for leaving
unassigned, because most of the time we can get along without it.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu Dec. 9, 2013, 8:10 p.m. UTC | #4
On Mon, Dec 9, 2013 at 11:42 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> That doesn't answer my question at all.
>
> I understand that this change makes it so Joseph doesn't have to use
> "pci=realloc=off".  But why should auto-reallocation be limited to
> buses that have resources above 4GB?  That doesn't make any sense.
>
> We should fix the reallocation code so it can deal with this case.  If
> there's not enough space for everything, obviously we have to leave
> something unassigned.  A ROM BAR is a good candidate for leaving
> unassigned, because most of the time we can get along without it.

Yes, that is ideal and not that simple.
but that would be hard to backport to old kernels.

BTW, Joseph, can you try
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
for-pci-3.14
with pci=realloc=on

on that system?

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Dec. 9, 2013, 8:20 p.m. UTC | #5
On Mon, Dec 9, 2013 at 1:10 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Mon, Dec 9, 2013 at 11:42 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>> That doesn't answer my question at all.
>>
>> I understand that this change makes it so Joseph doesn't have to use
>> "pci=realloc=off".  But why should auto-reallocation be limited to
>> buses that have resources above 4GB?  That doesn't make any sense.
>>
>> We should fix the reallocation code so it can deal with this case.  If
>> there's not enough space for everything, obviously we have to leave
>> something unassigned.  A ROM BAR is a good candidate for leaving
>> unassigned, because most of the time we can get along without it.
>
> Yes, that is ideal and not that simple.
> but that would be hard to backport to old kernels.

Yes, I agree.  I didn't say it would be simple.  The quick fix would
be easy for now, but adding nonsensical code makes our lives harder
long into the future.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu Dec. 9, 2013, 8:44 p.m. UTC | #6
On Mon, Dec 9, 2013 at 12:20 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:

> Yes, I agree.  I didn't say it would be simple.  The quick fix would
> be easy for now, but adding nonsensical code makes our lives harder
> long into the future.

If the old kernel is working, and user update kernel then we should
not request them to append "pci=realloc=off" for booting.

other user that would have "pci=realloc=on" works, they could always
keep that.

For now, we just reduce auto enable scope, as very beginning we are doing
that esp for SRIOV..., and now SRIOV would require huge range esp when
num_VF could be 64. we are easy to hit the case that realloc code mess up
the bios allocation when there is not above 4G 64bit mmio.

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Dec. 9, 2013, 9:05 p.m. UTC | #7
On Mon, Dec 9, 2013 at 1:44 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Mon, Dec 9, 2013 at 12:20 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>
>> Yes, I agree.  I didn't say it would be simple.  The quick fix would
>> be easy for now, but adding nonsensical code makes our lives harder
>> long into the future.
>
> If the old kernel is working, and user update kernel then we should
> not request them to append "pci=realloc=off" for booting.

I agree.  I would just prefer a better fix.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Salisbury Dec. 11, 2013, 7:19 p.m. UTC | #8
On 12/09/2013 03:10 PM, Yinghai Lu wrote:
> On Mon, Dec 9, 2013 at 11:42 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>> That doesn't answer my question at all.
>>
>> I understand that this change makes it so Joseph doesn't have to use
>> "pci=realloc=off".  But why should auto-reallocation be limited to
>> buses that have resources above 4GB?  That doesn't make any sense.
>>
>> We should fix the reallocation code so it can deal with this case.  If
>> there's not enough space for everything, obviously we have to leave
>> something unassigned.  A ROM BAR is a good candidate for leaving
>> unassigned, because most of the time we can get along without it.
> Yes, that is ideal and not that simple.
> but that would be hard to backport to old kernels.
>
> BTW, Joseph, can you try
> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
> for-pci-3.14
> with pci=realloc=on
>
> on that system?
>
> Thanks
>
> Yinghai
I noticed there was some back and forth on this thread.  Do you still
want me to test this version, Yinghai?
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu Dec. 11, 2013, 7:55 p.m. UTC | #9
On Wed, Dec 11, 2013 at 11:19 AM, Joseph Salisbury
<joseph.salisbury@canonical.com> wrote:
> On 12/09/2013 03:10 PM, Yinghai Lu wrote:
>> On Mon, Dec 9, 2013 at 11:42 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>> That doesn't answer my question at all.
>>>
>>> I understand that this change makes it so Joseph doesn't have to use
>>> "pci=realloc=off".  But why should auto-reallocation be limited to
>>> buses that have resources above 4GB?  That doesn't make any sense.
>>>
>>> We should fix the reallocation code so it can deal with this case.  If
>>> there's not enough space for everything, obviously we have to leave
>>> something unassigned.  A ROM BAR is a good candidate for leaving
>>> unassigned, because most of the time we can get along without it.
>> Yes, that is ideal and not that simple.
>> but that would be hard to backport to old kernels.
>>
>> BTW, Joseph, can you try
>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
>> for-pci-3.14
>> with pci=realloc=on
>>
>> on that system?
>>
>> Thanks
>>
>> Yinghai
> I noticed there was some back and forth on this thread.  Do you still
> want me to test this version, Yinghai?

Yes, if that works, we would not need to put the patch in upstream for limiting
realloc auto scope.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Salisbury Jan. 8, 2014, 4:38 p.m. UTC | #10
On 12/11/2013 02:55 PM, Yinghai Lu wrote:
> On Wed, Dec 11, 2013 at 11:19 AM, Joseph Salisbury
> <joseph.salisbury@canonical.com> wrote:
>> On 12/09/2013 03:10 PM, Yinghai Lu wrote:
>>> On Mon, Dec 9, 2013 at 11:42 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>>> That doesn't answer my question at all.
>>>>
>>>> I understand that this change makes it so Joseph doesn't have to use
>>>> "pci=realloc=off".  But why should auto-reallocation be limited to
>>>> buses that have resources above 4GB?  That doesn't make any sense.
>>>>
>>>> We should fix the reallocation code so it can deal with this case.  If
>>>> there's not enough space for everything, obviously we have to leave
>>>> something unassigned.  A ROM BAR is a good candidate for leaving
>>>> unassigned, because most of the time we can get along without it.
>>> Yes, that is ideal and not that simple.
>>> but that would be hard to backport to old kernels.
>>>
>>> BTW, Joseph, can you try
>>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
>>> for-pci-3.14
>>> with pci=realloc=on
>>>
>>> on that system?
>>>
>>> Thanks
>>>
>>> Yinghai
>> I noticed there was some back and forth on this thread.  Do you still
>> want me to test this version, Yinghai?
> Yes, if that works, we would not need to put the patch in upstream for limiting
> realloc auto scope.
>
> Thanks
>
> Yinghai
Hi Yinghai,

Sorry for the delay.  The bug reporter was finally able to test your
patch.  He reports that this version of the patch does in fact fix the
bug.  See comment #72 here:

http://pad.lv/1245938

Thanks again for all your help!

Joe

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Salisbury Jan. 10, 2014, 4:19 p.m. UTC | #11
On 12/11/2013 02:55 PM, Yinghai Lu wrote:
> On Wed, Dec 11, 2013 at 11:19 AM, Joseph Salisbury
> <joseph.salisbury@canonical.com> wrote:
>> On 12/09/2013 03:10 PM, Yinghai Lu wrote:
>>> On Mon, Dec 9, 2013 at 11:42 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>>> That doesn't answer my question at all.
>>>>
>>>> I understand that this change makes it so Joseph doesn't have to use
>>>> "pci=realloc=off".  But why should auto-reallocation be limited to
>>>> buses that have resources above 4GB?  That doesn't make any sense.
>>>>
>>>> We should fix the reallocation code so it can deal with this case.  If
>>>> there's not enough space for everything, obviously we have to leave
>>>> something unassigned.  A ROM BAR is a good candidate for leaving
>>>> unassigned, because most of the time we can get along without it.
>>> Yes, that is ideal and not that simple.
>>> but that would be hard to backport to old kernels.
>>>
>>> BTW, Joseph, can you try
>>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
>>> for-pci-3.14
>>> with pci=realloc=on
>>>
>>> on that system?
>>>
>>> Thanks
>>>
>>> Yinghai
>> I noticed there was some back and forth on this thread.  Do you still
>> want me to test this version, Yinghai?
> Yes, if that works, we would not need to put the patch in upstream for limiting
> realloc auto scope.
>
> Thanks
>
> Yinghai

Another user has confirmed that at test kernel from your branch[0] does
resolve the bug.

Thanks,

Joe

[0]

git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu Jan. 10, 2014, 5:13 p.m. UTC | #12
On Fri, Jan 10, 2014 at 8:19 AM, Joseph Salisbury
<joseph.salisbury@canonical.com> wrote:
> On 12/11/2013 02:55 PM, Yinghai Lu wrote:
>> On Wed, Dec 11, 2013 at 11:19 AM, Joseph Salisbury
>> <joseph.salisbury@canonical.com> wrote:
>>> On 12/09/2013 03:10 PM, Yinghai Lu wrote:
>>>> On Mon, Dec 9, 2013 at 11:42 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>>>> That doesn't answer my question at all.
>>>>>
>>>>> I understand that this change makes it so Joseph doesn't have to use
>>>>> "pci=realloc=off".  But why should auto-reallocation be limited to
>>>>> buses that have resources above 4GB?  That doesn't make any sense.
>>>>>
>>>>> We should fix the reallocation code so it can deal with this case.  If
>>>>> there's not enough space for everything, obviously we have to leave
>>>>> something unassigned.  A ROM BAR is a good candidate for leaving
>>>>> unassigned, because most of the time we can get along without it.
>>>> Yes, that is ideal and not that simple.
>>>> but that would be hard to backport to old kernels.
>>>>
>>>> BTW, Joseph, can you try
>>>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
>>>> for-pci-3.14
>>>> with pci=realloc=on
>>>>
>>>> on that system?
>>>>
>>>> Thanks
>>>>
>>>> Yinghai
>>> I noticed there was some back and forth on this thread.  Do you still
>>> want me to test this version, Yinghai?
>> Yes, if that works, we would not need to put the patch in upstream for limiting
>> realloc auto scope.
>>
>
> Another user has confirmed that at test kernel from your branch[0] does
> resolve the bug.

Hi, Joe,

Bjorn does not want to limit auto realloc, so this patch can not make
to upstream at this point.

so we may have to ask user to append "pci=realloc=off" before we find
more smart way to realloc
resource.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Salisbury Jan. 10, 2014, 9:54 p.m. UTC | #13
On 01/10/2014 12:13 PM, Yinghai Lu wrote:
> On Fri, Jan 10, 2014 at 8:19 AM, Joseph Salisbury
> <joseph.salisbury@canonical.com> wrote:
>> On 12/11/2013 02:55 PM, Yinghai Lu wrote:
>>> On Wed, Dec 11, 2013 at 11:19 AM, Joseph Salisbury
>>> <joseph.salisbury@canonical.com> wrote:
>>>> On 12/09/2013 03:10 PM, Yinghai Lu wrote:
>>>>> On Mon, Dec 9, 2013 at 11:42 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>>>>> That doesn't answer my question at all.
>>>>>>
>>>>>> I understand that this change makes it so Joseph doesn't have to use
>>>>>> "pci=realloc=off".  But why should auto-reallocation be limited to
>>>>>> buses that have resources above 4GB?  That doesn't make any sense.
>>>>>>
>>>>>> We should fix the reallocation code so it can deal with this case.  If
>>>>>> there's not enough space for everything, obviously we have to leave
>>>>>> something unassigned.  A ROM BAR is a good candidate for leaving
>>>>>> unassigned, because most of the time we can get along without it.
>>>>> Yes, that is ideal and not that simple.
>>>>> but that would be hard to backport to old kernels.
>>>>>
>>>>> BTW, Joseph, can you try
>>>>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
>>>>> for-pci-3.14
>>>>> with pci=realloc=on
>>>>>
>>>>> on that system?
>>>>>
>>>>> Thanks
>>>>>
>>>>> Yinghai
>>>> I noticed there was some back and forth on this thread.  Do you still
>>>> want me to test this version, Yinghai?
>>> Yes, if that works, we would not need to put the patch in upstream for limiting
>>> realloc auto scope.
>>>
>> Another user has confirmed that at test kernel from your branch[0] does
>> resolve the bug.
> Hi, Joe,
>
> Bjorn does not want to limit auto realloc, so this patch can not make
> to upstream at this point.
>
> so we may have to ask user to append "pci=realloc=off" before we find
> more smart way to realloc
> resource.
>
> Thanks
>
> Yinghai
Hi Yinghai,

In a prior email you mentioned: "Yes, if that works, we would not need
to put the patch in upstream for limiting realloc auto scope."  Is the
git tree at
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
somehow different now?

Thanks,

Joe

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu Jan. 10, 2014, 11:12 p.m. UTC | #14
On Fri, Jan 10, 2014 at 1:54 PM, Joseph Salisbury
<joseph.salisbury@canonical.com> wrote:
> On 01/10/2014 12:13 PM, Yinghai Lu wrote:
>
> In a prior email you mentioned: "Yes, if that works, we would not need
> to put the patch in upstream for limiting realloc auto scope."  Is the
> git tree at
> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
> somehow different now?

but you boot with "pci=realloc=off", right?

Anyway please try pci/next to see if there is any help.

git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git pci/next
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Salisbury Sept. 16, 2014, 8:21 p.m. UTC | #15
On 01/10/2014 12:13 PM, Yinghai Lu wrote:
> On Fri, Jan 10, 2014 at 8:19 AM, Joseph Salisbury
> <joseph.salisbury@canonical.com> wrote:
>> On 12/11/2013 02:55 PM, Yinghai Lu wrote:
>>> On Wed, Dec 11, 2013 at 11:19 AM, Joseph Salisbury
>>> <joseph.salisbury@canonical.com> wrote:
>>>> On 12/09/2013 03:10 PM, Yinghai Lu wrote:
>>>>> On Mon, Dec 9, 2013 at 11:42 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>>>>> That doesn't answer my question at all.
>>>>>>
>>>>>> I understand that this change makes it so Joseph doesn't have to use
>>>>>> "pci=realloc=off".  But why should auto-reallocation be limited to
>>>>>> buses that have resources above 4GB?  That doesn't make any sense.
>>>>>>
>>>>>> We should fix the reallocation code so it can deal with this case.  If
>>>>>> there's not enough space for everything, obviously we have to leave
>>>>>> something unassigned.  A ROM BAR is a good candidate for leaving
>>>>>> unassigned, because most of the time we can get along without it.
>>>>> Yes, that is ideal and not that simple.
>>>>> but that would be hard to backport to old kernels.
>>>>>
>>>>> BTW, Joseph, can you try
>>>>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
>>>>> for-pci-3.14
>>>>> with pci=realloc=on
>>>>>
>>>>> on that system?
>>>>>
>>>>> Thanks
>>>>>
>>>>> Yinghai
>>>> I noticed there was some back and forth on this thread.  Do you still
>>>> want me to test this version, Yinghai?
>>> Yes, if that works, we would not need to put the patch in upstream for limiting
>>> realloc auto scope.
>>>
>> Another user has confirmed that at test kernel from your branch[0] does
>> resolve the bug.
> Hi, Joe,
>
> Bjorn does not want to limit auto realloc, so this patch can not make
> to upstream at this point.
>
> so we may have to ask user to append "pci=realloc=off" before we find
> more smart way to realloc
> resource.
>
> Thanks
>
> Yinghai

Hi Yinghai,

A new bug[0] was opened due to enabling PCI_REALLOC_ENABLE_AUTO, which
is similar to the original bug[1] we discussed.

Just wondering if there have been any additional ideas on realloc since
this was last discussed?

Thanks,

Joe


[0] http://pad.lv/1363313
[1] http://pad.lv/1245938
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu Sept. 16, 2014, 10:27 p.m. UTC | #16
On Tue, Sep 16, 2014 at 1:21 PM, Joseph Salisbury
<joseph.salisbury@canonical.com> wrote:
>
> A new bug[0] was opened due to enabling PCI_REALLOC_ENABLE_AUTO, which
> is similar to the original bug[1] we discussed.
>
> Just wondering if there have been any additional ideas on realloc since
> this was last discussed?
>
> [0] http://pad.lv/1363313
> [1] http://pad.lv/1245938

This one looks different, that LSI card support SRIOV, but BIOS does not
allocate resource to SRIOV bar.
We release old resource and ...reallocate resource to them with two retries.

[    0.321983] pci_bus 0000:00: max bus depth: 1 pci_try_num: 2
[    0.322010] pci 0000:00:03.0: BAR 15: assigned [mem
0xef800000-0xef8fffff pref]
[    0.322025] pci 0000:01:00.0: reg 0x174: [mem 0x00000000-0x00003fff 64bit]
[    0.322033] pci 0000:01:00.0: reg 0x17c: [mem 0x00000000-0x0003ffff 64bit]
[    0.322040] pci 0000:01:00.0: reg 0x174: [mem 0x00000000-0x00003fff 64bit]
[    0.322042] pci 0000:01:00.0: BAR 6: assigned [mem
0xef800000-0xef87ffff pref]
[    0.322051] pci 0000:01:00.0: reg 0x17c: [mem 0x00000000-0x0003ffff 64bit]
[    0.322053] pci 0000:01:00.0: BAR 9: can't assign mem (size 0x400000)
[    0.322061] pci 0000:01:00.0: reg 0x174: [mem 0x00000000-0x00003fff 64bit]
[    0.322063] pci 0000:01:00.0: BAR 7: assigned [mem
0xfbd00000-0xfbd3ffff 64bit]
[    0.322070] pci 0000:00:03.0: PCI bridge to [bus 01]
[    0.322073] pci 0000:00:03.0:   bridge window [io  0xc000-0xcfff]
[    0.322077] pci 0000:00:03.0:   bridge window [mem 0xfbd00000-0xfbdfffff]
[    0.322080] pci 0000:00:03.0:   bridge window [mem
0xef800000-0xef8fffff pref]
[    0.322142] pci_bus 0000:00: No. 2 try to assign unassigned res
[    0.322143] release child resource [mem 0xfbd00000-0xfbd3ffff 64bit]
[    0.322144] release child resource [mem 0xfbd80000-0xfbdbffff 64bit]
[    0.322145] release child resource [mem 0xfbdfc000-0xfbdfffff 64bit]
[    0.322147] pci 0000:00:03.0: resource 14 [mem
0xfbd00000-0xfbdfffff] released
[    0.322148] pci 0000:00:03.0: PCI bridge to [bus 01]
[    0.322156] pci 0000:00:03.0: bridge window [mem
0x00100000-0x001fffff] to [bus 01] add_size 500000
[    0.322174] pci 0000:00:03.0: res[14]=[mem 0x00100000-0x001fffff]
get_res_add_size add_size 500000
[    0.322176] pci 0000:00:03.0: BAR 14: assigned [mem 0xefb00000-0xf00fffff]
[    0.322185] pci 0000:01:00.0: reg 0x174: [mem 0xfbd00000-0xfbd03fff 64bit]
[    0.322192] pci 0000:01:00.0: reg 0x17c: [mem 0x00000000-0x0003ffff 64bit]
[    0.322194] pci 0000:01:00.0: res[9]=[mem
0x00000000-0xffffffffffffffff 64bit] get_res_add_size add_size 400000
[    0.322196] pci 0000:01:00.0: res[7]=[mem
0x00000000-0xffffffffffffffff 64bit] get_res_add_size add_size 40000
[    0.322198] pci 0000:01:00.0: BAR 3: assigned [mem
0xefb00000-0xefb3ffff 64bit]
[    0.322211] pci 0000:01:00.0: reg 0x17c: [mem 0x00000000-0x0003ffff 64bit]
[    0.322212] pci 0000:01:00.0: BAR 9: assigned [mem
0xefb40000-0xeff3ffff 64bit]
[    0.322219] pci 0000:01:00.0: BAR 1: assigned [mem
0xeff40000-0xeff43fff 64bit]
[    0.322232] pci 0000:01:00.0: reg 0x174: [mem 0xfbd00000-0xfbd03fff 64bit]
[    0.322234] pci 0000:01:00.0: BAR 7: assigned [mem
0xeff44000-0xeff83fff 64bit]
[    0.322241] pci 0000:00:03.0: PCI bridge to [bus 01]
[    0.322244] pci 0000:00:03.0:   bridge window [io  0xc000-0xcfff]
[    0.322248] pci 0000:00:03.0:   bridge window [mem 0xefb00000-0xf00fffff]
[    0.322251] pci 0000:00:03.0:   bridge window [mem
0xef800000-0xef8fffff 64bit pref]

So the resource realloc do work as expected. but LSI firmware has some
problem again?

We may need to add command after the reset the bridge resource.

Can you get bootlog with "pci=earlydump"?

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Salisbury Sept. 25, 2014, 3:49 p.m. UTC | #17
On 09/16/2014 06:27 PM, Yinghai Lu wrote:
> On Tue, Sep 16, 2014 at 1:21 PM, Joseph Salisbury
> <joseph.salisbury@canonical.com> wrote:
>> A new bug[0] was opened due to enabling PCI_REALLOC_ENABLE_AUTO, which
>> is similar to the original bug[1] we discussed.
>>
>> Just wondering if there have been any additional ideas on realloc since
>> this was last discussed?
>>
>> [0] http://pad.lv/1363313
>> [1] http://pad.lv/1245938
> This one looks different, that LSI card support SRIOV, but BIOS does not
> allocate resource to SRIOV bar.
> We release old resource and ...reallocate resource to them with two retries.
>
> [    0.321983] pci_bus 0000:00: max bus depth: 1 pci_try_num: 2
> [    0.322010] pci 0000:00:03.0: BAR 15: assigned [mem
> 0xef800000-0xef8fffff pref]
> [    0.322025] pci 0000:01:00.0: reg 0x174: [mem 0x00000000-0x00003fff 64bit]
> [    0.322033] pci 0000:01:00.0: reg 0x17c: [mem 0x00000000-0x0003ffff 64bit]
> [    0.322040] pci 0000:01:00.0: reg 0x174: [mem 0x00000000-0x00003fff 64bit]
> [    0.322042] pci 0000:01:00.0: BAR 6: assigned [mem
> 0xef800000-0xef87ffff pref]
> [    0.322051] pci 0000:01:00.0: reg 0x17c: [mem 0x00000000-0x0003ffff 64bit]
> [    0.322053] pci 0000:01:00.0: BAR 9: can't assign mem (size 0x400000)
> [    0.322061] pci 0000:01:00.0: reg 0x174: [mem 0x00000000-0x00003fff 64bit]
> [    0.322063] pci 0000:01:00.0: BAR 7: assigned [mem
> 0xfbd00000-0xfbd3ffff 64bit]
> [    0.322070] pci 0000:00:03.0: PCI bridge to [bus 01]
> [    0.322073] pci 0000:00:03.0:   bridge window [io  0xc000-0xcfff]
> [    0.322077] pci 0000:00:03.0:   bridge window [mem 0xfbd00000-0xfbdfffff]
> [    0.322080] pci 0000:00:03.0:   bridge window [mem
> 0xef800000-0xef8fffff pref]
> [    0.322142] pci_bus 0000:00: No. 2 try to assign unassigned res
> [    0.322143] release child resource [mem 0xfbd00000-0xfbd3ffff 64bit]
> [    0.322144] release child resource [mem 0xfbd80000-0xfbdbffff 64bit]
> [    0.322145] release child resource [mem 0xfbdfc000-0xfbdfffff 64bit]
> [    0.322147] pci 0000:00:03.0: resource 14 [mem
> 0xfbd00000-0xfbdfffff] released
> [    0.322148] pci 0000:00:03.0: PCI bridge to [bus 01]
> [    0.322156] pci 0000:00:03.0: bridge window [mem
> 0x00100000-0x001fffff] to [bus 01] add_size 500000
> [    0.322174] pci 0000:00:03.0: res[14]=[mem 0x00100000-0x001fffff]
> get_res_add_size add_size 500000
> [    0.322176] pci 0000:00:03.0: BAR 14: assigned [mem 0xefb00000-0xf00fffff]
> [    0.322185] pci 0000:01:00.0: reg 0x174: [mem 0xfbd00000-0xfbd03fff 64bit]
> [    0.322192] pci 0000:01:00.0: reg 0x17c: [mem 0x00000000-0x0003ffff 64bit]
> [    0.322194] pci 0000:01:00.0: res[9]=[mem
> 0x00000000-0xffffffffffffffff 64bit] get_res_add_size add_size 400000
> [    0.322196] pci 0000:01:00.0: res[7]=[mem
> 0x00000000-0xffffffffffffffff 64bit] get_res_add_size add_size 40000
> [    0.322198] pci 0000:01:00.0: BAR 3: assigned [mem
> 0xefb00000-0xefb3ffff 64bit]
> [    0.322211] pci 0000:01:00.0: reg 0x17c: [mem 0x00000000-0x0003ffff 64bit]
> [    0.322212] pci 0000:01:00.0: BAR 9: assigned [mem
> 0xefb40000-0xeff3ffff 64bit]
> [    0.322219] pci 0000:01:00.0: BAR 1: assigned [mem
> 0xeff40000-0xeff43fff 64bit]
> [    0.322232] pci 0000:01:00.0: reg 0x174: [mem 0xfbd00000-0xfbd03fff 64bit]
> [    0.322234] pci 0000:01:00.0: BAR 7: assigned [mem
> 0xeff44000-0xeff83fff 64bit]
> [    0.322241] pci 0000:00:03.0: PCI bridge to [bus 01]
> [    0.322244] pci 0000:00:03.0:   bridge window [io  0xc000-0xcfff]
> [    0.322248] pci 0000:00:03.0:   bridge window [mem 0xefb00000-0xf00fffff]
> [    0.322251] pci 0000:00:03.0:   bridge window [mem
> 0xef800000-0xef8fffff 64bit pref]
>
> So the resource realloc do work as expected. but LSI firmware has some
> problem again?
>
> We may need to add command after the reset the bridge resource.
>
> Can you get bootlog with "pci=earlydump"?
>
> Thanks
>
> Yinghai
Hi Yinghai,

The user collected earlydump data, which can be found here:
https://launchpadlibrarian.net/185141163/dmesg-pci%3Dearlydump
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Sept. 25, 2014, 4:04 p.m. UTC | #18
On Thu, Sep 25, 2014 at 9:49 AM, Joseph Salisbury
<joseph.salisbury@canonical.com> wrote:
> On 09/16/2014 06:27 PM, Yinghai Lu wrote:
>> On Tue, Sep 16, 2014 at 1:21 PM, Joseph Salisbury
>> <joseph.salisbury@canonical.com> wrote:
>>> A new bug[0] was opened due to enabling PCI_REALLOC_ENABLE_AUTO, which
>>> is similar to the original bug[1] we discussed.
>>>
>>> Just wondering if there have been any additional ideas on realloc since
>>> this was last discussed?
>>>
>>> [0] http://pad.lv/1363313
>>> [1] http://pad.lv/1245938

As a side-note, this looks like a regression and I should have given
it more attention, but I don't track all the vendor bug databases.  If
you open a kernel.bug bugzilla and mark it as a regression, I *do*
track that regularly.  For regressions, it's helpful if you can attach
dmesg logs from working and non-working kernels that are as close
together as possible.  It'd be ideal if those were upstream kernels,
but I doubt you apply any patches that would be relevant, so I think
logs from Ubuntu kernels would be enough.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joseph Salisbury Sept. 25, 2014, 6:04 p.m. UTC | #19
On 09/25/2014 12:04 PM, Bjorn Helgaas wrote:
> On Thu, Sep 25, 2014 at 9:49 AM, Joseph Salisbury
> <joseph.salisbury@canonical.com> wrote:
>> On 09/16/2014 06:27 PM, Yinghai Lu wrote:
>>> On Tue, Sep 16, 2014 at 1:21 PM, Joseph Salisbury
>>> <joseph.salisbury@canonical.com> wrote:
>>>> A new bug[0] was opened due to enabling PCI_REALLOC_ENABLE_AUTO, which
>>>> is similar to the original bug[1] we discussed.
>>>>
>>>> Just wondering if there have been any additional ideas on realloc since
>>>> this was last discussed?
>>>>
>>>> [0] http://pad.lv/1363313
>>>> [1] http://pad.lv/1245938
> As a side-note, this looks like a regression and I should have given
> it more attention, but I don't track all the vendor bug databases.  If
> you open a kernel.bug bugzilla and mark it as a regression, I *do*
> track that regularly.  For regressions, it's helpful if you can attach
> dmesg logs from working and non-working kernels that are as close
> together as possible.  It'd be ideal if those were upstream kernels,
> but I doubt you apply any patches that would be relevant, so I think
> logs from Ubuntu kernels would be enough.
>
> Bjorn

Thanks, Bjorn.  I'll open a bugzilla bug and ask the bug reporters for
the requested dmesg data.  Thanks again for the feedback.

Joe
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu Sept. 25, 2014, 6:23 p.m. UTC | #20
On Thu, Sep 25, 2014 at 8:49 AM, Joseph Salisbury
<joseph.salisbury@canonical.com> wrote:
> On 09/16/2014 06:27 PM, Yinghai Lu wrote:
>> On Tue, Sep 16, 2014 at 1:21 PM, Joseph Salisbury
>> <joseph.salisbury@canonical.com> wrote:
>>> A new bug[0] was opened due to enabling PCI_REALLOC_ENABLE_AUTO, which
>>> is similar to the original bug[1] we discussed.
>>>
>>> Just wondering if there have been any additional ideas on realloc since
>>> this was last discussed?
>>>
>>> [0] http://pad.lv/1363313
>>> [1] http://pad.lv/1245938
>> This one looks different, that LSI card support SRIOV, but BIOS does not
>> allocate resource to SRIOV bar.
>> We release old resource and ...reallocate resource to them with two retries.
>
> The user collected earlydump data, which can be found here:
> https://launchpadlibrarian.net/185141163/dmesg-pci%3Dearlydump

Did you ask the user to test reset_lsi_pci_devices.patch?

> ---
>  drivers/pci/setup-bus.c |   16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
>
> Index: linux-2.6/drivers/pci/setup-
bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -1615,6 +1615,22 @@ again:
>                                                fail_res->flags & type_mask,
>                                                rel_type);
>
> +     /* reset LSI device */
> +     list_for_each_entry(fail_res, &fail_head, list) {
> +             struct pci_dev *pdev;
> +             struct pci_bus *pbus = fail_res->dev->bus;
> +
> +             if (pci_is_root_bus(pbus) || !pbus->self)
> +                     continue;
> +
> +             /* LSI device firmware is not happy with changing BAR value */
> +             list_for_each_entry(pdev, &pbus->devices, bus_list)
> +                     if (pdev->vendor == PCI_VENDOR_ID_LSI_LOGIC) {
> +                             pci_reset_secondary_bus(pbus->self);
> +                             break;
> +                     }
> +     }
> +
>       /* restore size and flags */
>       list_for_each_entry(fail_res, &fail_head, list) {
>               struct resource *res = fail_res->res;

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 219a410..f9e6efb 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1432,17 +1432,39 @@  static int iov_resources_unassigned(struct pci_dev *dev, void *data)
 	return 0;
 }
 
+static bool pci_bus_mem_above_4g(struct pci_bus *bus)
+{
+	int i;
+	struct resource *res;
+
+	pci_bus_for_each_resource(bus, res, i) {
+		struct pci_bus_region region;
+
+		if (!res || !(res->flags & IORESOURCE_MEM))
+			continue;
+
+		__pcibios_resource_to_bus(bus, &region, res);
+		if (region.end > 0xffffffff)
+			return true;
+	}
+
+	return false;
+}
+
 static enum enable_type pci_realloc_detect(struct pci_bus *bus,
 			 enum enable_type enable_local)
 {
-	bool unassigned = false;
-
 	if (enable_local != undefined)
 		return enable_local;
 
-	pci_walk_bus(bus, iov_resources_unassigned, &unassigned);
-	if (unassigned)
-		return auto_enabled;
+	/* only enable auto when root bus does support 64bit mmio */
+	if (pci_bus_mem_above_4g(bus)) {
+		bool unassigned = false;
+
+		pci_walk_bus(bus, iov_resources_unassigned, &unassigned);
+		if (unassigned)
+			return auto_enabled;
+	}
 
 	return enable_local;
 }