Patchwork ELDK 4.2/kilauea/3.5+ kernel broken

login
register
mail settings
Submitter Robert Berger
Date Oct. 18, 2012, 5:45 p.m.
Message ID <50804020.30505@gmail.com>
Download mbox | patch
Permalink /patch/192396/
State Not Applicable
Headers show

Comments

Robert Berger - Oct. 18, 2012, 5:45 p.m.
FYI:

When I replace arch/powerpc/sysdev/ppc4xx_msi.c from a 3.6 kernel with a
ppc4xx_msi.c from 47da421981571c69ef29740cc55fa7248682e167 it boots from
nfs with a defconfig, so this seems to be the guilty one.

Please find attached the difference between the good and bad
ppc4xx_msi.c files.

I'll be happy to test your patches;)

Regards,

Robert
Benjamin Herrenschmidt - Oct. 18, 2012, 7:03 p.m.
On Thu, 2012-10-18 at 20:45 +0300, Robert Berger wrote:
> -       mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));      /*HIGH addr */
> -       mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));  /* Low addr */
> +       SDR0_WRITE(sdr_addr, (u64)res.start >> 32);      /*HIGH addr */
> +       SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */
> +
>  
>         msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
> -       if (!msi->msi_dev)
> +       if (msi->msi_dev)
>                 return -ENODEV; 

The above changes look bad. The first one is stupid, the second one is clearly broken.

The diff us from good to bad right ? Looks like somebody added a very busted patch.

If I look at the code in current upstream, I see:

	mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));	/*HIGH addr */
	mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));	/* Low addr */

	msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
	if (!msi->msi_dev)
		return -ENODEV;


Which looks correct. So this might be something specific to ELDK ?

Cheers,
Ben.
Robert Berger - Oct. 18, 2012, 8:05 p.m.
Hi,

On 10/18/2012 10:03 PM, Benjamin Herrenschmidt wrote:
> On Thu, 2012-10-18 at 20:45 +0300, Robert Berger wrote:
>> -       mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));      /*HIGH addr */
>> -       mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));  /* Low addr */
>> +       SDR0_WRITE(sdr_addr, (u64)res.start >> 32);      /*HIGH addr */
>> +       SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */
>> +
>>  
>>         msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
>> -       if (!msi->msi_dev)
>> +       if (msi->msi_dev)
>>                 return -ENODEV; 
> 
> The above changes look bad. The first one is stupid, the second one is clearly broken.
> 
> The diff us from good to bad right ? Looks like somebody added a very busted patch.

this (from 3.6) does not work:

-       mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));      /*HIGH
addr */
-       mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));  /* Low
addr */

The good old file (which works) is this:

+       SDR0_WRITE(sdr_addr, (u64)res.start >> 32);      /*HIGH addr */
+       SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */
+

> 
> If I look at the code in current upstream, I see:
> 
> 	mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));	/*HIGH addr */
> 	mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));	/* Low addr */
> 
> 	msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
> 	if (!msi->msi_dev)
> 		return -ENODEV;
> 
> 
> Which looks correct. So this might be something specific to ELDK ?

I will be on the road from tomorrow for a week or so, but maybe I can
isolate the exact lines which break it. I can also try a newer compiler
to see if this changes anything.

Is there someone out there with a kilauea board who can boot a 3.6.
mainline kernel with a default config with a rootfs over nfs?

> 
> Cheers,
> Ben.
> 
> 

Regards,

Robert
Mai La - Oct. 19, 2012, 3:16 a.m.
Hi,

My patch was:

@@ -150,12 +157,11 @@ static int ppc4xx_setup_pcieh_hw(struct
platform_device *dev,
        if (!sdr_addr)
                return -1;

-       SDR0_WRITE(sdr_addr, (u64)res.start >> 32);      /*HIGH addr */
-       SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */
-
+       mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));      /*HIGH addr
*/
+       mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));  /* Low addr
*/

        msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
-       if (msi->msi_dev)
+       if (!msi->msi_dev)
                return -ENODEV;

        msi->msi_regs = of_iomap(msi->msi_dev, 0);


1. The first few lines: change from SDR0_WRITE to mtdcri since the old one
cause crash. I use ELDK 4.2.

2. The second one should mean that: if not find any node then return error.
So it should be "!msi->msi_dev"

Regards,
Mai La.


On Fri, Oct 19, 2012 at 3:05 AM, Robert Berger <robert.karl.berger@gmail.com
> wrote:

> Hi,
>
> On 10/18/2012 10:03 PM, Benjamin Herrenschmidt wrote:
> > On Thu, 2012-10-18 at 20:45 +0300, Robert Berger wrote:
> >> -       mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));      /*HIGH
> addr */
> >> -       mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));  /* Low
> addr */
> >> +       SDR0_WRITE(sdr_addr, (u64)res.start >> 32);      /*HIGH addr */
> >> +       SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */
> >> +
> >>
> >>         msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
> >> -       if (!msi->msi_dev)
> >> +       if (msi->msi_dev)
> >>                 return -ENODEV;
> >
> > The above changes look bad. The first one is stupid, the second one is
> clearly broken.
> >
> > The diff us from good to bad right ? Looks like somebody added a very
> busted patch.
>
> this (from 3.6) does not work:
>
> -       mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));      /*HIGH
> addr */
> -       mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));  /* Low
> addr */
>
> The good old file (which works) is this:
>
> +       SDR0_WRITE(sdr_addr, (u64)res.start >> 32);      /*HIGH addr */
> +       SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */
> +
>
> >
> > If I look at the code in current upstream, I see:
> >
> >       mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));      /*HIGH
> addr */
> >       mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));  /* Low
> addr */
> >
> >       msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
> >       if (!msi->msi_dev)
> >               return -ENODEV;
> >
> >
> > Which looks correct. So this might be something specific to ELDK ?
>
> I will be on the road from tomorrow for a week or so, but maybe I can
> isolate the exact lines which break it. I can also try a newer compiler
> to see if this changes anything.
>
> Is there someone out there with a kilauea board who can boot a 3.6.
> mainline kernel with a default config with a rootfs over nfs?
>
> >
> > Cheers,
> > Ben.
> >
> >
>
> Regards,
>
> Robert
>
Robert Berger - Oct. 19, 2012, 6:35 a.m.
Hi,

On 10/19/2012 06:16 AM, Mai La wrote:
> Hi,
> 
> My patch was:
> 
> @@ -150,12 +157,11 @@ static int ppc4xx_setup_pcieh_hw(struct
> platform_device *dev,
>         if (!sdr_addr)
>                 return -1;
> 
> -       SDR0_WRITE(sdr_addr, (u64)res.start >> 32);      /*HIGH addr */
> -       SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */
> -
> +       mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));      /*HIGH
> addr */
> +       mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));  /* Low
> addr */
> 
>         msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
> -       if (msi->msi_dev)
> +       if (!msi->msi_dev)
>                 return -ENODEV;
> 
>         msi->msi_regs = of_iomap(msi->msi_dev, 0);
> 
> 
> 1. The first few lines: change from SDR0_WRITE to mtdcri since the old
> one cause crash. I use ELDK 4.2.

The old one does not cause a crash for me.  As I said on a kilauea board
with ELDK 4.2 a 3.6 kernel, default config and everything reverted to
the good old file I can boot happily with a rootfs from nfs.

If I use the file as it is in 3.6 I don't see the kernel booting. but it
crashes.

> 
> 2. The second one should mean that: if not find any node then return
> error. So it should be "!msi->msi_dev"

In the 3.6 kernel it's with ! the old file (which works for me) is
without the !

... very strange ...

> 
> Regards,
> Mai La.
> 
> 

Regards,

Robert
Robert Berger - Oct. 21, 2012, 3:35 p.m.
Hi,

On 10/18/2012 10:03 PM, Benjamin Herrenschmidt wrote:
> 
> Which looks correct. So this might be something specific to ELDK ?

I just tried with the ELDK 5.2.1 and have exactly the same behavior as
with ELDK 4.2, so I guess there is something not correct in
arch/powerpc/sysdev/ppc4xx_msi.c.

Just to exclude the case that I'm doing something stupid which only
affects kernel versions 3.5+ and not 3.4 can someone else give it a try
with a kilauea board or something similar?

Regards,

Robert

> 
> Cheers,
> Ben.
> 

..."Serious programming is systems programming or anything beyond
writing factorial(n) as a student exercise." -- Brian W. Kernighan

My public pgp key is available,at:
http://pgp.mit.edu:11371/pks/lookup?op=get&search=0x90320BF1
Robert Berger - Oct. 21, 2012, 3:39 p.m.
Hi,

On 10/18/2012 10:03 PM, Benjamin Herrenschmidt wrote:
>
> Which looks correct. So this might be something specific to ELDK ?

I just tried with the ELDK 5.2.1 and have exactly the same behavior as
with ELDK 4.2, so I guess there is something not correct in
arch/powerpc/sysdev/ppc4xx_msi.c.

Just to exclude the case that I'm doing something stupid which only
affects kernel versions 3.5+ and not 3.4 can someone else give it a try
with a kilauea board or something similar?

Regards,

Robert

>
> Cheers,
> Ben.
>
Benjamin Herrenschmidt - Oct. 21, 2012, 8:01 p.m.
On Sun, 2012-10-21 at 18:35 +0300, Robert Berger wrote:
> Hi,
> 
> On 10/18/2012 10:03 PM, Benjamin Herrenschmidt wrote:
> > 
> > Which looks correct. So this might be something specific to ELDK ?
> 
> I just tried with the ELDK 5.2.1 and have exactly the same behavior as
> with ELDK 4.2, so I guess there is something not correct in
> arch/powerpc/sysdev/ppc4xx_msi.c.
> 
> Just to exclude the case that I'm doing something stupid which only
> affects kernel versions 3.5+ and not 3.4 can someone else give it a try
> with a kilauea board or something similar?

Remind me what is the symptom ? A specific device isn't working ? Or the
whole kernel goes toast ? My feeling is that those patches make MSIs
work (well that's what they are supposed to do) and for some reason that
doesn't agree with whatever you have connected to the PCIe slot...

Cheers,
Ben.
 
> Regards,
> 
> Robert
> 
> > 
> > Cheers,
> > Ben.
> > 
> 
> ..."Serious programming is systems programming or anything beyond
> writing factorial(n) as a student exercise." -- Brian W. Kernighan
> 
> My public pgp key is available,at:
> http://pgp.mit.edu:11371/pks/lookup?op=get&search=0x90320BF1
Robert Berger - Oct. 24, 2012, 5:45 a.m.
Hi Ben,
>
> Remind me what is the symptom ? A specific device isn't working ? Or the
> whole kernel goes toast ?

The whole kernel goes toast! Just reboots without saying much before;)

> My feeling is that those patches make MSIs
> work (well that's what they are supposed to do) and for some reason that
> doesn't agree with whatever you have connected to the PCIe slot...

I have nothing connected to the PCIe slot. Just a standard kilauea eval
board with a defconfig and a 3.6 kernel, so if someone has a kilauea
board it should be very easy to reproduce.

Mai, Rupjyoti do you have a kilauea bard lying around to test?

Maybe it's the kilauea fdt?

If I hard code NR_MSI_IRQS (as it used to be) at least the kernel boots
and I can work with the board. (I don't think MSI interrupts work).

23c23
< #define DEBUG
---
>
47d46
< #define NR_MSI_IRQS	4
55c54
< 	int msi_virqs[NR_MSI_IRQS];
---
> 	int *msi_virqs;
67c66
< 	err = msi_bitmap_alloc(&msi_data->bitmap, NR_MSI_IRQS,
---
> 	err = msi_bitmap_alloc(&msi_data->bitmap, msi_irqs,
88a88,92
> 	msi_data->msi_virqs = kmalloc((msi_irqs) * sizeof(int),
> 					    GFP_KERNEL);
> 	if (!msi_data->msi_virqs)
> 		return -ENOMEM;
>
192a197,198
> 	dma_free_coherent(&dev->dev, 64, msi_virt, msi_phys);
>
202c208
< 	for (i = 0; i < NR_MSI_IRQS; i++) {
---
> 	for (i = 0; i < msi_irqs; i++) {
223,224d228
< 	/*msi = &ppc4xx_msi;*//*keep the msi data for further use*/
<

Regards,

Robert

>
> Cheers,
> Ben.
>

...If it's there, and you can see it, it's real. - If it's not there,
and you can see it, it's virtual.- If it's there, and you can't see it,
it's transparent.- If it's not there, and you can't see it, you erased
it. (from some mailing list)

My public pgp key is available,at:
http://pgp.mit.edu:11371/pks/lookup?op=get&search=0x90320BF1
Benjamin Herrenschmidt - Jan. 4, 2013, 4:08 a.m.
On Wed, 2012-10-24 at 08:45 +0300, Robert Berger wrote:
> Hi Ben,
> >
> > Remind me what is the symptom ? A specific device isn't working ? Or the
> > whole kernel goes toast ?
> 
> The whole kernel goes toast! Just reboots without saying much before;)
> 
> > My feeling is that those patches make MSIs
> > work (well that's what they are supposed to do) and for some reason that
> > doesn't agree with whatever you have connected to the PCIe slot...
> 
> I have nothing connected to the PCIe slot. Just a standard kilauea eval
> board with a defconfig and a 3.6 kernel, so if someone has a kilauea
> board it should be very easy to reproduce.
> 
> Mai, Rupjyoti do you have a kilauea bard lying around to test?
> 
> Maybe it's the kilauea fdt?
> 
> If I hard code NR_MSI_IRQS (as it used to be) at least the kernel boots
> and I can work with the board. (I don't think MSI interrupts work).

Mai ? Somebody else from APM ? Can you look at this ?

Ben.

Patch

diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c
index 82c6702..1c2d7af 100644
--- a/arch/powerpc/sysdev/ppc4xx_msi.c
+++ b/arch/powerpc/sysdev/ppc4xx_msi.c
@@ -28,11 +28,10 @@ 
 #include <linux/of_platform.h>
 #include <linux/interrupt.h>
 #include <linux/export.h>
-#include <linux/kernel.h>
 #include <asm/prom.h>
 #include <asm/hw_irq.h>
 #include <asm/ppc-pci.h>
-#include <asm/dcr.h>
+#include <boot/dcr.h>
 #include <asm/dcr-regs.h>
 #include <asm/msi_bitmap.h>
 
@@ -44,14 +43,13 @@ 
 #define PEIH_FLUSH0	0x30
 #define PEIH_FLUSH1	0x38
 #define PEIH_CNTRST	0x48
-
-static int msi_irqs;
+#define NR_MSI_IRQS	4
 
 struct ppc4xx_msi {
 	u32 msi_addr_lo;
 	u32 msi_addr_hi;
 	void __iomem *msi_regs;
-	int *msi_virqs;
+	int msi_virqs[NR_MSI_IRQS];
 	struct msi_bitmap bitmap;
 	struct device_node *msi_dev;
 };
@@ -63,7 +61,7 @@  static int ppc4xx_msi_init_allocator(struct platform_device *dev,
 {
 	int err;
 
-	err = msi_bitmap_alloc(&msi_data->bitmap, msi_irqs,
+	err = msi_bitmap_alloc(&msi_data->bitmap, NR_MSI_IRQS,
 			      dev->dev.of_node);
 	if (err)
 		return err;
@@ -85,11 +83,6 @@  static int ppc4xx_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	struct msi_desc *entry;
 	struct ppc4xx_msi *msi_data = &ppc4xx_msi;
 
-	msi_data->msi_virqs = kmalloc((msi_irqs) * sizeof(int),
-					    GFP_KERNEL);
-	if (!msi_data->msi_virqs)
-		return -ENOMEM;
-
 	list_for_each_entry(entry, &dev->msi_list, list) {
 		int_no = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1);
 		if (int_no >= 0)
@@ -157,11 +150,12 @@  static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
 	if (!sdr_addr)
 		return -1;
 
-	mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start));	/*HIGH addr */
-	mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start));	/* Low addr */
+	SDR0_WRITE(sdr_addr, (u64)res.start >> 32);	 /*HIGH addr */
+	SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */
+
 
 	msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
-	if (!msi->msi_dev)
+	if (msi->msi_dev)
 		return -ENODEV;
 
 	msi->msi_regs = of_iomap(msi->msi_dev, 0);
@@ -173,12 +167,9 @@  static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
 		(u32) (msi->msi_regs + PEIH_TERMADH), (u32) (msi->msi_regs));
 
 	msi_virt = dma_alloc_coherent(&dev->dev, 64, &msi_phys, GFP_KERNEL);
-	if (!msi_virt)
-		return -ENOMEM;
-	msi->msi_addr_hi = upper_32_bits(msi_phys);
-	msi->msi_addr_lo = lower_32_bits(msi_phys & 0xffffffff);
-	dev_dbg(&dev->dev, "PCIE-MSI: msi address high 0x%x, low 0x%x\n",
-		msi->msi_addr_hi, msi->msi_addr_lo);
+	msi->msi_addr_hi = 0x0;
+	msi->msi_addr_lo = (u32) msi_phys;
+	dev_dbg(&dev->dev, "PCIE-MSI: msi address 0x%x\n", msi->msi_addr_lo);
 
 	/* Progam the Interrupt handler Termination addr registers */
 	out_be32(msi->msi_regs + PEIH_TERMADH, msi->msi_addr_hi);
@@ -194,8 +185,6 @@  static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
 	out_be32(msi->msi_regs + PEIH_MSIED, *msi_data);
 	out_be32(msi->msi_regs + PEIH_MSIMK, *msi_mask);
 
-	dma_free_coherent(&dev->dev, 64, msi_virt, msi_phys);
-
 	return 0;
 }
 
@@ -205,7 +194,7 @@  static int ppc4xx_of_msi_remove(struct platform_device *dev)
 	int i;
 	int virq;
 
-	for (i = 0; i < msi_irqs; i++) {
+	for (i = 0; i < NR_MSI_IRQS; i++) {
 		virq = msi->msi_virqs[i];
 		if (virq != NO_IRQ)
 			irq_dispose_mapping(virq);
@@ -226,6 +215,8 @@  static int __devinit ppc4xx_msi_probe(struct platform_device *dev)
 	struct resource res;
 	int err = 0;
 
+	msi = &ppc4xx_msi;/*keep the msi data for further use*/
+
 	dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n");
 
 	msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL);
@@ -243,10 +234,6 @@  static int __devinit ppc4xx_msi_probe(struct platform_device *dev)
 		goto error_out;
 	}
 
-	msi_irqs = of_irq_count(dev->dev.of_node);
-	if (!msi_irqs)
-		return -ENODEV;
-
 	if (ppc4xx_setup_pcieh_hw(dev, res, msi))
 		goto error_out;
 
@@ -255,7 +242,6 @@  static int __devinit ppc4xx_msi_probe(struct platform_device *dev)
 		dev_err(&dev->dev, "Error allocating MSI bitmap\n");
 		goto error_out;
 	}
-	ppc4xx_msi = *msi;
 
 	ppc_md.setup_msi_irqs = ppc4xx_setup_msi_irqs;
 	ppc_md.teardown_msi_irqs = ppc4xx_teardown_msi_irqs;