Message ID | 20181120132705.6917-3-stefan@agner.ch |
---|---|
State | Superseded |
Headers | show |
Series | [v2,1/3] PCI: dwc: allow to limit registers set length | expand |
Am Dienstag, den 20.11.2018, 14:27 +0100 schrieb Stefan Agner: > Define the length of the DBI registers. This makes sure that > the kernel does not access registers beyond that point, avoiding > the following abort on a i.MX 6Quad: > # cat /sys/devices/soc0/soc/1ffc000.pcie/pci0000\:00/0000\:00\:00.0/config > [ 100.021433] Unhandled fault: imprecise external abort (0x1406) at 0xb6ea7000 > ... > [ 100.056423] PC is at dw_pcie_read+0x50/0x84 > [ 100.060790] LR is at dw_pcie_rd_own_conf+0x44/0x48 > ... > > Signed-off-by: Stefan Agner <stefan@agner.ch> Reviewed-by: Lucas Stach <l.stach@pengutronix.de> > --- > drivers/pci/controller/dwc/pci-imx6.c | 5 ++++- > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c > index 7ac1a639fe91..41d6127b40ad 100644 > --- a/drivers/pci/controller/dwc/pci-imx6.c > +++ b/drivers/pci/controller/dwc/pci-imx6.c > @@ -41,6 +41,7 @@ enum imx6_pcie_variants { > > struct imx6_pcie_drvdata { > > enum imx6_pcie_variants variant; > > > + int dbi_length; > }; > > struct imx6_pcie { > @@ -779,6 +780,8 @@ static int imx6_pcie_probe(struct platform_device *pdev) > > break; > > } > > > + pci->dbi_length = imx6_pcie->drvdata->dbi_length; > + > > /* Grab GPR config register range */ > > imx6_pcie->iomuxc_gpr = > > syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); > @@ -839,7 +842,7 @@ static void imx6_pcie_shutdown(struct platform_device *pdev) > } > > static const struct imx6_pcie_drvdata drvdata[] = { > > - [IMX6Q] = { .variant = IMX6Q }, > > + [IMX6Q] = { .variant = IMX6Q, .dbi_length = 0x15c }, > > [IMX6SX] = { .variant = IMX6SX }, > > [IMX6QP] = { .variant = IMX6QP }, > > [IMX7D] = { .variant = IMX7D },
On Tue, Nov 20, 2018 at 9:43 AM Stefan Agner <stefan@agner.ch> wrote: > > Define the length of the DBI registers. This makes sure that > the kernel does not access registers beyond that point, avoiding > the following abort on a i.MX 6Quad: > # cat /sys/devices/soc0/soc/1ffc000.pcie/pci0000\:00/0000\:00\:00.0/config > [ 100.021433] Unhandled fault: imprecise external abort (0x1406) at 0xb6ea7000 > ... > [ 100.056423] PC is at dw_pcie_read+0x50/0x84 > [ 100.060790] LR is at dw_pcie_rd_own_conf+0x44/0x48 > ... Could this be a regression? Prior to 415b6185c541 ("PCI: imx6: Fix config read timeout handling") all of the imprecise aborts were caught and handled via no-op handler. I did an experiment on i.MX6Q board that I have (ZII RDU2) and adding a simple no-op for imprecise aborts via hook_fault_code(16 + 6, imx6q_pcie_no_op_handler, SIGBUS, 0, "imprecise external abort"); seems to "resolve" this problem: hexdump /sys/devices/soc0/soc/1ffc000.pcie/pci0000\:00/0000\:00\:00.0/config 0000000 16c3 abcd 0547 0010 0001 0604 0010 0001 0000010 0000 01c0 0000 0000 0100 00ff 1010 0000 0000020 0100 01b0 fff0 0000 0000 0000 0000 0000 0000030 0000 0000 0040 0000 0000 0000 012a 0001 0000040 5001 dbc3 0000 0000 0000 0000 0000 0000 0000050 7005 0181 c000 7e27 0000 0000 0000 0000 0000060 0000 0000 0000 0000 0000 0000 0000 0000 0000070 0010 0042 8000 0000 201f 0010 cc11 0013 0000080 0040 3011 0000 0000 03c0 0040 0008 0000 0000090 0000 0000 001f 0000 0000 0000 0002 0000 00000a0 0002 0001 0000 0000 0000 0000 0000 0000 00000b0 0000 0000 0000 0000 0000 0000 0000 0000 * 0000100 0001 1401 0000 0000 0000 0000 2030 0006 0000110 0000 0000 2000 0000 00a0 0000 0000 0000 0000120 0000 0000 0000 0000 0000 0000 0007 0000 0000130 0000 0000 0000 0000 0000 0000 0000 0000 0000140 0002 0001 0000 0000 0000 0000 0000 0000 0000150 0000 0000 00ff 8000 0000 0000 0000 0000 0000160 0000 0000 0000 0000 0000 0000 0000 0000 * 0000700 0076 0163 ffff ffff 0004 0700 2c00 1b2c 0000710 0120 0001 0000 0000 8000 0000 0280 0000 0000720 0000 0000 0001 0000 b611 03d5 0410 0800 0000730 4020 0000 4004 0000 ffff 000f 0000 0000 0000740 000f 0000 0000 0000 c019 0020 c003 0020 0000750 0000 0080 0000 0000 0000 0000 0000 0000 0000760 0000 0000 0000 0000 0000 0000 0000 0000 * 0000800 0000 0000 0000 0000 0000 0000 012c 0000 0000810 0000 0000 0000 0000 0302 0000 0000 0000 0000820 c000 7e27 0000 0000 0001 0000 0000 0000 0000830 0000 0000 0000 0000 0000 0000 0000 0000 * 0000900 0001 0000 0002 0000 0000 8000 0000 01f8 0000910 0000 0000 ffff 01f8 0000 0000 0000 0000 0000920 0000 0000 0000 0000 0000 0000 0000 0000 * 0001000 Maybe a simple fix would be to install such a handler when running on i.MX6Q? Thanks, Andrey Smirnov > > Signed-off-by: Stefan Agner <stefan@agner.ch> > --- > drivers/pci/controller/dwc/pci-imx6.c | 5 ++++- > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c > index 7ac1a639fe91..41d6127b40ad 100644 > --- a/drivers/pci/controller/dwc/pci-imx6.c > +++ b/drivers/pci/controller/dwc/pci-imx6.c > @@ -41,6 +41,7 @@ enum imx6_pcie_variants { > > struct imx6_pcie_drvdata { > enum imx6_pcie_variants variant; > + int dbi_length; > }; > > struct imx6_pcie { > @@ -779,6 +780,8 @@ static int imx6_pcie_probe(struct platform_device *pdev) > break; > } > > + pci->dbi_length = imx6_pcie->drvdata->dbi_length; > + > /* Grab GPR config register range */ > imx6_pcie->iomuxc_gpr = > syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); > @@ -839,7 +842,7 @@ static void imx6_pcie_shutdown(struct platform_device *pdev) > } > > static const struct imx6_pcie_drvdata drvdata[] = { > - [IMX6Q] = { .variant = IMX6Q }, > + [IMX6Q] = { .variant = IMX6Q, .dbi_length = 0x15c }, > [IMX6SX] = { .variant = IMX6SX }, > [IMX6QP] = { .variant = IMX6QP }, > [IMX7D] = { .variant = IMX7D }, > -- > 2.19.1 >
Hi Andrey, On Tue, Nov 27, 2018 at 10:57 PM Andrey Smirnov <andrew.smirnov@gmail.com> wrote: > Could this be a regression? Prior to 415b6185c541 ("PCI: imx6: Fix > config read timeout handling") all of the imprecise aborts were caught > and handled via no-op handler. I did an experiment on i.MX6Q board > that I have (ZII RDU2) and adding a simple no-op for imprecise aborts > via > > hook_fault_code(16 + 6, imx6q_pcie_no_op_handler, SIGBUS, 0, > "imprecise external abort"); > > seems to "resolve" this problem: Please check https://patchwork.kernel.org/patch/9720313/ This commit fixed a kernel crash on mx6q boards with a PCI switch. So we can't go back to the simple no-op.
On Tue, Nov 27, 2018 at 5:12 PM Fabio Estevam <festevam@gmail.com> wrote: > > Hi Andrey, > > On Tue, Nov 27, 2018 at 10:57 PM Andrey Smirnov > <andrew.smirnov@gmail.com> wrote: > > > Could this be a regression? Prior to 415b6185c541 ("PCI: imx6: Fix > > config read timeout handling") all of the imprecise aborts were caught > > and handled via no-op handler. I did an experiment on i.MX6Q board > > that I have (ZII RDU2) and adding a simple no-op for imprecise aborts > > via > > > > hook_fault_code(16 + 6, imx6q_pcie_no_op_handler, SIGBUS, 0, > > "imprecise external abort"); > > > > seems to "resolve" this problem: > > Please check https://patchwork.kernel.org/patch/9720313/ > > This commit fixed a kernel crash on mx6q boards with a PCI switch. > > So we can't go back to the simple no-op. It's probably not exactly clear form my message, but I wasn't proposing to go back to a no-op. What I had in mind is having a no-op handler for imprecise aborts _alongside_ the non-linefetch handlers that is already there when running against i.MX6Q type of the IP block. Thanks, Andrey Smirnov
On 28.11.2018 02:28, Andrey Smirnov wrote: > On Tue, Nov 27, 2018 at 5:12 PM Fabio Estevam <festevam@gmail.com> wrote: >> >> Hi Andrey, >> >> On Tue, Nov 27, 2018 at 10:57 PM Andrey Smirnov >> <andrew.smirnov@gmail.com> wrote: >> >> > Could this be a regression? Prior to 415b6185c541 ("PCI: imx6: Fix >> > config read timeout handling") all of the imprecise aborts were caught >> > and handled via no-op handler. I did an experiment on i.MX6Q board >> > that I have (ZII RDU2) and adding a simple no-op for imprecise aborts >> > via >> > >> > hook_fault_code(16 + 6, imx6q_pcie_no_op_handler, SIGBUS, 0, >> > "imprecise external abort"); Unsurprisingly, introducing this handler also "fixes" the issue in my setup. FWIW, during my investigation with the Thumb2 issue, I was looking at the abort handler and its history a bit more closely too. I was about to suggest readding this handler too, you just beat me by some hours :-) The current 4.9 downstream BSP still has the old fault handler, and hence this issue does not happen in the downstream BSP. >> > >> > seems to "resolve" this problem: >> >> Please check https://patchwork.kernel.org/patch/9720313/ >> >> This commit fixed a kernel crash on mx6q boards with a PCI switch. >> >> So we can't go back to the simple no-op. > > It's probably not exactly clear form my message, but I wasn't > proposing to go back to a no-op. What I had in mind is having a no-op > handler for imprecise aborts _alongside_ the non-linefetch handlers > that is already there when running against i.MX6Q type of the IP > block. > Agreed, it should be alongside the "external abort on non-linefetch" handler. I actually encountered another issue when I had a Intel e1000e running yesterday. Unfortunately I wasn't able to reproduce the issue, so maybe it was just a fluke. It probably would be solved by the additional "imprecise external abort" too: [ 37.644300] fec 2188000.ethernet eth0: Link is Down [ 38.077383] Unhandled fault: imprecise external abort (0x1406) at 0xb64e8000 [ 38.084638] pgd = ac4709d6 [ 38.087434] [b64e8000] *pgd=00000000 [ 38.091129] Internal error: : 1406 [#1] PREEMPT SMP ARM [ 38.096508] CPU: 0 PID: 468 Comm: kworker/0:2 Not tainted 4.19.4-00044-ged7a0cc2ef01-dirty #479 [ 38.105428] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 38.112143] Workqueue: events e1000_watchdog_task [ 38.116993] PC is at e1000e_update_stats+0x68/0xa7c [ 38.122008] LR is at e1000_watchdog_task+0xe8/0x71c [ 38.127021] pc : [<c0621238>] lr : [<c0628d0c>] psr: 60010013 [ 38.133449] sp : ed185ea0 ip : 00007374 fp : ec83ece4 [ 38.138814] r10: ec71f700 r9 : ec83c500 r8 : ec83c000 [ 38.144180] r7 : ec83c924 r6 : ec83c000 r5 : c1104cc8 r4 : ec83c500 [ 38.150875] r3 : f14c4000 r2 : 000003e8 r1 : 00000000 r0 : ec83c500 [ 38.157573] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 38.164890] Control: 10c5387d Table: 3d1c004a DAC: 00000051 [ 38.170789] Process kworker/0:2 (pid: 468, stack limit = 0xbc71b316) [ 38.177306] Stack: (0xed185ea0 to 0xed186000) [ 38.181794] 5ea0: ef7a9100 c0619ba3 ec0870e0 ec087068 ec0870e0 00000000 60010013 c0619ba3 [ 38.190184] 5ec0: ef7a8d00 ec83c54c c1104cc8 ec83e54c ec83c924 ec83c000 ec83c500 ec71f700 [ 38.198573] 5ee0: ec83ece4 c0628d0c ecbd16c0 c0c0237c ed185f3c c0b26de0 ec131c04 c0619ba3 [ 38.206966] 5f00: c1153fe4 ec83c54c ecdcc100 ef7a8d00 ef7a9e00 00000000 ec83c550 00000000 [ 38.221770] 5f20: ef7a8d00 c0136aec c1103d00 ef7a8d18 ecdcc100 ef7a8d00 ecdcc114 c1103d00 [ 38.236801] 5f40: ef7a8d18 ffffe000 00000008 c0136d40 ec521c70 c1176068 c0e4213c ed184000 [ 38.251874] 5f60: ecfecfdc ecfecfc0 ed0db1c0 00000000 ed184000 ecdcc100 c0136cfc ec0a3ea4 [ 38.267199] 5f80: ecfecfdc c013c810 00000000 ed0db1c0 c013c6c8 00000000 00000000 00000000 [ 38.282612] 5fa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000 [ 38.298080] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 38.313792] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [ 38.329716] [<c0621238>] (e1000e_update_stats) from [<c0628d0c>] (e1000_watchdog_task+0xe8/0x71c) [ 38.346597] [<c0628d0c>] (e1000_watchdog_task) from [<c0136aec>] (process_one_work+0x1f0/0x400) [ 38.363493] [<c0136aec>] (process_one_work) from [<c0136d40>] (worker_thread+0x44/0x584) [ 38.379844] [<c0136d40>] (worker_thread) from [<c013c810>] (kthread+0x148/0x150) [ 38.395575] [<c013c810>] (kthread) from [<c01010e8>] (ret_from_fork+0x14/0x2c) [ 38.411119] Exception stack(0xed185fb0 to 0xed185ff8) [ 38.420331] 5fa0: 00000000 00000000 00000000 00000000 [ 38.436662] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 38.452933] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 38.463661] Code: e590641c e2833901 e5931000 f57ff04f (e280ad9f) -- Stefan
diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 7ac1a639fe91..41d6127b40ad 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -41,6 +41,7 @@ enum imx6_pcie_variants { struct imx6_pcie_drvdata { enum imx6_pcie_variants variant; + int dbi_length; }; struct imx6_pcie { @@ -779,6 +780,8 @@ static int imx6_pcie_probe(struct platform_device *pdev) break; } + pci->dbi_length = imx6_pcie->drvdata->dbi_length; + /* Grab GPR config register range */ imx6_pcie->iomuxc_gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); @@ -839,7 +842,7 @@ static void imx6_pcie_shutdown(struct platform_device *pdev) } static const struct imx6_pcie_drvdata drvdata[] = { - [IMX6Q] = { .variant = IMX6Q }, + [IMX6Q] = { .variant = IMX6Q, .dbi_length = 0x15c }, [IMX6SX] = { .variant = IMX6SX }, [IMX6QP] = { .variant = IMX6QP }, [IMX7D] = { .variant = IMX7D },
Define the length of the DBI registers. This makes sure that the kernel does not access registers beyond that point, avoiding the following abort on a i.MX 6Quad: # cat /sys/devices/soc0/soc/1ffc000.pcie/pci0000\:00/0000\:00\:00.0/config [ 100.021433] Unhandled fault: imprecise external abort (0x1406) at 0xb6ea7000 ... [ 100.056423] PC is at dw_pcie_read+0x50/0x84 [ 100.060790] LR is at dw_pcie_rd_own_conf+0x44/0x48 ... Signed-off-by: Stefan Agner <stefan@agner.ch> --- drivers/pci/controller/dwc/pci-imx6.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)