diff mbox series

[v2,3/3] PCI: imx6: limit DBI register length

Message ID 20181120132705.6917-3-stefan@agner.ch
State Superseded
Headers show
Series [v2,1/3] PCI: dwc: allow to limit registers set length | expand

Commit Message

Stefan Agner Nov. 20, 2018, 1:27 p.m. UTC
Define the length of the DBI registers. This makes sure that
the kernel does not access registers beyond that point, avoiding
the following abort on a i.MX 6Quad:
  # cat /sys/devices/soc0/soc/1ffc000.pcie/pci0000\:00/0000\:00\:00.0/config
  [  100.021433] Unhandled fault: imprecise external abort (0x1406) at 0xb6ea7000
  ...
  [  100.056423] PC is at dw_pcie_read+0x50/0x84
  [  100.060790] LR is at dw_pcie_rd_own_conf+0x44/0x48
  ...

Signed-off-by: Stefan Agner <stefan@agner.ch>
---
 drivers/pci/controller/dwc/pci-imx6.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

Comments

Lucas Stach Nov. 20, 2018, 2:37 p.m. UTC | #1
Am Dienstag, den 20.11.2018, 14:27 +0100 schrieb Stefan Agner:
> Define the length of the DBI registers. This makes sure that
> the kernel does not access registers beyond that point, avoiding
> the following abort on a i.MX 6Quad:
>   # cat /sys/devices/soc0/soc/1ffc000.pcie/pci0000\:00/0000\:00\:00.0/config
>   [  100.021433] Unhandled fault: imprecise external abort (0x1406) at 0xb6ea7000
>   ...
>   [  100.056423] PC is at dw_pcie_read+0x50/0x84
>   [  100.060790] LR is at dw_pcie_rd_own_conf+0x44/0x48
>   ...
> 
> Signed-off-by: Stefan Agner <stefan@agner.ch>

Reviewed-by: Lucas Stach <l.stach@pengutronix.de>

> ---
>  drivers/pci/controller/dwc/pci-imx6.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
> index 7ac1a639fe91..41d6127b40ad 100644
> --- a/drivers/pci/controller/dwc/pci-imx6.c
> +++ b/drivers/pci/controller/dwc/pci-imx6.c
> @@ -41,6 +41,7 @@ enum imx6_pcie_variants {
>  
>  struct imx6_pcie_drvdata {
> >  	enum imx6_pcie_variants variant;
> > > +	int			dbi_length;
>  };
>  
>  struct imx6_pcie {
> @@ -779,6 +780,8 @@ static int imx6_pcie_probe(struct platform_device *pdev)
> >  		break;
> >  	}
>  
> > +	pci->dbi_length = imx6_pcie->drvdata->dbi_length;
> +
> >  	/* Grab GPR config register range */
> >  	imx6_pcie->iomuxc_gpr =
> >  		 syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
> @@ -839,7 +842,7 @@ static void imx6_pcie_shutdown(struct platform_device *pdev)
>  }
>  
>  static const struct imx6_pcie_drvdata drvdata[] = {
> > -	[IMX6Q] = { .variant = IMX6Q },
> > +	[IMX6Q] = { .variant = IMX6Q, .dbi_length = 0x15c },
> >  	[IMX6SX] = { .variant = IMX6SX },
> >  	[IMX6QP] = { .variant = IMX6QP },
> >  	[IMX7D] = { .variant = IMX7D },
Andrey Smirnov Nov. 28, 2018, 12:56 a.m. UTC | #2
On Tue, Nov 20, 2018 at 9:43 AM Stefan Agner <stefan@agner.ch> wrote:
>
> Define the length of the DBI registers. This makes sure that
> the kernel does not access registers beyond that point, avoiding
> the following abort on a i.MX 6Quad:
>   # cat /sys/devices/soc0/soc/1ffc000.pcie/pci0000\:00/0000\:00\:00.0/config
>   [  100.021433] Unhandled fault: imprecise external abort (0x1406) at 0xb6ea7000
>   ...
>   [  100.056423] PC is at dw_pcie_read+0x50/0x84
>   [  100.060790] LR is at dw_pcie_rd_own_conf+0x44/0x48
>   ...

Could this be a regression? Prior to 415b6185c541 ("PCI: imx6: Fix
config read timeout handling") all of the imprecise aborts were caught
and handled via no-op handler. I did an experiment on i.MX6Q board
that I have (ZII RDU2) and adding a simple no-op for imprecise aborts
via

hook_fault_code(16 + 6, imx6q_pcie_no_op_handler, SIGBUS, 0,
                       "imprecise external abort");

seems to "resolve" this problem:

hexdump /sys/devices/soc0/soc/1ffc000.pcie/pci0000\:00/0000\:00\:00.0/config
0000000 16c3 abcd 0547 0010 0001 0604 0010 0001
0000010 0000 01c0 0000 0000 0100 00ff 1010 0000
0000020 0100 01b0 fff0 0000 0000 0000 0000 0000
0000030 0000 0000 0040 0000 0000 0000 012a 0001
0000040 5001 dbc3 0000 0000 0000 0000 0000 0000
0000050 7005 0181 c000 7e27 0000 0000 0000 0000
0000060 0000 0000 0000 0000 0000 0000 0000 0000
0000070 0010 0042 8000 0000 201f 0010 cc11 0013
0000080 0040 3011 0000 0000 03c0 0040 0008 0000
0000090 0000 0000 001f 0000 0000 0000 0002 0000
00000a0 0002 0001 0000 0000 0000 0000 0000 0000
00000b0 0000 0000 0000 0000 0000 0000 0000 0000
*
0000100 0001 1401 0000 0000 0000 0000 2030 0006
0000110 0000 0000 2000 0000 00a0 0000 0000 0000
0000120 0000 0000 0000 0000 0000 0000 0007 0000
0000130 0000 0000 0000 0000 0000 0000 0000 0000
0000140 0002 0001 0000 0000 0000 0000 0000 0000
0000150 0000 0000 00ff 8000 0000 0000 0000 0000
0000160 0000 0000 0000 0000 0000 0000 0000 0000
*
0000700 0076 0163 ffff ffff 0004 0700 2c00 1b2c
0000710 0120 0001 0000 0000 8000 0000 0280 0000
0000720 0000 0000 0001 0000 b611 03d5 0410 0800
0000730 4020 0000 4004 0000 ffff 000f 0000 0000
0000740 000f 0000 0000 0000 c019 0020 c003 0020
0000750 0000 0080 0000 0000 0000 0000 0000 0000
0000760 0000 0000 0000 0000 0000 0000 0000 0000
*
0000800 0000 0000 0000 0000 0000 0000 012c 0000
0000810 0000 0000 0000 0000 0302 0000 0000 0000
0000820 c000 7e27 0000 0000 0001 0000 0000 0000
0000830 0000 0000 0000 0000 0000 0000 0000 0000
*
0000900 0001 0000 0002 0000 0000 8000 0000 01f8
0000910 0000 0000 ffff 01f8 0000 0000 0000 0000
0000920 0000 0000 0000 0000 0000 0000 0000 0000
*
0001000

Maybe a simple fix would be to install such a handler when running on i.MX6Q?

Thanks,
Andrey Smirnov

>
> Signed-off-by: Stefan Agner <stefan@agner.ch>
> ---
>  drivers/pci/controller/dwc/pci-imx6.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
> index 7ac1a639fe91..41d6127b40ad 100644
> --- a/drivers/pci/controller/dwc/pci-imx6.c
> +++ b/drivers/pci/controller/dwc/pci-imx6.c
> @@ -41,6 +41,7 @@ enum imx6_pcie_variants {
>
>  struct imx6_pcie_drvdata {
>         enum imx6_pcie_variants variant;
> +       int                     dbi_length;
>  };
>
>  struct imx6_pcie {
> @@ -779,6 +780,8 @@ static int imx6_pcie_probe(struct platform_device *pdev)
>                 break;
>         }
>
> +       pci->dbi_length = imx6_pcie->drvdata->dbi_length;
> +
>         /* Grab GPR config register range */
>         imx6_pcie->iomuxc_gpr =
>                  syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
> @@ -839,7 +842,7 @@ static void imx6_pcie_shutdown(struct platform_device *pdev)
>  }
>
>  static const struct imx6_pcie_drvdata drvdata[] = {
> -       [IMX6Q] = { .variant = IMX6Q },
> +       [IMX6Q] = { .variant = IMX6Q, .dbi_length = 0x15c },
>         [IMX6SX] = { .variant = IMX6SX },
>         [IMX6QP] = { .variant = IMX6QP },
>         [IMX7D] = { .variant = IMX7D },
> --
> 2.19.1
>
Fabio Estevam Nov. 28, 2018, 1:12 a.m. UTC | #3
Hi Andrey,

On Tue, Nov 27, 2018 at 10:57 PM Andrey Smirnov
<andrew.smirnov@gmail.com> wrote:

> Could this be a regression? Prior to 415b6185c541 ("PCI: imx6: Fix
> config read timeout handling") all of the imprecise aborts were caught
> and handled via no-op handler. I did an experiment on i.MX6Q board
> that I have (ZII RDU2) and adding a simple no-op for imprecise aborts
> via
>
> hook_fault_code(16 + 6, imx6q_pcie_no_op_handler, SIGBUS, 0,
>                        "imprecise external abort");
>
> seems to "resolve" this problem:

Please check https://patchwork.kernel.org/patch/9720313/

This commit fixed a kernel crash on mx6q boards with a PCI switch.

So we can't go back to the simple no-op.
Andrey Smirnov Nov. 28, 2018, 1:28 a.m. UTC | #4
On Tue, Nov 27, 2018 at 5:12 PM Fabio Estevam <festevam@gmail.com> wrote:
>
> Hi Andrey,
>
> On Tue, Nov 27, 2018 at 10:57 PM Andrey Smirnov
> <andrew.smirnov@gmail.com> wrote:
>
> > Could this be a regression? Prior to 415b6185c541 ("PCI: imx6: Fix
> > config read timeout handling") all of the imprecise aborts were caught
> > and handled via no-op handler. I did an experiment on i.MX6Q board
> > that I have (ZII RDU2) and adding a simple no-op for imprecise aborts
> > via
> >
> > hook_fault_code(16 + 6, imx6q_pcie_no_op_handler, SIGBUS, 0,
> >                        "imprecise external abort");
> >
> > seems to "resolve" this problem:
>
> Please check https://patchwork.kernel.org/patch/9720313/
>
> This commit fixed a kernel crash on mx6q boards with a PCI switch.
>
> So we can't go back to the simple no-op.

It's probably not exactly clear form my message, but I wasn't
proposing to go back to a no-op. What I had in mind is having a no-op
handler for imprecise aborts _alongside_ the non-linefetch handlers
that is already there when running against i.MX6Q type of the IP
block.

Thanks,
Andrey Smirnov
Stefan Agner Nov. 28, 2018, 12:45 p.m. UTC | #5
On 28.11.2018 02:28, Andrey Smirnov wrote:
> On Tue, Nov 27, 2018 at 5:12 PM Fabio Estevam <festevam@gmail.com> wrote:
>>
>> Hi Andrey,
>>
>> On Tue, Nov 27, 2018 at 10:57 PM Andrey Smirnov
>> <andrew.smirnov@gmail.com> wrote:
>>
>> > Could this be a regression? Prior to 415b6185c541 ("PCI: imx6: Fix
>> > config read timeout handling") all of the imprecise aborts were caught
>> > and handled via no-op handler. I did an experiment on i.MX6Q board
>> > that I have (ZII RDU2) and adding a simple no-op for imprecise aborts
>> > via
>> >
>> > hook_fault_code(16 + 6, imx6q_pcie_no_op_handler, SIGBUS, 0,
>> >                        "imprecise external abort");

Unsurprisingly, introducing this handler also "fixes" the issue in my
setup.

FWIW, during my investigation with the Thumb2 issue, I was looking at
the abort handler and its history a bit more closely too. I was about to
suggest readding this handler too, you just beat me by some hours :-)

The current 4.9 downstream BSP still has the old fault handler, and
hence this issue does not happen in the downstream BSP.

>> >
>> > seems to "resolve" this problem:
>>
>> Please check https://patchwork.kernel.org/patch/9720313/
>>
>> This commit fixed a kernel crash on mx6q boards with a PCI switch.
>>
>> So we can't go back to the simple no-op.
> 
> It's probably not exactly clear form my message, but I wasn't
> proposing to go back to a no-op. What I had in mind is having a no-op
> handler for imprecise aborts _alongside_ the non-linefetch handlers
> that is already there when running against i.MX6Q type of the IP
> block.
> 

Agreed, it should be alongside the "external abort on non-linefetch"
handler.

I actually encountered another issue when I had a Intel e1000e running
yesterday. Unfortunately I wasn't able to reproduce the issue, so maybe
it was just a fluke. It probably would be solved by the additional
"imprecise external abort" too:

[   37.644300] fec 2188000.ethernet eth0: Link is Down
[   38.077383] Unhandled fault: imprecise external abort (0x1406) at
0xb64e8000
[   38.084638] pgd = ac4709d6
[   38.087434] [b64e8000] *pgd=00000000
[   38.091129] Internal error: : 1406 [#1] PREEMPT SMP ARM
[   38.096508] CPU: 0 PID: 468 Comm: kworker/0:2 Not tainted
4.19.4-00044-ged7a0cc2ef01-dirty #479
[   38.105428] Hardware name: Freescale i.MX6 Quad/DualLite (Device
Tree)
[   38.112143] Workqueue: events e1000_watchdog_task
[   38.116993] PC is at e1000e_update_stats+0x68/0xa7c
[   38.122008] LR is at e1000_watchdog_task+0xe8/0x71c
[   38.127021] pc : [<c0621238>]    lr : [<c0628d0c>]    psr: 60010013
[   38.133449] sp : ed185ea0  ip : 00007374  fp : ec83ece4
[   38.138814] r10: ec71f700  r9 : ec83c500  r8 : ec83c000
[   38.144180] r7 : ec83c924  r6 : ec83c000  r5 : c1104cc8  r4 :
ec83c500
[   38.150875] r3 : f14c4000  r2 : 000003e8  r1 : 00000000  r0 :
ec83c500
[   38.157573] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM 
Segment none
[   38.164890] Control: 10c5387d  Table: 3d1c004a  DAC: 00000051
[   38.170789] Process kworker/0:2 (pid: 468, stack limit = 0xbc71b316)
[   38.177306] Stack: (0xed185ea0 to 0xed186000)
[   38.181794] 5ea0: ef7a9100 c0619ba3 ec0870e0 ec087068 ec0870e0
00000000 60010013 c0619ba3
[   38.190184] 5ec0: ef7a8d00 ec83c54c c1104cc8 ec83e54c ec83c924
ec83c000 ec83c500 ec71f700
[   38.198573] 5ee0: ec83ece4 c0628d0c ecbd16c0 c0c0237c ed185f3c
c0b26de0 ec131c04 c0619ba3
[   38.206966] 5f00: c1153fe4 ec83c54c ecdcc100 ef7a8d00 ef7a9e00
00000000 ec83c550 00000000
[   38.221770] 5f20: ef7a8d00 c0136aec c1103d00 ef7a8d18 ecdcc100
ef7a8d00 ecdcc114 c1103d00
[   38.236801] 5f40: ef7a8d18 ffffe000 00000008 c0136d40 ec521c70
c1176068 c0e4213c ed184000
[   38.251874] 5f60: ecfecfdc ecfecfc0 ed0db1c0 00000000 ed184000
ecdcc100 c0136cfc ec0a3ea4
[   38.267199] 5f80: ecfecfdc c013c810 00000000 ed0db1c0 c013c6c8
00000000 00000000 00000000
[   38.282612] 5fa0: 00000000 00000000 00000000 c01010e8 00000000
00000000 00000000 00000000
[   38.298080] 5fc0: 00000000 00000000 00000000 00000000 00000000
00000000 00000000 00000000
[   38.313792] 5fe0: 00000000 00000000 00000000 00000000 00000013
00000000 00000000 00000000
[   38.329716] [<c0621238>] (e1000e_update_stats) from [<c0628d0c>]
(e1000_watchdog_task+0xe8/0x71c)
[   38.346597] [<c0628d0c>] (e1000_watchdog_task) from [<c0136aec>]
(process_one_work+0x1f0/0x400)
[   38.363493] [<c0136aec>] (process_one_work) from [<c0136d40>]
(worker_thread+0x44/0x584)
[   38.379844] [<c0136d40>] (worker_thread) from [<c013c810>]
(kthread+0x148/0x150)
[   38.395575] [<c013c810>] (kthread) from [<c01010e8>]
(ret_from_fork+0x14/0x2c)
[   38.411119] Exception stack(0xed185fb0 to 0xed185ff8)
[   38.420331] 5fa0:                                     00000000
00000000 00000000 00000000
[   38.436662] 5fc0: 00000000 00000000 00000000 00000000 00000000
00000000 00000000 00000000
[   38.452933] 5fe0: 00000000 00000000 00000000 00000000 00000013
00000000
[   38.463661] Code: e590641c e2833901 e5931000 f57ff04f (e280ad9f)

--
Stefan
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
index 7ac1a639fe91..41d6127b40ad 100644
--- a/drivers/pci/controller/dwc/pci-imx6.c
+++ b/drivers/pci/controller/dwc/pci-imx6.c
@@ -41,6 +41,7 @@  enum imx6_pcie_variants {
 
 struct imx6_pcie_drvdata {
 	enum imx6_pcie_variants variant;
+	int			dbi_length;
 };
 
 struct imx6_pcie {
@@ -779,6 +780,8 @@  static int imx6_pcie_probe(struct platform_device *pdev)
 		break;
 	}
 
+	pci->dbi_length = imx6_pcie->drvdata->dbi_length;
+
 	/* Grab GPR config register range */
 	imx6_pcie->iomuxc_gpr =
 		 syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
@@ -839,7 +842,7 @@  static void imx6_pcie_shutdown(struct platform_device *pdev)
 }
 
 static const struct imx6_pcie_drvdata drvdata[] = {
-	[IMX6Q] = { .variant = IMX6Q },
+	[IMX6Q] = { .variant = IMX6Q, .dbi_length = 0x15c },
 	[IMX6SX] = { .variant = IMX6SX },
 	[IMX6QP] = { .variant = IMX6QP },
 	[IMX7D] = { .variant = IMX7D },