diff mbox

pata_legacy: Allow disabling of legacy PATA device probes on non-PCI systems

Message ID 20170119213737.GB25133@mtj.duckdns.org
State Not Applicable
Delegated to: David Miller
Headers show

Commit Message

Tejun Heo Jan. 19, 2017, 9:37 p.m. UTC
On Wed, Jan 11, 2017 at 08:54:21PM -0500, tedheadster wrote:
> ​Tejun,
>   I'm interested in debugging this. What can I do to help?

Sorry about taking so long.  I suck. :)

Can you please try the following patch and post what the kernel prints
out?

Thanks.
---
 drivers/ata/pata_legacy.c |    2 ++
 drivers/base/core.c       |    9 +++++++++
 include/linux/kobject.h   |    1 +
 lib/kobject.c             |   22 ++++++++++++++++++++++
 4 files changed, 34 insertions(+)

--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

tedheadster Jan. 20, 2017, 5:08 p.m. UTC | #1
Tejun,
  here is the relevant output:

[   39.868755] XXX kobject_get(pata_legacy.0): ref=3++
(ata_tport_add:ata_host_register:ata_host_activate)
[   39.875762] XXX kobject_get(pata_legacy.0): ref=4++
(ata_tport_add:ata_host_register:ata_host_activate)
[   39.877885] XXX kobject_get(pata_legacy.0): ref=5++
(kobject_add:device_add:ata_tport_add)
[   39.971742] scsi host0: pata_legacy
[   39.999736] ata1: PATA max PIO4 cmd 0x1f0 ctl 0x3f6 irq 14
[   40.157745] ata1.00: ATA-4: QUANTUM FIREBALL CR8.4A, A5U.1200, max UDMA/66
[   40.159149] ata1.00: 16514064 sectors, multi 0: LBA
[   40.160111] ata1.00: configured for PIO
[   40.236700] scsi 0:0:0:0: Direct-Access     ATA      QUANTUM
FIREBALL 1200 PQ: 0 ANSI: 5
[   40.337686] sd 0:0:0:0: [sda] 16514064 512-byte logical blocks:
(8.46 GB/7.87 GiB)
[   40.346688] sd 0:0:0:0: [sda] Write Protect is off
[   40.348681] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
[   40.365678] sd 0:0:0:0: [sda] Write cache: enabled, read cache:
enabled, doesn't support DPO or FUA
[   40.367688] sd 0:0:0:0: Attached scsi generic sg0 type 0
[   40.540654]  sda: sda1 sda2 sda3 sda4
[   40.916596] sd 0:0:0:0: [sda] Attached SCSI disk
[   40.956594] XXX kobject_get(pata_legacy.1): ref=3++
(ata_tport_add:ata_host_register:ata_host_activate)
[   40.968587] XXX kobject_get(pata_legacy.1): ref=4++
(ata_tport_add:ata_host_register:ata_host_activate)
[   40.971139] XXX kobject_get(pata_legacy.1): ref=5++
(kobject_add:device_add:ata_tport_add)
[   41.152561] scsi host1: pata_legacy
[   41.227549] ata2: PATA max PIO4 cmd 0x170 ctl 0x376 irq 15
[   41.690486] XXX kobject_put(pata_legacy.1): ref=6--
(device_del:ata_tport_delete:ata_host_detach)
[   41.692548] XXX kobject_put(pata_legacy.1): ref=5--
(ata_tport_delete:ata_host_detach:legacy_init [pata_legacy])
[   41.719476] XXX kobject_put(pata_legacy.1): ref=4--
(klist_put:klist_del:device_del)
[   41.725471] XXX kobject_put(pata_legacy.1): ref=3--
(klist_devices_put:klist_put:klist_del)
[   41.745472] XXX kobject_put(pata_legacy.1): ref=2--
(platform_device_unregister:legacy_init [pata_legacy]:do_one_initcall)
[   41.793466] XXX kobject_get(pata_legacy.2): ref=3++
(ata_tport_add:ata_host_register:ata_host_activate)
[   41.807475] XXX kobject_get(pata_legacy.2): ref=4++
(ata_tport_add:ata_host_register:ata_host_activate)
[   41.809848] XXX kobject_get(pata_legacy.2): ref=5++
(kobject_add:device_add:ata_tport_add)
[   41.991431] scsi host2: pata_legacy
[   42.036428] ata3: PATA max PIO4 cmd 0x1e8 ctl 0x3ee irq 11
[   42.357388] XXX kobject_put(pata_legacy.2): ref=6--
(device_del:ata_tport_delete:ata_host_detach)
[   42.359786] XXX kobject_put(pata_legacy.2): ref=5--
(ata_tport_delete:ata_host_detach:legacy_init [pata_legacy])
[   42.447367] XXX kobject_put(pata_legacy.2): ref=4--
(klist_put:klist_del:device_del)
[   42.470362] XXX kobject_put(pata_legacy.2): ref=3--
(klist_devices_put:klist_put:klist_del)
[   42.510357] XXX kobject_put(pata_legacy.2): ref=2--
(platform_device_unregister:legacy_init [pata_legacy]:do_one_initcall)
[   42.583346] XXX kobject_get(pata_legacy.3): ref=3++
(ata_tport_add:ata_host_register:ata_host_activate)
[   42.592340] XXX kobject_get(pata_legacy.3): ref=4++
(ata_tport_add:ata_host_register:ata_host_activate)
[   42.594415] XXX kobject_get(pata_legacy.3): ref=5++
(kobject_add:device_add:ata_tport_add)
[   42.815304] scsi host3: pata_legacy
[   42.881302] ata4: PATA max PIO4 cmd 0x168 ctl 0x36e irq 10
[   43.292234] XXX kobject_put(pata_legacy.3): ref=6--
(device_del:ata_tport_delete:ata_host_detach)
[   43.294228] XXX kobject_put(pata_legacy.3): ref=5--
(ata_tport_delete:ata_host_detach:legacy_init [pata_legacy])
[   43.340228] XXX kobject_put(pata_legacy.3): ref=4--
(klist_put:klist_del:device_del)
[   43.370225] XXX kobject_put(pata_legacy.3): ref=3--
(klist_devices_put:klist_put:klist_del)
[   43.411220] XXX kobject_put(pata_legacy.3): ref=2--
(platform_device_unregister:legacy_init [pata_legacy]:do_one_initcall)
[   43.599193] genirq: Flags mismatch irq 8. 00000000
(platform[pata_legacy.4]) vs. 00000080 (rtc0)
[   43.645179] XXX kobject_put(pata_legacy.4): ref=3--
(klist_put:klist_del:device_del)
[   43.685178] XXX kobject_put(pata_legacy.4): ref=2--
(klist_devices_put:klist_put:klist_del)
[   43.733174] XXX kobject_put(pata_legacy.4): ref=1--
(platform_device_unregister:legacy_init [pata_legacy]:do_one_initcall)
[   43.736259] XXX kobject_cleanup(pata_legacy.4): rel=device_release
(put_device:platform_device_unregister:legacy_init [pata_legacy])
[   43.738417] platform pata_legacy.4: XXX device_release:
drel=platform_device_release trel=  (null) cdrel=  (null)
[   43.740160] platform pata_legacy.4: XXX device_release:
devres_release_all() done
[   43.950138] XXX kobject_get(pata_legacy.5): ref=3++
(ata_tport_add:ata_host_register:ata_host_activate)
[   43.973137] XXX kobject_get(pata_legacy.5): ref=4++
(ata_tport_add:ata_host_register:ata_host_activate)
[   43.975360] XXX kobject_get(pata_legacy.5): ref=5++
(kobject_add:device_add:ata_tport_add)
[   44.338072] scsi host4: pata_legacy
[   44.389071] ata5: PATA max PIO4 cmd 0x160 ctl 0x366 irq 12
[   44.699020] XXX kobject_put(pata_legacy.5): ref=6--
(device_del:ata_tport_delete:ata_host_detach)
[   44.701270] XXX kobject_put(pata_legacy.5): ref=5--
(ata_tport_delete:ata_host_detach:legacy_init [pata_legacy])
[   44.714021] XXX kobject_put(pata_legacy.5): ref=4--
(klist_put:klist_del:device_del)
[   44.731015] XXX kobject_put(pata_legacy.5): ref=3--
(klist_devices_put:klist_put:klist_del)
[   44.750009] XXX kobject_put(pata_legacy.5): ref=2--
(platform_device_unregister:legacy_init [pata_legacy]:do_one_initcall)

...

[  252.348369] genirq: Flags mismatch irq 10. 00000000 (3c515) vs.
00000000 (platform[pata_legacy.3])
[  252.379341] genirq: Flags mismatch irq 10. 00000000 (3c515) vs.
00000000 (platform[pata_legacy.3])

- Matthew
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo Jan. 20, 2017, 7:19 p.m. UTC | #2
Hello,

Thanks for testing.

On Fri, Jan 20, 2017 at 12:08:10PM -0500, tedheadster wrote:
> kobject_get(pata_legacy.1): ref=3++ (ata_tport_add:ata_host_register:ata_host_activate)
> kobject_get(pata_legacy.1): ref=4++ (ata_tport_add:ata_host_register:ata_host_activate)
> XXX kobject_get(pata_legacy.1): ref=5++ (kobject_add:device_add:ata_tport_add)
> XXX kobject_put(pata_legacy.1): ref=6-- (device_del:ata_tport_delete:ata_host_detach)
> XXX kobject_put(pata_legacy.1): ref=5-- (ata_tport_delete:ata_host_detach:legacy_init [pata_legacy])
> XXX kobject_put(pata_legacy.1): ref=4-- (klist_put:klist_del:device_del)
> XXX kobject_put(pata_legacy.1): ref=3-- (klist_devices_put:klist_put:klist_del)
> XXX kobject_put(pata_legacy.1): ref=2-- (platform_device_unregister:legacy_init [pata_legacy]:do_one_initcall)

So, three gets from the transport path but only two puts.  That seems
to be the culprit.  Looking into it.

Thanks.
Tejun Heo Jan. 20, 2017, 7:38 p.m. UTC | #3
Hello, Gwendal.

On Fri, Jan 20, 2017 at 02:19:46PM -0500, Tejun Heo wrote:
> On Fri, Jan 20, 2017 at 12:08:10PM -0500, tedheadster wrote:
> > kobject_get(pata_legacy.1): ref=3++ (ata_tport_add:ata_host_register:ata_host_activate)
> > kobject_get(pata_legacy.1): ref=4++ (ata_tport_add:ata_host_register:ata_host_activate)
> > XXX kobject_get(pata_legacy.1): ref=5++ (kobject_add:device_add:ata_tport_add)
> > XXX kobject_put(pata_legacy.1): ref=6-- (device_del:ata_tport_delete:ata_host_detach)
> > XXX kobject_put(pata_legacy.1): ref=5-- (ata_tport_delete:ata_host_detach:legacy_init [pata_legacy])
> > XXX kobject_put(pata_legacy.1): ref=4-- (klist_put:klist_del:device_del)
> > XXX kobject_put(pata_legacy.1): ref=3-- (klist_devices_put:klist_put:klist_del)
> > XXX kobject_put(pata_legacy.1): ref=2-- (platform_device_unregister:legacy_init [pata_legacy]:do_one_initcall)
> 
> So, three gets from the transport path but only two puts.  That seems
> to be the culprit.  Looking into it.

So, Matthew discovered that pata_legacy isn't releasing resources
after probe failure.  After some debugging, it turns out that the
transport code is taking three refs but only putting two preventing
the ata port from going away.

Can you please explain why the libata transport code is explicitly
pinning the parent device and then putting it in the release methods?
device_add/del() already gets and puts the parent, so I don't get why
this part is necessary.  Is this intentional?

Also, it looks like there is a ref leak on the transport device
itself.  Its release function never gets called and thus the parent
device (ata_port) stays pinned too.

Thanks.
Gwendal Grignou Jan. 23, 2017, 11:36 p.m. UTC | #4
On Fri, Jan 20, 2017 at 11:38 AM, Tejun Heo <tj@kernel.org> wrote:
> Hello, Gwendal.
>
> On Fri, Jan 20, 2017 at 02:19:46PM -0500, Tejun Heo wrote:
>> On Fri, Jan 20, 2017 at 12:08:10PM -0500, tedheadster wrote:
>> > kobject_get(pata_legacy.1): ref=3++ (ata_tport_add:ata_host_register:ata_host_activate)
>> > kobject_get(pata_legacy.1): ref=4++ (ata_tport_add:ata_host_register:ata_host_activate)
>> > XXX kobject_get(pata_legacy.1): ref=5++ (kobject_add:device_add:ata_tport_add)
>> > XXX kobject_put(pata_legacy.1): ref=6-- (device_del:ata_tport_delete:ata_host_detach)
>> > XXX kobject_put(pata_legacy.1): ref=5-- (ata_tport_delete:ata_host_detach:legacy_init [pata_legacy])
>> > XXX kobject_put(pata_legacy.1): ref=4-- (klist_put:klist_del:device_del)
>> > XXX kobject_put(pata_legacy.1): ref=3-- (klist_devices_put:klist_put:klist_del)
>> > XXX kobject_put(pata_legacy.1): ref=2-- (platform_device_unregister:legacy_init [pata_legacy]:do_one_initcall)
>>
>> So, three gets from the transport path but only two puts.  That seems
>> to be the culprit.  Looking into it.
>
> So, Matthew discovered that pata_legacy isn't releasing resources
> after probe failure.  After some debugging, it turns out that the
> transport code is taking three refs but only putting two preventing
> the ata port from going away.
>
> Can you please explain why the libata transport code is explicitly
> pinning the parent device and then putting it in the release methods?
> device_add/del() already gets and puts the parent, so I don't get why
> this part is necessary.  Is this intentional?
It was intentional. When I wrote the code, I used other transport as
inspiration, in particular drivers/scsi/scsi_transport_sas.c.
For instance, in sas_port_alloc, "port->dev.parent =
get_device(parent);", undo in sas_port_release().
From your debug code, in case of ATA at least, it looks unnecessary.

Moreover, in the error path of ata_XXX_add, a put_device(&link->tdev)
Also,How ata_tport_add() increase the reference twice before calling
device_add()?
>
> Also, it looks like there is a ref leak on the transport device
> itself.  Its release function never gets called and thus the parent
> device (ata_port) stays pinned too.
That's the root cause of pata port not released. Can we run your debug
code one more time, enabling ap->tdev.kobj.release_debug?

Thanks,
Gwendal.
>
> Thanks.
>
> --
> tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
tedheadster Jan. 23, 2017, 11:57 p.m. UTC | #5
Gwendal,

>> Also, it looks like there is a ref leak on the transport device
>> itself.  Its release function never gets called and thus the parent
>> device (ata_port) stays pinned too.
> That's the root cause of pata port not released. Can we run your debug
> code one more time, enabling ap->tdev.kobj.release_debug?
>

I'm building the debug kernel because I have the test hardware, but
I'm not quite sure what I need to do to enable
ap->tdev.kobj.release_debug. Tejun's patch enables kobj.release_debug,
but I'm not sure if that is the same thing you want. Please advise.

- Matthew
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo Jan. 24, 2017, 3:57 p.m. UTC | #6
Hello,

On Mon, Jan 23, 2017 at 03:36:18PM -0800, Gwendal Grignou wrote:
> > Can you please explain why the libata transport code is explicitly
> > pinning the parent device and then putting it in the release methods?
> > device_add/del() already gets and puts the parent, so I don't get why
> > this part is necessary.  Is this intentional?
>
> It was intentional. When I wrote the code, I used other transport as
> inspiration, in particular drivers/scsi/scsi_transport_sas.c.
> For instance, in sas_port_alloc, "port->dev.parent =
> get_device(parent);", undo in sas_port_release().
> From your debug code, in case of ATA at least, it looks unnecessary.

kobjects release their parents on kobject_del() rather than release,
which I don't think is a good design, so if there are code paths which
try to walk up the hierarchy after being deleted but before released,
having the extra ref around release does make sense.  I don't know
whether that's the case for the transport code tho.

Anyways, I was curious but this is a bit tangential to the issue.
This code does make the ata device hang around till the transport
kobject is released and the problem is caused by the transport kobject
not being released, so there's linkage there, but we wanna find out
why the transport kobject isn't being released and fix it.

> Moreover, in the error path of ata_XXX_add, a put_device(&link->tdev)
> Also,How ata_tport_add() increase the reference twice before calling
> device_add()?

Dunno, attribute additions?

> > Also, it looks like there is a ref leak on the transport device
> > itself.  Its release function never gets called and thus the parent
> > device (ata_port) stays pinned too.
>
> That's the root cause of pata port not released. Can we run your debug
> code one more time, enabling ap->tdev.kobj.release_debug?

Yeah, I'll update the patch.

Thanks.
diff mbox

Patch

--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -962,6 +962,8 @@  static __init int legacy_init_one(struct
 	if (IS_ERR(pdev))
 		return PTR_ERR(pdev);
 
+	pdev->dev.kobj.release_debug = true;
+
 	ret = -EBUSY;
 	if (devm_request_region(&pdev->dev, io, 8, "pata_legacy") == NULL ||
 	    devm_request_region(&pdev->dev, io + 0x0206, 1,
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -803,8 +803,17 @@  static void device_release(struct kobjec
 	 * is deleted but alive, so release devres here to avoid
 	 * possible memory leak.
 	 */
+	if (kobj->release_debug)
+		dev_info(dev, "XXX device_release: drel=%pf trel=%pf cdrel=%pf\n",
+			 dev->release,
+			 dev->type ? dev->type->release : NULL,
+			 dev->class ? dev->class->dev_release : NULL);
+
 	devres_release_all(dev);
 
+	if (kobj->release_debug)
+		dev_info(dev, "XXX device_release: devres_release_all() done\n");
+
 	if (dev->release)
 		dev->release(dev);
 	else if (dev->type && dev->type->release)
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -76,6 +76,7 @@  struct kobject {
 	unsigned int state_add_uevent_sent:1;
 	unsigned int state_remove_uevent_sent:1;
 	unsigned int uevent_suppress:1;
+	unsigned int release_debug:1;
 };
 
 extern __printf(2, 3)
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -595,6 +595,13 @@  struct kobject *kobject_get(struct kobje
 			WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
 			       "initialized, yet kobject_get() is being "
 			       "called.\n", kobject_name(kobj), kobj);
+		if (kobj->release_debug)
+			printk("XXX kobject_get(%s): ref=%d++ (%pf:%pf:%pf)\n",
+			       kobject_name(kobj),
+			       atomic_read(&kobj->kref.refcount),
+			       __builtin_return_address(1),
+			       __builtin_return_address(2),
+			       __builtin_return_address(3));
 		kref_get(&kobj->kref);
 	}
 	return kobj;
@@ -620,6 +627,14 @@  static void kobject_cleanup(struct kobje
 	pr_debug("kobject: '%s' (%p): %s, parent %p\n",
 		 kobject_name(kobj), kobj, __func__, kobj->parent);
 
+	if (kobj->release_debug)
+		printk("XXX kobject_cleanup(%s): rel=%pf (%pf:%pf:%pf)\n",
+		       kobject_name(kobj),
+		       (t && t->release) ? t->release : NULL,
+		       __builtin_return_address(1),
+		       __builtin_return_address(2),
+		       __builtin_return_address(3));
+
 	if (t && !t->release)
 		pr_debug("kobject: '%s' (%p): does not have a release() "
 			 "function, it is broken and must be fixed.\n",
@@ -688,6 +703,13 @@  void kobject_put(struct kobject *kobj)
 			WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
 			       "initialized, yet kobject_put() is being "
 			       "called.\n", kobject_name(kobj), kobj);
+		if (kobj->release_debug)
+			printk("XXX kobject_put(%s): ref=%d-- (%pf:%pf:%pf)\n",
+			       kobject_name(kobj),
+			       atomic_read(&kobj->kref.refcount),
+			       __builtin_return_address(1),
+			       __builtin_return_address(2),
+			       __builtin_return_address(3));
 		kref_put(&kobj->kref, kobject_release);
 	}
 }