Patchwork 2.6.31-rc5 regression: hd don't show up

login
register
mail settings
Submitter Tejun Heo
Date Aug. 27, 2009, 11:35 a.m.
Message ID <4A966F7D.60707@kernel.org>
Download mbox | patch
Permalink /patch/32241/
State Not Applicable
Delegated to: David Miller
Headers show

Comments

Tejun Heo - Aug. 27, 2009, 11:35 a.m.
Tim Blechmann wrote:
> On 08/27/2009 10:38 AM, Tejun Heo wrote:
>> Tim Blechmann wrote:
>>>>>>> running 2.6.31-rc5 (7cb7beb31aa3d941833b6a6e553687422c31e4b6 to be
>>>>>>> exact), sometimes some hard disks don't show up.
>>>>>>>
>>>>>>> after booting, my root hd (sda) is mounted to /, while two other hds
>>>>>>> (sdb/sdc) are mounted as a user. sda is always present, but the other
>>>>>>> two sometimes don't show up (i.e. they are not listed in /dev/disk/, nor
>>>>>>> to they have a /dev/sdX link). with 2.6.29 and 2.6.30, all three disks
>>>>>>> are reported correctly.
>>>>>> Can you please attach boot logs of a successful and a failed boot?
>>>>> i have two files attached:
>>>>> - dmesg_good - all hds are available
>>>>> - dmesg_bad - on hd is missing
>>>> Can you please apply the attached patch and post the bad boot log?
>>> attached you find boot logs for both a good ad a bad boot
>> Sorry about the long delay.  I somehow marked the message read without
>> actually reading it.
>>
>> I suspected the problem was with getting the wrong classification code
>> or phantom device detection kicking in spuriously.  Looks like the
>> problem happens way before that.  Can you please apply the attached
>> patch and report the result?
> 
> i applied your patch onto of the current linus/master branch and
> currently (after rebooting 5 or 6 times) i cannot reproduce the problem
> any more ...
> however, there is a warning stack trace in the boot log from libata code
> (bootlog attached)

Oops, that was my bad.  This should remove the useless warning.

Thanks.
Tim Blechmann - Aug. 28, 2009, 11:04 a.m.
On 08/27/2009 01:35 PM, Tejun Heo wrote:
> Tim Blechmann wrote:
>> On 08/27/2009 10:38 AM, Tejun Heo wrote:
>>> Tim Blechmann wrote:
>>>>>>>> running 2.6.31-rc5 (7cb7beb31aa3d941833b6a6e553687422c31e4b6 to be
>>>>>>>> exact), sometimes some hard disks don't show up.
>>>>>>>>
>>>>>>>> after booting, my root hd (sda) is mounted to /, while two other hds
>>>>>>>> (sdb/sdc) are mounted as a user. sda is always present, but the other
>>>>>>>> two sometimes don't show up (i.e. they are not listed in /dev/disk/, nor
>>>>>>>> to they have a /dev/sdX link). with 2.6.29 and 2.6.30, all three disks
>>>>>>>> are reported correctly.
>>>>>>> Can you please attach boot logs of a successful and a failed boot?
>>>>>> i have two files attached:
>>>>>> - dmesg_good - all hds are available
>>>>>> - dmesg_bad - on hd is missing
>>>>> Can you please apply the attached patch and post the bad boot log?
>>>> attached you find boot logs for both a good ad a bad boot
>>> Sorry about the long delay.  I somehow marked the message read without
>>> actually reading it.
>>>
>>> I suspected the problem was with getting the wrong classification code
>>> or phantom device detection kicking in spuriously.  Looks like the
>>> problem happens way before that.  Can you please apply the attached
>>> patch and report the result?
>>
>> i applied your patch onto of the current linus/master branch and
>> currently (after rebooting 5 or 6 times) i cannot reproduce the problem
>> any more ...
>> however, there is a warning stack trace in the boot log from libata code
>> (bootlog attached)
> 
> Oops, that was my bad.  This should remove the useless warning.

booting the machine today, one hd is missing again ... bootlog attached

hth, tim

Patch

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 072ba5e..876ede2 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3770,6 +3770,7 @@  int sata_link_resume(struct ata_link *link, const unsigned long *params,
 
 	scontrol = (scontrol & 0x0f0) | 0x300;
 
+	ata_link_printk(link, KERN_INFO, "XXX bringing up link\n");
 	if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
 		return rc;
 
@@ -3778,7 +3779,9 @@  int sata_link_resume(struct ata_link *link, const unsigned long *params,
 	 */
 	msleep(200);
 
-	if ((rc = sata_link_debounce(link, params, deadline)))
+	rc = sata_link_debounce(link, params, deadline);
+	ata_link_printk(link, KERN_INFO, "XXX debounced rc=%d\n", rc);
+	if (rc)
 		return rc;
 
 	/* clear SError, some PHYs require this even for SRST to work */
@@ -3904,8 +3907,10 @@  int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
 	if (rc)
 		goto out;
 	/* if link is offline nothing more to do */
-	if (ata_phys_link_offline(link))
+	if (ata_phys_link_offline(link)) {
+		ata_link_printk(link, KERN_INFO, "XXX phys link offline\n");
 		goto out;
+	}
 
 	/* Link is online.  From this point, -ENODEV too is an error. */
 	if (online)
@@ -6060,7 +6065,7 @@  static void async_port_probe(void *data, async_cookie_t cookie)
 
 		ehi->probe_mask |= ATA_ALL_DEVICES;
 		ehi->action |= ATA_EH_RESET | ATA_EH_LPM;
-		ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET;
+		ehi->flags |= ATA_EHI_NO_AUTOPSY/* | ATA_EHI_QUIET*/;
 
 		ap->pflags &= ~ATA_PFLAG_INITIALIZING;
 		ap->pflags |= ATA_PFLAG_LOADING;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index bbbb1fa..c718d12 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1998,6 +1998,9 @@  unsigned int ata_sff_dev_classify(struct ata_device *dev, int present,
 	if (r_err)
 		*r_err = err;
 
+	ata_dev_printk(dev, KERN_INFO, "XXX CLASSIFY TF %02x/%02x:%02x:%02x:%02x\n",
+		       tf.command, tf.feature, tf.lbal, tf.lbam, tf.lbah);
+
 	/* see if device passed diags: continue and warn later */
 	if (err == 0)
 		/* diagnostic fail : do nothing _YET_ */
@@ -2006,11 +2009,14 @@  unsigned int ata_sff_dev_classify(struct ata_device *dev, int present,
 		/* do nothing */ ;
 	else if ((dev->devno == 0) && (err == 0x81))
 		/* do nothing */ ;
-	else
+	else {
+		ata_dev_printk(dev, KERN_INFO, "XXX diag nodev\n");
 		return ATA_DEV_NONE;
+	}
 
 	/* determine if device is ATA or ATAPI */
 	class = ata_dev_classify(&tf);
+	ata_dev_printk(dev, KERN_INFO, "XXX ata_dev_classify=%d\n", class);
 
 	if (class == ATA_DEV_UNKNOWN) {
 		/* If the device failed diagnostic, it's likely to
@@ -2019,13 +2025,18 @@  unsigned int ata_sff_dev_classify(struct ata_device *dev, int present,
 		 * device signature is invalid with diagnostic
 		 * failure.
 		 */
-		if (present && (dev->horkage & ATA_HORKAGE_DIAGNOSTIC))
+		if (present && (dev->horkage & ATA_HORKAGE_DIAGNOSTIC)) {
+			ata_dev_printk(dev, KERN_INFO, "XXX UNK && present -> ATA\n");
 			class = ATA_DEV_ATA;
-		else
+		} else {
 			class = ATA_DEV_NONE;
+			ata_dev_printk(dev, KERN_INFO, "XXX UNK && !present -> NONE\n");
+		}
 	} else if ((class == ATA_DEV_ATA) &&
-		   (ap->ops->sff_check_status(ap) == 0))
+		   (ap->ops->sff_check_status(ap) == 0)) {
 		class = ATA_DEV_NONE;
+		ata_dev_printk(dev, KERN_INFO, "XXX stat==0 -> NONE\n");
+	}
 
 	return class;
 }