diff mbox

Drives missing at boot

Message ID AANLkTinwJLE=jjwG5zL8UBO91tzDpX36_W9GORKag8RK@mail.gmail.com
State Not Applicable
Delegated to: David Miller
Headers show

Commit Message

Mark Knecht Aug. 2, 2010, 10:07 p.m. UTC
On Thu, Jul 22, 2010 at 5:39 AM, Tejun Heo <tj@kernel.org> wrote:
> Hello,
>
> On 07/21/2010 10:54 PM, Mark Knecht wrote:
>>    Looks like I had a failure today. First one in weeks and only the
>> 3rd or 4th boot with this newer patch file. One of the two drives
>> making a RAID0 wasn't found so /dev/md11 (constructed from /dev/sdd
>> and /dev/sde) couldn't be started. I did a cold reboot and the drive
>> was found.
>>
>>    If it matters, and it probably doesn't, the failure came on a boot
>> which had a scheduled fsck to do of /dev/md5 - my main / drive. I
>> don't see how that would make a difference but I figure why leave the
>> info out. That's why the times are so much larger in the dmesg file.
>> (I think)
>>
>>    dmesg attached. I patched the Gentoo kernel if it makes a
>> difference, same as I did with the earlier patch.
>>
>> mark@c2stable ~ $ uname -a
>> Linux c2stable 2.6.34-gentoo-r2 #1 SMP PREEMPT Sun Jul 18 14:09:48 PDT
>> 2010 x86_64 Intel(R) Core(TM) i7 CPU X 980 @ 3.33GHz GenuineIntel
>> GNU/Linux
>> mark@c2stable ~ $
>
> Hmmm... that's weird.  Can you please make sure the patch is actually
> applied?  Adding a printk("XXX patch applied!\n") near other changes
> usually is easy enough.  Also, can you please apply resume-dbg-1.patch
> too and reproduce the failure and post log?
>
> Thanks.
>
> --
> tejun
>

Hi Tejun,
   I'm finally home and trying to get back to this. I'm really a bad
programmer so I don't know what I've done wrong but it seems patch
isn't happy with me.

c2stable linux # patch --dry-run -p1 <../ata_piix-sidpr-lock.patch
patching file drivers/ata/ata_piix.c
patch: **** malformed patch at line 13:

c2stable linux #

   Here's the change I tried to make to a copy of the file:

c2stable linux # cat ../ata_piix-sidpr-lock.patch
c2stable linux #

   Maybe you can shoot back something that's done correctly and I'll
start testing.

   I've tried booting a few times. I've had 3 cold boot failures so
far. No warm boot failures. Each time it failed on cold boot a warm
boot fixed it.

Thanks,
Mark
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Randy Dunlap Aug. 3, 2010, 6:41 p.m. UTC | #1
On Mon, 2 Aug 2010 15:07:11 -0700 Mark Knecht wrote:

> On Thu, Jul 22, 2010 at 5:39 AM, Tejun Heo <tj@kernel.org> wrote:
> 
> Hi Tejun,
>    I'm finally home and trying to get back to this. I'm really a bad
> programmer so I don't know what I've done wrong but it seems patch
> isn't happy with me.
> 
> c2stable linux # patch --dry-run -p1 <../ata_piix-sidpr-lock.patch
> patching file drivers/ata/ata_piix.c
> patch: **** malformed patch at line 13:

Whenever the patch file was saved on this system, line 13 of it was
split (probably by an email client).  Whenever I see this, I just
join (merge) that line and the next one and try again... sometimes
several lines are malformed and have to be fixed like this.

> 
> c2stable linux #
> 
>    Here's the change I tried to make to a copy of the file:
> 
> c2stable linux # cat ../ata_piix-sidpr-lock.patch
> diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
> index 7409f98..3971bc0 100644
> --- a/drivers/ata/ata_piix.c
> +++ b/drivers/ata/ata_piix.c
> @@ -158,6 +158,7 @@ struct piix_map_db {
>  struct piix_host_priv {
>         const int *map;
>         u32 saved_iocfg;
> +       spinlock_t sidpr_lock;  /* FIXME: remove once locking in EH is fixed */
> +       printk("MWK - ata_sidpr patch applied!\n");
>         void __iomem *sidpr;
>  };
> 
> @@ -951,12 +952,15 @@ static int piix_sidpr_scr_read(struct ata_link *link,
>                                unsigned int reg, u32 *val)
>  {
>         struct piix_host_priv *hpriv = link->ap->host->private_data;
> +       unsigned long flags;
> 
>         if (reg >= ARRAY_SIZE(piix_sidx_map))
>                 return -EINVAL;
> 
> +       spin_lock_irqsave(&hpriv->sidpr_lock, flags);
>         piix_sidpr_sel(link, reg);
>         *val = ioread32(hpriv->sidpr + PIIX_SIDPR_DATA);
> +       spin_unlock_irqrestore(&hpriv->sidpr_lock, flags);
>         return 0;
>  }
> 
> @@ -964,12 +968,15 @@ static int piix_sidpr_scr_write(struct ata_link *link,
>                                 unsigned int reg, u32 val)
>  {
>         struct piix_host_priv *hpriv = link->ap->host->private_data;
> +       unsigned long flags;
> 
>         if (reg >= ARRAY_SIZE(piix_sidx_map))
>                 return -EINVAL;
> 
> +       spin_lock_irqsave(&hpriv->sidpr_lock, flags);
>         piix_sidpr_sel(link, reg);
>         iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA);
> +       spin_unlock_irqrestore(&hpriv->sidpr_lock, flags);
>         return 0;
>  }
> 
> @@ -1566,6 +1573,7 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
>         hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
>         if (!hpriv)
>                 return -ENOMEM;
> +       spin_lock_init(&hpriv->sidpr_lock);
> 
>         /* Save IOCFG, this will be used for cable detection, quirk
>          * detection and restoration on detach.  This is necessary
> c2stable linux #
> 
>    Maybe you can shoot back something that's done correctly and I'll
> start testing.
> 
>    I've tried booting a few times. I've had 3 cold boot failures so
> far. No warm boot failures. Each time it failed on cold boot a warm
> boot fixed it.


---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Knecht Aug. 3, 2010, 6:47 p.m. UTC | #2
On Tue, Aug 3, 2010 at 11:41 AM, Randy Dunlap <randy.dunlap@oracle.com> wrote:
> On Mon, 2 Aug 2010 15:07:11 -0700 Mark Knecht wrote:
>
>> On Thu, Jul 22, 2010 at 5:39 AM, Tejun Heo <tj@kernel.org> wrote:
>>
>> Hi Tejun,
>>    I'm finally home and trying to get back to this. I'm really a bad
>> programmer so I don't know what I've done wrong but it seems patch
>> isn't happy with me.
>>
>> c2stable linux # patch --dry-run -p1 <../ata_piix-sidpr-lock.patch
>> patching file drivers/ata/ata_piix.c
>> patch: **** malformed patch at line 13:
>
> Whenever the patch file was saved on this system, line 13 of it was
> split (probably by an email client).  Whenever I see this, I just
> join (merge) that line and the next one and try again... sometimes
> several lines are malformed and have to be fixed like this.
>

Randy,
   Could very well be what happened. I added line 13 (the printk) by hand

<SNIP - ORIGINAL PATCH FILE>
struct piix_host_priv {
       const int *map;
       u32 saved_iocfg;
+       spinlock_t sidpr_lock;  /* FIXME: remove once locking in EH is fixed */
        void __iomem *sidpr;
};

<SNIP - MY CHANGE BY HAND>
struct piix_host_priv {
       const int *map;
       u32 saved_iocfg;
+       spinlock_t sidpr_lock;  /* FIXME: remove once locking in EH is fixed */
+       printk("MWK - ata_sidpr patch applied!\n");
       void __iomem *sidpr;
};

Maybe I should have just put it on the same line as the previous
spinlock command?

I'll play with it and see if I can get it working.

Thanks,
Mark
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jim Paris Aug. 3, 2010, 6:49 p.m. UTC | #3
Randy Dunlap wrote:
> On Mon, 2 Aug 2010 15:07:11 -0700 Mark Knecht wrote:
> 
> > On Thu, Jul 22, 2010 at 5:39 AM, Tejun Heo <tj@kernel.org> wrote:
> > 
> > Hi Tejun,
> >    I'm finally home and trying to get back to this. I'm really a bad
> > programmer so I don't know what I've done wrong but it seems patch
> > isn't happy with me.
> > 
> > c2stable linux # patch --dry-run -p1 <../ata_piix-sidpr-lock.patch
> > patching file drivers/ata/ata_piix.c
> > patch: **** malformed patch at line 13:
> 
> Whenever the patch file was saved on this system, line 13 of it was
> split (probably by an email client).  Whenever I see this, I just
> join (merge) that line and the next one and try again... sometimes
> several lines are malformed and have to be fixed like this.

Tejun asked Mark to add a printk, and Mark added it directly to the
patch.  Mark, just apply the original patch as-is, and then add the
printk to the source code in ata_piix.c.  You should add it somewhere
like the piix_init_one function, e.g. right before the "Save IOCFG"
comment around line 1575.

-jim

> 
> > 
> > c2stable linux #
> > 
> >    Here's the change I tried to make to a copy of the file:
> > 
> > c2stable linux # cat ../ata_piix-sidpr-lock.patch
> > diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
> > index 7409f98..3971bc0 100644
> > --- a/drivers/ata/ata_piix.c
> > +++ b/drivers/ata/ata_piix.c
> > @@ -158,6 +158,7 @@ struct piix_map_db {
> >  struct piix_host_priv {
> >         const int *map;
> >         u32 saved_iocfg;
> > +       spinlock_t sidpr_lock;  /* FIXME: remove once locking in EH is fixed */
> > +       printk("MWK - ata_sidpr patch applied!\n");
> >         void __iomem *sidpr;
> >  };
> > 
> > @@ -951,12 +952,15 @@ static int piix_sidpr_scr_read(struct ata_link *link,
> >                                unsigned int reg, u32 *val)
> >  {
> >         struct piix_host_priv *hpriv = link->ap->host->private_data;
> > +       unsigned long flags;
> > 
> >         if (reg >= ARRAY_SIZE(piix_sidx_map))
> >                 return -EINVAL;
> > 
> > +       spin_lock_irqsave(&hpriv->sidpr_lock, flags);
> >         piix_sidpr_sel(link, reg);
> >         *val = ioread32(hpriv->sidpr + PIIX_SIDPR_DATA);
> > +       spin_unlock_irqrestore(&hpriv->sidpr_lock, flags);
> >         return 0;
> >  }
> > 
> > @@ -964,12 +968,15 @@ static int piix_sidpr_scr_write(struct ata_link *link,
> >                                 unsigned int reg, u32 val)
> >  {
> >         struct piix_host_priv *hpriv = link->ap->host->private_data;
> > +       unsigned long flags;
> > 
> >         if (reg >= ARRAY_SIZE(piix_sidx_map))
> >                 return -EINVAL;
> > 
> > +       spin_lock_irqsave(&hpriv->sidpr_lock, flags);
> >         piix_sidpr_sel(link, reg);
> >         iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA);
> > +       spin_unlock_irqrestore(&hpriv->sidpr_lock, flags);
> >         return 0;
> >  }
> > 
> > @@ -1566,6 +1573,7 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
> >         hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
> >         if (!hpriv)
> >                 return -ENOMEM;
> > +       spin_lock_init(&hpriv->sidpr_lock);
> > 
> >         /* Save IOCFG, this will be used for cable detection, quirk
> >          * detection and restoration on detach.  This is necessary
> > c2stable linux #
> > 
> >    Maybe you can shoot back something that's done correctly and I'll
> > start testing.
> > 
> >    I've tried booting a few times. I've had 3 cold boot failures so
> > far. No warm boot failures. Each time it failed on cold boot a warm
> > boot fixed it.
> 
> 
> ---
> ~Randy
> *** Remember to use Documentation/SubmitChecklist when testing your code ***
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ide" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Knecht Aug. 3, 2010, 6:53 p.m. UTC | #4
On Tue, Aug 3, 2010 at 11:49 AM, Jim Paris <jim@jtan.com> wrote:
> Randy Dunlap wrote:
>> On Mon, 2 Aug 2010 15:07:11 -0700 Mark Knecht wrote:
>>
>> > On Thu, Jul 22, 2010 at 5:39 AM, Tejun Heo <tj@kernel.org> wrote:
>> >
>> > Hi Tejun,
>> >    I'm finally home and trying to get back to this. I'm really a bad
>> > programmer so I don't know what I've done wrong but it seems patch
>> > isn't happy with me.
>> >
>> > c2stable linux # patch --dry-run -p1 <../ata_piix-sidpr-lock.patch
>> > patching file drivers/ata/ata_piix.c
>> > patch: **** malformed patch at line 13:
>>
>> Whenever the patch file was saved on this system, line 13 of it was
>> split (probably by an email client).  Whenever I see this, I just
>> join (merge) that line and the next one and try again... sometimes
>> several lines are malformed and have to be fixed like this.
>
> Tejun asked Mark to add a printk, and Mark added it directly to the
> patch.  Mark, just apply the original patch as-is, and then add the
> printk to the source code in ata_piix.c.  You should add it somewhere
> like the piix_init_one function, e.g. right before the "Save IOCFG"
> comment around line 1575.
>
> -jim

Thanks Jim. I'll give it a shot.

Cheers,
Mark
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Randy Dunlap Aug. 3, 2010, 6:55 p.m. UTC | #5
On Tue, 3 Aug 2010 11:47:25 -0700 Mark Knecht wrote:

> On Tue, Aug 3, 2010 at 11:41 AM, Randy Dunlap <randy.dunlap@oracle.com> wrote:
> > On Mon, 2 Aug 2010 15:07:11 -0700 Mark Knecht wrote:
> >
> >> On Thu, Jul 22, 2010 at 5:39 AM, Tejun Heo <tj@kernel.org> wrote:
> >>
> >> Hi Tejun,
> >>    I'm finally home and trying to get back to this. I'm really a bad
> >> programmer so I don't know what I've done wrong but it seems patch
> >> isn't happy with me.
> >>
> >> c2stable linux # patch --dry-run -p1 <../ata_piix-sidpr-lock.patch
> >> patching file drivers/ata/ata_piix.c
> >> patch: **** malformed patch at line 13:
> >
> > Whenever the patch file was saved on this system, line 13 of it was
> > split (probably by an email client).  Whenever I see this, I just
> > join (merge) that line and the next one and try again... sometimes
> > several lines are malformed and have to be fixed like this.
> >
> 
> Randy,
>    Could very well be what happened. I added line 13 (the printk) by hand
> 
> <SNIP - ORIGINAL PATCH FILE>
> struct piix_host_priv {
>        const int *map;
>        u32 saved_iocfg;
> +       spinlock_t sidpr_lock;  /* FIXME: remove once locking in EH is fixed */
>         void __iomem *sidpr;
> };
> 
> <SNIP - MY CHANGE BY HAND>
> struct piix_host_priv {
>        const int *map;
>        u32 saved_iocfg;
> +       spinlock_t sidpr_lock;  /* FIXME: remove once locking in EH is fixed */
> +       printk("MWK - ata_sidpr patch applied!\n");
>        void __iomem *sidpr;
> };
> 
> Maybe I should have just put it on the same line as the previous
> spinlock command?
> 
> I'll play with it and see if I can get it working.


Ah, so you added a line to a patch file.  That means that the patch
block header must be changed from something like this:

@@ -964,12 +968,15 @@

to like this:

@@ -964,12 +968,16 @@

Any text following the second "@@" is just a comment so it does not matter.

The ,12  ,15   ,16   are all line counts. The ",12" is the number of lines
in the before version of the patch.  The ",15" or ",16" is the number of lines
in the after version of the patch, so you would need to increase it by 1 if
you added one line.   Or you can just put the printk on the same line as another
part of the patch and it won't matter.  :)


---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Knecht Aug. 4, 2010, 4:16 p.m. UTC | #6
On Tue, Aug 3, 2010 at 11:55 AM, Randy Dunlap <randy.dunlap@oracle.com> wrote:
> On Tue, 3 Aug 2010 11:47:25 -0700 Mark Knecht wrote:
>
>> On Tue, Aug 3, 2010 at 11:41 AM, Randy Dunlap <randy.dunlap@oracle.com> wrote:
>> > On Mon, 2 Aug 2010 15:07:11 -0700 Mark Knecht wrote:
>> >
>> >> On Thu, Jul 22, 2010 at 5:39 AM, Tejun Heo <tj@kernel.org> wrote:
>> >>
>> >> Hi Tejun,
>> >>    I'm finally home and trying to get back to this. I'm really a bad
>> >> programmer so I don't know what I've done wrong but it seems patch
>> >> isn't happy with me.
>> >>
>> >> c2stable linux # patch --dry-run -p1 <../ata_piix-sidpr-lock.patch
>> >> patching file drivers/ata/ata_piix.c
>> >> patch: **** malformed patch at line 13:
>> >
>> > Whenever the patch file was saved on this system, line 13 of it was
>> > split (probably by an email client).  Whenever I see this, I just
>> > join (merge) that line and the next one and try again... sometimes
>> > several lines are malformed and have to be fixed like this.
>> >
>>
>> Randy,
>>    Could very well be what happened. I added line 13 (the printk) by hand
>>
>> <SNIP - ORIGINAL PATCH FILE>
>> struct piix_host_priv {
>>        const int *map;
>>        u32 saved_iocfg;
>> +       spinlock_t sidpr_lock;  /* FIXME: remove once locking in EH is fixed */
>>         void __iomem *sidpr;
>> };
>>
>> <SNIP - MY CHANGE BY HAND>
>> struct piix_host_priv {
>>        const int *map;
>>        u32 saved_iocfg;
>> +       spinlock_t sidpr_lock;  /* FIXME: remove once locking in EH is fixed */
>> +       printk("MWK - ata_sidpr patch applied!\n");
>>        void __iomem *sidpr;
>> };
>>
>> Maybe I should have just put it on the same line as the previous
>> spinlock command?
>>
>> I'll play with it and see if I can get it working.
>
>
> Ah, so you added a line to a patch file.  That means that the patch
> block header must be changed from something like this:
>
> @@ -964,12 +968,15 @@
>
> to like this:
>
> @@ -964,12 +968,16 @@
>
> Any text following the second "@@" is just a comment so it does not matter.
>
> The ,12  ,15   ,16   are all line counts. The ",12" is the number of lines
> in the before version of the patch.  The ",15" or ",16" is the number of lines
> in the after version of the patch, so you would need to increase it by 1 if
> you added one line.   Or you can just put the printk on the same line as another
> part of the patch and it won't matter.  :)
>
>
> ---
> ~Randy
> *** Remember to use Documentation/SubmitChecklist when testing your code ***
>

Thanks Randy. This helped.

OK - so I'm going to hang fire on this problem right now. As part of
this process I've updated to a 2.6.35-gentoo kernel and I'm not seeing
the problem at all so far. I've booted about 25 times over the last 3
days - warm and cold boots mixed - and so far no failures. While I've
had times in the past where earlier kernels didn't exhibit the problem
for days I don't remember a time where I got this many good boots in a
row.

As my problem didn't seem to be exactly the same as Paul's, and since
Tejun expressed surprise that his fix for Paul's issue didn't fix
mine, maybe my problem is actually something different and (hopefully)
already fixed.

I'll continue to use the machine and watch for any failures. If I get
one I'll reengage the list and go into debug mode.

Cheers,
Mark
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 7409f98..3971bc0 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -158,6 +158,7 @@  struct piix_map_db {
 struct piix_host_priv {
        const int *map;
        u32 saved_iocfg;
+       spinlock_t sidpr_lock;  /* FIXME: remove once locking in EH is fixed */
+       printk("MWK - ata_sidpr patch applied!\n");
        void __iomem *sidpr;
 };

@@ -951,12 +952,15 @@  static int piix_sidpr_scr_read(struct ata_link *link,
                               unsigned int reg, u32 *val)
 {
        struct piix_host_priv *hpriv = link->ap->host->private_data;
+       unsigned long flags;

        if (reg >= ARRAY_SIZE(piix_sidx_map))
                return -EINVAL;

+       spin_lock_irqsave(&hpriv->sidpr_lock, flags);
        piix_sidpr_sel(link, reg);
        *val = ioread32(hpriv->sidpr + PIIX_SIDPR_DATA);
+       spin_unlock_irqrestore(&hpriv->sidpr_lock, flags);
        return 0;
 }

@@ -964,12 +968,15 @@  static int piix_sidpr_scr_write(struct ata_link *link,
                                unsigned int reg, u32 val)
 {
        struct piix_host_priv *hpriv = link->ap->host->private_data;
+       unsigned long flags;

        if (reg >= ARRAY_SIZE(piix_sidx_map))
                return -EINVAL;

+       spin_lock_irqsave(&hpriv->sidpr_lock, flags);
        piix_sidpr_sel(link, reg);
        iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA);
+       spin_unlock_irqrestore(&hpriv->sidpr_lock, flags);
        return 0;
 }

@@ -1566,6 +1573,7 @@  static int __devinit piix_init_one(struct pci_dev *pdev,
        hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
        if (!hpriv)
                return -ENOMEM;
+       spin_lock_init(&hpriv->sidpr_lock);

        /* Save IOCFG, this will be used for cable detection, quirk
         * detection and restoration on detach.  This is necessary