diff mbox

[PATCHv2] macio: handle non-block ATAPI DMA transfers the same as block DMA transfers

Message ID 55CD1430.5060107@ilande.co.uk
State New
Headers show

Commit Message

Mark Cave-Ayland Aug. 13, 2015, 10:03 p.m. UTC
On 01/08/15 19:33, Aurelien Jarno wrote:

> On 2015-08-01 17:54, Mark Cave-Ayland wrote:
>> The existing code incorrectly changes the dma_active flag when a non-block
>> transfer has completed leading to a hang on newer versions of Linux because the
>> IDE and DMA engines deadlock waiting for each other.
>>
>> Instead copy the buffer directly to RAM, set the remaining transfer size to 0 and
>> then invoke the ATAPI callback manually once again to correctly finish the
>> transfer in an identical manner to a block transfer.
>>
>> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
>> ---
> 
> Thanks for the patch, it improves things here. I don't get messages
> anymore, and I get less messages when mounting the CD-ROM, though I
> still get one:
> 
>   [  307.258463] pata-macio 0.00021000:ata-4: timeout flushing DMA
>   [  307.262856] ata2.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x0
>   [  307.262919] ata2.00: BMDMA stat 0x6
>   [  307.263048] sr 1:0:0:0: CDB: Get configuration: 46 00 00 28 00 00 00 00 10 00
>   [  307.263289] ata2.00: cmd a0/01:00:00:10:00/00:00:00:00:00/a0 tag 0 dma 16400 in
>   [  307.263297]          res 41/50:03:00:10:00/00:00:00:00:00/a0 Emask 0x20 (host bus error)
>   [  307.263407] ata2.00: status: { DRDY ERR }
>   [  307.271251] ata2.00: configured for MWDMA2
>   [  307.271824] ata2: EH complete
> 
> The CD-ROM is fully functional though.

Apologies for the delay in getting back to you on this one, however I
finally found some time today to try and work out what is happening.
Attached is a v3 patch that works here for your lenny and wheezy
standard images - please can you test and confirm it works for you?
Debugging is enabled just in case anything unusual shows up during your
testing.

I've made a couple of minor tweaks to v2: firstly I don't call
pmac_ide_atapi_transfer_cb() to go around again, since I was seeing the
dbdma_end() function being called more than once when lba == -1.
Secondly, I've moved the DBDMA flush out of the RUN status check section
so it is called unconditionally, and removed the second version which
was actually causing the problem above by not clearing the FLUSH bit
upon write.

FWIW with this patch applied I still see an error upon mount with your
squeeze image but I believe that this may be a bug in that particular
version of the Linux kernel. With MACIO and DBDMA logging enabled I see
the following when trying to mount the CDROM:


------------ IDE transfer
buffer_size: 14   buffer_index: 0
lba: ffffffff    size: 14
-------------------------
DBDMA: DBDMA_run_bh
DBDMA: writel 0x0000000000000d00 <= 0x80008000
DBDMA: channel 0x1a reg 0x0
DBDMA:     status 0x00008400
DBDMA: readl 0x0000000000000d00 => 0x80008000
DBDMA: channel 0x1a reg 0x0
DBDMA: DBDMA_run_bh
DBDMA: channel_run
dbdma_cmd 0x252c328
    req_count 0x0020
    command 0x3000
    phy_addr 0x0670e000
    cmd_dep 0x00000000
    res_count 0x0000
    xfer_status 0x0000
DBDMA: start_input
DBDMA: addr 0x670e000 key 0x0

pmac_ide_atapi_transfer_cb
DBDMA: dbdma_end
DBDMA: conditional_wait
DBDMA: dbdma_cmdptr_save 0x05ef2000
DBDMA: xfer_status 0x00008400 res_count 0x0000
DBDMA: conditional_interrupt
DBDMA: conditional_branch
DBDMA: dbdma_cmdptr_load 0x05ef2010
DBDMA: channel_run
dbdma_cmd 0x252c328
    req_count 0x0000
    command 0x7000
    phy_addr 0x00000000
    cmd_dep 0x00000000
    res_count 0x0000
    xfer_status 0x0000
DBDMA: readl 0x0000000000000d04 => 0x00008000
DBDMA: channel 0x1a reg 0x1
DBDMA: readl 0x0000000000000d04 => 0x00008000
DBDMA: channel 0x1a reg 0x1
DBDMA: writel 0x0000000000000d00 <= 0x98000000
DBDMA: channel 0x1a reg 0x0
DBDMA:     status 0x00000000
DBDMA: writel 0x0000000000000d00 <= 0xf8000000
DBDMA: channel 0x1a reg 0x0
DBDMA:     status 0x00000000
DBDMA: readl 0x0000000000000d04 => 0x00000000
DBDMA: channel 0x1a reg 0x1
DBDMA: writel 0x0000000000000d0c <= 0x05ef2000
DBDMA: channel 0x1a reg 0x3
DBDMA: dbdma_cmdptr_load 0x05ef2000


------------ IDE transfer
buffer_size: 1000   buffer_index: 0
lba: 21260    size: 1000
-------------------------

..etc..


Tracing through the kernel source I can see that the DMA read error is
being caused by pmac_ide_dma_end() returning a non-zero value:


static int
pmac_ide_dma_end (ide_drive_t *drive)
{
        ide_hwif_t *hwif = drive->hwif;
        pmac_ide_hwif_t *pmif =
                (pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
        volatile struct dbdma_regs __iomem *dma = pmif->dma_regs;
        u32 dstat;

        dstat = readl(&dma->status);
        writel(((RUN|WAKE|DEAD) << 16), &dma->control);

        /* verify good dma status. we don't check for ACTIVE beeing 0.
We should...
         * in theory, but with ATAPI decices doing buffer underruns,
that would
         * cause us to disable DMA, which isn't what we want
         */
        return (dstat & (RUN|DEAD)) != RUN;
}


Right at the very end of the CDROM code the problem was being caused by
the very last request (very similar to the initial request above) that
looked like this:


------------ IDE transfer
buffer_size: 14   buffer_index: 0
lba: ffffffff    size: 14
-------------------------
DBDMA: writel 0x0000000000000d00 <= 0x80008000
DBDMA: channel 0x1a reg 0x0
DBDMA:     status 0x00008400
DBDMA: readl 0x0000000000000d00 => 0x80008000
DBDMA: channel 0x1a reg 0x0
DBDMA: DBDMA_run_bh
DBDMA: channel_run
dbdma_cmd 0x252c328
    req_count 0x0020
    command 0x3000
    phy_addr 0x05e10000
    cmd_dep 0x00000000
    res_count 0x0000
    xfer_status 0x0000
DBDMA: start_input
DBDMA: addr 0x5e10000 key 0x0

pmac_ide_atapi_transfer_cb
DBDMA: dbdma_end
DBDMA: conditional_wait
DBDMA: dbdma_cmdptr_save 0x05ef2000
DBDMA: xfer_status 0x00008400 res_count 0x0000
DBDMA: conditional_interrupt
DBDMA: conditional_branch
DBDMA: dbdma_cmdptr_load 0x05ef2010
DBDMA: channel_run
dbdma_cmd 0x252c328
    req_count 0x0000
    command 0x7000
    phy_addr 0x00000000
    cmd_dep 0x00000000
    res_count 0x0000
    xfer_status 0x0000
DBDMA: readl 0x0000000000000d04 => 0x00008000
DBDMA: channel 0x1a reg 0x1
DBDMA: readl 0x0000000000000d04 => 0x00008000
DBDMA: channel 0x1a reg 0x1
DBDMA: writel 0x0000000000000d00 <= 0x98000000
DBDMA: channel 0x1a reg 0x0
DBDMA:     status 0x00000000
DBDMA: writel 0x0000000000000d00 <= 0xf8000000
DBDMA: channel 0x1a reg 0x0
DBDMA:     status 0x00000000
DBDMA: readl 0x0000000000000d04 => 0x00000000
DBDMA: channel 0x1a reg 0x1
DBDMA: writel 0x0000000000000d0c <= 0x05ef2000
DBDMA: channel 0x1a reg 0x3
DBDMA: dbdma_cmdptr_load 0x05ef2000
DBDMA: readl 0x0000000000000d04 => 0x00000000
DBDMA: channel 0x1a reg 0x1

DBDMA: readl 0x0000000000000d04 => 0x00000000
                                   ^^^^^^^^^^
DBDMA: channel 0x1a reg 0x1
DBDMA: writel 0x0000000000000d00 <= 0x98000000


DBDMA: channel 0x1a reg 0x0
DBDMA:     status 0x00000000
DBDMA: writel 0x0000000000000b00 <= 0xf8000000
DBDMA: channel 0x16 reg 0x0
DBDMA:     status 0x00000000
DBDMA: readl 0x0000000000000b04 => 0x00000000
DBDMA: channel 0x16 reg 0x1
DBDMA: writel 0x0000000000000b0c <= 0x05eea000
DBDMA: channel 0x16 reg 0x3
DBDMA: dbdma_cmdptr_load 0x05eea000


For some reason pmac_ide_dma_end() was being called without an active
DMA request and so the RUN bit was clear on entry which was causing the
error flag to be set. Given that this doesn't occur on lenny and wheezy,
I'm inclined to believe that this is a bug in the squeeze kernel.

Anyhow if this survives your testing then I'll repost as 2 separate
patches with a CC to qemu-stable so that this also gets picked up in the
next 2.4 release.


ATB,

Mark.

Comments

John Snow Aug. 13, 2015, 10:59 p.m. UTC | #1
On 08/13/2015 06:03 PM, Mark Cave-Ayland wrote:
> On 01/08/15 19:33, Aurelien Jarno wrote:
> 
>> On 2015-08-01 17:54, Mark Cave-Ayland wrote:
>>> The existing code incorrectly changes the dma_active flag when a non-block
>>> transfer has completed leading to a hang on newer versions of Linux because the
>>> IDE and DMA engines deadlock waiting for each other.
>>>
>>> Instead copy the buffer directly to RAM, set the remaining transfer size to 0 and
>>> then invoke the ATAPI callback manually once again to correctly finish the
>>> transfer in an identical manner to a block transfer.
>>>
>>> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
>>> ---
>>
>> Thanks for the patch, it improves things here. I don't get messages
>> anymore, and I get less messages when mounting the CD-ROM, though I
>> still get one:
>>
>>   [  307.258463] pata-macio 0.00021000:ata-4: timeout flushing DMA
>>   [  307.262856] ata2.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x0
>>   [  307.262919] ata2.00: BMDMA stat 0x6
>>   [  307.263048] sr 1:0:0:0: CDB: Get configuration: 46 00 00 28 00 00 00 00 10 00
>>   [  307.263289] ata2.00: cmd a0/01:00:00:10:00/00:00:00:00:00/a0 tag 0 dma 16400 in
>>   [  307.263297]          res 41/50:03:00:10:00/00:00:00:00:00/a0 Emask 0x20 (host bus error)
>>   [  307.263407] ata2.00: status: { DRDY ERR }
>>   [  307.271251] ata2.00: configured for MWDMA2
>>   [  307.271824] ata2: EH complete
>>
>> The CD-ROM is fully functional though.
> 
> Apologies for the delay in getting back to you on this one, however I
> finally found some time today to try and work out what is happening.
> Attached is a v3 patch that works here for your lenny and wheezy
> standard images - please can you test and confirm it works for you?
> Debugging is enabled just in case anything unusual shows up during your
> testing.
> 

Awesome! I'm about to hit the road for KVM Forum, but I'll keep my eyes
peeled if someone else reviews this.

Thanks!

--js

> I've made a couple of minor tweaks to v2: firstly I don't call
> pmac_ide_atapi_transfer_cb() to go around again, since I was seeing the
> dbdma_end() function being called more than once when lba == -1.
> Secondly, I've moved the DBDMA flush out of the RUN status check section
> so it is called unconditionally, and removed the second version which
> was actually causing the problem above by not clearing the FLUSH bit
> upon write.
> 
> FWIW with this patch applied I still see an error upon mount with your
> squeeze image but I believe that this may be a bug in that particular
> version of the Linux kernel. With MACIO and DBDMA logging enabled I see
> the following when trying to mount the CDROM:
> 
> 
> ------------ IDE transfer
> buffer_size: 14   buffer_index: 0
> lba: ffffffff    size: 14
> -------------------------
> DBDMA: DBDMA_run_bh
> DBDMA: writel 0x0000000000000d00 <= 0x80008000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00008400
> DBDMA: readl 0x0000000000000d00 => 0x80008000
> DBDMA: channel 0x1a reg 0x0
> DBDMA: DBDMA_run_bh
> DBDMA: channel_run
> dbdma_cmd 0x252c328
>     req_count 0x0020
>     command 0x3000
>     phy_addr 0x0670e000
>     cmd_dep 0x00000000
>     res_count 0x0000
>     xfer_status 0x0000
> DBDMA: start_input
> DBDMA: addr 0x670e000 key 0x0
> 
> pmac_ide_atapi_transfer_cb
> DBDMA: dbdma_end
> DBDMA: conditional_wait
> DBDMA: dbdma_cmdptr_save 0x05ef2000
> DBDMA: xfer_status 0x00008400 res_count 0x0000
> DBDMA: conditional_interrupt
> DBDMA: conditional_branch
> DBDMA: dbdma_cmdptr_load 0x05ef2010
> DBDMA: channel_run
> dbdma_cmd 0x252c328
>     req_count 0x0000
>     command 0x7000
>     phy_addr 0x00000000
>     cmd_dep 0x00000000
>     res_count 0x0000
>     xfer_status 0x0000
> DBDMA: readl 0x0000000000000d04 => 0x00008000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: readl 0x0000000000000d04 => 0x00008000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: writel 0x0000000000000d00 <= 0x98000000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: writel 0x0000000000000d00 <= 0xf8000000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: readl 0x0000000000000d04 => 0x00000000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: writel 0x0000000000000d0c <= 0x05ef2000
> DBDMA: channel 0x1a reg 0x3
> DBDMA: dbdma_cmdptr_load 0x05ef2000
> 
> 
> ------------ IDE transfer
> buffer_size: 1000   buffer_index: 0
> lba: 21260    size: 1000
> -------------------------
> 
> ..etc..
> 
> 
> Tracing through the kernel source I can see that the DMA read error is
> being caused by pmac_ide_dma_end() returning a non-zero value:
> 
> 
> static int
> pmac_ide_dma_end (ide_drive_t *drive)
> {
>         ide_hwif_t *hwif = drive->hwif;
>         pmac_ide_hwif_t *pmif =
>                 (pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
>         volatile struct dbdma_regs __iomem *dma = pmif->dma_regs;
>         u32 dstat;
> 
>         dstat = readl(&dma->status);
>         writel(((RUN|WAKE|DEAD) << 16), &dma->control);
> 
>         /* verify good dma status. we don't check for ACTIVE beeing 0.
> We should...
>          * in theory, but with ATAPI decices doing buffer underruns,
> that would
>          * cause us to disable DMA, which isn't what we want
>          */
>         return (dstat & (RUN|DEAD)) != RUN;
> }
> 
> 
> Right at the very end of the CDROM code the problem was being caused by
> the very last request (very similar to the initial request above) that
> looked like this:
> 
> 
> ------------ IDE transfer
> buffer_size: 14   buffer_index: 0
> lba: ffffffff    size: 14
> -------------------------
> DBDMA: writel 0x0000000000000d00 <= 0x80008000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00008400
> DBDMA: readl 0x0000000000000d00 => 0x80008000
> DBDMA: channel 0x1a reg 0x0
> DBDMA: DBDMA_run_bh
> DBDMA: channel_run
> dbdma_cmd 0x252c328
>     req_count 0x0020
>     command 0x3000
>     phy_addr 0x05e10000
>     cmd_dep 0x00000000
>     res_count 0x0000
>     xfer_status 0x0000
> DBDMA: start_input
> DBDMA: addr 0x5e10000 key 0x0
> 
> pmac_ide_atapi_transfer_cb
> DBDMA: dbdma_end
> DBDMA: conditional_wait
> DBDMA: dbdma_cmdptr_save 0x05ef2000
> DBDMA: xfer_status 0x00008400 res_count 0x0000
> DBDMA: conditional_interrupt
> DBDMA: conditional_branch
> DBDMA: dbdma_cmdptr_load 0x05ef2010
> DBDMA: channel_run
> dbdma_cmd 0x252c328
>     req_count 0x0000
>     command 0x7000
>     phy_addr 0x00000000
>     cmd_dep 0x00000000
>     res_count 0x0000
>     xfer_status 0x0000
> DBDMA: readl 0x0000000000000d04 => 0x00008000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: readl 0x0000000000000d04 => 0x00008000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: writel 0x0000000000000d00 <= 0x98000000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: writel 0x0000000000000d00 <= 0xf8000000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: readl 0x0000000000000d04 => 0x00000000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: writel 0x0000000000000d0c <= 0x05ef2000
> DBDMA: channel 0x1a reg 0x3
> DBDMA: dbdma_cmdptr_load 0x05ef2000
> DBDMA: readl 0x0000000000000d04 => 0x00000000
> DBDMA: channel 0x1a reg 0x1
> 
> DBDMA: readl 0x0000000000000d04 => 0x00000000
>                                    ^^^^^^^^^^
> DBDMA: channel 0x1a reg 0x1
> DBDMA: writel 0x0000000000000d00 <= 0x98000000
> 
> 
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: writel 0x0000000000000b00 <= 0xf8000000
> DBDMA: channel 0x16 reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: readl 0x0000000000000b04 => 0x00000000
> DBDMA: channel 0x16 reg 0x1
> DBDMA: writel 0x0000000000000b0c <= 0x05eea000
> DBDMA: channel 0x16 reg 0x3
> DBDMA: dbdma_cmdptr_load 0x05eea000
> 
> 
> For some reason pmac_ide_dma_end() was being called without an active
> DMA request and so the RUN bit was clear on entry which was causing the
> error flag to be set. Given that this doesn't occur on lenny and wheezy,
> I'm inclined to believe that this is a bug in the squeeze kernel.
> 
> Anyhow if this survives your testing then I'll repost as 2 separate
> patches with a CC to qemu-stable so that this also gets picked up in the
> next 2.4 release.
> 
> 
> ATB,
> 
> Mark.
>
Aurelien Jarno Aug. 17, 2015, 8:39 p.m. UTC | #2
On 2015-08-13 23:03, Mark Cave-Ayland wrote:
> On 01/08/15 19:33, Aurelien Jarno wrote:
> 
> > On 2015-08-01 17:54, Mark Cave-Ayland wrote:
> >> The existing code incorrectly changes the dma_active flag when a non-block
> >> transfer has completed leading to a hang on newer versions of Linux because the
> >> IDE and DMA engines deadlock waiting for each other.
> >>
> >> Instead copy the buffer directly to RAM, set the remaining transfer size to 0 and
> >> then invoke the ATAPI callback manually once again to correctly finish the
> >> transfer in an identical manner to a block transfer.
> >>
> >> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> >> ---

[snip]

> For some reason pmac_ide_dma_end() was being called without an active
> DMA request and so the RUN bit was clear on entry which was causing the
> error flag to be set. Given that this doesn't occur on lenny and wheezy,
> I'm inclined to believe that this is a bug in the squeeze kernel.
> 
> Anyhow if this survives your testing then I'll repost as 2 separate
> patches with a CC to qemu-stable so that this also gets picked up in the
> next 2.4 release.

Thanks for working on that. I don't feel I have enough knowledge of this
part of QEMU to review the patch, that said, I have been able to test
it and I confirm it fixes the issue.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
John Snow Aug. 21, 2015, 7:04 p.m. UTC | #3
On 08/13/2015 03:03 PM, Mark Cave-Ayland wrote:
> On 01/08/15 19:33, Aurelien Jarno wrote:
> 
>> On 2015-08-01 17:54, Mark Cave-Ayland wrote:
>>> The existing code incorrectly changes the dma_active flag when a non-block
>>> transfer has completed leading to a hang on newer versions of Linux because the
>>> IDE and DMA engines deadlock waiting for each other.
>>>
>>> Instead copy the buffer directly to RAM, set the remaining transfer size to 0 and
>>> then invoke the ATAPI callback manually once again to correctly finish the
>>> transfer in an identical manner to a block transfer.
>>>
>>> Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
>>> ---
>>
>> Thanks for the patch, it improves things here. I don't get messages
>> anymore, and I get less messages when mounting the CD-ROM, though I
>> still get one:
>>
>>   [  307.258463] pata-macio 0.00021000:ata-4: timeout flushing DMA
>>   [  307.262856] ata2.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x0
>>   [  307.262919] ata2.00: BMDMA stat 0x6
>>   [  307.263048] sr 1:0:0:0: CDB: Get configuration: 46 00 00 28 00 00 00 00 10 00
>>   [  307.263289] ata2.00: cmd a0/01:00:00:10:00/00:00:00:00:00/a0 tag 0 dma 16400 in
>>   [  307.263297]          res 41/50:03:00:10:00/00:00:00:00:00/a0 Emask 0x20 (host bus error)
>>   [  307.263407] ata2.00: status: { DRDY ERR }
>>   [  307.271251] ata2.00: configured for MWDMA2
>>   [  307.271824] ata2: EH complete
>>
>> The CD-ROM is fully functional though.
> 
> Apologies for the delay in getting back to you on this one, however I
> finally found some time today to try and work out what is happening.
> Attached is a v3 patch that works here for your lenny and wheezy
> standard images - please can you test and confirm it works for you?
> Debugging is enabled just in case anything unusual shows up during your
> testing.
> 
> I've made a couple of minor tweaks to v2: firstly I don't call
> pmac_ide_atapi_transfer_cb() to go around again, since I was seeing the
> dbdma_end() function being called more than once when lba == -1.
> Secondly, I've moved the DBDMA flush out of the RUN status check section
> so it is called unconditionally, and removed the second version which
> was actually causing the problem above by not clearing the FLUSH bit
> upon write.
> 
> FWIW with this patch applied I still see an error upon mount with your
> squeeze image but I believe that this may be a bug in that particular
> version of the Linux kernel. With MACIO and DBDMA logging enabled I see
> the following when trying to mount the CDROM:
> 
> 
> ------------ IDE transfer
> buffer_size: 14   buffer_index: 0
> lba: ffffffff    size: 14
> -------------------------
> DBDMA: DBDMA_run_bh
> DBDMA: writel 0x0000000000000d00 <= 0x80008000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00008400
> DBDMA: readl 0x0000000000000d00 => 0x80008000
> DBDMA: channel 0x1a reg 0x0
> DBDMA: DBDMA_run_bh
> DBDMA: channel_run
> dbdma_cmd 0x252c328
>     req_count 0x0020
>     command 0x3000
>     phy_addr 0x0670e000
>     cmd_dep 0x00000000
>     res_count 0x0000
>     xfer_status 0x0000
> DBDMA: start_input
> DBDMA: addr 0x670e000 key 0x0
> 
> pmac_ide_atapi_transfer_cb
> DBDMA: dbdma_end
> DBDMA: conditional_wait
> DBDMA: dbdma_cmdptr_save 0x05ef2000
> DBDMA: xfer_status 0x00008400 res_count 0x0000
> DBDMA: conditional_interrupt
> DBDMA: conditional_branch
> DBDMA: dbdma_cmdptr_load 0x05ef2010
> DBDMA: channel_run
> dbdma_cmd 0x252c328
>     req_count 0x0000
>     command 0x7000
>     phy_addr 0x00000000
>     cmd_dep 0x00000000
>     res_count 0x0000
>     xfer_status 0x0000
> DBDMA: readl 0x0000000000000d04 => 0x00008000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: readl 0x0000000000000d04 => 0x00008000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: writel 0x0000000000000d00 <= 0x98000000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: writel 0x0000000000000d00 <= 0xf8000000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: readl 0x0000000000000d04 => 0x00000000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: writel 0x0000000000000d0c <= 0x05ef2000
> DBDMA: channel 0x1a reg 0x3
> DBDMA: dbdma_cmdptr_load 0x05ef2000
> 
> 
> ------------ IDE transfer
> buffer_size: 1000   buffer_index: 0
> lba: 21260    size: 1000
> -------------------------
> 
> ..etc..
> 
> 
> Tracing through the kernel source I can see that the DMA read error is
> being caused by pmac_ide_dma_end() returning a non-zero value:
> 
> 
> static int
> pmac_ide_dma_end (ide_drive_t *drive)
> {
>         ide_hwif_t *hwif = drive->hwif;
>         pmac_ide_hwif_t *pmif =
>                 (pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
>         volatile struct dbdma_regs __iomem *dma = pmif->dma_regs;
>         u32 dstat;
> 
>         dstat = readl(&dma->status);
>         writel(((RUN|WAKE|DEAD) << 16), &dma->control);
> 
>         /* verify good dma status. we don't check for ACTIVE beeing 0.
> We should...
>          * in theory, but with ATAPI decices doing buffer underruns,
> that would
>          * cause us to disable DMA, which isn't what we want
>          */
>         return (dstat & (RUN|DEAD)) != RUN;
> }
> 
> 
> Right at the very end of the CDROM code the problem was being caused by
> the very last request (very similar to the initial request above) that
> looked like this:
> 
> 
> ------------ IDE transfer
> buffer_size: 14   buffer_index: 0
> lba: ffffffff    size: 14
> -------------------------
> DBDMA: writel 0x0000000000000d00 <= 0x80008000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00008400
> DBDMA: readl 0x0000000000000d00 => 0x80008000
> DBDMA: channel 0x1a reg 0x0
> DBDMA: DBDMA_run_bh
> DBDMA: channel_run
> dbdma_cmd 0x252c328
>     req_count 0x0020
>     command 0x3000
>     phy_addr 0x05e10000
>     cmd_dep 0x00000000
>     res_count 0x0000
>     xfer_status 0x0000
> DBDMA: start_input
> DBDMA: addr 0x5e10000 key 0x0
> 
> pmac_ide_atapi_transfer_cb
> DBDMA: dbdma_end
> DBDMA: conditional_wait
> DBDMA: dbdma_cmdptr_save 0x05ef2000
> DBDMA: xfer_status 0x00008400 res_count 0x0000
> DBDMA: conditional_interrupt
> DBDMA: conditional_branch
> DBDMA: dbdma_cmdptr_load 0x05ef2010
> DBDMA: channel_run
> dbdma_cmd 0x252c328
>     req_count 0x0000
>     command 0x7000
>     phy_addr 0x00000000
>     cmd_dep 0x00000000
>     res_count 0x0000
>     xfer_status 0x0000
> DBDMA: readl 0x0000000000000d04 => 0x00008000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: readl 0x0000000000000d04 => 0x00008000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: writel 0x0000000000000d00 <= 0x98000000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: writel 0x0000000000000d00 <= 0xf8000000
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: readl 0x0000000000000d04 => 0x00000000
> DBDMA: channel 0x1a reg 0x1
> DBDMA: writel 0x0000000000000d0c <= 0x05ef2000
> DBDMA: channel 0x1a reg 0x3
> DBDMA: dbdma_cmdptr_load 0x05ef2000
> DBDMA: readl 0x0000000000000d04 => 0x00000000
> DBDMA: channel 0x1a reg 0x1
> 
> DBDMA: readl 0x0000000000000d04 => 0x00000000
>                                    ^^^^^^^^^^
> DBDMA: channel 0x1a reg 0x1
> DBDMA: writel 0x0000000000000d00 <= 0x98000000
> 
> 
> DBDMA: channel 0x1a reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: writel 0x0000000000000b00 <= 0xf8000000
> DBDMA: channel 0x16 reg 0x0
> DBDMA:     status 0x00000000
> DBDMA: readl 0x0000000000000b04 => 0x00000000
> DBDMA: channel 0x16 reg 0x1
> DBDMA: writel 0x0000000000000b0c <= 0x05eea000
> DBDMA: channel 0x16 reg 0x3
> DBDMA: dbdma_cmdptr_load 0x05eea000
> 
> 
> For some reason pmac_ide_dma_end() was being called without an active
> DMA request and so the RUN bit was clear on entry which was causing the
> error flag to be set. Given that this doesn't occur on lenny and wheezy,
> I'm inclined to believe that this is a bug in the squeeze kernel.
> 
> Anyhow if this survives your testing then I'll repost as 2 separate
> patches with a CC to qemu-stable so that this also gets picked up in the
> next 2.4 release.
> 
> 
> ATB,
> 
> Mark.
> 

Do you want to resend the V3 here directly to the ML with Aurelien
Jarno's R-B?

--js
Mark Cave-Ayland Aug. 22, 2015, 1:11 a.m. UTC | #4
On 21/08/15 20:04, John Snow wrote:

(lots cut)

> Do you want to resend the V3 here directly to the ML with Aurelien
> Jarno's R-B?

I was actually looking at this yesterday, and it seems with some more
testing that only the DBDMA flush part is required. It could be that
fixing the flush semantics prevents the macio device getting into the
incorrect state that makes the macio patch necessary in the first place.

In this case the DBDMA patch should probably go via Alex since it's
fairly specific to the Mac DMA hardware rather than being related just
to the IDE interface.


ATB,

Mark.
John Snow Aug. 24, 2015, 6:43 p.m. UTC | #5
On 08/21/2015 06:11 PM, Mark Cave-Ayland wrote:
> On 21/08/15 20:04, John Snow wrote:
> 
> (lots cut)
> 
>> Do you want to resend the V3 here directly to the ML with Aurelien
>> Jarno's R-B?
> 
> I was actually looking at this yesterday, and it seems with some more
> testing that only the DBDMA flush part is required. It could be that
> fixing the flush semantics prevents the macio device getting into the
> incorrect state that makes the macio patch necessary in the first place.
> 
> In this case the DBDMA patch should probably go via Alex since it's
> fairly specific to the Mac DMA hardware rather than being related just
> to the IDE interface.
> 
> 
> ATB,
> 
> Mark.
> 

Even better! 8-)

Thanks,
--js
diff mbox

Patch

diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 66ac2ba..1d29fb7 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -31,7 +31,7 @@ 
 #include <hw/ide/internal.h>
 
 /* debug MACIO */
-// #define DEBUG_MACIO
+ #define DEBUG_MACIO
 
 #ifdef DEBUG_MACIO
 static const int debug_macio = 1;
@@ -274,8 +274,11 @@  static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
         /* Non-block ATAPI transfer - just copy to RAM */
         s->io_buffer_size = MIN(s->io_buffer_size, io->len);
         cpu_physical_memory_write(io->addr, s->io_buffer, s->io_buffer_size);
+
+        /* End of IDE transfer */
+        s->io_buffer_size = 0;
+        io->len = 0;
         ide_atapi_cmd_ok(s);
-        m->dma_active = false;
         goto done;
     }
 
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
index b25e851..6a59dcd 100644
--- a/hw/misc/macio/mac_dbdma.c
+++ b/hw/misc/macio/mac_dbdma.c
@@ -42,7 +42,7 @@ 
 #include "qemu/main-loop.h"
 
 /* debug DBDMA */
-//#define DEBUG_DBDMA
+#define DEBUG_DBDMA
 
 #ifdef DEBUG_DBDMA
 #define DBDMA_DPRINTF(fmt, ...)                                 \
@@ -590,12 +590,13 @@  dbdma_control_write(DBDMA_channel *ch)
     if ((ch->regs[DBDMA_STATUS] & RUN) && !(status & RUN)) {
         /* RUN is cleared */
         status &= ~(ACTIVE|DEAD);
-        if ((status & FLUSH) && ch->flush) {
-            ch->flush(&ch->io);
-            status &= ~FLUSH;
-        }
     }
 
+    if ((status & FLUSH) && ch->flush) {
+        ch->flush(&ch->io);
+        status &= ~FLUSH;
+    }
+    
     DBDMA_DPRINTF("    status 0x%08x\n", status);
 
     ch->regs[DBDMA_STATUS] = status;
@@ -603,9 +604,6 @@  dbdma_control_write(DBDMA_channel *ch)
     if (status & ACTIVE) {
         DBDMA_kick(dbdma_from_ch(ch));
     }
-    if ((status & FLUSH) && ch->flush) {
-        ch->flush(&ch->io);
-    }
 }
 
 static void dbdma_write(void *opaque, hwaddr addr,