Patchwork [03/11] ide: Split out BMDMA code from ATA core

login
register
mail settings
Submitter Alexander Graf
Date Dec. 14, 2010, 12:34 a.m.
Message ID <1292286883-20157-4-git-send-email-agraf@suse.de>
Download mbox | patch
Permalink /patch/75454/
State New
Headers show

Comments

Alexander Graf - Dec. 14, 2010, 12:34 a.m.
The ATA core is currently heavily intertwined with BMDMA code. Let's loosen
that a bit, so we can happily replace the DMA backend with different
implementations.

Signed-off-by: Alexander Graf <agraf@suse.de>

---

v7 -> v8:

  - rewrite as DMA ops

v8 -> v9:

  - fold in: split out irq setting
  - fold in: move header definitions out
  - make dma providers subclass of idedma (kwolf)
  - s/set_status/add_status/g (kwolf)
---
 hw/ide/cmd646.c   |    7 +-
 hw/ide/core.c     |  335 ++++++++++++++---------------------------------------
 hw/ide/internal.h |   69 +++++------
 hw/ide/pci.c      |  289 +++++++++++++++++++++++++++++++++++++++++++++-
 hw/ide/pci.h      |   30 +++++
 hw/ide/piix.c     |    7 +-
 hw/ide/via.c      |    7 +-
 7 files changed, 446 insertions(+), 298 deletions(-)
Kevin Wolf - Dec. 14, 2010, 4:20 p.m.
Am 14.12.2010 01:34, schrieb Alexander Graf:
> The ATA core is currently heavily intertwined with BMDMA code. Let's loosen
> that a bit, so we can happily replace the DMA backend with different
> implementations.
> 
> Signed-off-by: Alexander Graf <agraf@suse.de>

Something in this patch makes qemu abort with a failed assertion on
system_reset.

Kevin
Alexander Graf - Dec. 14, 2010, 11:17 p.m.
On 14.12.2010, at 17:20, Kevin Wolf wrote:

> Am 14.12.2010 01:34, schrieb Alexander Graf:
>> The ATA core is currently heavily intertwined with BMDMA code. Let's loosen
>> that a bit, so we can happily replace the DMA backend with different
>> implementations.
>> 
>> Signed-off-by: Alexander Graf <agraf@suse.de>
> 
> Something in this patch makes qemu abort with a failed assertion on
> system_reset.

Ouch. Thanks for catching that. The reset path was just plain wrong :).


Alex

Patch

diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c
index ea5d2dc..fde0617 100644
--- a/hw/ide/cmd646.c
+++ b/hw/ide/cmd646.c
@@ -167,9 +167,10 @@  static void bmdma_map(PCIDevice *pci_dev, int region_num,
 
     for(i = 0;i < 2; i++) {
         BMDMAState *bm = &d->bmdma[i];
-        d->bus[i].bmdma = bm;
+        bmdma_init(&d->bus[i], bm);
         bm->bus = d->bus+i;
-        qemu_add_vm_change_state_handler(ide_dma_restart_cb, bm);
+        qemu_add_vm_change_state_handler(d->bus[i].dma->ops->restart_cb,
+                                         &bm->dma);
 
         if (i == 0) {
             register_ioport_write(addr, 4, 1, bmdma_writeb_0, d);
@@ -218,7 +219,7 @@  static void cmd646_reset(void *opaque)
 
     for (i = 0; i < 2; i++) {
         ide_bus_reset(&d->bus[i]);
-        ide_dma_reset(&d->bmdma[i]);
+        d->bus[i].dma->ops->reset(&d->bmdma[i].dma);
     }
 }
 
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 5e2fcbd..2d0ad56 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -34,8 +34,6 @@ 
 
 #include <hw/ide/internal.h>
 
-#define IDE_PAGE_SIZE 4096
-
 static const int smart_attributes[][5] = {
     /* id,  flags, val, wrst, thrsh */
     { 0x01, 0x03, 0x64, 0x64, 0x06}, /* raw read */
@@ -61,11 +59,8 @@  static inline int media_is_cd(IDEState *s)
     return (media_present(s) && s->nb_sectors <= CD_MAX_SECTORS);
 }
 
-static void ide_dma_start(IDEState *s, BlockDriverCompletionFunc *dma_cb);
-static void ide_dma_restart(IDEState *s, int is_read);
 static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret);
 static int ide_handle_rw_error(IDEState *s, int error, int op);
-static void ide_flush_cache(IDEState *s);
 
 static void padstr(char *str, const char *src, int len)
 {
@@ -314,11 +309,11 @@  static inline void ide_abort_command(IDEState *s)
 }
 
 static inline void ide_dma_submit_check(IDEState *s,
-          BlockDriverCompletionFunc *dma_cb, BMDMAState *bm)
+          BlockDriverCompletionFunc *dma_cb)
 {
-    if (bm->aiocb)
+    if (s->bus->dma->aiocb)
 	return;
-    dma_cb(bm, -1);
+    dma_cb(s, -1);
 }
 
 /* prepare data transfer and tell what to do after */
@@ -328,8 +323,10 @@  static void ide_transfer_start(IDEState *s, uint8_t *buf, int size,
     s->end_transfer_func = end_transfer_func;
     s->data_ptr = buf;
     s->data_end = buf + size;
-    if (!(s->status & ERR_STAT))
+    if (!(s->status & ERR_STAT)) {
         s->status |= DRQ_STAT;
+    }
+    s->bus->dma->ops->start_transfer(s->bus->dma);
 }
 
 static void ide_transfer_stop(IDEState *s)
@@ -394,7 +391,7 @@  static void ide_rw_error(IDEState *s) {
     ide_set_irq(s->bus);
 }
 
-static void ide_sector_read(IDEState *s)
+void ide_sector_read(IDEState *s)
 {
     int64_t sector_num;
     int ret, n;
@@ -427,58 +424,15 @@  static void ide_sector_read(IDEState *s)
     }
 }
 
-
-/* return 0 if buffer completed */
-static int dma_buf_prepare(BMDMAState *bm, int is_write)
-{
-    IDEState *s = bmdma_active_if(bm);
-    struct {
-        uint32_t addr;
-        uint32_t size;
-    } prd;
-    int l, len;
-
-    qemu_sglist_init(&s->sg, s->nsector / (IDE_PAGE_SIZE / 512) + 1);
-    s->io_buffer_size = 0;
-    for(;;) {
-        if (bm->cur_prd_len == 0) {
-            /* end of table (with a fail safe of one page) */
-            if (bm->cur_prd_last ||
-                (bm->cur_addr - bm->addr) >= IDE_PAGE_SIZE)
-                return s->io_buffer_size != 0;
-            cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8);
-            bm->cur_addr += 8;
-            prd.addr = le32_to_cpu(prd.addr);
-            prd.size = le32_to_cpu(prd.size);
-            len = prd.size & 0xfffe;
-            if (len == 0)
-                len = 0x10000;
-            bm->cur_prd_len = len;
-            bm->cur_prd_addr = prd.addr;
-            bm->cur_prd_last = (prd.size & 0x80000000);
-        }
-        l = bm->cur_prd_len;
-        if (l > 0) {
-            qemu_sglist_add(&s->sg, bm->cur_prd_addr, l);
-            bm->cur_prd_addr += l;
-            bm->cur_prd_len -= l;
-            s->io_buffer_size += l;
-        }
-    }
-    return 1;
-}
-
 static void dma_buf_commit(IDEState *s, int is_write)
 {
     qemu_sglist_destroy(&s->sg);
 }
 
-static void ide_dma_set_inactive(BMDMAState *bm)
+static void ide_set_inactive(IDEState *s)
 {
-    bm->status &= ~BM_STATUS_DMAING;
-    bm->dma_cb = NULL;
-    bm->unit = -1;
-    bm->aiocb = NULL;
+    s->bus->dma->aiocb = NULL;
+    s->bus->dma->ops->set_inactive(s->bus->dma);
 }
 
 void ide_dma_error(IDEState *s)
@@ -486,8 +440,8 @@  void ide_dma_error(IDEState *s)
     ide_transfer_stop(s);
     s->error = ABRT_ERR;
     s->status = READY_STAT | ERR_STAT;
-    ide_dma_set_inactive(s->bus->bmdma);
-    s->bus->bmdma->status |= BM_STATUS_INT;
+    ide_set_inactive(s);
+    s->bus->dma->ops->add_status(s->bus->dma, BM_STATUS_INT);
     ide_set_irq(s->bus);
 }
 
@@ -503,8 +457,8 @@  static int ide_handle_rw_error(IDEState *s, int error, int op)
 
     if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
             || action == BLOCK_ERR_STOP_ANY) {
-        s->bus->bmdma->unit = s->unit;
-        s->bus->bmdma->status |= op;
+        s->bus->dma->ops->set_unit(s->bus->dma, s->unit);
+        s->bus->dma->ops->add_status(s->bus->dma, op);
         bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read);
         vm_stop(0);
     } else {
@@ -520,58 +474,9 @@  static int ide_handle_rw_error(IDEState *s, int error, int op)
     return 1;
 }
 
-/* return 0 if buffer completed */
-static int dma_buf_rw(BMDMAState *bm, int is_write)
-{
-    IDEState *s = bmdma_active_if(bm);
-    struct {
-        uint32_t addr;
-        uint32_t size;
-    } prd;
-    int l, len;
-
-    for(;;) {
-        l = s->io_buffer_size - s->io_buffer_index;
-        if (l <= 0)
-            break;
-        if (bm->cur_prd_len == 0) {
-            /* end of table (with a fail safe of one page) */
-            if (bm->cur_prd_last ||
-                (bm->cur_addr - bm->addr) >= IDE_PAGE_SIZE)
-                return 0;
-            cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8);
-            bm->cur_addr += 8;
-            prd.addr = le32_to_cpu(prd.addr);
-            prd.size = le32_to_cpu(prd.size);
-            len = prd.size & 0xfffe;
-            if (len == 0)
-                len = 0x10000;
-            bm->cur_prd_len = len;
-            bm->cur_prd_addr = prd.addr;
-            bm->cur_prd_last = (prd.size & 0x80000000);
-        }
-        if (l > bm->cur_prd_len)
-            l = bm->cur_prd_len;
-        if (l > 0) {
-            if (is_write) {
-                cpu_physical_memory_write(bm->cur_prd_addr,
-                                          s->io_buffer + s->io_buffer_index, l);
-            } else {
-                cpu_physical_memory_read(bm->cur_prd_addr,
-                                          s->io_buffer + s->io_buffer_index, l);
-            }
-            bm->cur_prd_addr += l;
-            bm->cur_prd_len -= l;
-            s->io_buffer_index += l;
-        }
-    }
-    return 1;
-}
-
-static void ide_read_dma_cb(void *opaque, int ret)
+void ide_read_dma_cb(void *opaque, int ret)
 {
-    BMDMAState *bm = opaque;
-    IDEState *s = bmdma_active_if(bm);
+    IDEState *s = opaque;
     int n;
     int64_t sector_num;
 
@@ -597,8 +502,8 @@  static void ide_read_dma_cb(void *opaque, int ret)
         s->status = READY_STAT | SEEK_STAT;
         ide_set_irq(s->bus);
     eot:
-        bm->status |= BM_STATUS_INT;
-        ide_dma_set_inactive(bm);
+        s->bus->dma->ops->add_status(s->bus->dma, BM_STATUS_INT);
+        ide_set_inactive(s);
         return;
     }
 
@@ -606,13 +511,13 @@  static void ide_read_dma_cb(void *opaque, int ret)
     n = s->nsector;
     s->io_buffer_index = 0;
     s->io_buffer_size = n * 512;
-    if (dma_buf_prepare(bm, 1) == 0)
+    if (s->bus->dma->ops->prepare_buf(s->bus->dma, 1) == 0)
         goto eot;
 #ifdef DEBUG_AIO
     printf("aio_read: sector_num=%" PRId64 " n=%d\n", sector_num, n);
 #endif
-    bm->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num, ide_read_dma_cb, bm);
-    ide_dma_submit_check(s, ide_read_dma_cb, bm);
+    s->bus->dma->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num, ide_read_dma_cb, s);
+    ide_dma_submit_check(s, ide_read_dma_cb);
 }
 
 static void ide_sector_read_dma(IDEState *s)
@@ -621,7 +526,7 @@  static void ide_sector_read_dma(IDEState *s)
     s->io_buffer_index = 0;
     s->io_buffer_size = 0;
     s->is_read = 1;
-    ide_dma_start(s, ide_read_dma_cb);
+    s->bus->dma->ops->start_dma(s->bus->dma, s, ide_read_dma_cb);
 }
 
 static void ide_sector_write_timer_cb(void *opaque)
@@ -630,7 +535,7 @@  static void ide_sector_write_timer_cb(void *opaque)
     ide_set_irq(s->bus);
 }
 
-static void ide_sector_write(IDEState *s)
+void ide_sector_write(IDEState *s)
 {
     int64_t sector_num;
     int ret, n, n1;
@@ -676,48 +581,9 @@  static void ide_sector_write(IDEState *s)
     }
 }
 
-static void ide_dma_restart_bh(void *opaque)
-{
-    BMDMAState *bm = opaque;
-    int is_read;
-
-    qemu_bh_delete(bm->bh);
-    bm->bh = NULL;
-
-    is_read = !!(bm->status & BM_STATUS_RETRY_READ);
-
-    if (bm->status & BM_STATUS_DMA_RETRY) {
-        bm->status &= ~(BM_STATUS_DMA_RETRY | BM_STATUS_RETRY_READ);
-        ide_dma_restart(bmdma_active_if(bm), is_read);
-    } else if (bm->status & BM_STATUS_PIO_RETRY) {
-        bm->status &= ~(BM_STATUS_PIO_RETRY | BM_STATUS_RETRY_READ);
-        if (is_read) {
-            ide_sector_read(bmdma_active_if(bm));
-        } else {
-            ide_sector_write(bmdma_active_if(bm));
-        }
-    } else if (bm->status & BM_STATUS_RETRY_FLUSH) {
-        ide_flush_cache(bmdma_active_if(bm));
-    }
-}
-
-void ide_dma_restart_cb(void *opaque, int running, int reason)
-{
-    BMDMAState *bm = opaque;
-
-    if (!running)
-        return;
-
-    if (!bm->bh) {
-        bm->bh = qemu_bh_new(ide_dma_restart_bh, bm);
-        qemu_bh_schedule(bm->bh);
-    }
-}
-
-static void ide_write_dma_cb(void *opaque, int ret)
+void ide_write_dma_cb(void *opaque, int ret)
 {
-    BMDMAState *bm = opaque;
-    IDEState *s = bmdma_active_if(bm);
+    IDEState *s = opaque;
     int n;
     int64_t sector_num;
 
@@ -740,21 +606,21 @@  static void ide_write_dma_cb(void *opaque, int ret)
         s->status = READY_STAT | SEEK_STAT;
         ide_set_irq(s->bus);
     eot:
-        bm->status |= BM_STATUS_INT;
-        ide_dma_set_inactive(bm);
+        s->bus->dma->ops->add_status(s->bus->dma, BM_STATUS_INT);
+        ide_set_inactive(s);
         return;
     }
 
     n = s->nsector;
     s->io_buffer_size = n * 512;
     /* launch next transfer */
-    if (dma_buf_prepare(bm, 0) == 0)
+    if (s->bus->dma->ops->prepare_buf(s->bus->dma, 0) == 0)
         goto eot;
 #ifdef DEBUG_AIO
     printf("aio_write: sector_num=%" PRId64 " n=%d\n", sector_num, n);
 #endif
-    bm->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num, ide_write_dma_cb, bm);
-    ide_dma_submit_check(s, ide_write_dma_cb, bm);
+    s->bus->dma->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num, ide_write_dma_cb, s);
+    ide_dma_submit_check(s, ide_write_dma_cb);
 }
 
 static void ide_sector_write_dma(IDEState *s)
@@ -763,7 +629,7 @@  static void ide_sector_write_dma(IDEState *s)
     s->io_buffer_index = 0;
     s->io_buffer_size = 0;
     s->is_read = 0;
-    ide_dma_start(s, ide_write_dma_cb);
+    s->bus->dma->ops->start_dma(s->bus->dma, s, ide_write_dma_cb);
 }
 
 void ide_atapi_cmd_ok(IDEState *s)
@@ -813,7 +679,7 @@  static void ide_flush_cb(void *opaque, int ret)
     ide_set_irq(s->bus);
 }
 
-static void ide_flush_cache(IDEState *s)
+void ide_flush_cache(IDEState *s)
 {
     BlockDriverAIOCB *acb;
 
@@ -1003,7 +869,8 @@  static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size)
 
     if (s->atapi_dma) {
     	s->status = READY_STAT | SEEK_STAT | DRQ_STAT;
-	ide_dma_start(s, ide_atapi_cmd_read_dma_cb);
+        s->bus->dma->ops->start_dma(s->bus->dma, s,
+                                   ide_atapi_cmd_read_dma_cb);
     } else {
     	s->status = READY_STAT | SEEK_STAT;
     	ide_atapi_cmd_reply_end(s);
@@ -1029,8 +896,7 @@  static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
 /* XXX: handle read errors */
 static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
 {
-    BMDMAState *bm = opaque;
-    IDEState *s = bmdma_active_if(bm);
+    IDEState *s = opaque;
     int data_offset, n;
 
     if (ret < 0) {
@@ -1056,7 +922,7 @@  static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
 	    s->lba += n;
 	}
         s->packet_transfer_size -= s->io_buffer_size;
-        if (dma_buf_rw(bm, 1) == 0)
+        if (s->bus->dma->ops->rw_buf(s->bus->dma, 1) == 0)
             goto eot;
     }
 
@@ -1065,8 +931,8 @@  static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
         s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
         ide_set_irq(s->bus);
     eot:
-        bm->status |= BM_STATUS_INT;
-        ide_dma_set_inactive(bm);
+        s->bus->dma->ops->add_status(s->bus->dma, BM_STATUS_INT);
+        ide_set_inactive(s);
         return;
     }
 
@@ -1085,12 +951,13 @@  static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
 #ifdef DEBUG_AIO
     printf("aio_read_cd: lba=%u n=%d\n", s->lba, n);
 #endif
-    bm->iov.iov_base = (void *)(s->io_buffer + data_offset);
-    bm->iov.iov_len = n * 4 * 512;
-    qemu_iovec_init_external(&bm->qiov, &bm->iov, 1);
-    bm->aiocb = bdrv_aio_readv(s->bs, (int64_t)s->lba << 2, &bm->qiov,
-                               n * 4, ide_atapi_cmd_read_dma_cb, bm);
-    if (!bm->aiocb) {
+    s->bus->dma->iov.iov_base = (void *)(s->io_buffer + data_offset);
+    s->bus->dma->iov.iov_len = n * 4 * 512;
+    qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1);
+    s->bus->dma->aiocb = bdrv_aio_readv(s->bs, (int64_t)s->lba << 2,
+                                       &s->bus->dma->qiov, n * 4,
+                                       ide_atapi_cmd_read_dma_cb, s);
+    if (!s->bus->dma->aiocb) {
         /* Note: media not present is the most likely case */
         ide_atapi_cmd_error(s, SENSE_NOT_READY,
                             ASC_MEDIUM_NOT_PRESENT);
@@ -1111,7 +978,8 @@  static void ide_atapi_cmd_read_dma(IDEState *s, int lba, int nb_sectors,
 
     /* XXX: check if BUSY_STAT should be set */
     s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT;
-    ide_dma_start(s, ide_atapi_cmd_read_dma_cb);
+    s->bus->dma->ops->start_dma(s->bus->dma, s,
+                               ide_atapi_cmd_read_dma_cb);
 }
 
 static void ide_atapi_cmd_read(IDEState *s, int lba, int nb_sectors,
@@ -2696,6 +2564,7 @@  int ide_init_drive(IDEState *s, BlockDriverState *bs,
     } else {
         pstrcpy(s->version, sizeof(s->version), QEMU_VERSION);
     }
+
     ide_reset(s);
     bdrv_set_removable(bs, s->drive_kind == IDE_CD);
     return 0;
@@ -2717,6 +2586,42 @@  static void ide_init1(IDEBus *bus, int unit)
                                            ide_sector_write_timer_cb, s);
 }
 
+static void ide_nop_start(IDEDMA *dma, IDEState *s,
+                          BlockDriverCompletionFunc *cb)
+{
+}
+
+static int ide_nop(IDEDMA *dma)
+{
+    return 0;
+}
+
+static int ide_nop_int(IDEDMA *dma, int x)
+{
+    return 0;
+}
+
+static void ide_nop_restart(void *opaque, int x, int y)
+{
+}
+
+static const IDEDMAOps ide_dma_nop_ops = {
+    .start_dma      = ide_nop_start,
+    .start_transfer = ide_nop,
+    .prepare_buf    = ide_nop_int,
+    .rw_buf         = ide_nop_int,
+    .set_unit       = ide_nop_int,
+    .add_status     = ide_nop_int,
+    .set_inactive   = ide_nop,
+    .restart_cb     = ide_nop_restart,
+    .reset          = ide_nop,
+};
+
+static IDEDMA ide_dma_nop = {
+    .ops = &ide_dma_nop_ops,
+    .aiocb = NULL,
+};
+
 void ide_init2(IDEBus *bus, qemu_irq irq)
 {
     int i;
@@ -2726,6 +2631,7 @@  void ide_init2(IDEBus *bus, qemu_irq irq)
         ide_reset(&bus->ifs[i]);
     }
     bus->irq = irq;
+    bus->dma = &ide_dma_nop;
 }
 
 /* TODO convert users to qdev and remove */
@@ -2749,6 +2655,7 @@  void ide_init2_with_non_qdev_drives(IDEBus *bus, DriveInfo *hd0,
         }
     }
     bus->irq = irq;
+    bus->dma = &ide_dma_nop;
 }
 
 void ide_init_ioport(IDEBus *bus, int iobase, int iobase2)
@@ -2916,73 +2823,3 @@  const VMStateDescription vmstate_ide_bus = {
         VMSTATE_END_OF_LIST()
     }
 };
-
-/***********************************************************/
-/* PCI IDE definitions */
-
-static void ide_dma_start(IDEState *s, BlockDriverCompletionFunc *dma_cb)
-{
-    BMDMAState *bm = s->bus->bmdma;
-    if(!bm)
-        return;
-    bm->unit = s->unit;
-    bm->dma_cb = dma_cb;
-    bm->cur_prd_last = 0;
-    bm->cur_prd_addr = 0;
-    bm->cur_prd_len = 0;
-    bm->sector_num = ide_get_sector(s);
-    bm->nsector = s->nsector;
-    if (bm->status & BM_STATUS_DMAING) {
-        bm->dma_cb(bm, 0);
-    }
-}
-
-static void ide_dma_restart(IDEState *s, int is_read)
-{
-    BMDMAState *bm = s->bus->bmdma;
-    ide_set_sector(s, bm->sector_num);
-    s->io_buffer_index = 0;
-    s->io_buffer_size = 0;
-    s->nsector = bm->nsector;
-    bm->cur_addr = bm->addr;
-
-    if (is_read) {
-        bm->dma_cb = ide_read_dma_cb;
-    } else {
-        bm->dma_cb = ide_write_dma_cb;
-    }
-
-    ide_dma_start(s, bm->dma_cb);
-}
-
-void ide_dma_cancel(BMDMAState *bm)
-{
-    if (bm->status & BM_STATUS_DMAING) {
-        if (bm->aiocb) {
-#ifdef DEBUG_AIO
-            printf("aio_cancel\n");
-#endif
-            bdrv_aio_cancel(bm->aiocb);
-        }
-
-        /* cancel DMA request */
-        ide_dma_set_inactive(bm);
-    }
-}
-
-void ide_dma_reset(BMDMAState *bm)
-{
-#ifdef DEBUG_IDE
-    printf("ide: dma_reset\n");
-#endif
-    ide_dma_cancel(bm);
-    bm->cmd = 0;
-    bm->status = 0;
-    bm->addr = 0;
-    bm->cur_addr = 0;
-    bm->cur_prd_last = 0;
-    bm->cur_prd_addr = 0;
-    bm->cur_prd_len = 0;
-    bm->sector_num = 0;
-    bm->nsector = 0;
-}
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index 029c76c..aadb505 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -20,7 +20,8 @@  typedef struct IDEBus IDEBus;
 typedef struct IDEDevice IDEDevice;
 typedef struct IDEDeviceInfo IDEDeviceInfo;
 typedef struct IDEState IDEState;
-typedef struct BMDMAState BMDMAState;
+typedef struct IDEDMA IDEDMA;
+typedef struct IDEDMAOps IDEDMAOps;
 
 /* Bits of HD_STATUS */
 #define ERR_STAT		0x01
@@ -367,6 +368,11 @@  typedef enum { IDE_HD, IDE_CD, IDE_CFATA } IDEDriveKind;
 
 typedef void EndTransferFunc(IDEState *);
 
+typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockDriverCompletionFunc *);
+typedef int DMAFunc(IDEDMA *);
+typedef int DMAIntFunc(IDEDMA *, int);
+typedef void DMARestartFunc(void *, int, int);
+
 /* NOTE: IDEState represents in fact one drive */
 struct IDEState {
     IDEBus *bus;
@@ -443,13 +449,32 @@  struct IDEState {
     uint8_t *smart_selftest_data;
 };
 
+struct IDEDMAOps {
+    DMAStartFunc *start_dma;
+    DMAFunc *start_transfer;
+    DMAIntFunc *prepare_buf;
+    DMAIntFunc *rw_buf;
+    DMAIntFunc *set_unit;
+    DMAIntFunc *add_status;
+    DMAFunc *set_inactive;
+    DMARestartFunc *restart_cb;
+    DMAFunc *reset;
+};
+
+struct IDEDMA {
+    const struct IDEDMAOps *ops;
+    struct iovec iov;
+    QEMUIOVector qiov;
+    BlockDriverAIOCB *aiocb;
+};
+
 struct IDEBus {
     BusState qbus;
     IDEDevice *master;
     IDEDevice *slave;
-    BMDMAState *bmdma;
     IDEState ifs[2];
     int bus_id;
+    IDEDMA *dma;
     uint8_t unit;
     uint8_t cmd;
     qemu_irq irq;
@@ -480,46 +505,14 @@  struct IDEDeviceInfo {
 #define BM_CMD_START     0x01
 #define BM_CMD_READ      0x08
 
-struct BMDMAState {
-    uint8_t cmd;
-    uint8_t status;
-    uint32_t addr;
-
-    IDEBus *bus;
-    /* current transfer state */
-    uint32_t cur_addr;
-    uint32_t cur_prd_last;
-    uint32_t cur_prd_addr;
-    uint32_t cur_prd_len;
-    uint8_t unit;
-    BlockDriverCompletionFunc *dma_cb;
-    BlockDriverAIOCB *aiocb;
-    struct iovec iov;
-    QEMUIOVector qiov;
-    int64_t sector_num;
-    uint32_t nsector;
-    IORange addr_ioport;
-    QEMUBH *bh;
-};
-
 static inline IDEState *idebus_active_if(IDEBus *bus)
 {
     return bus->ifs + bus->unit;
 }
 
-static inline IDEState *bmdma_active_if(BMDMAState *bmdma)
-{
-    assert(bmdma->unit != (uint8_t)-1);
-    return bmdma->bus->ifs + bmdma->unit;
-}
-
 static inline void ide_set_irq(IDEBus *bus)
 {
-    BMDMAState *bm = bus->bmdma;
     if (!(bus->cmd & IDE_CMD_DISABLE_IRQ)) {
-        if (bm) {
-            bm->status |= BM_STATUS_INT;
-        }
         qemu_irq_raise(bus->irq);
     }
 }
@@ -542,10 +535,7 @@  void ide_bus_reset(IDEBus *bus);
 int64_t ide_get_sector(IDEState *s);
 void ide_set_sector(IDEState *s, int64_t sector_num);
 
-void ide_dma_cancel(BMDMAState *bm);
-void ide_dma_restart_cb(void *opaque, int running, int reason);
 void ide_dma_error(IDEState *s);
-void ide_dma_reset(BMDMAState *bm);
 
 void ide_atapi_cmd_ok(IDEState *s);
 void ide_atapi_cmd_error(IDEState *s, int sense_key, int asc);
@@ -568,6 +558,11 @@  void ide_init2_with_non_qdev_drives(IDEBus *bus, DriveInfo *hd0,
 void ide_init_ioport(IDEBus *bus, int iobase, int iobase2);
 
 void ide_exec_cmd(IDEBus *bus, uint32_t val);
+void ide_read_dma_cb(void *opaque, int ret);
+void ide_write_dma_cb(void *opaque, int ret);
+void ide_sector_write(IDEState *s);
+void ide_sector_read(IDEState *s);
+void ide_flush_cache(IDEState *s);
 
 /* hw/ide/qdev.c */
 void ide_bus_new(IDEBus *idebus, DeviceState *dev, int bus_id);
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index ad406ee..f9141e6 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -33,6 +33,262 @@ 
 
 #include <hw/ide/pci.h>
 
+#define BMDMA_PAGE_SIZE 4096
+
+static void bmdma_start_dma(IDEDMA *dma, IDEState *s,
+                            BlockDriverCompletionFunc *dma_cb)
+{
+    BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
+
+    bm->unit = s->unit;
+    bm->dma_cb = dma_cb;
+    bm->cur_prd_last = 0;
+    bm->cur_prd_addr = 0;
+    bm->cur_prd_len = 0;
+    bm->sector_num = ide_get_sector(s);
+    bm->nsector = s->nsector;
+
+    if (bm->status & BM_STATUS_DMAING) {
+        bm->dma_cb(bmdma_active_if(bm), 0);
+    }
+}
+
+/* return 0 if buffer completed */
+static int bmdma_prepare_buf(IDEDMA *dma, int is_write)
+{
+    BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
+    IDEState *s = bmdma_active_if(bm);
+    struct {
+        uint32_t addr;
+        uint32_t size;
+    } prd;
+    int l, len;
+
+    qemu_sglist_init(&s->sg, s->nsector / (BMDMA_PAGE_SIZE / 512) + 1);
+    s->io_buffer_size = 0;
+    for(;;) {
+        if (bm->cur_prd_len == 0) {
+            /* end of table (with a fail safe of one page) */
+            if (bm->cur_prd_last ||
+                (bm->cur_addr - bm->addr) >= BMDMA_PAGE_SIZE)
+                return s->io_buffer_size != 0;
+            cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8);
+            bm->cur_addr += 8;
+            prd.addr = le32_to_cpu(prd.addr);
+            prd.size = le32_to_cpu(prd.size);
+            len = prd.size & 0xfffe;
+            if (len == 0)
+                len = 0x10000;
+            bm->cur_prd_len = len;
+            bm->cur_prd_addr = prd.addr;
+            bm->cur_prd_last = (prd.size & 0x80000000);
+        }
+        l = bm->cur_prd_len;
+        if (l > 0) {
+            qemu_sglist_add(&s->sg, bm->cur_prd_addr, l);
+            bm->cur_prd_addr += l;
+            bm->cur_prd_len -= l;
+            s->io_buffer_size += l;
+        }
+    }
+    return 1;
+}
+
+/* return 0 if buffer completed */
+static int bmdma_rw_buf(IDEDMA *dma, int is_write)
+{
+    BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
+    IDEState *s = bmdma_active_if(bm);
+    struct {
+        uint32_t addr;
+        uint32_t size;
+    } prd;
+    int l, len;
+
+    for(;;) {
+        l = s->io_buffer_size - s->io_buffer_index;
+        if (l <= 0)
+            break;
+        if (bm->cur_prd_len == 0) {
+            /* end of table (with a fail safe of one page) */
+            if (bm->cur_prd_last ||
+                (bm->cur_addr - bm->addr) >= BMDMA_PAGE_SIZE)
+                return 0;
+            cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8);
+            bm->cur_addr += 8;
+            prd.addr = le32_to_cpu(prd.addr);
+            prd.size = le32_to_cpu(prd.size);
+            len = prd.size & 0xfffe;
+            if (len == 0)
+                len = 0x10000;
+            bm->cur_prd_len = len;
+            bm->cur_prd_addr = prd.addr;
+            bm->cur_prd_last = (prd.size & 0x80000000);
+        }
+        if (l > bm->cur_prd_len)
+            l = bm->cur_prd_len;
+        if (l > 0) {
+            if (is_write) {
+                cpu_physical_memory_write(bm->cur_prd_addr,
+                                          s->io_buffer + s->io_buffer_index, l);
+            } else {
+                cpu_physical_memory_read(bm->cur_prd_addr,
+                                          s->io_buffer + s->io_buffer_index, l);
+            }
+            bm->cur_prd_addr += l;
+            bm->cur_prd_len -= l;
+            s->io_buffer_index += l;
+        }
+    }
+    return 1;
+}
+
+static int bmdma_set_unit(IDEDMA *dma, int unit)
+{
+    BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
+    bm->unit = unit;
+
+    return 0;
+}
+
+static int bmdma_add_status(IDEDMA *dma, int status)
+{
+    BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
+    bm->status |= status;
+
+    return 0;
+}
+
+static int bmdma_set_inactive(IDEDMA *dma)
+{
+    BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
+
+    bm->status &= ~BM_STATUS_DMAING;
+    bm->dma_cb = NULL;
+    bm->unit = -1;
+
+    return 0;
+}
+
+static void bmdma_restart_dma(BMDMAState *bm, int is_read)
+{
+    IDEState *s = bmdma_active_if(bm);
+
+    ide_set_sector(s, bm->sector_num);
+    s->io_buffer_index = 0;
+    s->io_buffer_size = 0;
+    s->nsector = bm->nsector;
+    bm->cur_addr = bm->addr;
+
+    if (is_read) {
+        bm->dma_cb = ide_read_dma_cb;
+    } else {
+        bm->dma_cb = ide_write_dma_cb;
+    }
+
+    bmdma_start_dma(&bm->dma, s, bm->dma_cb);
+}
+
+static void bmdma_restart_bh(void *opaque)
+{
+    BMDMAState *bm = opaque;
+    int is_read;
+
+    qemu_bh_delete(bm->bh);
+    bm->bh = NULL;
+
+    is_read = !!(bm->status & BM_STATUS_RETRY_READ);
+
+    if (bm->status & BM_STATUS_DMA_RETRY) {
+        bm->status &= ~(BM_STATUS_DMA_RETRY | BM_STATUS_RETRY_READ);
+        bmdma_restart_dma(bm, is_read);
+    } else if (bm->status & BM_STATUS_PIO_RETRY) {
+        bm->status &= ~(BM_STATUS_PIO_RETRY | BM_STATUS_RETRY_READ);
+        if (is_read) {
+            ide_sector_read(bmdma_active_if(bm));
+        } else {
+            ide_sector_write(bmdma_active_if(bm));
+        }
+    } else if (bm->status & BM_STATUS_RETRY_FLUSH) {
+        ide_flush_cache(bmdma_active_if(bm));
+    }
+}
+
+static void bmdma_restart_cb(void *opaque, int running, int reason)
+{
+    IDEDMA *dma = opaque;
+    BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
+
+    if (!running)
+        return;
+
+    if (!bm->bh) {
+        bm->bh = qemu_bh_new(bmdma_restart_bh, &bm->dma);
+        qemu_bh_schedule(bm->bh);
+    }
+}
+
+static void bmdma_cancel(BMDMAState *bm)
+{
+    IDEState *s = bmdma_active_if(bm);
+
+    if (bm->status & BM_STATUS_DMAING) {
+        if (s->bus->dma->aiocb) {
+#ifdef DEBUG_AIO
+            printf("aio_cancel\n");
+#endif
+            bdrv_aio_cancel(s->bus->dma->aiocb);
+        }
+
+        /* cancel DMA request */
+        bmdma_set_inactive(&bm->dma);
+    }
+}
+
+static int bmdma_reset(IDEDMA *dma)
+{
+    BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma);
+
+#ifdef DEBUG_IDE
+    printf("ide: dma_reset\n");
+#endif
+    bmdma_cancel(bm);
+    bm->cmd = 0;
+    bm->status = 0;
+    bm->addr = 0;
+    bm->cur_addr = 0;
+    bm->cur_prd_last = 0;
+    bm->cur_prd_addr = 0;
+    bm->cur_prd_len = 0;
+    bm->sector_num = 0;
+    bm->nsector = 0;
+
+    return 0;
+}
+
+static int bmdma_start_transfer(IDEDMA *dma)
+{
+    return 0;
+}
+
+static void bmdma_irq(void *opaque, int n, int level)
+{
+    BMDMAState *bm = opaque;
+
+    if (!level) {
+        /* pass through lower */
+        qemu_set_irq(bm->irq, level);
+        return;
+    }
+
+    if (bm) {
+        bm->status |= BM_STATUS_INT;
+    }
+
+    /* trigger the real irq */
+    qemu_set_irq(bm->irq, level);
+}
+
 void bmdma_cmd_writeb(void *opaque, uint32_t addr, uint32_t val)
 {
     BMDMAState *bm = opaque;
@@ -55,10 +311,10 @@  void bmdma_cmd_writeb(void *opaque, uint32_t addr, uint32_t val)
              * whole DMA operation will be submitted to disk with a single
              * aio operation with preadv/pwritev.
              */
-            if (bm->aiocb) {
+            if (bm->bus->dma->aiocb) {
                 qemu_aio_flush();
 #ifdef DEBUG_IDE
-                if (bm->aiocb)
+                if (bm->bus->dma->aiocb)
                     printf("ide_dma_cancel: aiocb still pending");
                 if (bm->status & BM_STATUS_DMAING)
                     printf("ide_dma_cancel: BM_STATUS_DMAING still pending");
@@ -70,7 +326,7 @@  void bmdma_cmd_writeb(void *opaque, uint32_t addr, uint32_t val)
                 bm->status |= BM_STATUS_DMAING;
                 /* start dma transfer if possible */
                 if (bm->dma_cb)
-                    bm->dma_cb(bm, 0);
+                    bm->dma_cb(bmdma_active_if(bm), 0);
             }
         }
     }
@@ -198,3 +454,30 @@  void pci_ide_create_devs(PCIDevice *dev, DriveInfo **hd_table)
         ide_create_drive(d->bus+bus[i], unit[i], hd_table[i]);
     }
 }
+
+static const struct IDEDMAOps bmdma_ops = {
+    .start_dma = bmdma_start_dma,
+    .start_transfer = bmdma_start_transfer,
+    .prepare_buf = bmdma_prepare_buf,
+    .rw_buf = bmdma_rw_buf,
+    .set_unit = bmdma_set_unit,
+    .add_status = bmdma_add_status,
+    .set_inactive = bmdma_set_inactive,
+    .restart_cb = bmdma_restart_cb,
+    .reset = bmdma_reset,
+};
+
+void bmdma_init(IDEBus *bus, BMDMAState *bm)
+{
+    qemu_irq *irq;
+
+    if (bus->dma == &bm->dma) {
+        return;
+    }
+
+    bm->dma.ops = &bmdma_ops;
+    bus->dma = &bm->dma;
+    bm->irq = bus->irq;
+    irq = qemu_allocate_irqs(bmdma_irq, bm, 1);
+    bus->irq = *irq;
+}
diff --git a/hw/ide/pci.h b/hw/ide/pci.h
index b81b26c..cd72cba 100644
--- a/hw/ide/pci.h
+++ b/hw/ide/pci.h
@@ -3,6 +3,27 @@ 
 
 #include <hw/ide/internal.h>
 
+typedef struct BMDMAState {
+    IDEDMA dma;
+    uint8_t cmd;
+    uint8_t status;
+    uint32_t addr;
+
+    IDEBus *bus;
+    /* current transfer state */
+    uint32_t cur_addr;
+    uint32_t cur_prd_last;
+    uint32_t cur_prd_addr;
+    uint32_t cur_prd_len;
+    uint8_t unit;
+    BlockDriverCompletionFunc *dma_cb;
+    int64_t sector_num;
+    uint32_t nsector;
+    IORange addr_ioport;
+    QEMUBH *bh;
+    qemu_irq irq;
+} BMDMAState;
+
 typedef struct PCIIDEState {
     PCIDevice dev;
     IDEBus bus[2];
@@ -10,6 +31,15 @@  typedef struct PCIIDEState {
     uint32_t secondary; /* used only for cmd646 */
 } PCIIDEState;
 
+
+static inline IDEState *bmdma_active_if(BMDMAState *bmdma)
+{
+    assert(bmdma->unit != (uint8_t)-1);
+    return bmdma->bus->ifs + bmdma->unit;
+}
+
+
+void bmdma_init(IDEBus *bus, BMDMAState *bm);
 void bmdma_cmd_writeb(void *opaque, uint32_t addr, uint32_t val);
 extern const IORangeOps bmdma_addr_ioport_ops;
 void pci_ide_create_devs(PCIDevice *dev, DriveInfo **hd_table);
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 1c0cb0c..0c93a0a 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -76,9 +76,10 @@  static void bmdma_map(PCIDevice *pci_dev, int region_num,
 
     for(i = 0;i < 2; i++) {
         BMDMAState *bm = &d->bmdma[i];
-        d->bus[i].bmdma = bm;
+        bmdma_init(&d->bus[i], bm);
         bm->bus = d->bus+i;
-        qemu_add_vm_change_state_handler(ide_dma_restart_cb, bm);
+        qemu_add_vm_change_state_handler(d->bus[i].dma->ops->restart_cb,
+                                         &bm->dma);
 
         register_ioport_write(addr, 1, 1, bmdma_cmd_writeb, bm);
 
@@ -99,7 +100,7 @@  static void piix3_reset(void *opaque)
 
     for (i = 0; i < 2; i++) {
         ide_bus_reset(&d->bus[i]);
-        ide_dma_reset(&d->bmdma[i]);
+        d->bus[i].dma->ops->reset(&d->bmdma[i].dma);
     }
 
     /* TODO: this is the default. do not override. */
diff --git a/hw/ide/via.c b/hw/ide/via.c
index 78857e8..fce0bac 100644
--- a/hw/ide/via.c
+++ b/hw/ide/via.c
@@ -78,9 +78,10 @@  static void bmdma_map(PCIDevice *pci_dev, int region_num,
 
     for(i = 0;i < 2; i++) {
         BMDMAState *bm = &d->bmdma[i];
-        d->bus[i].bmdma = bm;
+        bmdma_init(&d->bus[i], bm);
         bm->bus = d->bus+i;
-        qemu_add_vm_change_state_handler(ide_dma_restart_cb, bm);
+        qemu_add_vm_change_state_handler(d->bus[i].dma->ops->restart_cb,
+                                         &bm->dma);
 
         register_ioport_write(addr, 1, 1, bmdma_cmd_writeb, bm);
 
@@ -101,7 +102,7 @@  static void via_reset(void *opaque)
 
     for (i = 0; i < 2; i++) {
         ide_bus_reset(&d->bus[i]);
-        ide_dma_reset(&d->bmdma[i]);
+        d->bus[i].dma->ops->reset(&d->bmdma[i].dma);
     }
 
     pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_WAIT);