diff mbox

[3/3] ide: add TRIM support

Message ID 20110519085819.GC3679@lst.de
State New
Headers show

Commit Message

Christoph Hellwig May 19, 2011, 8:58 a.m. UTC
Add support for TRIM sub function of the data set management command,
and wire it up to the qemu discard infrastructure.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Comments

Kevin Wolf June 10, 2011, 11:47 a.m. UTC | #1
Am 19.05.2011 10:58, schrieb Christoph Hellwig:
> Add support for TRIM sub function of the data set management command,
> and wire it up to the qemu discard infrastructure.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: qemu/hw/ide/core.c
> ===================================================================
> --- qemu.orig/hw/ide/core.c	2011-05-18 20:30:20.855172933 +0200
> +++ qemu/hw/ide/core.c	2011-05-18 20:32:26.237625024 +0200
> @@ -124,6 +124,9 @@ static void ide_identify(IDEState *s)
>      put_le16(p + 66, 120);
>      put_le16(p + 67, 120);
>      put_le16(p + 68, 120);
> +    if (dev && dev->conf.discard_granularity) {
> +        put_le16(p + 69, (1 << 14)); /* determinate TRIM behavior */
> +    }
>  
>      if (s->ncq_queues) {
>          put_le16(p + 75, s->ncq_queues - 1);
> @@ -157,6 +160,9 @@ static void ide_identify(IDEState *s)
>      dev = s->unit ? s->bus->slave : s->bus->master;
>      if (dev && dev->conf.physical_block_size)
>          put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf));
> +    if (dev && dev->conf.discard_granularity) {
> +        put_le16(p + 169, 1); /* TRIM support */
> +    }
>  
>      memcpy(s->identify_data, p, sizeof(s->identify_data));
>      s->identify_set = 1;
> @@ -299,6 +305,72 @@ static void ide_set_signature(IDEState *
>      }
>  }
>  
> +typedef struct TrimAIOCB {
> +    BlockDriverAIOCB common;
> +    QEMUBH *bh;
> +    int ret;
> +} TrimAIOCB;
> +
> +static void trim_aio_cancel(BlockDriverAIOCB *acb)
> +{
> +    TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common);
> +
> +    qemu_bh_delete(iocb->bh);
> +    iocb->bh = NULL;
> +    qemu_aio_release(iocb);
> +}
> +
> +static AIOPool trim_aio_pool = {
> +    .aiocb_size         = sizeof(TrimAIOCB),
> +    .cancel             = trim_aio_cancel,
> +};
> +
> +static void ide_trim_bh_cb(void *opaque)
> +{
> +    TrimAIOCB *iocb = opaque;
> +
> +    iocb->common.cb(iocb->common.opaque, iocb->ret);
> +
> +    qemu_bh_delete(iocb->bh);
> +    iocb->bh = NULL;
> +
> +    qemu_aio_release(iocb);
> +}
> +
> +BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
> +        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
> +        BlockDriverCompletionFunc *cb, void *opaque)
> +{
> +    TrimAIOCB *iocb;
> +    int i, j, ret;
> +
> +    iocb = qemu_aio_get(&trim_aio_pool, bs, cb, opaque);
> +    iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
> +    iocb->ret = 0;
> +
> +    for (j = 0; j < qiov->niov; j++) {
> +        uint64_t *buffer = qiov->iov[j].iov_base;
> +
> +        for (i = 0; i < qiov->iov[j].iov_len / 8; i++) {
> +            /* 6-byte LBA + 2-byte range per entry */
> +            uint64_t entry = le64_to_cpu(buffer[i]);
> +            uint64_t sector = entry & 0x0000ffffffffffffULL;
> +            uint16_t count = entry >> 48;
> +
> +            if (count == 0)
> +                break;
> +
> +            ret = bdrv_discard(bs, sector * 512, count * 512);

Hm... bdrv_discard wants sector numbers instead of bytes, doesn't it?

If you agree, I'll send out and apply a fixed and rebased version.

Kevin
Kevin Wolf June 10, 2011, 2:04 p.m. UTC | #2
Am 19.05.2011 10:58, schrieb Christoph Hellwig:
> Add support for TRIM sub function of the data set management command,
> and wire it up to the qemu discard infrastructure.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

> Index: qemu/hw/ide/pci.c
> ===================================================================
> --- qemu.orig/hw/ide/pci.c	2011-05-18 20:28:17.153625872 +0200
> +++ qemu/hw/ide/pci.c	2011-05-18 20:33:06.102141553 +0200
> @@ -205,6 +205,9 @@ static void bmdma_restart_bh(void *opaqu
>          }
>      } else if (bm->status & BM_STATUS_RETRY_FLUSH) {
>          ide_flush_cache(bmdma_active_if(bm));
> +    } else if (bm->status & BM_STATUS_RETRY_TRIM) {
> +        bm->status &= ~BM_STATUS_RETRY_TRIM;
> +        bmdma_restart_dma(bm, IDE_DMA_TRIM);
>      }
>  }

Just noticed that this is wrong. BM_STATUS_DMA_RETRY is always set at
the same time, so this is dead code.

Kevin
diff mbox

Patch

Index: qemu/hw/ide/core.c
===================================================================
--- qemu.orig/hw/ide/core.c	2011-05-18 20:30:20.855172933 +0200
+++ qemu/hw/ide/core.c	2011-05-18 20:32:26.237625024 +0200
@@ -124,6 +124,9 @@  static void ide_identify(IDEState *s)
     put_le16(p + 66, 120);
     put_le16(p + 67, 120);
     put_le16(p + 68, 120);
+    if (dev && dev->conf.discard_granularity) {
+        put_le16(p + 69, (1 << 14)); /* determinate TRIM behavior */
+    }
 
     if (s->ncq_queues) {
         put_le16(p + 75, s->ncq_queues - 1);
@@ -157,6 +160,9 @@  static void ide_identify(IDEState *s)
     dev = s->unit ? s->bus->slave : s->bus->master;
     if (dev && dev->conf.physical_block_size)
         put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf));
+    if (dev && dev->conf.discard_granularity) {
+        put_le16(p + 169, 1); /* TRIM support */
+    }
 
     memcpy(s->identify_data, p, sizeof(s->identify_data));
     s->identify_set = 1;
@@ -299,6 +305,72 @@  static void ide_set_signature(IDEState *
     }
 }
 
+typedef struct TrimAIOCB {
+    BlockDriverAIOCB common;
+    QEMUBH *bh;
+    int ret;
+} TrimAIOCB;
+
+static void trim_aio_cancel(BlockDriverAIOCB *acb)
+{
+    TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+    qemu_aio_release(iocb);
+}
+
+static AIOPool trim_aio_pool = {
+    .aiocb_size         = sizeof(TrimAIOCB),
+    .cancel             = trim_aio_cancel,
+};
+
+static void ide_trim_bh_cb(void *opaque)
+{
+    TrimAIOCB *iocb = opaque;
+
+    iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+
+    qemu_aio_release(iocb);
+}
+
+BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    TrimAIOCB *iocb;
+    int i, j, ret;
+
+    iocb = qemu_aio_get(&trim_aio_pool, bs, cb, opaque);
+    iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
+    iocb->ret = 0;
+
+    for (j = 0; j < qiov->niov; j++) {
+        uint64_t *buffer = qiov->iov[j].iov_base;
+
+        for (i = 0; i < qiov->iov[j].iov_len / 8; i++) {
+            /* 6-byte LBA + 2-byte range per entry */
+            uint64_t entry = le64_to_cpu(buffer[i]);
+            uint64_t sector = entry & 0x0000ffffffffffffULL;
+            uint16_t count = entry >> 48;
+
+            if (count == 0)
+                break;
+
+            ret = bdrv_discard(bs, sector * 512, count * 512);
+            if (!iocb->ret)
+                iocb->ret = ret;
+        }
+    }
+
+    qemu_bh_schedule(iocb->bh);
+
+    return &iocb->common;
+}
+
 static inline void ide_abort_command(IDEState *s)
 {
     s->status = READY_STAT | ERR_STAT;
@@ -475,6 +547,9 @@  handle_rw_error:
 
         if (s->dma_cmd == IDE_DMA_READ)
             op |= BM_STATUS_RETRY_READ;
+        else if (s->dma_cmd == IDE_DMA_TRIM)
+            op |= BM_STATUS_RETRY_TRIM;
+
         if (ide_handle_rw_error(s, -ret, op)) {
             return;
         }
@@ -517,6 +592,10 @@  handle_rw_error:
         s->bus->dma->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num,
                                             ide_dma_cb, s);
         break;
+    case IDE_DMA_TRIM:
+        s->bus->dma->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
+                                         ide_issue_trim, ide_dma_cb, s, 1);
+        break;
     }
 
     if (!s->bus->dma->aiocb) {
@@ -817,6 +896,17 @@  void ide_exec_cmd(IDEBus *bus, uint32_t
         return;
 
     switch(val) {
+    case WIN_DSM:
+        switch (s->feature) {
+        case DSM_TRIM:
+            if (!s->bs)
+                goto abort_cmd;
+            ide_sector_start_dma(s, IDE_DMA_TRIM);
+            break;
+        default:
+            goto abort_cmd;
+        }
+        break;
     case WIN_IDENTIFY:
         if (s->bs && s->drive_kind != IDE_CD) {
             if (s->drive_kind != IDE_CFATA)
Index: qemu/hw/ide/internal.h
===================================================================
--- qemu.orig/hw/ide/internal.h	2011-05-18 20:28:17.133626100 +0200
+++ qemu/hw/ide/internal.h	2011-05-18 20:30:58.937624890 +0200
@@ -62,7 +62,11 @@  typedef struct IDEDMAOps IDEDMAOps;
  */
 #define CFA_REQ_EXT_ERROR_CODE		0x03 /* CFA Request Extended Error Code */
 /*
- *	0x04->0x07 Reserved
+ *      0x04->0x05 Reserved
+ */
+#define WIN_DSM                         0x06
+/*
+ *      0x07 Reserved
  */
 #define WIN_SRST			0x08 /* ATAPI soft reset command */
 #define WIN_DEVICE_RESET		0x08
@@ -190,6 +194,9 @@  typedef struct IDEDMAOps IDEDMAOps;
 
 #define IDE_DMA_BUF_SECTORS 256
 
+/* feature values for Data Set Management */
+#define DSM_TRIM                        0x01
+
 #if (IDE_DMA_BUF_SECTORS < MAX_MULT_SECTORS)
 #error "IDE_DMA_BUF_SECTORS must be bigger or equal to MAX_MULT_SECTORS"
 #endif
@@ -382,6 +389,7 @@  struct unreported_events {
 enum ide_dma_cmd {
     IDE_DMA_READ,
     IDE_DMA_WRITE,
+    IDE_DMA_TRIM,
 };
 
 #define ide_cmd_is_read(s) \
@@ -517,6 +525,7 @@  struct IDEDeviceInfo {
 #define BM_STATUS_PIO_RETRY  0x10
 #define BM_STATUS_RETRY_READ  0x20
 #define BM_STATUS_RETRY_FLUSH 0x40
+#define BM_STATUS_RETRY_TRIM 0x80
 
 #define BM_CMD_START     0x01
 #define BM_CMD_READ      0x08
@@ -583,6 +592,9 @@  void ide_transfer_start(IDEState *s, uin
                         EndTransferFunc *end_transfer_func);
 void ide_transfer_stop(IDEState *s);
 void ide_set_inactive(IDEState *s);
+BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque);
 
 /* hw/ide/atapi.c */
 void ide_atapi_cmd(IDEState *s);
Index: qemu/hw/ide/qdev.c
===================================================================
--- qemu.orig/hw/ide/qdev.c	2011-05-18 20:28:17.137626185 +0200
+++ qemu/hw/ide/qdev.c	2011-05-18 20:30:39.387125836 +0200
@@ -125,6 +125,11 @@  static int ide_drive_initfn(IDEDevice *d
     const char *serial;
     DriveInfo *dinfo;
 
+    if (dev->conf.discard_granularity && dev->conf.discard_granularity != 512) {
+        error_report("discard_granularity must be 512 for ide");
+        return -1;
+    }
+
     serial = dev->serial;
     if (!serial) {
         /* try to fall back to value set with legacy -drive serial=... */
Index: qemu/hw/ide/macio.c
===================================================================
--- qemu.orig/hw/ide/macio.c	2011-05-18 20:28:17.145627191 +0200
+++ qemu/hw/ide/macio.c	2011-05-18 20:30:39.387125836 +0200
@@ -154,6 +154,10 @@  static void pmac_ide_transfer_cb(void *o
         m->aiocb = dma_bdrv_write(s->bs, &s->sg, sector_num,
 		                  pmac_ide_transfer_cb, io);
         break;
+    case IDE_DMA_TRIM:
+        m->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
+                               ide_issue_trim, pmac_ide_transfer_cb, s, 1);
+        break;
     }
 
     if (!m->aiocb)
Index: qemu/hw/ide/pci.c
===================================================================
--- qemu.orig/hw/ide/pci.c	2011-05-18 20:28:17.153625872 +0200
+++ qemu/hw/ide/pci.c	2011-05-18 20:33:06.102141553 +0200
@@ -205,6 +205,9 @@  static void bmdma_restart_bh(void *opaqu
         }
     } else if (bm->status & BM_STATUS_RETRY_FLUSH) {
         ide_flush_cache(bmdma_active_if(bm));
+    } else if (bm->status & BM_STATUS_RETRY_TRIM) {
+        bm->status &= ~BM_STATUS_RETRY_TRIM;
+        bmdma_restart_dma(bm, IDE_DMA_TRIM);
     }
 }