diff mbox

[4/7] scsi-disk: introduce dma_readv and dma_writev

Message ID 1464008051-6429-5-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini May 23, 2016, 12:54 p.m. UTC
These are replacements for blk_aio_preadv and blk_aio_pwritev that allow
customization of the data path.  They reuse the DMA helpers' DMAIOFunc
callback type, so that the same function can be used in either the
QEMUSGList or the bounce-buffered case.

This customization will be needed in the next patch to do zero-copy
SG_IO on scsi-block.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/scsi/scsi-disk.c | 63 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 11 deletions(-)

Comments

Eric Blake May 23, 2016, 4:09 p.m. UTC | #1
On 05/23/2016 06:54 AM, Paolo Bonzini wrote:
> These are replacements for blk_aio_preadv and blk_aio_pwritev that allow
> customization of the data path.  They reuse the DMA helpers' DMAIOFunc
> callback type, so that the same function can be used in either the
> QEMUSGList or the bounce-buffered case.
> 
> This customization will be needed in the next patch to do zero-copy
> SG_IO on scsi-block.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  hw/scsi/scsi-disk.c | 63 +++++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 52 insertions(+), 11 deletions(-)
> 

>  
> +static
> +BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
> +                           BlockCompletionFunc *cb, void *cb_opaque,
> +                           void *opaque)
> +{
> +    SCSIDiskReq *r = opaque;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);

DO_UPCAST() is poorly named, and I've been trying to reduce its use in
the tree; can we use container_of() instead?

> +    return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
> +}
> +
> +static
> +BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
> +                            BlockCompletionFunc *cb, void *cb_opaque,
> +                            void *opaque)
> +{
> +    SCSIDiskReq *r = opaque;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +    return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);

Are we ever going to want to pass flags (such as BDRV_REQ_FUA) on to
blk_aio_pwritev(), which would imply a flags argument to
scsi_dma_writev()?  But it doesn't have to happen now, so it's not
holding up my review.

Reviewed-by: Eric Blake <eblake@redhat.com>
Mark Cave-Ayland June 1, 2016, 7:07 p.m. UTC | #2
On 23/05/16 13:54, Paolo Bonzini wrote:

> These are replacements for blk_aio_preadv and blk_aio_pwritev that allow
> customization of the data path.  They reuse the DMA helpers' DMAIOFunc
> callback type, so that the same function can be used in either the
> QEMUSGList or the bounce-buffered case.
> 
> This customization will be needed in the next patch to do zero-copy
> SG_IO on scsi-block.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  hw/scsi/scsi-disk.c | 63 +++++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 52 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index eaadfd6..4b5db59 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -55,7 +55,21 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
>  
>  #define TYPE_SCSI_DISK_BASE         "scsi-disk-base"
>  
> +#define SCSI_DISK_BASE(obj) \
> +     OBJECT_CHECK(SCSIDiskState, (obj), TYPE_SCSI_DISK_BASE)
> +#define SCSI_DISK_BASE_CLASS(klass) \
> +     OBJECT_CLASS_CHECK(SCSIDiskClass, (klass), TYPE_SCSI_DISK_BASE)
> +#define SCSI_DISK_BASE_GET_CLASS(obj) \
> +     OBJECT_GET_CLASS(SCSIDiskClass, (obj), TYPE_SCSI_DISK_BASE)
> +
>  typedef struct SCSIDiskState SCSIDiskState;
> +typedef struct SCSIDiskClass SCSIDiskClass;
> +
> +typedef struct SCSIDiskClass {
> +    SCSIDeviceClass parent_class;
> +    DMAIOFunc       *dma_readv;
> +    DMAIOFunc       *dma_writev;
> +} SCSIDiskClass;
>  
>  typedef struct SCSIDiskReq {
>      SCSIRequest req;
> @@ -317,6 +331,7 @@ done:
>  static void scsi_do_read(SCSIDiskReq *r, int ret)
>  {
>      SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +    SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
>  
>      assert (r->req.aiocb == NULL);
>  
> @@ -337,16 +352,16 @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
>      if (r->req.sg) {
>          dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
>          r->req.resid -= r->req.sg->size;
> -        r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg,
> -                                    r->sector << BDRV_SECTOR_BITS,
> -                                    scsi_dma_complete, r);
> +        r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
> +                                  r->req.sg, r->sector << BDRV_SECTOR_BITS,
> +                                  sdc->dma_readv, r, scsi_dma_complete, r,
> +                                  DMA_DIRECTION_FROM_DEVICE);
>      } else {
>          scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
>          block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
>                           r->qiov.size, BLOCK_ACCT_READ);
> -        r->req.aiocb = blk_aio_preadv(s->qdev.conf.blk,
> -                                      r->sector << BDRV_SECTOR_BITS, &r->qiov,
> -                                      0, scsi_read_complete, r);
> +        r->req.aiocb = sdc->dma_readv(r->sector, &r->qiov,
> +                                      scsi_read_complete, r, r);
>      }
>  
>  done:
> @@ -506,6 +521,7 @@ static void scsi_write_data(SCSIRequest *req)
>  {
>      SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
>      SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +    SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
>  
>      /* No data transfer may already be in progress */
>      assert(r->req.aiocb == NULL);
> @@ -542,15 +558,16 @@ static void scsi_write_data(SCSIRequest *req)
>      if (r->req.sg) {
>          dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
>          r->req.resid -= r->req.sg->size;
> -        r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg,
> -                                     r->sector << BDRV_SECTOR_BITS,
>                                       scsi_dma_complete, r);
> +        r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
> +                                  r->req.sg, r->sector << BDRV_SECTOR_BITS,
> +                                  sdc->dma_writev, r, scsi_dma_complete, r,
> +                                  DMA_DIRECTION_TO_DEVICE);
>      } else {
>          block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
>                           r->qiov.size, BLOCK_ACCT_WRITE);
> -        r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
> -                                       r->sector << BDRV_SECTOR_BITS, &r->qiov,
> -                                       0, scsi_write_complete, r);
> +        r->req.aiocb = sdc->dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov,
> +                                       scsi_write_complete, r, r);
>      }
>  }
>  
> @@ -2658,12 +2675,35 @@ static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
>  
>  #endif
>  
> +static
> +BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
> +                           BlockCompletionFunc *cb, void *cb_opaque,
> +                           void *opaque)
> +{
> +    SCSIDiskReq *r = opaque;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +    return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
> +}
> +
> +static
> +BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
> +                            BlockCompletionFunc *cb, void *cb_opaque,
> +                            void *opaque)
> +{
> +    SCSIDiskReq *r = opaque;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +    return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
> +}
> +
>  static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
> +    SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
>  
>      dc->fw_name = "disk";
>      dc->reset = scsi_disk_reset;
> +    sdc->dma_readv = scsi_dma_readv;
> +    sdc->dma_writev = scsi_dma_writev;
>  }
>  
>  static const TypeInfo scsi_disk_base_info = {
> @@ -2671,6 +2711,7 @@ static const TypeInfo scsi_disk_base_info = {
>      .parent        = TYPE_SCSI_DEVICE,
>      .class_init    = scsi_disk_base_class_initfn,
>      .instance_size = sizeof(SCSIDiskState),
> +    .class_size    = sizeof(SCSIDiskClass),
>  };
>  
>  #define DEFINE_SCSI_DISK_PROPERTIES()                                \

Hi Paolo,

This patch appears to break qemu-system-sparc booting from CDROM with
the following command line:

./qemu-system-sparc -cdrom debian-40r4a-sparc-netinst.iso -boot d

Instead of booting straight into SILO, OpenBIOS hangs when trying to
read from the CDROM device.


ATB,

Mark.
zhao xiao qiang June 3, 2016, 2:56 a.m. UTC | #3
在 2016年06月02日 03:07, Mark Cave-Ayland 写道:
> On 23/05/16 13:54, Paolo Bonzini wrote:
>
>> >These are replacements for blk_aio_preadv and blk_aio_pwritev that allow
>> >customization of the data path.  They reuse the DMA helpers' DMAIOFunc
>> >callback type, so that the same function can be used in either the
>> >QEMUSGList or the bounce-buffered case.
>> >
>> >This customization will be needed in the next patch to do zero-copy
>> >SG_IO on scsi-block.
>> >
>> >Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
>> >---
>> >  hw/scsi/scsi-disk.c | 63 +++++++++++++++++++++++++++++++++++++++++++----------
>> >  1 file changed, 52 insertions(+), 11 deletions(-)
>> >
> Hi Paolo,
>
> This patch appears to break qemu-system-sparc booting from CDROM with
> the following command line:
>
> ./qemu-system-sparc -cdrom debian-40r4a-sparc-netinst.iso -boot d
>
> Instead of booting straight into SILO, OpenBIOS hangs when trying to
> read from the CDROM device.
Paolo:
   By using git bisect on master branch , I found this patch also break 
qemu-system-arm from booting.
   command line:
  qemu-system-arm -M versatilepb -kernel vmlinuz-3.2.0-4-versatile 
-initrd initrd.img-3.2.0-4-versatile -hda 
/home/hitmoon/debian_wheezy_armel_standard.qcow2 -append 'root=/dev/sda1'

  The booting process stops at mounting the root partition and after 
timeout droped into a initramfs shell. The device '/dev/sda1' is 
presented . I guess it can not read properly from sda1.
Mark Cave-Ayland June 3, 2016, 5:22 a.m. UTC | #4
On 03/06/16 03:56, xiaoqiang zhao wrote:

> 在 2016年06月02日 03:07, Mark Cave-Ayland 写道:
>> On 23/05/16 13:54, Paolo Bonzini wrote:
>>
>>> >These are replacements for blk_aio_preadv and blk_aio_pwritev that
>>> allow
>>> >customization of the data path.  They reuse the DMA helpers' DMAIOFunc
>>> >callback type, so that the same function can be used in either the
>>> >QEMUSGList or the bounce-buffered case.
>>> >
>>> >This customization will be needed in the next patch to do zero-copy
>>> >SG_IO on scsi-block.
>>> >
>>> >Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
>>> >---
>>> >  hw/scsi/scsi-disk.c | 63
>>> +++++++++++++++++++++++++++++++++++++++++++----------
>>> >  1 file changed, 52 insertions(+), 11 deletions(-)
>>> >
>> Hi Paolo,
>>
>> This patch appears to break qemu-system-sparc booting from CDROM with
>> the following command line:
>>
>> ./qemu-system-sparc -cdrom debian-40r4a-sparc-netinst.iso -boot d
>>
>> Instead of booting straight into SILO, OpenBIOS hangs when trying to
>> read from the CDROM device.
> Paolo:
>   By using git bisect on master branch , I found this patch also break
> qemu-system-arm from booting.
>   command line:
>  qemu-system-arm -M versatilepb -kernel vmlinuz-3.2.0-4-versatile
> -initrd initrd.img-3.2.0-4-versatile -hda
> /home/hitmoon/debian_wheezy_armel_standard.qcow2 -append 'root=/dev/sda1'
> 
>  The booting process stops at mounting the root partition and after
> timeout droped into a initramfs shell. The device '/dev/sda1' is
> presented . I guess it can not read properly from sda1.

I've just sent through a patch which fixes the issue for me - please
test and report back!

Paolo - not sure if it's worth a follow-up patch that renames the
relevant _readv/_writev functions in scsi-disk.c to _preadv/_pwritev to
try and help avoid such confusion in future?


ATB,

Mark.
zhao xiao qiang June 3, 2016, 6:07 a.m. UTC | #5
在 2016年06月03日 13:22, Mark Cave-Ayland 写道:
> On 03/06/16 03:56, xiaoqiang zhao wrote:
>
>> 在 2016年06月02日 03:07, Mark Cave-Ayland 写道:
>>> On 23/05/16 13:54, Paolo Bonzini wrote:
>>>
>>>>> These are replacements for blk_aio_preadv and blk_aio_pwritev that
>>>> allow
>>>>> customization of the data path.  They reuse the DMA helpers' DMAIOFunc
>>>>> callback type, so that the same function can be used in either the
>>>>> QEMUSGList or the bounce-buffered case.
>>>>>
>>>>> This customization will be needed in the next patch to do zero-copy
>>>>> SG_IO on scsi-block.
>>>>>
>>>>> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
>>>>> ---
>>>>>   hw/scsi/scsi-disk.c | 63
>>>> +++++++++++++++++++++++++++++++++++++++++++----------
>>>>>   1 file changed, 52 insertions(+), 11 deletions(-)
>>>>>
>>> Hi Paolo,
>>>
>>> This patch appears to break qemu-system-sparc booting from CDROM with
>>> the following command line:
>>>
>>> ./qemu-system-sparc -cdrom debian-40r4a-sparc-netinst.iso -boot d
>>>
>>> Instead of booting straight into SILO, OpenBIOS hangs when trying to
>>> read from the CDROM device.
>> Paolo:
>>    By using git bisect on master branch , I found this patch also break
>> qemu-system-arm from booting.
>>    command line:
>>   qemu-system-arm -M versatilepb -kernel vmlinuz-3.2.0-4-versatile
>> -initrd initrd.img-3.2.0-4-versatile -hda
>> /home/hitmoon/debian_wheezy_armel_standard.qcow2 -append 'root=/dev/sda1'
>>
>>   The booting process stops at mounting the root partition and after
>> timeout droped into a initramfs shell. The device '/dev/sda1' is
>> presented . I guess it can not read properly from sda1.
> I've just sent through a patch which fixes the issue for me - please
> test and report back!
>
> Paolo - not sure if it's worth a follow-up patch that renames the
> relevant _readv/_writev functions in scsi-disk.c to _preadv/_pwritev to
> try and help avoid such confusion in future?
>
>
> ATB,
>
> Mark.
>
>
Mark:
   I can confirm it works for me !
diff mbox

Patch

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index eaadfd6..4b5db59 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -55,7 +55,21 @@  do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
 
 #define TYPE_SCSI_DISK_BASE         "scsi-disk-base"
 
+#define SCSI_DISK_BASE(obj) \
+     OBJECT_CHECK(SCSIDiskState, (obj), TYPE_SCSI_DISK_BASE)
+#define SCSI_DISK_BASE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(SCSIDiskClass, (klass), TYPE_SCSI_DISK_BASE)
+#define SCSI_DISK_BASE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(SCSIDiskClass, (obj), TYPE_SCSI_DISK_BASE)
+
 typedef struct SCSIDiskState SCSIDiskState;
+typedef struct SCSIDiskClass SCSIDiskClass;
+
+typedef struct SCSIDiskClass {
+    SCSIDeviceClass parent_class;
+    DMAIOFunc       *dma_readv;
+    DMAIOFunc       *dma_writev;
+} SCSIDiskClass;
 
 typedef struct SCSIDiskReq {
     SCSIRequest req;
@@ -317,6 +331,7 @@  done:
 static void scsi_do_read(SCSIDiskReq *r, int ret)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+    SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
 
     assert (r->req.aiocb == NULL);
 
@@ -337,16 +352,16 @@  static void scsi_do_read(SCSIDiskReq *r, int ret)
     if (r->req.sg) {
         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
         r->req.resid -= r->req.sg->size;
-        r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg,
-                                    r->sector << BDRV_SECTOR_BITS,
-                                    scsi_dma_complete, r);
+        r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
+                                  r->req.sg, r->sector << BDRV_SECTOR_BITS,
+                                  sdc->dma_readv, r, scsi_dma_complete, r,
+                                  DMA_DIRECTION_FROM_DEVICE);
     } else {
         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
                          r->qiov.size, BLOCK_ACCT_READ);
-        r->req.aiocb = blk_aio_preadv(s->qdev.conf.blk,
-                                      r->sector << BDRV_SECTOR_BITS, &r->qiov,
-                                      0, scsi_read_complete, r);
+        r->req.aiocb = sdc->dma_readv(r->sector, &r->qiov,
+                                      scsi_read_complete, r, r);
     }
 
 done:
@@ -506,6 +521,7 @@  static void scsi_write_data(SCSIRequest *req)
 {
     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+    SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
 
     /* No data transfer may already be in progress */
     assert(r->req.aiocb == NULL);
@@ -542,15 +558,16 @@  static void scsi_write_data(SCSIRequest *req)
     if (r->req.sg) {
         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
         r->req.resid -= r->req.sg->size;
-        r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg,
-                                     r->sector << BDRV_SECTOR_BITS,
                                      scsi_dma_complete, r);
+        r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
+                                  r->req.sg, r->sector << BDRV_SECTOR_BITS,
+                                  sdc->dma_writev, r, scsi_dma_complete, r,
+                                  DMA_DIRECTION_TO_DEVICE);
     } else {
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
                          r->qiov.size, BLOCK_ACCT_WRITE);
-        r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
-                                       r->sector << BDRV_SECTOR_BITS, &r->qiov,
-                                       0, scsi_write_complete, r);
+        r->req.aiocb = sdc->dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov,
+                                       scsi_write_complete, r, r);
     }
 }
 
@@ -2658,12 +2675,35 @@  static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
 
 #endif
 
+static
+BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
+                           BlockCompletionFunc *cb, void *cb_opaque,
+                           void *opaque)
+{
+    SCSIDiskReq *r = opaque;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+    return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
+}
+
+static
+BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
+                            BlockCompletionFunc *cb, void *cb_opaque,
+                            void *opaque)
+{
+    SCSIDiskReq *r = opaque;
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+    return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
+}
+
 static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
+    SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
 
     dc->fw_name = "disk";
     dc->reset = scsi_disk_reset;
+    sdc->dma_readv = scsi_dma_readv;
+    sdc->dma_writev = scsi_dma_writev;
 }
 
 static const TypeInfo scsi_disk_base_info = {
@@ -2671,6 +2711,7 @@  static const TypeInfo scsi_disk_base_info = {
     .parent        = TYPE_SCSI_DEVICE,
     .class_init    = scsi_disk_base_class_initfn,
     .instance_size = sizeof(SCSIDiskState),
+    .class_size    = sizeof(SCSIDiskClass),
 };
 
 #define DEFINE_SCSI_DISK_PROPERTIES()                                \