scsi-block: Handle error from host devices
diff mbox series

Message ID 20180402032336.31834-1-famz@redhat.com
State New
Headers show
Series
  • scsi-block: Handle error from host devices
Related show

Commit Message

Fam Zheng April 2, 2018, 3:23 a.m. UTC
The callback of blk_aio_ioctl is not sensible to SCSI errors, so
werror=stop doesn't work if ioctl returns 0 but the scsi status is
error.

Peek at the sg_io_hdr_t fields and amend ret to fix that.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 hw/scsi/scsi-disk.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

Comments

Daniel Henrique Barboza April 3, 2018, 8:41 p.m. UTC | #1
Hi Fam,

I've tried this patch and found issues when booting a VM using SCSI 
passthrough. This is the backtrace from gdb from the segfault that 
happens in the middle of kernel boot:

Thread 1 "qemu-system-ppc" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7ffff7ff63a0 (LWP 16830)]
0x0000000000000000 in ?? ()
(gdb) bt
#0  0x0000000000000000 in ?? ()
#1  0x00000001007b8da8 in scsi_block_sgio_cb (opaque=0x10212e580, ret=0) 
at /home/danielhb/qemu/hw/scsi/scsi-disk.c:2772
#2  0x0000000100993f68 in blk_aio_complete (acb=0x101909520) at 
/home/danielhb/qemu/block/block-backend.c:1331
#3  0x0000000100994ccc in blk_aio_ioctl_entry (opaque=0x101909520) at 
/home/danielhb/qemu/block/block-backend.c:1542
#4  0x0000000100ac0954 in coroutine_trampoline (i0=28666944, i1=1) at 
/home/danielhb/qemu/util/coroutine-ucontext.c:116
#5  0x00007ffff789574c in makecontext () at 
../sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S:136
#6  0x0000000000000000 in ?? ()

The segfault is happening at this line:

static void scsi_block_sgio_cb(void *opaque, int ret)
{
     SCSIBlockReq *req = opaque;

     if (!ret &&
         (req->io_header.status ||
          req->io_header.host_status ||
          req->io_header.driver_status)) {
         ret = -EIO;
     }
     req->cb(req->cb_opaque, ret); <-----------------
}


This is happening because inside scsi_block_do_sgio you're not setting 
req->cb, just req->cb_opaque. Setting req->cb made the VM boot again:


--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2851,6 +2851,7 @@ static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq 
*req,
      io_header->usr_ptr = r;
      io_header->flags |= SG_FLAG_DIRECT_IO;

+    req->cb = cb;
      req->cb_opaque = opaque;
      aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header,
                            scsi_block_sgio_cb, req);




Thanks,


Daniel

On 04/02/2018 12:23 AM, Fam Zheng wrote:
> The callback of blk_aio_ioctl is not sensible to SCSI errors, so
> werror=stop doesn't work if ioctl returns 0 but the scsi status is
> error.
>
> Peek at the sg_io_hdr_t fields and amend ret to fix that.
>
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>   hw/scsi/scsi-disk.c | 20 +++++++++++++++++++-
>   1 file changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index f5ab767ab5..2c43830586 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -2651,10 +2651,26 @@ typedef struct SCSIBlockReq {
>       /* Selected bytes of the original CDB, copied into our own CDB.  */
>       uint8_t cmd, cdb1, group_number;
>
> +    BlockCompletionFunc *cb;
> +    void *cb_opaque;
> +
>       /* CDB passed to SG_IO.  */
>       uint8_t cdb[16];
>   } SCSIBlockReq;
>
> +static void scsi_block_sgio_cb(void *opaque, int ret)
> +{
> +    SCSIBlockReq *req = opaque;
> +
> +    if (!ret &&
> +        (req->io_header.status ||
> +         req->io_header.host_status ||
> +         req->io_header.driver_status)) {
> +        ret = -EIO;
> +    }
> +    req->cb(req->cb_opaque, ret);
> +}
> +
>   static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
>                                         int64_t offset, QEMUIOVector *iov,
>                                         int direction,
> @@ -2734,7 +2750,9 @@ static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
>       io_header->usr_ptr = r;
>       io_header->flags |= SG_FLAG_DIRECT_IO;
>
> -    aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, cb, opaque);
> +    req->cb_opaque = opaque;
> +    aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header,
> +                          scsi_block_sgio_cb, req);
>       assert(aiocb != NULL);
>       return aiocb;
>   }
Fam Zheng April 4, 2018, 1:42 a.m. UTC | #2
On Tue, 04/03 17:41, Daniel Henrique Barboza wrote:
> Hi Fam,
> 
> I've tried this patch and found issues when booting a VM using SCSI
> passthrough. This is the backtrace from gdb from the segfault that happens
> in the middle of kernel boot:
> 
> Thread 1 "qemu-system-ppc" received signal SIGSEGV, Segmentation fault.
> [Switching to Thread 0x7ffff7ff63a0 (LWP 16830)]
> 0x0000000000000000 in ?? ()
> (gdb) bt
> #0  0x0000000000000000 in ?? ()
> #1  0x00000001007b8da8 in scsi_block_sgio_cb (opaque=0x10212e580, ret=0) at
> /home/danielhb/qemu/hw/scsi/scsi-disk.c:2772
> #2  0x0000000100993f68 in blk_aio_complete (acb=0x101909520) at
> /home/danielhb/qemu/block/block-backend.c:1331
> #3  0x0000000100994ccc in blk_aio_ioctl_entry (opaque=0x101909520) at
> /home/danielhb/qemu/block/block-backend.c:1542
> #4  0x0000000100ac0954 in coroutine_trampoline (i0=28666944, i1=1) at
> /home/danielhb/qemu/util/coroutine-ucontext.c:116
> #5  0x00007ffff789574c in makecontext () at
> ../sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S:136
> #6  0x0000000000000000 in ?? ()
> 
> The segfault is happening at this line:
> 
> static void scsi_block_sgio_cb(void *opaque, int ret)
> {
>     SCSIBlockReq *req = opaque;
> 
>     if (!ret &&
>         (req->io_header.status ||
>          req->io_header.host_status ||
>          req->io_header.driver_status)) {
>         ret = -EIO;
>     }
>     req->cb(req->cb_opaque, ret); <-----------------
> }
> 
> 
> This is happening because inside scsi_block_do_sgio you're not setting
> req->cb, just req->cb_opaque. Setting req->cb made the VM boot again:

Oops, thanks. Apparently I failed to test my patch, sorry. :(

Fam

Patch
diff mbox series

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index f5ab767ab5..2c43830586 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2651,10 +2651,26 @@  typedef struct SCSIBlockReq {
     /* Selected bytes of the original CDB, copied into our own CDB.  */
     uint8_t cmd, cdb1, group_number;
 
+    BlockCompletionFunc *cb;
+    void *cb_opaque;
+
     /* CDB passed to SG_IO.  */
     uint8_t cdb[16];
 } SCSIBlockReq;
 
+static void scsi_block_sgio_cb(void *opaque, int ret)
+{
+    SCSIBlockReq *req = opaque;
+
+    if (!ret &&
+        (req->io_header.status ||
+         req->io_header.host_status ||
+         req->io_header.driver_status)) {
+        ret = -EIO;
+    }
+    req->cb(req->cb_opaque, ret);
+}
+
 static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
                                       int64_t offset, QEMUIOVector *iov,
                                       int direction,
@@ -2734,7 +2750,9 @@  static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
     io_header->usr_ptr = r;
     io_header->flags |= SG_FLAG_DIRECT_IO;
 
-    aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, cb, opaque);
+    req->cb_opaque = opaque;
+    aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header,
+                          scsi_block_sgio_cb, req);
     assert(aiocb != NULL);
     return aiocb;
 }