Message ID | 20180402032336.31834-1-famz@redhat.com |
---|---|
State | New |
Headers | show |
Series | scsi-block: Handle error from host devices | expand |
Hi Fam, I've tried this patch and found issues when booting a VM using SCSI passthrough. This is the backtrace from gdb from the segfault that happens in the middle of kernel boot: Thread 1 "qemu-system-ppc" received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7ffff7ff63a0 (LWP 16830)] 0x0000000000000000 in ?? () (gdb) bt #0 0x0000000000000000 in ?? () #1 0x00000001007b8da8 in scsi_block_sgio_cb (opaque=0x10212e580, ret=0) at /home/danielhb/qemu/hw/scsi/scsi-disk.c:2772 #2 0x0000000100993f68 in blk_aio_complete (acb=0x101909520) at /home/danielhb/qemu/block/block-backend.c:1331 #3 0x0000000100994ccc in blk_aio_ioctl_entry (opaque=0x101909520) at /home/danielhb/qemu/block/block-backend.c:1542 #4 0x0000000100ac0954 in coroutine_trampoline (i0=28666944, i1=1) at /home/danielhb/qemu/util/coroutine-ucontext.c:116 #5 0x00007ffff789574c in makecontext () at ../sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S:136 #6 0x0000000000000000 in ?? () The segfault is happening at this line: static void scsi_block_sgio_cb(void *opaque, int ret) { SCSIBlockReq *req = opaque; if (!ret && (req->io_header.status || req->io_header.host_status || req->io_header.driver_status)) { ret = -EIO; } req->cb(req->cb_opaque, ret); <----------------- } This is happening because inside scsi_block_do_sgio you're not setting req->cb, just req->cb_opaque. Setting req->cb made the VM boot again: --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2851,6 +2851,7 @@ static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req, io_header->usr_ptr = r; io_header->flags |= SG_FLAG_DIRECT_IO; + req->cb = cb; req->cb_opaque = opaque; aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, scsi_block_sgio_cb, req); Thanks, Daniel On 04/02/2018 12:23 AM, Fam Zheng wrote: > The callback of blk_aio_ioctl is not sensible to SCSI errors, so > werror=stop doesn't work if ioctl returns 0 but the scsi status is > error. > > Peek at the sg_io_hdr_t fields and amend ret to fix that. > > Signed-off-by: Fam Zheng <famz@redhat.com> > --- > hw/scsi/scsi-disk.c | 20 +++++++++++++++++++- > 1 file changed, 19 insertions(+), 1 deletion(-) > > diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c > index f5ab767ab5..2c43830586 100644 > --- a/hw/scsi/scsi-disk.c > +++ b/hw/scsi/scsi-disk.c > @@ -2651,10 +2651,26 @@ typedef struct SCSIBlockReq { > /* Selected bytes of the original CDB, copied into our own CDB. */ > uint8_t cmd, cdb1, group_number; > > + BlockCompletionFunc *cb; > + void *cb_opaque; > + > /* CDB passed to SG_IO. */ > uint8_t cdb[16]; > } SCSIBlockReq; > > +static void scsi_block_sgio_cb(void *opaque, int ret) > +{ > + SCSIBlockReq *req = opaque; > + > + if (!ret && > + (req->io_header.status || > + req->io_header.host_status || > + req->io_header.driver_status)) { > + ret = -EIO; > + } > + req->cb(req->cb_opaque, ret); > +} > + > static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req, > int64_t offset, QEMUIOVector *iov, > int direction, > @@ -2734,7 +2750,9 @@ static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req, > io_header->usr_ptr = r; > io_header->flags |= SG_FLAG_DIRECT_IO; > > - aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, cb, opaque); > + req->cb_opaque = opaque; > + aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, > + scsi_block_sgio_cb, req); > assert(aiocb != NULL); > return aiocb; > }
On Tue, 04/03 17:41, Daniel Henrique Barboza wrote: > Hi Fam, > > I've tried this patch and found issues when booting a VM using SCSI > passthrough. This is the backtrace from gdb from the segfault that happens > in the middle of kernel boot: > > Thread 1 "qemu-system-ppc" received signal SIGSEGV, Segmentation fault. > [Switching to Thread 0x7ffff7ff63a0 (LWP 16830)] > 0x0000000000000000 in ?? () > (gdb) bt > #0 0x0000000000000000 in ?? () > #1 0x00000001007b8da8 in scsi_block_sgio_cb (opaque=0x10212e580, ret=0) at > /home/danielhb/qemu/hw/scsi/scsi-disk.c:2772 > #2 0x0000000100993f68 in blk_aio_complete (acb=0x101909520) at > /home/danielhb/qemu/block/block-backend.c:1331 > #3 0x0000000100994ccc in blk_aio_ioctl_entry (opaque=0x101909520) at > /home/danielhb/qemu/block/block-backend.c:1542 > #4 0x0000000100ac0954 in coroutine_trampoline (i0=28666944, i1=1) at > /home/danielhb/qemu/util/coroutine-ucontext.c:116 > #5 0x00007ffff789574c in makecontext () at > ../sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S:136 > #6 0x0000000000000000 in ?? () > > The segfault is happening at this line: > > static void scsi_block_sgio_cb(void *opaque, int ret) > { > SCSIBlockReq *req = opaque; > > if (!ret && > (req->io_header.status || > req->io_header.host_status || > req->io_header.driver_status)) { > ret = -EIO; > } > req->cb(req->cb_opaque, ret); <----------------- > } > > > This is happening because inside scsi_block_do_sgio you're not setting > req->cb, just req->cb_opaque. Setting req->cb made the VM boot again: Oops, thanks. Apparently I failed to test my patch, sorry. :( Fam
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index f5ab767ab5..2c43830586 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2651,10 +2651,26 @@ typedef struct SCSIBlockReq { /* Selected bytes of the original CDB, copied into our own CDB. */ uint8_t cmd, cdb1, group_number; + BlockCompletionFunc *cb; + void *cb_opaque; + /* CDB passed to SG_IO. */ uint8_t cdb[16]; } SCSIBlockReq; +static void scsi_block_sgio_cb(void *opaque, int ret) +{ + SCSIBlockReq *req = opaque; + + if (!ret && + (req->io_header.status || + req->io_header.host_status || + req->io_header.driver_status)) { + ret = -EIO; + } + req->cb(req->cb_opaque, ret); +} + static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req, int64_t offset, QEMUIOVector *iov, int direction, @@ -2734,7 +2750,9 @@ static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req, io_header->usr_ptr = r; io_header->flags |= SG_FLAG_DIRECT_IO; - aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, cb, opaque); + req->cb_opaque = opaque; + aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, + scsi_block_sgio_cb, req); assert(aiocb != NULL); return aiocb; }
The callback of blk_aio_ioctl is not sensible to SCSI errors, so werror=stop doesn't work if ioctl returns 0 but the scsi status is error. Peek at the sg_io_hdr_t fields and amend ret to fix that. Signed-off-by: Fam Zheng <famz@redhat.com> --- hw/scsi/scsi-disk.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-)