Message ID | 1513385953-5466-1-git-send-email-deepa.srinivasan@oracle.com |
---|---|
State | New |
Headers | show |
Series | [v2] block: Fix qemu crash when using scsi-block | expand |
On 16/12/2017 01:59, Deepa Srinivasan wrote: > Starting qemu with the following arguments causes qemu to segfault: > ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= > iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 > > This patch fixes blk_aio_ioctl() so it does not pass stack addresses to > blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More > details about the bug follow. > > blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the > coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). > > When blk_aio_ioctl() is executed from within a coroutine context (e.g. > iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to > the current coroutine's wakeup queue. blk_aio_ioctl() then returns. > > When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: > .... > BlkRwCo *rwco = &acb->rwco; > > rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > rwco->qiov->iov[0].iov_base); <--- qiov is > invalid here > ... > > In the case when blk_aio_ioctl() is called from a non-coroutine context, > blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls > qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine > execution is complete, control returns to blk_aio_ioctl_entry() after the call > to blk_co_ioctl(). There is no invalid reference after this point, but the > function is still holding on to invalid pointers. > > The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer > rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the > coroutine function casts it to QEMUIOVector or uses the void pointer directly. > > Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Reviewed-by: Mark Kanda <mark.kanda@oracle.com> > --- > block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- > 1 file changed, 25 insertions(+), 26 deletions(-) > > diff --git a/block/block-backend.c b/block/block-backend.c > index baef8e7..2d0d9b6 100644 > --- a/block/block-backend.c > +++ b/block/block-backend.c > @@ -1140,7 +1140,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, > typedef struct BlkRwCo { > BlockBackend *blk; > int64_t offset; > - QEMUIOVector *qiov; > + void *iobuf; > int ret; > BdrvRequestFlags flags; > } BlkRwCo; > @@ -1148,17 +1148,19 @@ typedef struct BlkRwCo { > static void blk_read_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > > - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, > - rwco->qiov, rwco->flags); > + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, > + qiov, rwco->flags); > } > > static void blk_write_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > > - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, > - rwco->qiov, rwco->flags); > + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, > + qiov, rwco->flags); > } > > static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, > @@ -1178,7 +1180,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, > rwco = (BlkRwCo) { > .blk = blk, > .offset = offset, > - .qiov = &qiov, > + .iobuf = &qiov, > .flags = flags, > .ret = NOT_DONE, > }; > @@ -1275,7 +1277,7 @@ static void blk_aio_complete_bh(void *opaque) > } > > static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, > - QEMUIOVector *qiov, CoroutineEntry co_entry, > + void *iobuf, CoroutineEntry co_entry, > BdrvRequestFlags flags, > BlockCompletionFunc *cb, void *opaque) > { > @@ -1287,7 +1289,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, > acb->rwco = (BlkRwCo) { > .blk = blk, > .offset = offset, > - .qiov = qiov, > + .iobuf = iobuf, > .flags = flags, > .ret = NOT_DONE, > }; > @@ -1310,10 +1312,11 @@ static void blk_aio_read_entry(void *opaque) > { > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > + QEMUIOVector *qiov = rwco->iobuf; > > - assert(rwco->qiov->size == acb->bytes); > + assert(qiov->size == acb->bytes); > rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, > - rwco->qiov, rwco->flags); > + qiov, rwco->flags); > blk_aio_complete(acb); > } > > @@ -1321,10 +1324,11 @@ static void blk_aio_write_entry(void *opaque) > { > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > + QEMUIOVector *qiov = rwco->iobuf; > > - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); > + assert(!qiov || qiov->size == acb->bytes); > rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, > - rwco->qiov, rwco->flags); > + qiov, rwco->flags); > blk_aio_complete(acb); > } > > @@ -1453,8 +1457,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) > static void blk_ioctl_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > + > rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > - rwco->qiov->iov[0].iov_base); > + qiov->iov[0].iov_base); > } > > int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) > @@ -1467,24 +1473,15 @@ static void blk_aio_ioctl_entry(void *opaque) > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > > - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > - rwco->qiov->iov[0].iov_base); > + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); > + > blk_aio_complete(acb); > } > > BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, > BlockCompletionFunc *cb, void *opaque) > { > - QEMUIOVector qiov; > - struct iovec iov; > - > - iov = (struct iovec) { > - .iov_base = buf, > - .iov_len = 0, > - }; > - qemu_iovec_init_external(&qiov, &iov, 1); > - > - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); > + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); > } > > int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) > @@ -1900,7 +1897,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, > static void blk_pdiscard_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); > + QEMUIOVector *qiov = rwco->iobuf; > + > + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); > } > > int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
On Fri, Dec 15, 2017 at 04:59:13PM -0800, Deepa Srinivasan wrote: > Starting qemu with the following arguments causes qemu to segfault: > ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= > iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 > > This patch fixes blk_aio_ioctl() so it does not pass stack addresses to > blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More > details about the bug follow. > > blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the > coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). > > When blk_aio_ioctl() is executed from within a coroutine context (e.g. > iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to > the current coroutine's wakeup queue. blk_aio_ioctl() then returns. > > When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: > .... > BlkRwCo *rwco = &acb->rwco; > > rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > rwco->qiov->iov[0].iov_base); <--- qiov is > invalid here > ... > > In the case when blk_aio_ioctl() is called from a non-coroutine context, > blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls > qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine > execution is complete, control returns to blk_aio_ioctl_entry() after the call > to blk_co_ioctl(). There is no invalid reference after this point, but the > function is still holding on to invalid pointers. > > The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer > rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the > coroutine function casts it to QEMUIOVector or uses the void pointer directly. > > Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Reviewed-by: Mark Kanda <mark.kanda@oracle.com> > --- > block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- > 1 file changed, 25 insertions(+), 26 deletions(-) Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
On Fri, Dec 15, 2017 at 04:59:13PM -0800, Deepa Srinivasan wrote: > Starting qemu with the following arguments causes qemu to segfault: > ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= > iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 > > This patch fixes blk_aio_ioctl() so it does not pass stack addresses to > blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More > details about the bug follow. > > blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the > coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). > > When blk_aio_ioctl() is executed from within a coroutine context (e.g. > iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to > the current coroutine's wakeup queue. blk_aio_ioctl() then returns. > > When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: > .... > BlkRwCo *rwco = &acb->rwco; > > rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > rwco->qiov->iov[0].iov_base); <--- qiov is > invalid here > ... > > In the case when blk_aio_ioctl() is called from a non-coroutine context, > blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls > qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine > execution is complete, control returns to blk_aio_ioctl_entry() after the call > to blk_co_ioctl(). There is no invalid reference after this point, but the > function is still holding on to invalid pointers. > > The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer > rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the > coroutine function casts it to QEMUIOVector or uses the void pointer directly. > > Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Reviewed-by: Mark Kanda <mark.kanda@oracle.com> > --- > block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- Kevin: Ping. Will you take this through your tree? > 1 file changed, 25 insertions(+), 26 deletions(-) > > diff --git a/block/block-backend.c b/block/block-backend.c > index baef8e7..2d0d9b6 100644 > --- a/block/block-backend.c > +++ b/block/block-backend.c > @@ -1140,7 +1140,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, > typedef struct BlkRwCo { > BlockBackend *blk; > int64_t offset; > - QEMUIOVector *qiov; > + void *iobuf; > int ret; > BdrvRequestFlags flags; > } BlkRwCo; > @@ -1148,17 +1148,19 @@ typedef struct BlkRwCo { > static void blk_read_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > > - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, > - rwco->qiov, rwco->flags); > + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, > + qiov, rwco->flags); > } > > static void blk_write_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > > - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, > - rwco->qiov, rwco->flags); > + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, > + qiov, rwco->flags); > } > > static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, > @@ -1178,7 +1180,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, > rwco = (BlkRwCo) { > .blk = blk, > .offset = offset, > - .qiov = &qiov, > + .iobuf = &qiov, > .flags = flags, > .ret = NOT_DONE, > }; > @@ -1275,7 +1277,7 @@ static void blk_aio_complete_bh(void *opaque) > } > > static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, > - QEMUIOVector *qiov, CoroutineEntry co_entry, > + void *iobuf, CoroutineEntry co_entry, > BdrvRequestFlags flags, > BlockCompletionFunc *cb, void *opaque) > { > @@ -1287,7 +1289,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, > acb->rwco = (BlkRwCo) { > .blk = blk, > .offset = offset, > - .qiov = qiov, > + .iobuf = iobuf, > .flags = flags, > .ret = NOT_DONE, > }; > @@ -1310,10 +1312,11 @@ static void blk_aio_read_entry(void *opaque) > { > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > + QEMUIOVector *qiov = rwco->iobuf; > > - assert(rwco->qiov->size == acb->bytes); > + assert(qiov->size == acb->bytes); > rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, > - rwco->qiov, rwco->flags); > + qiov, rwco->flags); > blk_aio_complete(acb); > } > > @@ -1321,10 +1324,11 @@ static void blk_aio_write_entry(void *opaque) > { > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > + QEMUIOVector *qiov = rwco->iobuf; > > - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); > + assert(!qiov || qiov->size == acb->bytes); > rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, > - rwco->qiov, rwco->flags); > + qiov, rwco->flags); > blk_aio_complete(acb); > } > > @@ -1453,8 +1457,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) > static void blk_ioctl_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > + QEMUIOVector *qiov = rwco->iobuf; > + > rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > - rwco->qiov->iov[0].iov_base); > + qiov->iov[0].iov_base); > } > > int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) > @@ -1467,24 +1473,15 @@ static void blk_aio_ioctl_entry(void *opaque) > BlkAioEmAIOCB *acb = opaque; > BlkRwCo *rwco = &acb->rwco; > > - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > - rwco->qiov->iov[0].iov_base); > + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); > + > blk_aio_complete(acb); > } > > BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, > BlockCompletionFunc *cb, void *opaque) > { > - QEMUIOVector qiov; > - struct iovec iov; > - > - iov = (struct iovec) { > - .iov_base = buf, > - .iov_len = 0, > - }; > - qemu_iovec_init_external(&qiov, &iov, 1); > - > - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); > + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); > } > > int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) > @@ -1900,7 +1897,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, > static void blk_pdiscard_entry(void *opaque) > { > BlkRwCo *rwco = opaque; > - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); > + QEMUIOVector *qiov = rwco->iobuf; > + > + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); > } > > int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) > -- > 2.7.4 > >
Stefan, Kevin - Ping, to take this patch. Thanks. On 01/29/2018 07:51 AM, Stefan Hajnoczi wrote: > On Fri, Dec 15, 2017 at 04:59:13PM -0800, Deepa Srinivasan wrote: >> Starting qemu with the following arguments causes qemu to segfault: >> ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= >> iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 >> >> This patch fixes blk_aio_ioctl() so it does not pass stack addresses to >> blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More >> details about the bug follow. >> >> blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the >> coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). >> >> When blk_aio_ioctl() is executed from within a coroutine context (e.g. >> iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to >> the current coroutine's wakeup queue. blk_aio_ioctl() then returns. >> >> When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: >> .... >> BlkRwCo *rwco = &acb->rwco; >> >> rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, >> rwco->qiov->iov[0].iov_base); <--- qiov is >> invalid here >> ... >> >> In the case when blk_aio_ioctl() is called from a non-coroutine context, >> blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls >> qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine >> execution is complete, control returns to blk_aio_ioctl_entry() after the call >> to blk_co_ioctl(). There is no invalid reference after this point, but the >> function is still holding on to invalid pointers. >> >> The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer >> rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the >> coroutine function casts it to QEMUIOVector or uses the void pointer directly. >> >> Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> >> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> >> Reviewed-by: Mark Kanda <mark.kanda@oracle.com> >> --- >> block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- > Kevin: Ping. Will you take this through your tree? > >> 1 file changed, 25 insertions(+), 26 deletions(-) >> >> diff --git a/block/block-backend.c b/block/block-backend.c >> index baef8e7..2d0d9b6 100644 >> --- a/block/block-backend.c >> +++ b/block/block-backend.c >> @@ -1140,7 +1140,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, >> typedef struct BlkRwCo { >> BlockBackend *blk; >> int64_t offset; >> - QEMUIOVector *qiov; >> + void *iobuf; >> int ret; >> BdrvRequestFlags flags; >> } BlkRwCo; >> @@ -1148,17 +1148,19 @@ typedef struct BlkRwCo { >> static void blk_read_entry(void *opaque) >> { >> BlkRwCo *rwco = opaque; >> + QEMUIOVector *qiov = rwco->iobuf; >> >> - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, >> - rwco->qiov, rwco->flags); >> + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, >> + qiov, rwco->flags); >> } >> >> static void blk_write_entry(void *opaque) >> { >> BlkRwCo *rwco = opaque; >> + QEMUIOVector *qiov = rwco->iobuf; >> >> - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, >> - rwco->qiov, rwco->flags); >> + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, >> + qiov, rwco->flags); >> } >> >> static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, >> @@ -1178,7 +1180,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, >> rwco = (BlkRwCo) { >> .blk = blk, >> .offset = offset, >> - .qiov = &qiov, >> + .iobuf = &qiov, >> .flags = flags, >> .ret = NOT_DONE, >> }; >> @@ -1275,7 +1277,7 @@ static void blk_aio_complete_bh(void *opaque) >> } >> >> static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, >> - QEMUIOVector *qiov, CoroutineEntry co_entry, >> + void *iobuf, CoroutineEntry co_entry, >> BdrvRequestFlags flags, >> BlockCompletionFunc *cb, void *opaque) >> { >> @@ -1287,7 +1289,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, >> acb->rwco = (BlkRwCo) { >> .blk = blk, >> .offset = offset, >> - .qiov = qiov, >> + .iobuf = iobuf, >> .flags = flags, >> .ret = NOT_DONE, >> }; >> @@ -1310,10 +1312,11 @@ static void blk_aio_read_entry(void *opaque) >> { >> BlkAioEmAIOCB *acb = opaque; >> BlkRwCo *rwco = &acb->rwco; >> + QEMUIOVector *qiov = rwco->iobuf; >> >> - assert(rwco->qiov->size == acb->bytes); >> + assert(qiov->size == acb->bytes); >> rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, >> - rwco->qiov, rwco->flags); >> + qiov, rwco->flags); >> blk_aio_complete(acb); >> } >> >> @@ -1321,10 +1324,11 @@ static void blk_aio_write_entry(void *opaque) >> { >> BlkAioEmAIOCB *acb = opaque; >> BlkRwCo *rwco = &acb->rwco; >> + QEMUIOVector *qiov = rwco->iobuf; >> >> - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); >> + assert(!qiov || qiov->size == acb->bytes); >> rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, >> - rwco->qiov, rwco->flags); >> + qiov, rwco->flags); >> blk_aio_complete(acb); >> } >> >> @@ -1453,8 +1457,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) >> static void blk_ioctl_entry(void *opaque) >> { >> BlkRwCo *rwco = opaque; >> + QEMUIOVector *qiov = rwco->iobuf; >> + >> rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, >> - rwco->qiov->iov[0].iov_base); >> + qiov->iov[0].iov_base); >> } >> >> int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) >> @@ -1467,24 +1473,15 @@ static void blk_aio_ioctl_entry(void *opaque) >> BlkAioEmAIOCB *acb = opaque; >> BlkRwCo *rwco = &acb->rwco; >> >> - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, >> - rwco->qiov->iov[0].iov_base); >> + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); >> + >> blk_aio_complete(acb); >> } >> >> BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, >> BlockCompletionFunc *cb, void *opaque) >> { >> - QEMUIOVector qiov; >> - struct iovec iov; >> - >> - iov = (struct iovec) { >> - .iov_base = buf, >> - .iov_len = 0, >> - }; >> - qemu_iovec_init_external(&qiov, &iov, 1); >> - >> - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); >> + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); >> } >> >> int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) >> @@ -1900,7 +1897,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, >> static void blk_pdiscard_entry(void *opaque) >> { >> BlkRwCo *rwco = opaque; >> - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); >> + QEMUIOVector *qiov = rwco->iobuf; >> + >> + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); >> } >> >> int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) >> -- >> 2.7.4 >> >>
On Fri, Dec 15, 2017 at 04:59:13PM -0800, Deepa Srinivasan wrote: > Starting qemu with the following arguments causes qemu to segfault: > ... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name= > iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1 > > This patch fixes blk_aio_ioctl() so it does not pass stack addresses to > blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More > details about the bug follow. > > blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the > coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter(). > > When blk_aio_ioctl() is executed from within a coroutine context (e.g. > iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to > the current coroutine's wakeup queue. blk_aio_ioctl() then returns. > > When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer: > .... > BlkRwCo *rwco = &acb->rwco; > > rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, > rwco->qiov->iov[0].iov_base); <--- qiov is > invalid here > ... > > In the case when blk_aio_ioctl() is called from a non-coroutine context, > blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls > qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine > execution is complete, control returns to blk_aio_ioctl_entry() after the call > to blk_co_ioctl(). There is no invalid reference after this point, but the > function is still holding on to invalid pointers. > > The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer > rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the > coroutine function casts it to QEMUIOVector or uses the void pointer directly. > > Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com> > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Reviewed-by: Mark Kanda <mark.kanda@oracle.com> > --- > block/block-backend.c | 51 +++++++++++++++++++++++++-------------------------- > 1 file changed, 25 insertions(+), 26 deletions(-) There has been no activity after pings, I'm merging this now although technically it should go via Kevin's tree. Thanks, applied to my block-next tree: https://github.com/stefanha/qemu/commits/block-next Stefan
diff --git a/block/block-backend.c b/block/block-backend.c index baef8e7..2d0d9b6 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1140,7 +1140,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, typedef struct BlkRwCo { BlockBackend *blk; int64_t offset; - QEMUIOVector *qiov; + void *iobuf; int ret; BdrvRequestFlags flags; } BlkRwCo; @@ -1148,17 +1148,19 @@ typedef struct BlkRwCo { static void blk_read_entry(void *opaque) { BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; - rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, - rwco->qiov, rwco->flags); + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, + qiov, rwco->flags); } static void blk_write_entry(void *opaque) { BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; - rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, - rwco->qiov, rwco->flags); + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, + qiov, rwco->flags); } static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, @@ -1178,7 +1180,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, rwco = (BlkRwCo) { .blk = blk, .offset = offset, - .qiov = &qiov, + .iobuf = &qiov, .flags = flags, .ret = NOT_DONE, }; @@ -1275,7 +1277,7 @@ static void blk_aio_complete_bh(void *opaque) } static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, - QEMUIOVector *qiov, CoroutineEntry co_entry, + void *iobuf, CoroutineEntry co_entry, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { @@ -1287,7 +1289,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, acb->rwco = (BlkRwCo) { .blk = blk, .offset = offset, - .qiov = qiov, + .iobuf = iobuf, .flags = flags, .ret = NOT_DONE, }; @@ -1310,10 +1312,11 @@ static void blk_aio_read_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; - assert(rwco->qiov->size == acb->bytes); + assert(qiov->size == acb->bytes); rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, - rwco->qiov, rwco->flags); + qiov, rwco->flags); blk_aio_complete(acb); } @@ -1321,10 +1324,11 @@ static void blk_aio_write_entry(void *opaque) { BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; - assert(!rwco->qiov || rwco->qiov->size == acb->bytes); + assert(!qiov || qiov->size == acb->bytes); rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, - rwco->qiov, rwco->flags); + qiov, rwco->flags); blk_aio_complete(acb); } @@ -1453,8 +1457,10 @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) static void blk_ioctl_entry(void *opaque) { BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, - rwco->qiov->iov[0].iov_base); + qiov->iov[0].iov_base); } int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) @@ -1467,24 +1473,15 @@ static void blk_aio_ioctl_entry(void *opaque) BlkAioEmAIOCB *acb = opaque; BlkRwCo *rwco = &acb->rwco; - rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, - rwco->qiov->iov[0].iov_base); + rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); + blk_aio_complete(acb); } BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { - QEMUIOVector qiov; - struct iovec iov; - - iov = (struct iovec) { - .iov_base = buf, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); - - return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque); + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); } int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) @@ -1900,7 +1897,9 @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, static void blk_pdiscard_entry(void *opaque) { BlkRwCo *rwco = opaque; - rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size); + QEMUIOVector *qiov = rwco->iobuf; + + rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); } int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)