Message ID | 20120513160331.5b774c93.yizhouzhou@ict.ac.cn |
---|---|
State | New |
Headers | show |
Am 13.05.2012 10:03, schrieb Zhouyi Zhou: > hi all > > sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset. > after some investigation, I found that: > in function posix_aio_process_queue(void *opaque) > 440 ret = qemu_paio_error(acb); > 441 if (ret == ECANCELED) { > 442 /* remove the request */ > 443 *pacb = acb->next; > 444 qemu_aio_release(acb); > 445 result = 1; > 446 } else if (ret != EINPROGRESS) { > in line 444 acb got released but acb->common.opaque does not. > which will be released via guest OS via ide_dma_cancel which > will in term call qcow_aio_cancel which does not check its argument > is in flight list or not. > The fix is as follows: (debian 6's qemu-kvm-0.12.5) > ####################################### > --- block/qcow2.h~ 2010-07-27 08:43:53.000000000 +0800 > +++ block/qcow2.h 2012-05-13 15:51:39.000000000 +0800 > @@ -143,6 +143,7 @@ > QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests; > > QLIST_ENTRY(QCowL2Meta) next_in_flight; > + int inflight; > } QCowL2Meta; > --- block/qcow2.c~ 2012-05-13 15:57:09.000000000 +0800 > +++ block/qcow2.c 2012-05-13 15:57:24.000000000 +0800 > @@ -349,6 +349,10 @@ > QCowAIOCB *acb = (QCowAIOCB *)blockacb; > if (acb->hd_aiocb) > bdrv_aio_cancel(acb->hd_aiocb); > + if (acb->l2meta.inflight) { > + QLIST_REMOVE(&acb->l2meta, next_in_flight); > + acb->l2meta.inflight = 0; > + } > qemu_aio_release(acb); > } > > @@ -506,6 +510,7 @@ > acb->n = 0; > acb->cluster_offset = 0; > acb->l2meta.nb_clusters = 0; > + acb->l2meta.inflight = 0; > QLIST_INIT(&acb->l2meta.dependent_requests); > return acb; > } > @@ -534,6 +539,7 @@ > /* Take the request off the list of running requests */ > if (m->nb_clusters != 0) { > QLIST_REMOVE(m, next_in_flight); > + m->inflight = 0; > } > > /* > @@ -632,6 +638,7 @@ > fail: > if (acb->l2meta.nb_clusters != 0) { > QLIST_REMOVE(&acb->l2meta, next_in_flight); > + acb->l2meta.inflight = 0; > } > done: > if (acb->qiov->niov > 1) > --- block/qcow2-cluster.c~ 2010-07-27 08:43:53.000000000 +0800 > +++ block/qcow2-cluster.c 2012-05-13 15:53:53.000000000 +0800 > @@ -827,6 +827,7 @@ > m->offset = offset; > m->n_start = n_start; > m->nb_clusters = nb_clusters; > + m->inflight = 1; > > out: > m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); > > Thanks for investigation > Zhouyi The patch looks reasonable to me. Note however that while it fixes the hang, it still causes cluster leaks. I'm not sure if someone is interested in picking these up for old stable releases. Andreas, I think you were going to take 0.15? The first version that doesn't have the problem is 1.0. Kevin
Am 14.05.2012 14:20, schrieb Kevin Wolf: > Am 13.05.2012 10:03, schrieb Zhouyi Zhou: >> hi all >> >> sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset. >> after some investigation, I found that: >> in function posix_aio_process_queue(void *opaque) >> 440 ret = qemu_paio_error(acb); >> 441 if (ret == ECANCELED) { >> 442 /* remove the request */ >> 443 *pacb = acb->next; >> 444 qemu_aio_release(acb); >> 445 result = 1; >> 446 } else if (ret != EINPROGRESS) { >> in line 444 acb got released but acb->common.opaque does not. >> which will be released via guest OS via ide_dma_cancel which >> will in term call qcow_aio_cancel which does not check its argument >> is in flight list or not. >> The fix is as follows: (debian 6's qemu-kvm-0.12.5) >> ####################################### >> --- block/qcow2.h~ 2010-07-27 08:43:53.000000000 +0800 >> +++ block/qcow2.h 2012-05-13 15:51:39.000000000 +0800 >> @@ -143,6 +143,7 @@ >> QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests; >> >> QLIST_ENTRY(QCowL2Meta) next_in_flight; >> + int inflight; >> } QCowL2Meta; >> --- block/qcow2.c~ 2012-05-13 15:57:09.000000000 +0800 >> +++ block/qcow2.c 2012-05-13 15:57:24.000000000 +0800 >> @@ -349,6 +349,10 @@ >> QCowAIOCB *acb = (QCowAIOCB *)blockacb; >> if (acb->hd_aiocb) >> bdrv_aio_cancel(acb->hd_aiocb); >> + if (acb->l2meta.inflight) { >> + QLIST_REMOVE(&acb->l2meta, next_in_flight); >> + acb->l2meta.inflight = 0; >> + } >> qemu_aio_release(acb); >> } >> >> @@ -506,6 +510,7 @@ >> acb->n = 0; >> acb->cluster_offset = 0; >> acb->l2meta.nb_clusters = 0; >> + acb->l2meta.inflight = 0; >> QLIST_INIT(&acb->l2meta.dependent_requests); >> return acb; >> } >> @@ -534,6 +539,7 @@ >> /* Take the request off the list of running requests */ >> if (m->nb_clusters != 0) { >> QLIST_REMOVE(m, next_in_flight); >> + m->inflight = 0; >> } >> >> /* >> @@ -632,6 +638,7 @@ >> fail: >> if (acb->l2meta.nb_clusters != 0) { >> QLIST_REMOVE(&acb->l2meta, next_in_flight); >> + acb->l2meta.inflight = 0; >> } >> done: >> if (acb->qiov->niov > 1) >> --- block/qcow2-cluster.c~ 2010-07-27 08:43:53.000000000 +0800 >> +++ block/qcow2-cluster.c 2012-05-13 15:53:53.000000000 +0800 >> @@ -827,6 +827,7 @@ >> m->offset = offset; >> m->n_start = n_start; >> m->nb_clusters = nb_clusters; >> + m->inflight = 1; >> >> out: >> m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); >> >> Thanks for investigation >> Zhouyi > > The patch looks reasonable to me. Note however that while it fixes the > hang, it still causes cluster leaks. I'm not sure if someone is > interested in picking these up for old stable releases. Andreas, I think > you were going to take 0.15? The first version that doesn't have the > problem is 1.0. Kevin, the policy as I understood it is to cherry-pick patches from qemu.git into qemu-stable-x.y.git. So I don't think me applying this patch to stable-0.15 would be right. I don't spot a particular qcow2 fix among our 0.15 backports that I have now pushed. Do you have a pointer which one(s) would fix this issue so that I can recheck? Zhouyi, could you test git://git.qemu.org/qemu-stable-0.15.git and check if the problem is still there? Thanks, Andreas
Am 12.06.2012 15:33, schrieb Andreas Färber: > Am 14.05.2012 14:20, schrieb Kevin Wolf: >> Am 13.05.2012 10:03, schrieb Zhouyi Zhou: >>> hi all >>> >>> sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset. >>> after some investigation, I found that: >>> in function posix_aio_process_queue(void *opaque) >>> 440 ret = qemu_paio_error(acb); >>> 441 if (ret == ECANCELED) { >>> 442 /* remove the request */ >>> 443 *pacb = acb->next; >>> 444 qemu_aio_release(acb); >>> 445 result = 1; >>> 446 } else if (ret != EINPROGRESS) { >>> in line 444 acb got released but acb->common.opaque does not. >>> which will be released via guest OS via ide_dma_cancel which >>> will in term call qcow_aio_cancel which does not check its argument >>> is in flight list or not. >>> The fix is as follows: (debian 6's qemu-kvm-0.12.5) >>> ####################################### >>> --- block/qcow2.h~ 2010-07-27 08:43:53.000000000 +0800 >>> +++ block/qcow2.h 2012-05-13 15:51:39.000000000 +0800 >>> @@ -143,6 +143,7 @@ >>> QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests; >>> >>> QLIST_ENTRY(QCowL2Meta) next_in_flight; >>> + int inflight; >>> } QCowL2Meta; >>> --- block/qcow2.c~ 2012-05-13 15:57:09.000000000 +0800 >>> +++ block/qcow2.c 2012-05-13 15:57:24.000000000 +0800 >>> @@ -349,6 +349,10 @@ >>> QCowAIOCB *acb = (QCowAIOCB *)blockacb; >>> if (acb->hd_aiocb) >>> bdrv_aio_cancel(acb->hd_aiocb); >>> + if (acb->l2meta.inflight) { >>> + QLIST_REMOVE(&acb->l2meta, next_in_flight); >>> + acb->l2meta.inflight = 0; >>> + } >>> qemu_aio_release(acb); >>> } >>> >>> @@ -506,6 +510,7 @@ >>> acb->n = 0; >>> acb->cluster_offset = 0; >>> acb->l2meta.nb_clusters = 0; >>> + acb->l2meta.inflight = 0; >>> QLIST_INIT(&acb->l2meta.dependent_requests); >>> return acb; >>> } >>> @@ -534,6 +539,7 @@ >>> /* Take the request off the list of running requests */ >>> if (m->nb_clusters != 0) { >>> QLIST_REMOVE(m, next_in_flight); >>> + m->inflight = 0; >>> } >>> >>> /* >>> @@ -632,6 +638,7 @@ >>> fail: >>> if (acb->l2meta.nb_clusters != 0) { >>> QLIST_REMOVE(&acb->l2meta, next_in_flight); >>> + acb->l2meta.inflight = 0; >>> } >>> done: >>> if (acb->qiov->niov > 1) >>> --- block/qcow2-cluster.c~ 2010-07-27 08:43:53.000000000 +0800 >>> +++ block/qcow2-cluster.c 2012-05-13 15:53:53.000000000 +0800 >>> @@ -827,6 +827,7 @@ >>> m->offset = offset; >>> m->n_start = n_start; >>> m->nb_clusters = nb_clusters; >>> + m->inflight = 1; >>> >>> out: >>> m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); >>> >>> Thanks for investigation >>> Zhouyi >> >> The patch looks reasonable to me. Note however that while it fixes the >> hang, it still causes cluster leaks. I'm not sure if someone is >> interested in picking these up for old stable releases. Andreas, I think >> you were going to take 0.15? The first version that doesn't have the >> problem is 1.0. > > Kevin, the policy as I understood it is to cherry-pick patches from > qemu.git into qemu-stable-x.y.git. So I don't think me applying this > patch to stable-0.15 would be right. I don't spot a particular qcow2 fix > among our 0.15 backports that I have now pushed. Do you have a pointer > which one(s) would fix this issue so that I can recheck? It's "fixed" as a side effect of the block layer conversion to coroutines. Not exactly the kind of patches you'd want to cherry-pick for stable-0.15. The better fix for 0.15 could be to backport the new behaviour of coroutine based requests with bdrv_aio_cancel: static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) { qemu_aio_flush(); } Using that as the implementation for qcow2_aio_cancel should be safe and fix this problem. Kevin
Am 12.06.2012 15:44, schrieb Kevin Wolf: > Am 12.06.2012 15:33, schrieb Andreas Färber: >> Am 14.05.2012 14:20, schrieb Kevin Wolf: >>> Am 13.05.2012 10:03, schrieb Zhouyi Zhou: >>>> hi all >>>> >>>> sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset. >>>> after some investigation, I found that: >>>> in function posix_aio_process_queue(void *opaque) >>>> 440 ret = qemu_paio_error(acb); >>>> 441 if (ret == ECANCELED) { >>>> 442 /* remove the request */ >>>> 443 *pacb = acb->next; >>>> 444 qemu_aio_release(acb); >>>> 445 result = 1; >>>> 446 } else if (ret != EINPROGRESS) { >>>> in line 444 acb got released but acb->common.opaque does not. >>>> which will be released via guest OS via ide_dma_cancel which >>>> will in term call qcow_aio_cancel which does not check its argument >>>> is in flight list or not. >>>> The fix is as follows: (debian 6's qemu-kvm-0.12.5) >>>> ####################################### >>>> --- block/qcow2.h~ 2010-07-27 08:43:53.000000000 +0800 >>>> +++ block/qcow2.h 2012-05-13 15:51:39.000000000 +0800 >>>> @@ -143,6 +143,7 @@ >>>> QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests; >>>> >>>> QLIST_ENTRY(QCowL2Meta) next_in_flight; >>>> + int inflight; >>>> } QCowL2Meta; >>>> --- block/qcow2.c~ 2012-05-13 15:57:09.000000000 +0800 >>>> +++ block/qcow2.c 2012-05-13 15:57:24.000000000 +0800 >>>> @@ -349,6 +349,10 @@ >>>> QCowAIOCB *acb = (QCowAIOCB *)blockacb; >>>> if (acb->hd_aiocb) >>>> bdrv_aio_cancel(acb->hd_aiocb); >>>> + if (acb->l2meta.inflight) { >>>> + QLIST_REMOVE(&acb->l2meta, next_in_flight); >>>> + acb->l2meta.inflight = 0; >>>> + } >>>> qemu_aio_release(acb); >>>> } >>>> >>>> @@ -506,6 +510,7 @@ >>>> acb->n = 0; >>>> acb->cluster_offset = 0; >>>> acb->l2meta.nb_clusters = 0; >>>> + acb->l2meta.inflight = 0; >>>> QLIST_INIT(&acb->l2meta.dependent_requests); >>>> return acb; >>>> } >>>> @@ -534,6 +539,7 @@ >>>> /* Take the request off the list of running requests */ >>>> if (m->nb_clusters != 0) { >>>> QLIST_REMOVE(m, next_in_flight); >>>> + m->inflight = 0; >>>> } >>>> >>>> /* >>>> @@ -632,6 +638,7 @@ >>>> fail: >>>> if (acb->l2meta.nb_clusters != 0) { >>>> QLIST_REMOVE(&acb->l2meta, next_in_flight); >>>> + acb->l2meta.inflight = 0; >>>> } >>>> done: >>>> if (acb->qiov->niov > 1) >>>> --- block/qcow2-cluster.c~ 2010-07-27 08:43:53.000000000 +0800 >>>> +++ block/qcow2-cluster.c 2012-05-13 15:53:53.000000000 +0800 >>>> @@ -827,6 +827,7 @@ >>>> m->offset = offset; >>>> m->n_start = n_start; >>>> m->nb_clusters = nb_clusters; >>>> + m->inflight = 1; >>>> >>>> out: >>>> m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); >>>> >>>> Thanks for investigation >>>> Zhouyi >>> >>> The patch looks reasonable to me. Note however that while it fixes the >>> hang, it still causes cluster leaks. I'm not sure if someone is >>> interested in picking these up for old stable releases. Andreas, I think >>> you were going to take 0.15? The first version that doesn't have the >>> problem is 1.0. >> >> Kevin, the policy as I understood it is to cherry-pick patches from >> qemu.git into qemu-stable-x.y.git. So I don't think me applying this >> patch to stable-0.15 would be right. I don't spot a particular qcow2 fix >> among our 0.15 backports that I have now pushed. Do you have a pointer >> which one(s) would fix this issue so that I can recheck? > > It's "fixed" as a side effect of the block layer conversion to > coroutines. Not exactly the kind of patches you'd want to cherry-pick > for stable-0.15. > > The better fix for 0.15 could be to backport the new behaviour of > coroutine based requests with bdrv_aio_cancel: > > static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) > { > qemu_aio_flush(); > } > > Using that as the implementation for qcow2_aio_cancel should be safe and > fix this problem. Kevin, I have stable-0.15 in a state where I'm about to tag 0.15.2 now. The original patch does not have a Signed-off-by nor your Acked-by, so I can't apply it as-is. stable-0.15 does not have coroutines, so I don't understand what exactly you're suggesting as alternative here: Backport the whole coroutine feature including coroutine function above? Or just call qemu_aio_flush() in place of what? This is old qcow2_aio_cancel(): static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { BDRVQcowState *s = bs->opaque; int ret; ret = qcow2_cache_flush(bs, s->l2_table_cache); if (ret < 0) { return NULL; } ret = qcow2_cache_flush(bs, s->refcount_block_cache); if (ret < 0) { return NULL; } return bdrv_aio_flush(bs->file, cb, opaque); } Andreas
--- block/qcow2.h~ 2010-07-27 08:43:53.000000000 +0800 +++ block/qcow2.h 2012-05-13 15:51:39.000000000 +0800 @@ -143,6 +143,7 @@ QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests; QLIST_ENTRY(QCowL2Meta) next_in_flight; + int inflight; } QCowL2Meta; --- block/qcow2.c~ 2012-05-13 15:57:09.000000000 +0800 +++ block/qcow2.c 2012-05-13 15:57:24.000000000 +0800 @@ -349,6 +349,10 @@ QCowAIOCB *acb = (QCowAIOCB *)blockacb; if (acb->hd_aiocb) bdrv_aio_cancel(acb->hd_aiocb); + if (acb->l2meta.inflight) { + QLIST_REMOVE(&acb->l2meta, next_in_flight); + acb->l2meta.inflight = 0; + } qemu_aio_release(acb); } @@ -506,6 +510,7 @@ acb->n = 0; acb->cluster_offset = 0; acb->l2meta.nb_clusters = 0; + acb->l2meta.inflight = 0; QLIST_INIT(&acb->l2meta.dependent_requests); return acb; } @@ -534,6 +539,7 @@ /* Take the request off the list of running requests */ if (m->nb_clusters != 0) { QLIST_REMOVE(m, next_in_flight); + m->inflight = 0; } /* @@ -632,6 +638,7 @@ fail: if (acb->l2meta.nb_clusters != 0) { QLIST_REMOVE(&acb->l2meta, next_in_flight); + acb->l2meta.inflight = 0; } done: if (acb->qiov->niov > 1) --- block/qcow2-cluster.c~ 2010-07-27 08:43:53.000000000 +0800 +++ block/qcow2-cluster.c 2012-05-13 15:53:53.000000000 +0800 @@ -827,6 +827,7 @@ m->offset = offset; m->n_start = n_start; m->nb_clusters = nb_clusters; + m->inflight = 1; out: m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);