Message ID | 1415807116-8375-1-git-send-email-roger.pau@citrix.com |
---|---|
State | New |
Headers | show |
On 11/12/2014 03:45 PM, Roger Pau Monne wrote: > This patch fixes two issues with persistent grants and the disk PV backend > (Qdisk): > > - Don't use batch mappings when using persistent grants, doing so prevents > unmapping single grants (the whole area has to be unmapped at once). > - Unmap persistent grants before switching to the closed state, so the > frontend can also free them. > > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> > Reported-and-Tested-by: George Dunlap <george.dunlap@eu.citrix.com> > Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > Cc: Kevin Wolf <kwolf@redhat.com> > Cc: Stefan Hajnoczi <stefanha@redhat.com> > Cc: George Dunlap <george.dunlap@eu.citrix.com> CC'ing Konrad and Stefano: This fixes a critical bug that should be a blocker for the Xen 4.5 release. Without this, any backend using qdisk for a PV guest with pygrub (including qcow2 and vhd) will crash dom0. -George > --- > hw/block/xen_disk.c | 35 ++++++++++++++++++++++++----------- > 1 file changed, 24 insertions(+), 11 deletions(-) > > diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c > index 231e9a7..1300c0a 100644 > --- a/hw/block/xen_disk.c > +++ b/hw/block/xen_disk.c > @@ -43,8 +43,6 @@ > > /* ------------------------------------------------------------- */ > > -static int batch_maps = 0; > - > static int max_requests = 32; > > /* ------------------------------------------------------------- */ > @@ -105,6 +103,7 @@ struct XenBlkDev { > blkif_back_rings_t rings; > int more_work; > int cnt_map; > + bool batch_maps; > > /* request lists */ > QLIST_HEAD(inflight_head, ioreq) inflight; > @@ -309,7 +308,7 @@ static void ioreq_unmap(struct ioreq *ioreq) > if (ioreq->num_unmap == 0 || ioreq->mapped == 0) { > return; > } > - if (batch_maps) { > + if (ioreq->blkdev->batch_maps) { > if (!ioreq->pages) { > return; > } > @@ -386,7 +385,7 @@ static int ioreq_map(struct ioreq *ioreq) > new_maps = ioreq->v.niov; > } > > - if (batch_maps && new_maps) { > + if (ioreq->blkdev->batch_maps && new_maps) { > ioreq->pages = xc_gnttab_map_grant_refs > (gnt, new_maps, domids, refs, ioreq->prot); > if (ioreq->pages == NULL) { > @@ -433,7 +432,7 @@ static int ioreq_map(struct ioreq *ioreq) > */ > grant = g_malloc0(sizeof(*grant)); > new_maps--; > - if (batch_maps) { > + if (ioreq->blkdev->batch_maps) { > grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE; > } else { > grant->page = ioreq->page[new_maps]; > @@ -718,7 +717,9 @@ static void blk_alloc(struct XenDevice *xendev) > QLIST_INIT(&blkdev->freelist); > blkdev->bh = qemu_bh_new(blk_bh, blkdev); > if (xen_mode != XEN_EMULATE) { > - batch_maps = 1; > + blkdev->batch_maps = TRUE; > + } else { > + blkdev->batch_maps = FALSE; > } > if (xc_gnttab_set_max_grants(xendev->gnttabdev, > MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) { > @@ -923,6 +924,13 @@ static int blk_connect(struct XenDevice *xendev) > } else { > blkdev->feature_persistent = !!pers; > } > + if (blkdev->feature_persistent) { > + /* > + * Disable batch maps, since that would prevent unmapping > + * single persistent grants. > + */ > + blkdev->batch_maps = FALSE; > + } > > blkdev->protocol = BLKIF_PROTOCOL_NATIVE; > if (blkdev->xendev.protocol) { > @@ -1000,6 +1008,16 @@ static void blk_disconnect(struct XenDevice *xendev) > blkdev->cnt_map--; > blkdev->sring = NULL; > } > + > + /* > + * Unmap persistent grants before switching to the closed state > + * so the frontend can free them. > + */ > + if (blkdev->feature_persistent) { > + g_tree_destroy(blkdev->persistent_gnts); > + assert(blkdev->persistent_gnt_count == 0); > + blkdev->feature_persistent = FALSE; > + } > } > > static int blk_free(struct XenDevice *xendev) > @@ -1011,11 +1029,6 @@ static int blk_free(struct XenDevice *xendev) > blk_disconnect(xendev); > } > > - /* Free persistent grants */ > - if (blkdev->feature_persistent) { > - g_tree_destroy(blkdev->persistent_gnts); > - } > - > while (!QLIST_EMPTY(&blkdev->freelist)) { > ioreq = QLIST_FIRST(&blkdev->freelist); > QLIST_REMOVE(ioreq, list);
On Wed, Nov 12, 2014 at 03:55:51PM +0000, George Dunlap wrote: > On 11/12/2014 03:45 PM, Roger Pau Monne wrote: > >This patch fixes two issues with persistent grants and the disk PV backend > >(Qdisk): > > > > - Don't use batch mappings when using persistent grants, doing so prevents > > unmapping single grants (the whole area has to be unmapped at once). > > - Unmap persistent grants before switching to the closed state, so the > > frontend can also free them. > > > >Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> > >Reported-and-Tested-by: George Dunlap <george.dunlap@eu.citrix.com> > >Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > >Cc: Kevin Wolf <kwolf@redhat.com> > >Cc: Stefan Hajnoczi <stefanha@redhat.com> > >Cc: George Dunlap <george.dunlap@eu.citrix.com> > > CC'ing Konrad and Stefano: This fixes a critical bug that should be a > blocker for the Xen 4.5 release. Without this, any backend using qdisk for > a PV guest with pygrub (including qcow2 and vhd) will crash dom0. Changing the title to reflect that. Stefano? > > -George > > >--- > > hw/block/xen_disk.c | 35 ++++++++++++++++++++++++----------- > > 1 file changed, 24 insertions(+), 11 deletions(-) > > > >diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c > >index 231e9a7..1300c0a 100644 > >--- a/hw/block/xen_disk.c > >+++ b/hw/block/xen_disk.c > >@@ -43,8 +43,6 @@ > > /* ------------------------------------------------------------- */ > >-static int batch_maps = 0; > >- > > static int max_requests = 32; > > /* ------------------------------------------------------------- */ > >@@ -105,6 +103,7 @@ struct XenBlkDev { > > blkif_back_rings_t rings; > > int more_work; > > int cnt_map; > >+ bool batch_maps; > > /* request lists */ > > QLIST_HEAD(inflight_head, ioreq) inflight; > >@@ -309,7 +308,7 @@ static void ioreq_unmap(struct ioreq *ioreq) > > if (ioreq->num_unmap == 0 || ioreq->mapped == 0) { > > return; > > } > >- if (batch_maps) { > >+ if (ioreq->blkdev->batch_maps) { > > if (!ioreq->pages) { > > return; > > } > >@@ -386,7 +385,7 @@ static int ioreq_map(struct ioreq *ioreq) > > new_maps = ioreq->v.niov; > > } > >- if (batch_maps && new_maps) { > >+ if (ioreq->blkdev->batch_maps && new_maps) { > > ioreq->pages = xc_gnttab_map_grant_refs > > (gnt, new_maps, domids, refs, ioreq->prot); > > if (ioreq->pages == NULL) { > >@@ -433,7 +432,7 @@ static int ioreq_map(struct ioreq *ioreq) > > */ > > grant = g_malloc0(sizeof(*grant)); > > new_maps--; > >- if (batch_maps) { > >+ if (ioreq->blkdev->batch_maps) { > > grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE; > > } else { > > grant->page = ioreq->page[new_maps]; > >@@ -718,7 +717,9 @@ static void blk_alloc(struct XenDevice *xendev) > > QLIST_INIT(&blkdev->freelist); > > blkdev->bh = qemu_bh_new(blk_bh, blkdev); > > if (xen_mode != XEN_EMULATE) { > >- batch_maps = 1; > >+ blkdev->batch_maps = TRUE; > >+ } else { > >+ blkdev->batch_maps = FALSE; > > } > > if (xc_gnttab_set_max_grants(xendev->gnttabdev, > > MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) { > >@@ -923,6 +924,13 @@ static int blk_connect(struct XenDevice *xendev) > > } else { > > blkdev->feature_persistent = !!pers; > > } > >+ if (blkdev->feature_persistent) { > >+ /* > >+ * Disable batch maps, since that would prevent unmapping > >+ * single persistent grants. > >+ */ > >+ blkdev->batch_maps = FALSE; > >+ } > > blkdev->protocol = BLKIF_PROTOCOL_NATIVE; > > if (blkdev->xendev.protocol) { > >@@ -1000,6 +1008,16 @@ static void blk_disconnect(struct XenDevice *xendev) > > blkdev->cnt_map--; > > blkdev->sring = NULL; > > } > >+ > >+ /* > >+ * Unmap persistent grants before switching to the closed state > >+ * so the frontend can free them. > >+ */ > >+ if (blkdev->feature_persistent) { > >+ g_tree_destroy(blkdev->persistent_gnts); > >+ assert(blkdev->persistent_gnt_count == 0); > >+ blkdev->feature_persistent = FALSE; > >+ } > > } > > static int blk_free(struct XenDevice *xendev) > >@@ -1011,11 +1029,6 @@ static int blk_free(struct XenDevice *xendev) > > blk_disconnect(xendev); > > } > >- /* Free persistent grants */ > >- if (blkdev->feature_persistent) { > >- g_tree_destroy(blkdev->persistent_gnts); > >- } > >- > > while (!QLIST_EMPTY(&blkdev->freelist)) { > > ioreq = QLIST_FIRST(&blkdev->freelist); > > QLIST_REMOVE(ioreq, list); >
On Wed, 12 Nov 2014, Roger Pau Monne wrote: > This patch fixes two issues with persistent grants and the disk PV backend > (Qdisk): > > - Don't use batch mappings when using persistent grants, doing so prevents > unmapping single grants (the whole area has to be unmapped at once). The real issue is that destroy_grant cannot work with batch_maps. One could reimplement destroy_grant to build a single array with all the grants to unmap and make a single xc_gnttab_munmap call. Do you think that would be feasible? Performance wise, it would certainly be better. > - Unmap persistent grants before switching to the closed state, so the > frontend can also free them. > > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> > Reported-and-Tested-by: George Dunlap <george.dunlap@eu.citrix.com> > Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > Cc: Kevin Wolf <kwolf@redhat.com> > Cc: Stefan Hajnoczi <stefanha@redhat.com> > Cc: George Dunlap <george.dunlap@eu.citrix.com> > --- > hw/block/xen_disk.c | 35 ++++++++++++++++++++++++----------- > 1 file changed, 24 insertions(+), 11 deletions(-) > > diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c > index 231e9a7..1300c0a 100644 > --- a/hw/block/xen_disk.c > +++ b/hw/block/xen_disk.c > @@ -43,8 +43,6 @@ > > /* ------------------------------------------------------------- */ > > -static int batch_maps = 0; > - > static int max_requests = 32; > > /* ------------------------------------------------------------- */ > @@ -105,6 +103,7 @@ struct XenBlkDev { > blkif_back_rings_t rings; > int more_work; > int cnt_map; > + bool batch_maps; > > /* request lists */ > QLIST_HEAD(inflight_head, ioreq) inflight; > @@ -309,7 +308,7 @@ static void ioreq_unmap(struct ioreq *ioreq) > if (ioreq->num_unmap == 0 || ioreq->mapped == 0) { > return; > } > - if (batch_maps) { > + if (ioreq->blkdev->batch_maps) { > if (!ioreq->pages) { > return; > } > @@ -386,7 +385,7 @@ static int ioreq_map(struct ioreq *ioreq) > new_maps = ioreq->v.niov; > } > > - if (batch_maps && new_maps) { > + if (ioreq->blkdev->batch_maps && new_maps) { > ioreq->pages = xc_gnttab_map_grant_refs > (gnt, new_maps, domids, refs, ioreq->prot); > if (ioreq->pages == NULL) { > @@ -433,7 +432,7 @@ static int ioreq_map(struct ioreq *ioreq) > */ > grant = g_malloc0(sizeof(*grant)); > new_maps--; > - if (batch_maps) { > + if (ioreq->blkdev->batch_maps) { > grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE; > } else { > grant->page = ioreq->page[new_maps]; > @@ -718,7 +717,9 @@ static void blk_alloc(struct XenDevice *xendev) > QLIST_INIT(&blkdev->freelist); > blkdev->bh = qemu_bh_new(blk_bh, blkdev); > if (xen_mode != XEN_EMULATE) { > - batch_maps = 1; > + blkdev->batch_maps = TRUE; > + } else { > + blkdev->batch_maps = FALSE; > } true and false, lower capitals > if (xc_gnttab_set_max_grants(xendev->gnttabdev, > MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) { > @@ -923,6 +924,13 @@ static int blk_connect(struct XenDevice *xendev) > } else { > blkdev->feature_persistent = !!pers; > } > + if (blkdev->feature_persistent) { > + /* > + * Disable batch maps, since that would prevent unmapping > + * single persistent grants. > + */ > + blkdev->batch_maps = FALSE; false > + } > > blkdev->protocol = BLKIF_PROTOCOL_NATIVE; > if (blkdev->xendev.protocol) { > @@ -1000,6 +1008,16 @@ static void blk_disconnect(struct XenDevice *xendev) > blkdev->cnt_map--; > blkdev->sring = NULL; > } > + > + /* > + * Unmap persistent grants before switching to the closed state > + * so the frontend can free them. > + */ > + if (blkdev->feature_persistent) { > + g_tree_destroy(blkdev->persistent_gnts); > + assert(blkdev->persistent_gnt_count == 0); > + blkdev->feature_persistent = FALSE; > + } > } > > static int blk_free(struct XenDevice *xendev) > @@ -1011,11 +1029,6 @@ static int blk_free(struct XenDevice *xendev) > blk_disconnect(xendev); > } > > - /* Free persistent grants */ > - if (blkdev->feature_persistent) { > - g_tree_destroy(blkdev->persistent_gnts); > - } > - > while (!QLIST_EMPTY(&blkdev->freelist)) { > ioreq = QLIST_FIRST(&blkdev->freelist); > QLIST_REMOVE(ioreq, list);
El 12/11/14 a les 18.41, Stefano Stabellini ha escrit: > On Wed, 12 Nov 2014, Roger Pau Monne wrote: >> This patch fixes two issues with persistent grants and the disk PV backend >> (Qdisk): >> >> - Don't use batch mappings when using persistent grants, doing so prevents >> unmapping single grants (the whole area has to be unmapped at once). > > The real issue is that destroy_grant cannot work with batch_maps. > One could reimplement destroy_grant to build a single array with all the > grants to unmap and make a single xc_gnttab_munmap call. > > Do you think that would be feasible? Making destroy_grant work with batch maps using the current tree structure is going to be quite complicated, because destroy_grant iterates on every entry on the tree, and doesn't know which grants belong to which regions. IMHO a simpler solution would be to introduce another tree (or list) that keeps track of grant-mapped regions, and on tear down use the data in that list to unmap the regions. This way the current tree will still be used to perform the grant_ref->vaddr translation, but on teardown the newly introduced list would be used instead. In general I was reluctant to do this because not using batch maps with persistent grants should not introduce a noticeable performance regression due to the fact that grants are only mapped once for the whole life-cycle of the virtual disk. Also, if we plan to implement indirect descriptors for Qdisk we really need to be able to unmap single grants in order to purge the list, since in that case it's not possible to keep all possible grants persistently mapped. Since this alternate solution is easy to implement I will send a new patch using this approach, then we can decide what to do. Roger.
Am 12.11.2014 um 18:41 hat Stefano Stabellini geschrieben: > On Wed, 12 Nov 2014, Roger Pau Monne wrote: > > This patch fixes two issues with persistent grants and the disk PV backend > > (Qdisk): > > > > - Don't use batch mappings when using persistent grants, doing so prevents > > unmapping single grants (the whole area has to be unmapped at once). > > The real issue is that destroy_grant cannot work with batch_maps. > One could reimplement destroy_grant to build a single array with all the > grants to unmap and make a single xc_gnttab_munmap call. > > Do you think that would be feasible? > > Performance wise, it would certainly be better. > > > > - Unmap persistent grants before switching to the closed state, so the > > frontend can also free them. > > > > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> > > Reported-and-Tested-by: George Dunlap <george.dunlap@eu.citrix.com> > > Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > > Cc: Kevin Wolf <kwolf@redhat.com> > > Cc: Stefan Hajnoczi <stefanha@redhat.com> > > Cc: George Dunlap <george.dunlap@eu.citrix.com> > > --- > > hw/block/xen_disk.c | 35 ++++++++++++++++++++++++----------- > > 1 file changed, 24 insertions(+), 11 deletions(-) > > > > diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c > > index 231e9a7..1300c0a 100644 > > --- a/hw/block/xen_disk.c > > +++ b/hw/block/xen_disk.c > > @@ -43,8 +43,6 @@ > > > > /* ------------------------------------------------------------- */ > > > > -static int batch_maps = 0; > > - > > static int max_requests = 32; > > > > /* ------------------------------------------------------------- */ > > @@ -105,6 +103,7 @@ struct XenBlkDev { > > blkif_back_rings_t rings; > > int more_work; > > int cnt_map; > > + bool batch_maps; > > > > /* request lists */ > > QLIST_HEAD(inflight_head, ioreq) inflight; > > @@ -309,7 +308,7 @@ static void ioreq_unmap(struct ioreq *ioreq) > > if (ioreq->num_unmap == 0 || ioreq->mapped == 0) { > > return; > > } > > - if (batch_maps) { > > + if (ioreq->blkdev->batch_maps) { > > if (!ioreq->pages) { > > return; > > } > > @@ -386,7 +385,7 @@ static int ioreq_map(struct ioreq *ioreq) > > new_maps = ioreq->v.niov; > > } > > > > - if (batch_maps && new_maps) { > > + if (ioreq->blkdev->batch_maps && new_maps) { > > ioreq->pages = xc_gnttab_map_grant_refs > > (gnt, new_maps, domids, refs, ioreq->prot); > > if (ioreq->pages == NULL) { > > @@ -433,7 +432,7 @@ static int ioreq_map(struct ioreq *ioreq) > > */ > > grant = g_malloc0(sizeof(*grant)); > > new_maps--; > > - if (batch_maps) { > > + if (ioreq->blkdev->batch_maps) { > > grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE; > > } else { > > grant->page = ioreq->page[new_maps]; > > @@ -718,7 +717,9 @@ static void blk_alloc(struct XenDevice *xendev) > > QLIST_INIT(&blkdev->freelist); > > blkdev->bh = qemu_bh_new(blk_bh, blkdev); > > if (xen_mode != XEN_EMULATE) { > > - batch_maps = 1; > > + blkdev->batch_maps = TRUE; > > + } else { > > + blkdev->batch_maps = FALSE; > > } > > true and false, lower capitals Or just blkdev->batch_maps = (xen_mode != XEN_EMULATE); Kevin
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 231e9a7..1300c0a 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -43,8 +43,6 @@ /* ------------------------------------------------------------- */ -static int batch_maps = 0; - static int max_requests = 32; /* ------------------------------------------------------------- */ @@ -105,6 +103,7 @@ struct XenBlkDev { blkif_back_rings_t rings; int more_work; int cnt_map; + bool batch_maps; /* request lists */ QLIST_HEAD(inflight_head, ioreq) inflight; @@ -309,7 +308,7 @@ static void ioreq_unmap(struct ioreq *ioreq) if (ioreq->num_unmap == 0 || ioreq->mapped == 0) { return; } - if (batch_maps) { + if (ioreq->blkdev->batch_maps) { if (!ioreq->pages) { return; } @@ -386,7 +385,7 @@ static int ioreq_map(struct ioreq *ioreq) new_maps = ioreq->v.niov; } - if (batch_maps && new_maps) { + if (ioreq->blkdev->batch_maps && new_maps) { ioreq->pages = xc_gnttab_map_grant_refs (gnt, new_maps, domids, refs, ioreq->prot); if (ioreq->pages == NULL) { @@ -433,7 +432,7 @@ static int ioreq_map(struct ioreq *ioreq) */ grant = g_malloc0(sizeof(*grant)); new_maps--; - if (batch_maps) { + if (ioreq->blkdev->batch_maps) { grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE; } else { grant->page = ioreq->page[new_maps]; @@ -718,7 +717,9 @@ static void blk_alloc(struct XenDevice *xendev) QLIST_INIT(&blkdev->freelist); blkdev->bh = qemu_bh_new(blk_bh, blkdev); if (xen_mode != XEN_EMULATE) { - batch_maps = 1; + blkdev->batch_maps = TRUE; + } else { + blkdev->batch_maps = FALSE; } if (xc_gnttab_set_max_grants(xendev->gnttabdev, MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) { @@ -923,6 +924,13 @@ static int blk_connect(struct XenDevice *xendev) } else { blkdev->feature_persistent = !!pers; } + if (blkdev->feature_persistent) { + /* + * Disable batch maps, since that would prevent unmapping + * single persistent grants. + */ + blkdev->batch_maps = FALSE; + } blkdev->protocol = BLKIF_PROTOCOL_NATIVE; if (blkdev->xendev.protocol) { @@ -1000,6 +1008,16 @@ static void blk_disconnect(struct XenDevice *xendev) blkdev->cnt_map--; blkdev->sring = NULL; } + + /* + * Unmap persistent grants before switching to the closed state + * so the frontend can free them. + */ + if (blkdev->feature_persistent) { + g_tree_destroy(blkdev->persistent_gnts); + assert(blkdev->persistent_gnt_count == 0); + blkdev->feature_persistent = FALSE; + } } static int blk_free(struct XenDevice *xendev) @@ -1011,11 +1029,6 @@ static int blk_free(struct XenDevice *xendev) blk_disconnect(xendev); } - /* Free persistent grants */ - if (blkdev->feature_persistent) { - g_tree_destroy(blkdev->persistent_gnts); - } - while (!QLIST_EMPTY(&blkdev->freelist)) { ioreq = QLIST_FIRST(&blkdev->freelist); QLIST_REMOVE(ioreq, list);
This patch fixes two issues with persistent grants and the disk PV backend (Qdisk): - Don't use batch mappings when using persistent grants, doing so prevents unmapping single grants (the whole area has to be unmapped at once). - Unmap persistent grants before switching to the closed state, so the frontend can also free them. Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> Reported-and-Tested-by: George Dunlap <george.dunlap@eu.citrix.com> Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Cc: Kevin Wolf <kwolf@redhat.com> Cc: Stefan Hajnoczi <stefanha@redhat.com> Cc: George Dunlap <george.dunlap@eu.citrix.com> --- hw/block/xen_disk.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-)