From patchwork Tue Sep 21 15:21:58 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kevin Wolf X-Patchwork-Id: 65334 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 2C90CB70AE for ; Wed, 22 Sep 2010 02:16:18 +1000 (EST) Received: from localhost ([127.0.0.1]:36198 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Oy52b-0002BH-6C for incoming@patchwork.ozlabs.org; Tue, 21 Sep 2010 11:45:57 -0400 Received: from [140.186.70.92] (port=49591 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Oy4fT-0008Qt-Q8 for qemu-devel@nongnu.org; Tue, 21 Sep 2010 11:22:07 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.69) (envelope-from ) id 1Oy4fQ-0005uL-So for qemu-devel@nongnu.org; Tue, 21 Sep 2010 11:22:02 -0400 Received: from mx1.redhat.com ([209.132.183.28]:45926) by eggs.gnu.org with esmtp (Exim 4.69) (envelope-from ) id 1Oy4fQ-0005tv-Io for qemu-devel@nongnu.org; Tue, 21 Sep 2010 11:22:00 -0400 Received: from int-mx08.intmail.prod.int.phx2.redhat.com (int-mx08.intmail.prod.int.phx2.redhat.com [10.5.11.21]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o8LFLw01007435 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Tue, 21 Sep 2010 11:21:58 -0400 Received: from dhcp-5-188.str.redhat.com (dhcp-5-175.str.redhat.com [10.32.5.175]) by int-mx08.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o8LFLZns016774; Tue, 21 Sep 2010 11:21:57 -0400 From: Kevin Wolf To: anthony@codemonkey.ws Date: Tue, 21 Sep 2010 17:21:58 +0200 Message-Id: <1285082522-24407-17-git-send-email-kwolf@redhat.com> In-Reply-To: <1285082522-24407-1-git-send-email-kwolf@redhat.com> References: <1285082522-24407-1-git-send-email-kwolf@redhat.com> X-Scanned-By: MIMEDefang 2.67 on 10.5.11.21 X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. Cc: kwolf@redhat.com, qemu-devel@nongnu.org Subject: [Qemu-devel] [PATCH 16/20] qcow2: Avoid bounce buffers for AIO read requests X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org qcow2 used to use bounce buffers for any AIO requests. This does not only imply unnecessary copying, but also unbounded allocations which should be avoided. This patch removes bounce buffers from the normal AIO read path, and constrains them to a constant size for encrypted images. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 8 ++++- block/qcow2.c | 86 +++++++++++++++++++++++++++++++++--------------- block/qcow2.h | 4 +- 3 files changed, 68 insertions(+), 30 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index cb2e33f..fb4224a 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -350,6 +350,8 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num, BDRVQcowState *s = bs->opaque; int ret, index_in_cluster, n, n1; uint64_t cluster_offset; + struct iovec iov; + QEMUIOVector qiov; while (nb_sectors > 0) { n = nb_sectors; @@ -364,7 +366,11 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num, if (!cluster_offset) { if (bs->backing_hd) { /* read from the base image */ - n1 = qcow2_backing_read1(bs->backing_hd, sector_num, buf, n); + iov.iov_base = buf; + iov.iov_len = n * 512; + qemu_iovec_init_external(&qiov, &iov, 1); + + n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n); if (n1 > 0) { BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING); ret = bdrv_read(bs->backing_hd, sector_num, buf, n1); diff --git a/block/qcow2.c b/block/qcow2.c index a53014d..4a688b4 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -311,8 +311,8 @@ static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, } /* handle reading after the end of the backing file */ -int qcow2_backing_read1(BlockDriverState *bs, - int64_t sector_num, uint8_t *buf, int nb_sectors) +int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t sector_num, int nb_sectors) { int n1; if ((sector_num + nb_sectors) <= bs->total_sectors) @@ -321,7 +321,9 @@ int qcow2_backing_read1(BlockDriverState *bs, n1 = 0; else n1 = bs->total_sectors - sector_num; - memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1)); + + qemu_iovec_memset(qiov, 0, 512 * (nb_sectors - n1)); + return n1; } @@ -333,6 +335,7 @@ typedef struct QCowAIOCB { void *orig_buf; int remaining_sectors; int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t bytes_done; uint64_t cluster_offset; uint8_t *cluster_data; BlockDriverAIOCB *hd_aiocb; @@ -397,15 +400,19 @@ static void qcow_aio_read_cb(void *opaque, int ret) /* nothing to do */ } else { if (s->crypt_method) { - qcow2_encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf, - acb->cur_nr_sectors, 0, - &s->aes_decrypt_key); + qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, + acb->cluster_data, acb->cur_nr_sectors, 0, &s->aes_decrypt_key); + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, + acb->cur_nr_sectors * 512); + qemu_iovec_from_buffer(&acb->hd_qiov, acb->cluster_data, + 512 * acb->cur_nr_sectors); } } acb->remaining_sectors -= acb->cur_nr_sectors; acb->sector_num += acb->cur_nr_sectors; - acb->buf += acb->cur_nr_sectors * 512; + acb->bytes_done += acb->cur_nr_sectors * 512; if (acb->remaining_sectors == 0) { /* request completed */ @@ -415,6 +422,11 @@ static void qcow_aio_read_cb(void *opaque, int ret) /* prepare next AIO request */ acb->cur_nr_sectors = acb->remaining_sectors; + if (s->crypt_method) { + acb->cur_nr_sectors = MIN(acb->cur_nr_sectors, + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); + } + ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9, &acb->cur_nr_sectors, &acb->cluster_offset); if (ret < 0) { @@ -423,15 +435,17 @@ static void qcow_aio_read_cb(void *opaque, int ret) index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, + acb->cur_nr_sectors * 512); + if (!acb->cluster_offset) { + if (bs->backing_hd) { /* read from the base image */ - n1 = qcow2_backing_read1(bs->backing_hd, acb->sector_num, - acb->buf, acb->cur_nr_sectors); + n1 = qcow2_backing_read1(bs->backing_hd, &acb->hd_qiov, + acb->sector_num, acb->cur_nr_sectors); if (n1 > 0) { - acb->hd_iov.iov_base = (void *)acb->buf; - acb->hd_iov.iov_len = acb->cur_nr_sectors * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, &acb->hd_qiov, acb->cur_nr_sectors, @@ -445,7 +459,7 @@ static void qcow_aio_read_cb(void *opaque, int ret) } } else { /* Note: in this case, no need to wait */ - memset(acb->buf, 0, 512 * acb->cur_nr_sectors); + qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors); ret = qcow_schedule_bh(qcow_aio_read_bh, acb); if (ret < 0) goto done; @@ -454,8 +468,11 @@ static void qcow_aio_read_cb(void *opaque, int ret) /* add AIO support for compressed blocks ? */ if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0) goto done; - memcpy(acb->buf, s->cluster_cache + index_in_cluster * 512, - 512 * acb->cur_nr_sectors); + + qemu_iovec_from_buffer(&acb->hd_qiov, + s->cluster_cache + index_in_cluster * 512, + 512 * acb->cur_nr_sectors); + ret = qcow_schedule_bh(qcow_aio_read_bh, acb); if (ret < 0) goto done; @@ -465,9 +482,23 @@ static void qcow_aio_read_cb(void *opaque, int ret) goto done; } - acb->hd_iov.iov_base = (void *)acb->buf; - acb->hd_iov.iov_len = acb->cur_nr_sectors * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); + if (s->crypt_method) { + /* + * For encrypted images, read everything into a temporary + * contiguous buffer on which the AES functions can work. + */ + if (!acb->cluster_data) { + acb->cluster_data = + qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + } + + assert(acb->cur_nr_sectors <= + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_add(&acb->hd_qiov, acb->cluster_data, + 512 * acb->cur_nr_sectors); + } + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); acb->hd_aiocb = bdrv_aio_readv(bs->file, (acb->cluster_offset >> 9) + index_in_cluster, @@ -481,11 +512,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) return; done: - if (acb->qiov->niov > 1) { - qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); - qemu_vfree(acb->orig_buf); - } acb->common.cb(acb->common.opaque, ret); + qemu_iovec_destroy(&acb->hd_qiov); qemu_aio_release(acb); } @@ -501,13 +529,17 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, acb->hd_aiocb = NULL; acb->sector_num = sector_num; acb->qiov = qiov; - if (qiov->niov > 1) { - acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); - if (is_write) - qemu_iovec_to_buffer(qiov, acb->buf); - } else { + + if (!is_write) { + qemu_iovec_init(&acb->hd_qiov, qiov->niov); + } else if (qiov->niov == 1) { acb->buf = (uint8_t *)qiov->iov->iov_base; + } else { + acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); + qemu_iovec_to_buffer(qiov, acb->buf); } + + acb->bytes_done = 0; acb->remaining_sectors = nb_sectors; acb->cur_nr_sectors = 0; acb->cluster_offset = 0; diff --git a/block/qcow2.h b/block/qcow2.h index 3ff162e..356a34a 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -166,8 +166,8 @@ static inline int64_t align_offset(int64_t offset, int n) // FIXME Need qcow2_ prefix to global functions /* qcow2.c functions */ -int qcow2_backing_read1(BlockDriverState *bs, - int64_t sector_num, uint8_t *buf, int nb_sectors); +int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t sector_num, int nb_sectors); /* qcow2-refcount.c functions */ int qcow2_refcount_init(BlockDriverState *bs);