From patchwork Mon Aug 31 20:17:30 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Christoph Hellwig X-Patchwork-Id: 32680 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by bilbo.ozlabs.org (Postfix) with ESMTPS id 266A1B7B65 for ; Tue, 1 Sep 2009 06:23:06 +1000 (EST) Received: from localhost ([127.0.0.1]:36121 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1MiDP5-0005eT-2u for incoming@patchwork.ozlabs.org; Mon, 31 Aug 2009 16:23:03 -0400 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1MiDJq-0003ys-8v for qemu-devel@nongnu.org; Mon, 31 Aug 2009 16:17:38 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1MiDJl-0003yL-9C for qemu-devel@nongnu.org; Mon, 31 Aug 2009 16:17:37 -0400 Received: from [199.232.76.173] (port=51914 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1MiDJl-0003yI-2a for qemu-devel@nongnu.org; Mon, 31 Aug 2009 16:17:33 -0400 Received: from verein.lst.de ([213.95.11.210]:33977) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_3DES_EDE_CBC_SHA1:24) (Exim 4.60) (envelope-from ) id 1MiDJk-0001Fb-Ev for qemu-devel@nongnu.org; Mon, 31 Aug 2009 16:17:32 -0400 Received: from verein.lst.de (localhost [127.0.0.1]) by verein.lst.de (8.12.3/8.12.3/Debian-7.1) with ESMTP id n7VKHUVL004927 (version=TLSv1/SSLv3 cipher=EDH-RSA-DES-CBC3-SHA bits=168 verify=NO) for ; Mon, 31 Aug 2009 22:17:31 +0200 Received: (from hch@localhost) by verein.lst.de (8.12.3/8.12.3/Debian-7.2) id n7VKHU2a004926 for qemu-devel@nongnu.org; Mon, 31 Aug 2009 22:17:30 +0200 Date: Mon, 31 Aug 2009 22:17:30 +0200 From: Christoph Hellwig To: qemu-devel@nongnu.org Message-ID: <20090831201730.GC4874@lst.de> References: <20090831201627.GA4811@lst.de> Mime-Version: 1.0 Content-Disposition: inline In-Reply-To: <20090831201627.GA4811@lst.de> User-Agent: Mutt/1.3.28i X-Spam-Score: 0 () X-Scanned-By: MIMEDefang 2.39 X-detected-operating-system: by monty-python.gnu.org: GNU/Linux 2.6 (newer, 2) Subject: [Qemu-devel] [PATCH 3/4] block: add bdrv_aio_flush operation X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Instead stalling the VCPU while serving a cache flush try to do it asynchronously. Use our good old helper thread pool to issue an asynchronous fdatasync for raw-posix. Note that while Linux AIO implements a fdatasync operation it is not useful for us because it isn't actually implement in asynchronous fashion. For now only use it in IDE because virtio-blk doesn't implement cache flusing yet (will be fixed in patch 4/4) and the interface between the HBA emulation and scsi-disk will need some changes to accomodate it for scsi (will be a separate patch series). Signed-off-by: Christoph Hellwig Index: qemu/block.c =================================================================== --- qemu.orig/block.c 2009-08-31 16:49:54.508542113 -0300 +++ qemu/block.c 2009-08-31 16:49:59.593042021 -0300 @@ -54,6 +54,8 @@ static BlockDriverAIOCB *bdrv_aio_readv_ static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); +static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num, @@ -138,6 +140,10 @@ void bdrv_register(BlockDriver *bdrv) bdrv->bdrv_read = bdrv_read_em; bdrv->bdrv_write = bdrv_write_em; } + + if (!bdrv->bdrv_aio_flush) + bdrv->bdrv_aio_flush = bdrv_aio_flush_em; + bdrv->next = first_drv; first_drv = bdrv; } @@ -1369,6 +1375,21 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockD return ret; } +BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BlockDriver *drv = bs->drv; + + if (!drv) + return NULL; + + /* + * Note that unlike bdrv_flush the driver is reponsible for flushing a + * backing image if it exists. + */ + return drv->bdrv_aio_flush(bs, cb, opaque); +} + void bdrv_aio_cancel(BlockDriverAIOCB *acb) { acb->pool->cancel(acb); @@ -1459,6 +1480,25 @@ static BlockDriverAIOCB *bdrv_aio_writev return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); } +static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BlockDriverAIOCBSync *acb; + + acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque); + acb->is_write = 1; /* don't bounce in the completion hadler */ + acb->qiov = NULL; + acb->bounce = NULL; + acb->ret = 0; + + if (!acb->bh) + acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); + + bdrv_flush(bs); + qemu_bh_schedule(acb->bh); + return &acb->common; +} + /**************************************************************/ /* sync block device emulation */ Index: qemu/block.h =================================================================== --- qemu.orig/block.h 2009-08-31 16:49:54.516577491 -0300 +++ qemu/block.h 2009-08-31 16:49:59.593042021 -0300 @@ -85,6 +85,8 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDr BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); +BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); void bdrv_aio_cancel(BlockDriverAIOCB *acb); /* sg packet commands */ Index: qemu/block_int.h =================================================================== --- qemu.orig/block_int.h 2009-08-31 16:49:54.512583129 -0300 +++ qemu/block_int.h 2009-08-31 16:49:59.597095469 -0300 @@ -69,6 +69,8 @@ struct BlockDriver { BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); + BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); const char *protocol_name; int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset); Index: qemu/hw/ide/core.c =================================================================== --- qemu.orig/hw/ide/core.c 2009-08-31 16:49:54.516577491 -0300 +++ qemu/hw/ide/core.c 2009-08-31 16:49:59.601041920 -0300 @@ -771,6 +771,16 @@ static void ide_atapi_cmd_check_status(I ide_set_irq(s); } +static void ide_flush_cb(void *opaque, int ret) +{ + IDEState *s = opaque; + + /* XXX: how do we signal I/O errors here? */ + + s->status = READY_STAT | SEEK_STAT; + ide_set_irq(s); +} + static inline void cpu_to_ube16(uint8_t *buf, int val) { buf[0] = val >> 8; @@ -1969,9 +1979,9 @@ void ide_ioport_write(void *opaque, uint case WIN_FLUSH_CACHE: case WIN_FLUSH_CACHE_EXT: if (s->bs) - bdrv_flush(s->bs); - s->status = READY_STAT | SEEK_STAT; - ide_set_irq(s); + bdrv_aio_flush(s->bs, ide_flush_cb, s); + else + ide_flush_cb(s, 0); break; case WIN_STANDBY: case WIN_STANDBY2: Index: qemu/block/raw-posix-aio.h =================================================================== --- qemu.orig/block/raw-posix-aio.h 2009-08-27 23:50:52.510770924 -0300 +++ qemu/block/raw-posix-aio.h 2009-08-31 16:49:59.605095368 -0300 @@ -17,8 +17,9 @@ #define QEMU_AIO_READ 0x0001 #define QEMU_AIO_WRITE 0x0002 #define QEMU_AIO_IOCTL 0x0004 +#define QEMU_AIO_FLUSH 0x0008 #define QEMU_AIO_TYPE_MASK \ - (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL) + (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH) /* AIO flags */ #define QEMU_AIO_MISALIGNED 0x1000 Index: qemu/block/raw-posix.c =================================================================== --- qemu.orig/block/raw-posix.c 2009-08-31 16:49:55.513071598 -0300 +++ qemu/block/raw-posix.c 2009-08-31 16:49:59.613070264 -0300 @@ -574,6 +574,18 @@ static BlockDriverAIOCB *raw_aio_writev( cb, opaque, QEMU_AIO_WRITE); } +static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVRawState *s = bs->opaque; + + if (fd_open(bs) < 0) + return NULL; + + return paio_submit(bs, s->aio_ctx, s->fd, 0, NULL, 0, + cb, opaque, QEMU_AIO_FLUSH); +} + static void raw_close(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; @@ -749,6 +761,7 @@ static BlockDriver bdrv_raw = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_flush = raw_aio_flush, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1002,6 +1015,7 @@ static BlockDriver bdrv_host_device = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_flush = raw_aio_flush, .bdrv_read = raw_read, .bdrv_write = raw_write, @@ -1096,6 +1110,7 @@ static BlockDriver bdrv_host_floppy = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_flush = raw_aio_flush, .bdrv_read = raw_read, .bdrv_write = raw_write, @@ -1176,6 +1191,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_flush = raw_aio_flush, .bdrv_read = raw_read, .bdrv_write = raw_write, @@ -1295,6 +1311,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_flush = raw_aio_flush, .bdrv_read = raw_read, .bdrv_write = raw_write, Index: qemu/posix-aio-compat.c =================================================================== --- qemu.orig/posix-aio-compat.c 2009-08-27 23:50:52.654237211 -0300 +++ qemu/posix-aio-compat.c 2009-08-31 16:49:59.621095866 -0300 @@ -134,6 +134,16 @@ static size_t handle_aiocb_ioctl(struct return aiocb->aio_nbytes; } +static size_t handle_aiocb_flush(struct qemu_paiocb *aiocb) +{ + int ret; + + ret = fdatasync(aiocb->aio_fildes); + if (ret == -1) + return -errno; + return 0; +} + #ifdef CONFIG_PREADV static ssize_t @@ -330,6 +340,9 @@ static void *aio_thread(void *unused) case QEMU_AIO_WRITE: ret = handle_aiocb_rw(aiocb); break; + case QEMU_AIO_FLUSH: + ret = handle_aiocb_flush(aiocb); + break; case QEMU_AIO_IOCTL: ret = handle_aiocb_ioctl(aiocb); break; @@ -530,8 +543,10 @@ BlockDriverAIOCB *paio_submit(BlockDrive acb->aio_type = type; acb->aio_fildes = fd; acb->ev_signo = SIGUSR2; - acb->aio_iov = qiov->iov; - acb->aio_niov = qiov->niov; + if (qiov) { + acb->aio_iov = qiov->iov; + acb->aio_niov = qiov->niov; + } acb->aio_nbytes = nb_sectors * 512; acb->aio_offset = sector_num * 512;