From patchwork Wed Oct 31 15:30:56 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paolo Bonzini X-Patchwork-Id: 195957 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 691DF2C01C8 for ; Thu, 1 Nov 2012 04:36:01 +1100 (EST) Received: from localhost ([::1]:43033 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TTcCt-0000Ln-Hs for incoming@patchwork.ozlabs.org; Wed, 31 Oct 2012 13:35:59 -0400 Received: from eggs.gnu.org ([208.118.235.92]:47065) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TTaIc-0001qI-80 for qemu-devel@nongnu.org; Wed, 31 Oct 2012 11:33:52 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1TTaIW-00061q-Rv for qemu-devel@nongnu.org; Wed, 31 Oct 2012 11:33:46 -0400 Received: from mail-da0-f45.google.com ([209.85.210.45]:43195) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TTaIW-0005YY-Lh for qemu-devel@nongnu.org; Wed, 31 Oct 2012 11:33:40 -0400 Received: by mail-da0-f45.google.com with SMTP id n15so666869dad.4 for ; Wed, 31 Oct 2012 08:33:40 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=sender:from:to:cc:subject:date:message-id:x-mailer:in-reply-to :references; bh=rxMGv5wzZE/F1PGH9NHdV+db6XI92eY9fblaN/mGEIo=; b=UTyUZv1AYU0nw3Ad/O6ukyb2XTbuP9XkVNUr94OO5FwpmGLAok4pPRNRn3sN8+TK1L cAkhIK7UwPnToiW05S6poEUxCAyNLMmjLey2hbK8NwoswAuPK/GB3+3PPnW5OTzfFaMi jnBBRZuopuSurqMOjw5mBRlU3cWdR3l2VWxGGCyd4xlaWBYmbkJHCr6XMkv5o8S3hRTC YXx+vBh5PzE4kE00Dt8BJQYKQQLd2HqmVEe3vplKMMkqQHTCT6EfbrouPsusptlweR1l 129mCKI50SnasrA+vhlTu0L6RzMu24fxsjL94s5gg4hWLEwrZGqzXRHs9Eoawoq6xTCe bwsg== Received: by 10.66.80.133 with SMTP id r5mr103651501pax.24.1351697620255; Wed, 31 Oct 2012 08:33:40 -0700 (PDT) Received: from yakj.usersys.redhat.com (93-34-169-1.ip50.fastwebnet.it. [93.34.169.1]) by mx.google.com with ESMTPS id sz6sm2445230pbc.52.2012.10.31.08.33.36 (version=TLSv1/SSLv3 cipher=OTHER); Wed, 31 Oct 2012 08:33:39 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Date: Wed, 31 Oct 2012 16:30:56 +0100 Message-Id: <1351697456-16107-40-git-send-email-pbonzini@redhat.com> X-Mailer: git-send-email 1.7.12.1 In-Reply-To: <1351697456-16107-1-git-send-email-pbonzini@redhat.com> References: <1351697456-16107-1-git-send-email-pbonzini@redhat.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 209.85.210.45 Cc: aliguori@us.ibm.com, stefanha@redhat.com Subject: [Qemu-devel] [PATCH v2 39/39] raw-win32: implement native asynchronous I/O X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org With the new support for EventNotifiers in the AIO event loop, we can hook a completion port to every opened file and use asynchronous I/O on them. Wine's support is extremely inefficient, also because it really does the I/O synchronously on regular files. (!) But it works, and it is good to keep the Win32 and POSIX ports as similar as possible. Signed-off-by: Paolo Bonzini --- block/Makefile.objs | 2 +- block/raw-aio.h | 10 +++ block/raw-win32.c | 42 ++++++++-- block/win32-aio.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 file modificati, 274 inserzioni(+), 6 rimozioni(-) create mode 100644 block/win32-aio.c diff --git a/block/Makefile.objs b/block/Makefile.objs index 771d341..30ef6ae 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -3,7 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o block-obj-y += parallels.o blkdebug.o blkverify.o -block-obj-$(CONFIG_WIN32) += raw-win32.o +block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o diff --git a/block/raw-aio.h b/block/raw-aio.h index b3bb073..e77f361 100644 --- a/block/raw-aio.h +++ b/block/raw-aio.h @@ -35,4 +35,14 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, BlockDriverCompletionFunc *cb, void *opaque, int type); #endif +#ifdef _WIN32 +typedef struct QEMUWin32AIOState QEMUWin32AIOState; +QEMUWin32AIOState *win32_aio_init(void); +int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile); +BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, + QEMUWin32AIOState *aio, HANDLE hfile, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type); +#endif + #endif /* QEMU_RAW_AIO_H */ diff --git a/block/raw-win32.c b/block/raw-win32.c index ffd86e3..0c05c58 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -36,6 +36,8 @@ #define FTYPE_CD 1 #define FTYPE_HARDDISK 2 +static QEMUWin32AIOState *aio; + typedef struct RawWin32AIOData { BlockDriverState *bs; HANDLE hfile; @@ -50,6 +52,7 @@ typedef struct BDRVRawState { HANDLE hfile; int type; char drive_path[16]; /* format: "d:\" */ + QEMUWin32AIOState *aio; } BDRVRawState; /* @@ -208,6 +211,9 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) } *overlapped = FILE_ATTRIBUTE_NORMAL; + if (flags & BDRV_O_NATIVE_AIO) { + *overlapped |= FILE_FLAG_OVERLAPPED; + } if (flags & BDRV_O_NOCACHE) { *overlapped |= FILE_FLAG_NO_BUFFERING; } @@ -222,6 +228,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) s->type = FTYPE_FILE; raw_parse_flags(flags, &access_flags, &overlapped); + + if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) { + aio = win32_aio_init(); + if (aio == NULL) { + return -EINVAL; + } + } s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, @@ -231,7 +244,16 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) if (err == ERROR_ACCESS_DENIED) return -EACCES; - return -1; + return -EINVAL; + } + + if (flags & BDRV_O_NATIVE_AIO) { + int ret = win32_aio_attach(aio, s->hfile); + if (ret < 0) { + CloseHandle(s->hfile); + return ret; + } + s->aio = aio; } return 0; } @@ -241,8 +263,13 @@ static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors, - cb, opaque, QEMU_AIO_READ); + if (s->aio) { + return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov, + nb_sectors, cb, opaque, QEMU_AIO_READ); + } else { + return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors, + cb, opaque, QEMU_AIO_READ); + } } static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, @@ -250,8 +277,13 @@ static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors, - cb, opaque, QEMU_AIO_WRITE); + if (s->aio) { + return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov, + nb_sectors, cb, opaque, QEMU_AIO_WRITE); + } else { + return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors, + cb, opaque, QEMU_AIO_WRITE); + } } static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs, diff --git a/block/win32-aio.c b/block/win32-aio.c new file mode 100644 index 0000000..c34dc73 --- /dev/null +++ b/block/win32-aio.c @@ -0,0 +1,226 @@ +/* + * Block driver for RAW files (win32) + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "qemu-timer.h" +#include "block_int.h" +#include "module.h" +#include "qemu-common.h" +#include "qemu-aio.h" +#include "raw-aio.h" +#include "event_notifier.h" +#include +#include + +#define FTYPE_FILE 0 +#define FTYPE_CD 1 +#define FTYPE_HARDDISK 2 + +struct QEMUWin32AIOState { + HANDLE hIOCP; + EventNotifier e; + int count; +}; + +typedef struct QEMUWin32AIOCB { + BlockDriverAIOCB common; + struct QEMUWin32AIOState *ctx; + int nbytes; + OVERLAPPED ov; + QEMUIOVector *qiov; + void *buf; + bool is_read; + bool is_linear; +} QEMUWin32AIOCB; + +/* + * Completes an AIO request (calls the callback and frees the ACB). + */ +static void win32_aio_process_completion(QEMUWin32AIOState *s, + QEMUWin32AIOCB *waiocb, DWORD count) +{ + int ret; + s->count--; + + if (waiocb->ov.Internal != 0) { + ret = -EIO; + } else { + ret = 0; + if (count < waiocb->nbytes) { + /* Short reads mean EOF, pad with zeros. */ + if (waiocb->is_read) { + qemu_iovec_memset(waiocb->qiov, count, 0, + waiocb->qiov->size - count); + } else { + ret = -EINVAL; + } + } + } + + if (!waiocb->is_linear) { + if (ret == 0 && waiocb->is_read) { + QEMUIOVector *qiov = waiocb->qiov; + char *p = waiocb->buf; + int i; + + for (i = 0; i < qiov->niov; ++i) { + memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); + p += qiov->iov[i].iov_len; + } + g_free(waiocb->buf); + } + } + + + waiocb->common.cb(waiocb->common.opaque, ret); + qemu_aio_release(waiocb); +} + +static void win32_aio_completion_cb(EventNotifier *e) +{ + QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e); + DWORD count; + ULONG_PTR key; + OVERLAPPED *ov; + + event_notifier_test_and_clear(&s->e); + while (GetQueuedCompletionStatus(s->hIOCP, &count, &key, &ov, 0)) { + QEMUWin32AIOCB *waiocb = container_of(ov, QEMUWin32AIOCB, ov); + + win32_aio_process_completion(s, waiocb, count); + } +} + +static int win32_aio_flush_cb(EventNotifier *e) +{ + QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e); + + return (s->count > 0) ? 1 : 0; +} + +static void win32_aio_cancel(BlockDriverAIOCB *blockacb) +{ + QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb; + + /* + * CancelIoEx is only supported in Vista and newer. For now, just + * wait for completion. + */ + while (!HasOverlappedIoCompleted(&waiocb->ov)) { + qemu_aio_wait(); + } +} + +static AIOPool win32_aio_pool = { + .aiocb_size = sizeof(QEMUWin32AIOCB), + .cancel = win32_aio_cancel, +}; + +BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, + QEMUWin32AIOState *aio, HANDLE hfile, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type) +{ + struct QEMUWin32AIOCB *waiocb; + uint64_t offset = sector_num * 512; + DWORD rc; + + waiocb = qemu_aio_get(&win32_aio_pool, bs, cb, opaque); + waiocb->nbytes = nb_sectors * 512; + waiocb->qiov = qiov; + waiocb->is_read = (type == QEMU_AIO_READ); + + if (qiov->niov > 1) { + waiocb->buf = qemu_blockalign(bs, qiov->size); + if (type & QEMU_AIO_WRITE) { + char *p = waiocb->buf; + int i; + + for (i = 0; i < qiov->niov; ++i) { + memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); + p += qiov->iov[i].iov_len; + } + } + waiocb->is_linear = false; + } else { + waiocb->buf = qiov->iov[0].iov_base; + waiocb->is_linear = true; + } + + waiocb->ov = (OVERLAPPED) { + .Offset = (DWORD) offset, + .OffsetHigh = (DWORD) (offset >> 32), + .hEvent = event_notifier_get_handle(&aio->e) + }; + aio->count++; + + if (type & QEMU_AIO_READ) { + rc = ReadFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov); + } else { + rc = WriteFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov); + } + if(rc == 0 && GetLastError() != ERROR_IO_PENDING) { + goto out_dec_count; + } + return &waiocb->common; + +out_dec_count: + aio->count--; + qemu_aio_release(waiocb); + return NULL; +} + +int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile) +{ + if (CreateIoCompletionPort(hfile, aio->hIOCP, (ULONG_PTR) 0, 0) == NULL) { + return -EINVAL; + } else { + return 0; + } +} + +QEMUWin32AIOState *win32_aio_init(void) +{ + QEMUWin32AIOState *s; + + s = g_malloc0(sizeof(*s)); + if (event_notifier_init(&s->e, false) < 0) { + goto out_free_state; + } + + s->hIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); + if (s->hIOCP == NULL) { + goto out_close_efd; + } + + qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb, + win32_aio_flush_cb); + + return s; + +out_close_efd: + event_notifier_cleanup(&s->e); +out_free_state: + g_free(s); + return NULL; +}