From patchwork Wed Dec 19 18:29:31 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Liu Yuan X-Patchwork-Id: 207501 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 115B72C0097 for ; Thu, 20 Dec 2012 05:30:03 +1100 (EST) Received: from localhost ([::1]:49893 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TlOP2-0008NN-MP for incoming@patchwork.ozlabs.org; Wed, 19 Dec 2012 13:30:00 -0500 Received: from eggs.gnu.org ([208.118.235.92]:46269) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TlOOv-0008NF-5M for qemu-devel@nongnu.org; Wed, 19 Dec 2012 13:29:54 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1TlOOs-0005CZ-1X for qemu-devel@nongnu.org; Wed, 19 Dec 2012 13:29:53 -0500 Received: from mail-da0-f44.google.com ([209.85.210.44]:55421) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TlOOr-0005CG-NW for qemu-devel@nongnu.org; Wed, 19 Dec 2012 13:29:49 -0500 Received: by mail-da0-f44.google.com with SMTP id z20so1065108dae.17 for ; Wed, 19 Dec 2012 10:29:48 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=x-received:from:to:cc:subject:date:message-id:x-mailer; bh=2KsN+KGqqHmw8u/Q2vODGw8CpKCUE3NXVd6LnPW7L98=; b=fp/1B0g9lzUwlmZWXeV8dP8NXcGD3Y0QST0aAGFavIfFfQo4pylAcz6sgQAWDTh85Q GZXGoeCUMVXMZADWQXUQ9ZI9c4rYlIUbkrxomonO24d35NCtrVdjIqnZq9Dda+08cUIf QIicSm19DoBdABxz9BBIvF+bFHcAcalO6QRJJQC31ueB2kkYVnwUoKJ9OOQtJc2B1kMv XVhA/lcn00cnsdaeyou/lisSGYDh5+wrQVUbQNJhwYcPy1cykY86XYatMIQlcJZ3dVw0 vMzeec4vWLH9ja+ecbnYSPrk2Rvqh75qCisRc6OnPB+t25pFfffq8JZdYBUSFk/o7Ebj oZOQ== X-Received: by 10.68.232.200 with SMTP id tq8mr21173988pbc.52.1355941788058; Wed, 19 Dec 2012 10:29:48 -0800 (PST) Received: from localhost.localdomain ([221.217.163.122]) by mx.google.com with ESMTPS id f10sm273698pav.18.2012.12.19.10.29.43 (version=SSLv3 cipher=OTHER); Wed, 19 Dec 2012 10:29:47 -0800 (PST) From: Liu Yuan To: qemu-devel@nongnu.org Date: Thu, 20 Dec 2012 02:29:31 +0800 Message-Id: <1355941771-3418-1-git-send-email-namei.unix@gmail.com> X-Mailer: git-send-email 1.7.9.5 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 209.85.210.44 Cc: Kevin Wolf , Stefan Hajnoczi , MORITA Kazutaka Subject: [Qemu-devel] [PATCH] sheepdog: implement direct write semantics X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Liu Yuan Sheepdog supports both writeback/writethrough write but has not yet supported DIRECTIO semantics which bypass the cache completely even if Sheepdog daemon is set up with cache enabled. Suppose cache is enabled on Sheepdog daemon size, the new cache control is cache=writeback # enable the writeback semantics for write cache=writethrough # enable the writethrough semantics for write cache='directsync | none | off' # disable cache competely Cc: MORITA Kazutaka Cc: Kevin Wolf Cc: Stefan Hajnoczi Signed-off-by: Liu Yuan --- block/sheepdog.c | 67 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index ceabc00..134329a 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -36,7 +36,8 @@ #define SD_FLAG_CMD_WRITE 0x01 #define SD_FLAG_CMD_COW 0x02 -#define SD_FLAG_CMD_CACHE 0x04 +#define SD_FLAG_CMD_CACHE 0x04 /* Writeback mode for cache */ +#define SD_FLAG_CMD_DIRECT 0x08 /* Don't use cache */ #define SD_RES_SUCCESS 0x00 /* Success */ #define SD_RES_UNKNOWN 0x01 /* Unknown error */ @@ -293,7 +294,7 @@ typedef struct BDRVSheepdogState { char name[SD_MAX_VDI_LEN]; bool is_snapshot; - bool cache_enabled; + uint32_t cache_flags; char *addr; char *port; @@ -977,8 +978,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, hdr.flags = SD_FLAG_CMD_WRITE | flags; } - if (s->cache_enabled) { - hdr.flags |= SD_FLAG_CMD_CACHE; + if (s->cache_flags) { + hdr.flags |= s->cache_flags; } hdr.oid = oid; @@ -1023,7 +1024,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, static int read_write_object(int fd, char *buf, uint64_t oid, int copies, unsigned int datalen, uint64_t offset, - bool write, bool create, bool cache) + bool write, bool create, uint32_t cache_flags) { SheepdogObjReq hdr; SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; @@ -1047,9 +1048,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, hdr.opcode = SD_OP_READ_OBJ; } - if (cache) { - hdr.flags |= SD_FLAG_CMD_CACHE; - } + hdr.flags |= cache_flags; hdr.oid = oid; hdr.data_length = datalen; @@ -1072,18 +1071,19 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, } static int read_object(int fd, char *buf, uint64_t oid, int copies, - unsigned int datalen, uint64_t offset, bool cache) + unsigned int datalen, uint64_t offset, + uint32_t cache_flags) { return read_write_object(fd, buf, oid, copies, datalen, offset, false, - false, cache); + false, cache_flags); } static int write_object(int fd, char *buf, uint64_t oid, int copies, unsigned int datalen, uint64_t offset, bool create, - bool cache) + uint32_t cache_flags) { return read_write_object(fd, buf, oid, copies, datalen, offset, true, - create, cache); + create, cache_flags); } static int sd_open(BlockDriverState *bs, const char *filename, int flags) @@ -1118,12 +1118,19 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) goto out; } - s->cache_enabled = true; - s->flush_fd = connect_to_sdog(s->addr, s->port); - if (s->flush_fd < 0) { - error_report("failed to connect"); - ret = s->flush_fd; - goto out; + if (flags & BDRV_O_NOCACHE) { + s->cache_flags = SD_FLAG_CMD_DIRECT; + } else if (flags & BDRV_O_CACHE_WB) { + s->cache_flags = SD_FLAG_CMD_CACHE; + } + + if (s->cache_flags != SD_FLAG_CMD_DIRECT) { + s->flush_fd = connect_to_sdog(s->addr, s->port); + if (s->flush_fd < 0) { + error_report("failed to connect"); + ret = s->flush_fd; + goto out; + } } if (snapid || tag[0] != '\0') { @@ -1140,7 +1147,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) buf = g_malloc(SD_INODE_SIZE); ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0, - s->cache_enabled); + s->cache_flags); closesocket(fd); @@ -1387,7 +1394,7 @@ static void sd_close(BlockDriverState *bs) qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); closesocket(s->fd); - if (s->cache_enabled) { + if (s->cache_flags) { closesocket(s->flush_fd); } g_free(s->addr); @@ -1423,7 +1430,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); s->inode.vdi_size = offset; ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, false, s->cache_enabled); + s->inode.nr_copies, datalen, 0, false, s->cache_flags); close(fd); if (ret < 0) { @@ -1506,7 +1513,7 @@ static int sd_create_branch(BDRVSheepdogState *s) } ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, - SD_INODE_SIZE, 0, s->cache_enabled); + SD_INODE_SIZE, 0, s->cache_flags); closesocket(fd); @@ -1707,7 +1714,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) int ret; unsigned int wlen = 0, rlen = 0; - if (!s->cache_enabled) { + if (s->cache_flags == SD_FLAG_CMD_DIRECT) { return 0; } @@ -1723,7 +1730,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) if (rsp->result == SD_RES_INVALID_PARMS) { dprintf("disable write cache since the server doesn't support it\n"); - s->cache_enabled = false; + s->cache_flags = SD_FLAG_CMD_DIRECT; closesocket(s->flush_fd); return 0; } @@ -1774,7 +1781,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) } ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), - s->inode.nr_copies, datalen, 0, false, s->cache_enabled); + s->inode.nr_copies, datalen, 0, false, s->cache_flags); if (ret < 0) { error_report("failed to write snapshot's inode."); goto cleanup; @@ -1791,7 +1798,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) inode = (SheepdogInode *)g_malloc(datalen); ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid), - s->inode.nr_copies, datalen, 0, s->cache_enabled); + s->inode.nr_copies, datalen, 0, s->cache_flags); if (ret < 0) { error_report("failed to read new inode info. %s", strerror(errno)); @@ -1845,7 +1852,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) buf = g_malloc(SD_INODE_SIZE); ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, - SD_INODE_SIZE, 0, s->cache_enabled); + SD_INODE_SIZE, 0, s->cache_flags); closesocket(fd); @@ -1942,7 +1949,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) /* we don't need to read entire object */ ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0, - s->cache_enabled); + s->cache_flags); if (ret) { continue; @@ -2003,11 +2010,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, if (load) { ret = read_object(fd, (char *)data, vmstate_oid, s->inode.nr_copies, data_len, offset, - s->cache_enabled); + s->cache_flags); } else { ret = write_object(fd, (char *)data, vmstate_oid, s->inode.nr_copies, data_len, offset, create, - s->cache_enabled); + s->cache_flags); } if (ret < 0) {