Message ID | 1305981319-32508-1-git-send-email-morita.kazutaka@lab.ntt.co.jp |
---|---|
State | New |
Headers | show |
On Sat, May 21, 2011 at 1:35 PM, MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp> wrote: > +static int sd_prealloc(uint32_t vid, int64_t vdi_size) > +{ > + int fd, ret; > + SheepdogInode *inode; > + char *buf; > + unsigned long idx, max_idx; [...] > + max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE; > + > + for (idx = 0; idx < max_idx; idx++) { Do you want to use uint64_t here instead of unsigned long, which may be too small on 32-bit hosts? Stefan
At Mon, 23 May 2011 10:19:13 +0100, Stefan Hajnoczi wrote: > > On Sat, May 21, 2011 at 1:35 PM, MORITA Kazutaka > <morita.kazutaka@lab.ntt.co.jp> wrote: > > +static int sd_prealloc(uint32_t vid, int64_t vdi_size) > > +{ > > + int fd, ret; > > + SheepdogInode *inode; > > + char *buf; > > + unsigned long idx, max_idx; > [...] > > + max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE; > > + > > + for (idx = 0; idx < max_idx; idx++) { > > Do you want to use uint64_t here instead of unsigned long, which may > be too small on 32-bit hosts? The index of a Sheepdog data object is within 32-bit range, so using an unsigned long is safe here. Thanks, Kazutaka
On Mon, May 23, 2011 at 12:13 PM, MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp> wrote: > At Mon, 23 May 2011 10:19:13 +0100, > Stefan Hajnoczi wrote: >> >> On Sat, May 21, 2011 at 1:35 PM, MORITA Kazutaka >> <morita.kazutaka@lab.ntt.co.jp> wrote: >> > +static int sd_prealloc(uint32_t vid, int64_t vdi_size) >> > +{ >> > + int fd, ret; >> > + SheepdogInode *inode; >> > + char *buf; >> > + unsigned long idx, max_idx; >> [...] >> > + max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE; >> > + >> > + for (idx = 0; idx < max_idx; idx++) { >> >> Do you want to use uint64_t here instead of unsigned long, which may >> be too small on 32-bit hosts? > > The index of a Sheepdog data object is within 32-bit range, so using > an unsigned long is safe here. You are right: #define MAX_DATA_OBJS (UINT64_C(1) << 20) #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22) #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) So the max_idx is MAX_DATA_OBJS, which is <32-bit. It just looked suspicious. Stefan
Am 21.05.2011 14:35, schrieb MORITA Kazutaka: > This introduces a qemu-img create option for sheepdog which allows the > data to be preallocated (note that sheepdog always preallocates > metadata). This is necessary to use Sheepdog volumes as a backend > storage for iSCSI target. More information is available at > https://sourceforge.net/apps/trac/sheepdog/wiki/General%20Protocol%20Support > > The option is disabled by default and you need to enable it like the > following: > > qemu-img create sheepdog:test -o preallocation=data 1G > > Signed-off-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp> > Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Hm, looks like I forgot about this patch and nobody pinged me... > block/sheepdog.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 files changed, 72 insertions(+), 1 deletions(-) > > diff --git a/block/sheepdog.c b/block/sheepdog.c > index 0392ca8..38ca9aa 100644 > --- a/block/sheepdog.c > +++ b/block/sheepdog.c > @@ -1292,6 +1292,57 @@ static int do_sd_create(char *filename, int64_t vdi_size, > return 0; > } > > +static int sd_prealloc(uint32_t vid, int64_t vdi_size) General comment to this function: Wouldn't it be easier to call the existing bdrv_write function in a loop instead of reimplementing the write manually? Though there may be a reason for it that I'm just missing. > +{ > + int fd, ret; > + SheepdogInode *inode; > + char *buf; > + unsigned long idx, max_idx; > + > + fd = connect_to_sdog(NULL, NULL); > + if (fd < 0) { > + return -EIO; > + } > + > + inode = qemu_malloc(sizeof(*inode)); > + buf = qemu_malloc(SD_DATA_OBJ_SIZE); > + > + ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid), > + 0, sizeof(*inode), 0); No error handling? > + > + max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE; > + > + for (idx = 0; idx < max_idx; idx++) { > + uint64_t oid; > + oid = vid_to_data_oid(vid, idx); > + > + if (inode->data_vdi_id[idx]) { > + ret = read_object(fd, buf, vid_to_vdi_oid(inode->data_vdi_id[idx]), > + 1, SD_DATA_OBJ_SIZE, 0); > + if (ret) > + goto out; Missing braces. Also, what is this if branch doing? Is it to ensure that we don't overwrite existing data? But then, isn't an image always empty when we preallocate it? > + } else { > + memset(buf, 0, SD_DATA_OBJ_SIZE); > + } > + > + ret = write_object(fd, buf, oid, 1, SD_DATA_OBJ_SIZE, 0, 1); > + if (ret) > + goto out; Braces > + > + inode->data_vdi_id[idx] = vid; > + ret = write_object(fd, (char *)inode, vid_to_vdi_oid(vid), > + 1, sizeof(*inode), 0, 0); > + if (ret) > + goto out; Same here > + } > +out: > + free(inode); > + free(buf); > + closesocket(fd); > + > + return ret; > +} > + > static int sd_create(const char *filename, QEMUOptionParameter *options) > { > int ret; > @@ -1301,6 +1352,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) > BDRVSheepdogState s; > char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; > uint32_t snapid; > + int prealloc = 0; > > strstart(filename, "sheepdog:", (const char **)&filename); > > @@ -1317,6 +1369,16 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) > vdi_size = options->value.n; > } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { > backing_file = options->value.s; > + } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { > + if (!options->value.s || !strcmp(options->value.s, "off")) { > + prealloc = 0; > + } else if (!strcmp(options->value.s, "data")) { > + prealloc = 1; > + } else { > + error_report("Invalid preallocation mode: '%s'\n", > + options->value.s); > + return -EINVAL; > + } > } > options++; > } > @@ -1354,7 +1416,11 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) > bdrv_delete(bs); > } > > - return do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port); > + ret = do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port); > + if (!prealloc || ret) > + return ret; And here. > + > + return sd_prealloc(vid, vdi_size); > } > > static void sd_close(BlockDriverState *bs) > @@ -1990,6 +2056,11 @@ static QEMUOptionParameter sd_create_options[] = { > .type = OPT_STRING, > .help = "File name of a base image" > }, > + { > + .name = BLOCK_OPT_PREALLOC, > + .type = OPT_STRING, > + .help = "Preallocation mode (allowed values: off, data)" > + }, > { NULL } > }; In my RFC for qcow2, I called this preallocation mode "full" instead of "data". I don't mind much which we pick, but we should keep it consistent. Any preferences? Kevin
At Fri, 01 Jul 2011 10:29:09 +0200, Kevin Wolf wrote: > > Am 21.05.2011 14:35, schrieb MORITA Kazutaka: > > This introduces a qemu-img create option for sheepdog which allows the > > data to be preallocated (note that sheepdog always preallocates > > metadata). This is necessary to use Sheepdog volumes as a backend > > storage for iSCSI target. More information is available at > > https://sourceforge.net/apps/trac/sheepdog/wiki/General%20Protocol%20Support > > > > The option is disabled by default and you need to enable it like the > > following: > > > > qemu-img create sheepdog:test -o preallocation=data 1G > > > > Signed-off-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp> > > Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> > > Hm, looks like I forgot about this patch and nobody pinged me... > > > block/sheepdog.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++- > > 1 files changed, 72 insertions(+), 1 deletions(-) > > > > diff --git a/block/sheepdog.c b/block/sheepdog.c > > index 0392ca8..38ca9aa 100644 > > --- a/block/sheepdog.c > > +++ b/block/sheepdog.c > > @@ -1292,6 +1292,57 @@ static int do_sd_create(char *filename, int64_t vdi_size, > > return 0; > > } > > > > +static int sd_prealloc(uint32_t vid, int64_t vdi_size) > > General comment to this function: Wouldn't it be easier to call the > existing bdrv_write function in a loop instead of reimplementing the > write manually? Though there may be a reason for it that I'm just missing. Yes, it's much easier! I'll rewrite this function with those, thanks. > > > +{ > > + int fd, ret; > > + SheepdogInode *inode; > > + char *buf; > > + unsigned long idx, max_idx; > > + > > + fd = connect_to_sdog(NULL, NULL); > > + if (fd < 0) { > > + return -EIO; > > + } > > + > > + inode = qemu_malloc(sizeof(*inode)); > > + buf = qemu_malloc(SD_DATA_OBJ_SIZE); > > + > > + ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid), > > + 0, sizeof(*inode), 0); > > No error handling? > > > + > > + max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE; > > + > > + for (idx = 0; idx < max_idx; idx++) { > > + uint64_t oid; > > + oid = vid_to_data_oid(vid, idx); > > + > > + if (inode->data_vdi_id[idx]) { > > + ret = read_object(fd, buf, vid_to_vdi_oid(inode->data_vdi_id[idx]), > > + 1, SD_DATA_OBJ_SIZE, 0); > > + if (ret) > > + goto out; > > Missing braces. > > Also, what is this if branch doing? Is it to ensure that we don't > overwrite existing data? But then, isn't an image always empty when we > preallocate it? This branch is for handling a cloned image, which is created with -b option. This branch reads data from the backing file (read_object returns zero when it succeeds) instead of filling buffer with zero. > > > + } else { > > + memset(buf, 0, SD_DATA_OBJ_SIZE); > > + } > > + > > + ret = write_object(fd, buf, oid, 1, SD_DATA_OBJ_SIZE, 0, 1); > > + if (ret) > > + goto out; > > Braces > > > + > > + inode->data_vdi_id[idx] = vid; > > + ret = write_object(fd, (char *)inode, vid_to_vdi_oid(vid), > > + 1, sizeof(*inode), 0, 0); > > + if (ret) > > + goto out; > > Same here > > > + } > > +out: > > + free(inode); > > + free(buf); > > + closesocket(fd); > > + > > + return ret; > > +} > > + > > static int sd_create(const char *filename, QEMUOptionParameter *options) > > { > > int ret; > > @@ -1301,6 +1352,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) > > BDRVSheepdogState s; > > char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; > > uint32_t snapid; > > + int prealloc = 0; > > > > strstart(filename, "sheepdog:", (const char **)&filename); > > > > @@ -1317,6 +1369,16 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) > > vdi_size = options->value.n; > > } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { > > backing_file = options->value.s; > > + } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { > > + if (!options->value.s || !strcmp(options->value.s, "off")) { > > + prealloc = 0; > > + } else if (!strcmp(options->value.s, "data")) { > > + prealloc = 1; > > + } else { > > + error_report("Invalid preallocation mode: '%s'\n", > > + options->value.s); > > + return -EINVAL; > > + } > > } > > options++; > > } > > @@ -1354,7 +1416,11 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) > > bdrv_delete(bs); > > } > > > > - return do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port); > > + ret = do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port); > > + if (!prealloc || ret) > > + return ret; > > And here. > > > + > > + return sd_prealloc(vid, vdi_size); > > } > > > > static void sd_close(BlockDriverState *bs) > > @@ -1990,6 +2056,11 @@ static QEMUOptionParameter sd_create_options[] = { > > .type = OPT_STRING, > > .help = "File name of a base image" > > }, > > + { > > + .name = BLOCK_OPT_PREALLOC, > > + .type = OPT_STRING, > > + .help = "Preallocation mode (allowed values: off, data)" > > + }, > > { NULL } > > }; > > In my RFC for qcow2, I called this preallocation mode "full" instead of > "data". I don't mind much which we pick, but we should keep it > consistent. Any preferences? I don't mind too. I'll use "full" in the v2 patch. Thanks, Kazutaka
Am 05.07.2011 20:21, schrieb MORITA Kazutaka: >>> + >>> + max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE; >>> + >>> + for (idx = 0; idx < max_idx; idx++) { >>> + uint64_t oid; >>> + oid = vid_to_data_oid(vid, idx); >>> + >>> + if (inode->data_vdi_id[idx]) { >>> + ret = read_object(fd, buf, vid_to_vdi_oid(inode->data_vdi_id[idx]), >>> + 1, SD_DATA_OBJ_SIZE, 0); >>> + if (ret) >>> + goto out; >> >> Missing braces. >> >> Also, what is this if branch doing? Is it to ensure that we don't >> overwrite existing data? But then, isn't an image always empty when we >> preallocate it? > > This branch is for handling a cloned image, which is created with -b > option. This branch reads data from the backing file (read_object > returns zero when it succeeds) instead of filling buffer with zero. Oh, I see. You support preallocation even with backing files. And suddenly it makes perfect sense. :-) (Although after completing preallocation, you won't need the backing file any more as all of it has been copied into the image. Maybe we should drop the reference then?) >> In my RFC for qcow2, I called this preallocation mode "full" instead of >> "data". I don't mind much which we pick, but we should keep it >> consistent. Any preferences? > > I don't mind too. I'll use "full" in the v2 patch. Great, thanks. Kevin
At Wed, 06 Jul 2011 09:53:32 +0200, Kevin Wolf wrote: > > Am 05.07.2011 20:21, schrieb MORITA Kazutaka: > >>> + > >>> + max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE; > >>> + > >>> + for (idx = 0; idx < max_idx; idx++) { > >>> + uint64_t oid; > >>> + oid = vid_to_data_oid(vid, idx); > >>> + > >>> + if (inode->data_vdi_id[idx]) { > >>> + ret = read_object(fd, buf, vid_to_vdi_oid(inode->data_vdi_id[idx]), > >>> + 1, SD_DATA_OBJ_SIZE, 0); > >>> + if (ret) > >>> + goto out; > >> > >> Missing braces. > >> > >> Also, what is this if branch doing? Is it to ensure that we don't > >> overwrite existing data? But then, isn't an image always empty when we > >> preallocate it? > > > > This branch is for handling a cloned image, which is created with -b > > option. This branch reads data from the backing file (read_object > > returns zero when it succeeds) instead of filling buffer with zero. > > Oh, I see. You support preallocation even with backing files. And > suddenly it makes perfect sense. :-) > > (Although after completing preallocation, you won't need the backing > file any more as all of it has been copied into the image. Maybe we > should drop the reference then?) Though we can drop it, Sheepdog uses the reference to show the VM image relationships in a tree format as VMware does. So as far as a Sheepdog protocol is concerned, I think we should keep it. Thanks, Kazutaka
diff --git a/block/sheepdog.c b/block/sheepdog.c index 0392ca8..38ca9aa 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1292,6 +1292,57 @@ static int do_sd_create(char *filename, int64_t vdi_size, return 0; } +static int sd_prealloc(uint32_t vid, int64_t vdi_size) +{ + int fd, ret; + SheepdogInode *inode; + char *buf; + unsigned long idx, max_idx; + + fd = connect_to_sdog(NULL, NULL); + if (fd < 0) { + return -EIO; + } + + inode = qemu_malloc(sizeof(*inode)); + buf = qemu_malloc(SD_DATA_OBJ_SIZE); + + ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid), + 0, sizeof(*inode), 0); + + max_idx = (vdi_size + SD_DATA_OBJ_SIZE - 1) / SD_DATA_OBJ_SIZE; + + for (idx = 0; idx < max_idx; idx++) { + uint64_t oid; + oid = vid_to_data_oid(vid, idx); + + if (inode->data_vdi_id[idx]) { + ret = read_object(fd, buf, vid_to_vdi_oid(inode->data_vdi_id[idx]), + 1, SD_DATA_OBJ_SIZE, 0); + if (ret) + goto out; + } else { + memset(buf, 0, SD_DATA_OBJ_SIZE); + } + + ret = write_object(fd, buf, oid, 1, SD_DATA_OBJ_SIZE, 0, 1); + if (ret) + goto out; + + inode->data_vdi_id[idx] = vid; + ret = write_object(fd, (char *)inode, vid_to_vdi_oid(vid), + 1, sizeof(*inode), 0, 0); + if (ret) + goto out; + } +out: + free(inode); + free(buf); + closesocket(fd); + + return ret; +} + static int sd_create(const char *filename, QEMUOptionParameter *options) { int ret; @@ -1301,6 +1352,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) BDRVSheepdogState s; char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; uint32_t snapid; + int prealloc = 0; strstart(filename, "sheepdog:", (const char **)&filename); @@ -1317,6 +1369,16 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) vdi_size = options->value.n; } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { backing_file = options->value.s; + } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { + if (!options->value.s || !strcmp(options->value.s, "off")) { + prealloc = 0; + } else if (!strcmp(options->value.s, "data")) { + prealloc = 1; + } else { + error_report("Invalid preallocation mode: '%s'\n", + options->value.s); + return -EINVAL; + } } options++; } @@ -1354,7 +1416,11 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) bdrv_delete(bs); } - return do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port); + ret = do_sd_create((char *)vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port); + if (!prealloc || ret) + return ret; + + return sd_prealloc(vid, vdi_size); } static void sd_close(BlockDriverState *bs) @@ -1990,6 +2056,11 @@ static QEMUOptionParameter sd_create_options[] = { .type = OPT_STRING, .help = "File name of a base image" }, + { + .name = BLOCK_OPT_PREALLOC, + .type = OPT_STRING, + .help = "Preallocation mode (allowed values: off, data)" + }, { NULL } };