Message ID | 20120721083159.GE1046@in.ibm.com |
---|---|
State | New |
Headers | show |
On Sat, Jul 21, 2012 at 9:31 AM, Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > +typedef struct GlusterAIOCB { > + BlockDriverAIOCB common; > + QEMUIOVector *qiov; The qiov field is unused. > + char *bounce; Unused. > + struct BDRVGlusterState *s; You can get this through common.bs->opaque, but if you like having a shortcut, that's fine. > + int cancelled; bool > +} GlusterAIOCB; > + > +typedef struct GlusterCBKData { > + GlusterAIOCB *acb; > + struct BDRVGlusterState *s; > + int64_t size; > + int ret; > +} GlusterCBKData; I think GlusterCBKData could just be part of GlusterAIOCB. That would simplify the code a little and avoid some malloc/free. > + > +typedef struct BDRVGlusterState { > + struct glfs *glfs; > + int fds[2]; > + int open_flags; > + struct glfs_fd *fd; > + int qemu_aio_count; > + int event_reader_pos; > + GlusterCBKData *event_gcbk; > +} BDRVGlusterState; > + > +#define GLUSTER_FD_READ 0 > +#define GLUSTER_FD_WRITE 1 > + > +static void qemu_gluster_complete_aio(GlusterCBKData *gcbk) > +{ > + GlusterAIOCB *acb = gcbk->acb; > + int ret; > + > + if (acb->cancelled) { Where does cancelled get set? > + qemu_aio_release(acb); > + goto done; > + } > + > + if (gcbk->ret == gcbk->size) { > + ret = 0; /* Success */ > + } else if (gcbk->ret < 0) { > + ret = gcbk->ret; /* Read/Write failed */ > + } else { > + ret = -EINVAL; /* Partial read/write - fail it */ EINVAL is for invalid arguments. EIO would be better. > +/* > + * file=protocol:server@port:volname:image > + */ > +static int qemu_gluster_parsename(GlusterConf *c, const char *filename) > +{ > + char *file = g_strdup(filename); > + char *token, *next_token, *saveptr; > + char *token_s, *next_token_s, *saveptr_s; > + int ret = -EINVAL; > + > + /* Discard the protocol */ > + token = strtok_r(file, ":", &saveptr); > + if (!token) { > + goto out; > + } > + > + /* server@port */ > + next_token = strtok_r(NULL, ":", &saveptr); > + if (!next_token) { > + goto out; > + } > + if (strchr(next_token, '@')) { > + token_s = strtok_r(next_token, "@", &saveptr_s); > + if (!token_s) { > + goto out; > + } > + strncpy(c->server, token_s, HOST_NAME_MAX); strncpy(3) will not NUL-terminate when token_s is HOST_NAME_MAX characters long. QEMU has cutils.c:pstrcpy(). When the argument is too long we should probably report an error instead of truncating. Same below. > + next_token_s = strtok_r(NULL, "@", &saveptr_s); > + if (!next_token_s) { > + goto out; > + } > + c->port = atoi(next_token_s); No error checking. If the input is invalid an error message would help the user here. > +static struct glfs *qemu_gluster_init(GlusterConf *c, const char *filename) > +{ > + struct glfs *glfs = NULL; > + int ret; > + > + ret = qemu_gluster_parsename(c, filename); > + if (ret < 0) { > + errno = -ret; > + goto out; > + } > + > + glfs = glfs_new(c->volname); > + if (!glfs) { > + goto out; > + } > + > + ret = glfs_set_volfile_server(glfs, "socket", c->server, c->port); > + if (ret < 0) { > + goto out; > + } > + > + /* > + * TODO: Logging is not necessary but instead nice to have. > + * Can QEMU optionally log into a standard place ? QEMU prints to stderr, can you do that here too? The global log file is not okay, especially when multiple QEMU instances are running. > + * Need to use defines like gf_loglevel_t:GF_LOG_INFO instead of > + * hard coded values like 7 here. > + */ > + ret = glfs_set_logging(glfs, "/tmp/qemu-gluster.log", 7); > + if (ret < 0) { > + goto out; > + } > + > + ret = glfs_init(glfs); > + if (ret < 0) { > + goto out; > + } > + return glfs; > + > +out: > + if (glfs) { > + (void)glfs_fini(glfs); > + } > + return NULL; > +} > + > +static int qemu_gluster_open(BlockDriverState *bs, const char *filename, > + int bdrv_flags) > +{ > + BDRVGlusterState *s = bs->opaque; > + GlusterConf *c = g_malloc(sizeof(GlusterConf)); Can this be allocated on the stack? > + int ret; > + > + s->glfs = qemu_gluster_init(c, filename); > + if (!s->glfs) { > + ret = -errno; > + goto out; > + } > + > + s->open_flags |= O_BINARY; Can open_flags be a local variable? > +static int qemu_gluster_create(const char *filename, > + QEMUOptionParameter *options) > +{ > + struct glfs *glfs; > + struct glfs_fd *fd; > + GlusterConf *c = g_malloc(sizeof(GlusterConf)); > + int ret = 0; > + int64_t total_size = 0; > + > + glfs = qemu_gluster_init(c, filename); > + if (!glfs) { > + ret = -errno; > + goto out; > + } > + > + /* Read out options */ > + while (options && options->name) { > + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { > + total_size = options->value.n / BDRV_SECTOR_SIZE; > + } > + options++; > + } > + > + fd = glfs_creat(glfs, c->image, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU); Why set the execute permission bit? > +static void qemu_gluster_close(BlockDriverState *bs) > +{ > + BDRVGlusterState *s = bs->opaque; > + > + if (s->fd) { > + glfs_close(s->fd); > + s->fd = NULL; > + } Why not call glfs_fini() here?
On Sun, Jul 22, 2012 at 04:38:00PM +0100, Stefan Hajnoczi wrote: > On Sat, Jul 21, 2012 at 9:31 AM, Bharata B Rao > <bharata@linux.vnet.ibm.com> wrote: > > +typedef struct GlusterAIOCB { > > + BlockDriverAIOCB common; > > + QEMUIOVector *qiov; > > The qiov field is unused. > > > + char *bounce; > > Unused. Yes, removed these two. > > > + struct BDRVGlusterState *s; > > You can get this through common.bs->opaque, but if you like having a > shortcut, that's fine. > > > + int cancelled; > > bool Ok. > > > +} GlusterAIOCB; > > + > > +typedef struct GlusterCBKData { > > + GlusterAIOCB *acb; > > + struct BDRVGlusterState *s; > > + int64_t size; > > + int ret; > > +} GlusterCBKData; > > I think GlusterCBKData could just be part of GlusterAIOCB. That would > simplify the code a little and avoid some malloc/free. Are you suggesting to put a field GlusterCBKData gcbk; inside GlusterAIOCB and use gcbk from there or Are you suggesting that I make the fields of GlusterCBKData part of GlusterAIOCB and get rid of GlusterCBKData altogether ? This means I would have to pass the GlusterAIOCB to gluster async calls and update its fields from gluster callback routine. I can do this, but I am not sure if you can touch the fields of GlusterAIOCB in non-QEMU threads (gluster callback thread). > > > + > > +typedef struct BDRVGlusterState { > > + struct glfs *glfs; > > + int fds[2]; > > + int open_flags; > > + struct glfs_fd *fd; > > + int qemu_aio_count; > > + int event_reader_pos; > > + GlusterCBKData *event_gcbk; > > +} BDRVGlusterState; > > + > > +#define GLUSTER_FD_READ 0 > > +#define GLUSTER_FD_WRITE 1 > > + > > +static void qemu_gluster_complete_aio(GlusterCBKData *gcbk) > > +{ > > + GlusterAIOCB *acb = gcbk->acb; > > + int ret; > > + > > + if (acb->cancelled) { > > Where does cancelled get set? I realised that I am not supporting bdrv_aio_cancel(). I guess I will have to add support for this in next version. > > > + qemu_aio_release(acb); > > + goto done; > > + } > > + > > + if (gcbk->ret == gcbk->size) { > > + ret = 0; /* Success */ > > + } else if (gcbk->ret < 0) { > > + ret = gcbk->ret; /* Read/Write failed */ > > + } else { > > + ret = -EINVAL; /* Partial read/write - fail it */ > > EINVAL is for invalid arguments. EIO would be better. Ok. > > > +/* > > + * file=protocol:server@port:volname:image > > + */ > > +static int qemu_gluster_parsename(GlusterConf *c, const char *filename) > > +{ > > + char *file = g_strdup(filename); > > + char *token, *next_token, *saveptr; > > + char *token_s, *next_token_s, *saveptr_s; > > + int ret = -EINVAL; > > + > > + /* Discard the protocol */ > > + token = strtok_r(file, ":", &saveptr); > > + if (!token) { > > + goto out; > > + } > > + > > + /* server@port */ > > + next_token = strtok_r(NULL, ":", &saveptr); > > + if (!next_token) { > > + goto out; > > + } > > + if (strchr(next_token, '@')) { > > + token_s = strtok_r(next_token, "@", &saveptr_s); > > + if (!token_s) { > > + goto out; > > + } > > + strncpy(c->server, token_s, HOST_NAME_MAX); > > strncpy(3) will not NUL-terminate when token_s is HOST_NAME_MAX > characters long. QEMU has cutils.c:pstrcpy(). Will use pstrcpy. > > When the argument is too long we should probably report an error > instead of truncating. Or should we let gluster APIs to flag an error with truncated server and volume names ? > > Same below. > > > + next_token_s = strtok_r(NULL, "@", &saveptr_s); > > + if (!next_token_s) { > > + goto out; > > + } > > + c->port = atoi(next_token_s); > > No error checking. If the input is invalid an error message would > help the user here. Fixed. > > > +static struct glfs *qemu_gluster_init(GlusterConf *c, const char *filename) > > +{ > > + struct glfs *glfs = NULL; > > + int ret; > > + > > + ret = qemu_gluster_parsename(c, filename); > > + if (ret < 0) { > > + errno = -ret; > > + goto out; > > + } > > + > > + glfs = glfs_new(c->volname); > > + if (!glfs) { > > + goto out; > > + } > > + > > + ret = glfs_set_volfile_server(glfs, "socket", c->server, c->port); > > + if (ret < 0) { > > + goto out; > > + } > > + > > + /* > > + * TODO: Logging is not necessary but instead nice to have. > > + * Can QEMU optionally log into a standard place ? > > QEMU prints to stderr, can you do that here too? The global log file > is not okay, especially when multiple QEMU instances are running. Ok, I can do glfs_set_logging(glfs, "/dev/stderr", loglevel); > > > + * Need to use defines like gf_loglevel_t:GF_LOG_INFO instead of > > + * hard coded values like 7 here. > > + */ > > + ret = glfs_set_logging(glfs, "/tmp/qemu-gluster.log", 7); > > + if (ret < 0) { > > + goto out; > > + } > > + > > + ret = glfs_init(glfs); > > + if (ret < 0) { > > + goto out; > > + } > > + return glfs; > > + > > +out: > > + if (glfs) { > > + (void)glfs_fini(glfs); > > + } > > + return NULL; > > +} > > + > > +static int qemu_gluster_open(BlockDriverState *bs, const char *filename, > > + int bdrv_flags) > > +{ > > + BDRVGlusterState *s = bs->opaque; > > + GlusterConf *c = g_malloc(sizeof(GlusterConf)); > > Can this be allocated on the stack? It consists of PATH_MAX(4096), HOST_NAME_MAX(255) and GLUSTERD_MAX_VOLUME_NAME (1000). A bit heavy to be on stack ? > > > + int ret; > > + > > + s->glfs = qemu_gluster_init(c, filename); > > + if (!s->glfs) { > > + ret = -errno; > > + goto out; > > + } > > + > > + s->open_flags |= O_BINARY; > > Can open_flags be a local variable? Yes, fixed. > > > +static int qemu_gluster_create(const char *filename, > > + QEMUOptionParameter *options) > > +{ > > + struct glfs *glfs; > > + struct glfs_fd *fd; > > + GlusterConf *c = g_malloc(sizeof(GlusterConf)); > > + int ret = 0; > > + int64_t total_size = 0; > > + > > + glfs = qemu_gluster_init(c, filename); > > + if (!glfs) { > > + ret = -errno; > > + goto out; > > + } > > + > > + /* Read out options */ > > + while (options && options->name) { > > + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { > > + total_size = options->value.n / BDRV_SECTOR_SIZE; > > + } > > + options++; > > + } > > + > > + fd = glfs_creat(glfs, c->image, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU); > > Why set the execute permission bit? Changed to read and write only. > > > +static void qemu_gluster_close(BlockDriverState *bs) > > +{ > > + BDRVGlusterState *s = bs->opaque; > > + > > + if (s->fd) { > > + glfs_close(s->fd); > > + s->fd = NULL; > > + } > > Why not call glfs_fini() here? Missed that, fixed now. Thanks for your comments. Regards, Bharata.
On Mon, Jul 23, 2012 at 9:32 AM, Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > On Sun, Jul 22, 2012 at 04:38:00PM +0100, Stefan Hajnoczi wrote: >> On Sat, Jul 21, 2012 at 9:31 AM, Bharata B Rao >> <bharata@linux.vnet.ibm.com> wrote: >> > +} GlusterAIOCB; >> > + >> > +typedef struct GlusterCBKData { >> > + GlusterAIOCB *acb; >> > + struct BDRVGlusterState *s; >> > + int64_t size; >> > + int ret; >> > +} GlusterCBKData; >> >> I think GlusterCBKData could just be part of GlusterAIOCB. That would >> simplify the code a little and avoid some malloc/free. > > Are you suggesting to put a field > > GlusterCBKData gcbk; > > inside GlusterAIOCB and use gcbk from there or > > Are you suggesting that I make the fields of GlusterCBKData part of > GlusterAIOCB and get rid of GlusterCBKData altogether ? This means I would > have to pass the GlusterAIOCB to gluster async calls and update its fields from > gluster callback routine. I can do this, but I am not sure if you can touch > the fields of GlusterAIOCB in non-QEMU threads (gluster callback thread). The fields in GlusterCBKData could become part of GlusterAIOCB. Different threads can access fields in a struct, they just need to ensure access is synchronized if they touch the same fields. In the case of this code I think there is nothing that requires synchronization beyond the pipe mechanism that you already use to complete processing in a QEMU thread. >> When the argument is too long we should probably report an error >> instead of truncating. > > Or should we let gluster APIs to flag an error with truncated > server and volume names ? What if the truncated name is a valid but different object? For example: Max chars = 5 Objects: "helloworld" "hello" If "helloworld" is truncated to "hello" we get no error back because it's a valid object! We need to either check sizes explicitly without truncating or use a g_strdup() approach without any size limits and let the gfapi functions error out if the input string is too long. >> > +static struct glfs *qemu_gluster_init(GlusterConf *c, const char *filename) >> > +{ >> > + struct glfs *glfs = NULL; >> > + int ret; >> > + >> > + ret = qemu_gluster_parsename(c, filename); >> > + if (ret < 0) { >> > + errno = -ret; >> > + goto out; >> > + } >> > + >> > + glfs = glfs_new(c->volname); >> > + if (!glfs) { >> > + goto out; >> > + } >> > + >> > + ret = glfs_set_volfile_server(glfs, "socket", c->server, c->port); >> > + if (ret < 0) { >> > + goto out; >> > + } >> > + >> > + /* >> > + * TODO: Logging is not necessary but instead nice to have. >> > + * Can QEMU optionally log into a standard place ? >> >> QEMU prints to stderr, can you do that here too? The global log file >> is not okay, especially when multiple QEMU instances are running. > > Ok, I can do glfs_set_logging(glfs, "/dev/stderr", loglevel); Yes. I think "-" is best since it is supported by gfapi (libglusterfs/src/logging.c:gf_log_init). /dev/stderr is not POSIX. >> > + * Need to use defines like gf_loglevel_t:GF_LOG_INFO instead of >> > + * hard coded values like 7 here. >> > + */ >> > + ret = glfs_set_logging(glfs, "/tmp/qemu-gluster.log", 7); >> > + if (ret < 0) { >> > + goto out; >> > + } >> > + >> > + ret = glfs_init(glfs); >> > + if (ret < 0) { >> > + goto out; >> > + } >> > + return glfs; >> > + >> > +out: >> > + if (glfs) { >> > + (void)glfs_fini(glfs); >> > + } >> > + return NULL; >> > +} >> > + >> > +static int qemu_gluster_open(BlockDriverState *bs, const char *filename, >> > + int bdrv_flags) >> > +{ >> > + BDRVGlusterState *s = bs->opaque; >> > + GlusterConf *c = g_malloc(sizeof(GlusterConf)); >> >> Can this be allocated on the stack? > > It consists of PATH_MAX(4096), HOST_NAME_MAX(255) and GLUSTERD_MAX_VOLUME_NAME > (1000). A bit heavy to be on stack ? This is userspace, stacks are big but it's up to you. Stefan
diff --git a/block/Makefile.objs b/block/Makefile.objs index b5754d3..a1ae67f 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -9,3 +9,4 @@ block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o +block-obj-$(CONFIG_GLUSTERFS) += gluster.o diff --git a/block/gluster.c b/block/gluster.c new file mode 100644 index 0000000..c33a006 --- /dev/null +++ b/block/gluster.c @@ -0,0 +1,483 @@ +/* + * GlusterFS backend for QEMU + * + * (AIO implementation is derived from block/rbd.c) + * + * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the top-level + * directory. + */ +#include "block_int.h" +#include <glusterfs/api/glfs.h> + +typedef struct GlusterConf { + char server[HOST_NAME_MAX]; + int port; + char volname[128]; /* TODO: use GLUSTERD_MAX_VOLUME_NAME */ + char image[PATH_MAX]; +} GlusterConf; + +typedef struct GlusterAIOCB { + BlockDriverAIOCB common; + QEMUIOVector *qiov; + char *bounce; + struct BDRVGlusterState *s; + int cancelled; +} GlusterAIOCB; + +typedef struct GlusterCBKData { + GlusterAIOCB *acb; + struct BDRVGlusterState *s; + int64_t size; + int ret; +} GlusterCBKData; + +typedef struct BDRVGlusterState { + struct glfs *glfs; + int fds[2]; + int open_flags; + struct glfs_fd *fd; + int qemu_aio_count; + int event_reader_pos; + GlusterCBKData *event_gcbk; +} BDRVGlusterState; + +#define GLUSTER_FD_READ 0 +#define GLUSTER_FD_WRITE 1 + +static void qemu_gluster_complete_aio(GlusterCBKData *gcbk) +{ + GlusterAIOCB *acb = gcbk->acb; + int ret; + + if (acb->cancelled) { + qemu_aio_release(acb); + goto done; + } + + if (gcbk->ret == gcbk->size) { + ret = 0; /* Success */ + } else if (gcbk->ret < 0) { + ret = gcbk->ret; /* Read/Write failed */ + } else { + ret = -EINVAL; /* Partial read/write - fail it */ + } + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); + +done: + g_free(gcbk); +} + +static void qemu_gluster_aio_event_reader(void *opaque) +{ + BDRVGlusterState *s = opaque; + ssize_t ret; + + do { + char *p = (char *)&s->event_gcbk; + + ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos, + sizeof(s->event_gcbk) - s->event_reader_pos); + if (ret > 0) { + s->event_reader_pos += ret; + if (s->event_reader_pos == sizeof(s->event_gcbk)) { + s->event_reader_pos = 0; + qemu_gluster_complete_aio(s->event_gcbk); + s->qemu_aio_count--; + } + } + } while (ret < 0 && errno == EINTR); +} + +static int qemu_gluster_aio_flush_cb(void *opaque) +{ + BDRVGlusterState *s = opaque; + + return (s->qemu_aio_count > 0); +} + +/* + * file=protocol:server@port:volname:image + */ +static int qemu_gluster_parsename(GlusterConf *c, const char *filename) +{ + char *file = g_strdup(filename); + char *token, *next_token, *saveptr; + char *token_s, *next_token_s, *saveptr_s; + int ret = -EINVAL; + + /* Discard the protocol */ + token = strtok_r(file, ":", &saveptr); + if (!token) { + goto out; + } + + /* server@port */ + next_token = strtok_r(NULL, ":", &saveptr); + if (!next_token) { + goto out; + } + if (strchr(next_token, '@')) { + token_s = strtok_r(next_token, "@", &saveptr_s); + if (!token_s) { + goto out; + } + strncpy(c->server, token_s, HOST_NAME_MAX); + next_token_s = strtok_r(NULL, "@", &saveptr_s); + if (!next_token_s) { + goto out; + } + c->port = atoi(next_token_s); + } else { + strncpy(c->server, next_token, HOST_NAME_MAX); + c->port = 0; + } + + /* volname */ + next_token = strtok_r(NULL, ":", &saveptr); + if (!next_token) { + goto out; + } + strncpy(c->volname, next_token, 128); + + /* image */ + next_token = strtok_r(NULL, ":", &saveptr); + if (!next_token) { + goto out; + } + strncpy(c->image, next_token, PATH_MAX); + ret = 0; +out: + g_free(file); + return ret; +} + +static struct glfs *qemu_gluster_init(GlusterConf *c, const char *filename) +{ + struct glfs *glfs = NULL; + int ret; + + ret = qemu_gluster_parsename(c, filename); + if (ret < 0) { + errno = -ret; + goto out; + } + + glfs = glfs_new(c->volname); + if (!glfs) { + goto out; + } + + ret = glfs_set_volfile_server(glfs, "socket", c->server, c->port); + if (ret < 0) { + goto out; + } + + /* + * TODO: Logging is not necessary but instead nice to have. + * Can QEMU optionally log into a standard place ? + * Need to use defines like gf_loglevel_t:GF_LOG_INFO instead of + * hard coded values like 7 here. + */ + ret = glfs_set_logging(glfs, "/tmp/qemu-gluster.log", 7); + if (ret < 0) { + goto out; + } + + ret = glfs_init(glfs); + if (ret < 0) { + goto out; + } + return glfs; + +out: + if (glfs) { + (void)glfs_fini(glfs); + } + return NULL; +} + +static int qemu_gluster_open(BlockDriverState *bs, const char *filename, + int bdrv_flags) +{ + BDRVGlusterState *s = bs->opaque; + GlusterConf *c = g_malloc(sizeof(GlusterConf)); + int ret; + + s->glfs = qemu_gluster_init(c, filename); + if (!s->glfs) { + ret = -errno; + goto out; + } + + s->open_flags |= O_BINARY; + s->open_flags &= ~O_ACCMODE; + if (bdrv_flags & BDRV_O_RDWR) { + s->open_flags |= O_RDWR; + } else { + s->open_flags |= O_RDONLY; + } + + if ((bdrv_flags & BDRV_O_NOCACHE)) { + s->open_flags |= O_DIRECT; + } + + s->fd = glfs_open(s->glfs, c->image, s->open_flags); + if (!s->fd) { + ret = -errno; + goto out; + } + + ret = qemu_pipe(s->fds); + if (ret < 0) { + goto out; + } + fcntl(s->fds[0], F_SETFL, O_NONBLOCK); + fcntl(s->fds[1], F_SETFL, O_NONBLOCK); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], + qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s); + g_free(c); + return ret; + +out: + g_free(c); + if (s->fd) { + glfs_close(s->fd); + } + if (s->glfs) { + (void) glfs_fini(s->glfs); + } + return ret; +} + +static int qemu_gluster_create(const char *filename, + QEMUOptionParameter *options) +{ + struct glfs *glfs; + struct glfs_fd *fd; + GlusterConf *c = g_malloc(sizeof(GlusterConf)); + int ret = 0; + int64_t total_size = 0; + + glfs = qemu_gluster_init(c, filename); + if (!glfs) { + ret = -errno; + goto out; + } + + /* Read out options */ + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + total_size = options->value.n / BDRV_SECTOR_SIZE; + } + options++; + } + + fd = glfs_creat(glfs, c->image, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU); + if (!fd) { + ret = -errno; + } else { + if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { + ret = -errno; + } + if (glfs_close(fd) != 0) { + ret = -errno; + } + } +out: + g_free(c); + if (glfs) { + (void) glfs_fini(glfs); + } + return ret; +} + +static AIOPool gluster_aio_pool = { + .aiocb_size = sizeof(GlusterAIOCB), +}; + +static int qemu_gluster_send_pipe(BDRVGlusterState *s, GlusterCBKData *gcbk) +{ + int ret = 0; + while (1) { + fd_set wfd; + int fd = s->fds[GLUSTER_FD_WRITE]; + + ret = write(fd, (void *)&gcbk, sizeof(gcbk)); + if (ret >= 0) { + break; + } + if (errno == EINTR) { + continue; + } + if (errno != EAGAIN) { + break; + } + + FD_ZERO(&wfd); + FD_SET(fd, &wfd); + do { + ret = select(fd + 1, NULL, &wfd, NULL, NULL); + } while (ret < 0 && errno == EINTR); + } + return ret; +} + +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) +{ + GlusterCBKData *gcbk = (GlusterCBKData *)arg; + BDRVGlusterState *s = gcbk->s; + + gcbk->ret = ret; + if (qemu_gluster_send_pipe(s, gcbk) < 0) { + error_report("Could not complete read/write/flush from gluster"); + abort(); + } +} + +static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int write) +{ + int ret; + GlusterAIOCB *acb; + GlusterCBKData *gcbk; + BDRVGlusterState *s = bs->opaque; + size_t size; + off_t offset; + + acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque); + acb->qiov = qiov; + acb->s = s; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + s->qemu_aio_count++; + + gcbk = g_malloc(sizeof(GlusterCBKData)); + gcbk->acb = acb; + gcbk->s = s; + gcbk->size = size; + + if (write) { + ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, gcbk); + } else { + ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, gcbk); + } + + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + g_free(gcbk); + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); +} + +static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); +} + +static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + int ret; + GlusterAIOCB *acb; + GlusterCBKData *gcbk; + BDRVGlusterState *s = bs->opaque; + + acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque); + acb->s = s; + s->qemu_aio_count++; + + gcbk = g_malloc(sizeof(GlusterCBKData)); + gcbk->acb = acb; + gcbk->s = s; + gcbk->size = 0; + + ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, gcbk); + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + g_free(gcbk); + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static int64_t qemu_gluster_getlength(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + struct stat st; + int ret; + + ret = glfs_fstat(s->fd, &st); + if (ret < 0) { + return -errno; + } else { + return st.st_size; + } +} + +static void qemu_gluster_close(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + + if (s->fd) { + glfs_close(s->fd); + s->fd = NULL; + } +} + +static QEMUOptionParameter qemu_gluster_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { NULL } +}; + +static BlockDriver bdrv_gluster = { + .format_name = "gluster", + .protocol_name = "gluster", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + + .create_options = qemu_gluster_create_options, +}; + +static void bdrv_gluster_init(void) +{ + bdrv_register(&bdrv_gluster); +} + +block_init(bdrv_gluster_init);