Message ID | 20190704124342.7753-2-mlevitsk@redhat.com |
---|---|
State | New |
Headers | show |
Series | Don't obey the kernel block device max transfer len / max segments for raw block devices | expand |
On Thu, 2019-07-04 at 15:43 +0300, Maxim Levitsky wrote: > Regular kernel block devices (/dev/sda*, /dev/nvme*, etc) don't have > max segment size/max segment count hardware requirements exposed > to the userspace, but rather the kernel block layer > takes care to split the incoming requests that > violate these requirements. > > Allowing the kernel to do the splitting allows qemu to avoid > various overheads that arise otherwise from this. > > This is especially visible in nbd server, > exposing as a raw file, a mostly empty qcow2 image over the net. > In this case most of the reads by the remote user > won't even hit the underlying kernel block device, > and therefore most of the overhead will be in the > nbd traffic which increases significantly with lower max transfer size. > > In addition to that even for local block device > access the peformance improves a bit due to less > traffic between qemu and the kernel when large > transfer sizes are used (e.g for image conversion) > > More info can be found at: > https://bugzilla.redhat.com/show_bug.cgi?id=1647104 > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> > Reviewed-by: Eric Blake <eblake@redhat.com> > --- > block/file-posix.c | 54 ++++++++++++++++++++++++---------------------- > 1 file changed, 28 insertions(+), 26 deletions(-) > > diff --git a/block/file-posix.c b/block/file-posix.c > index ab05b51a66..4479cc7ab4 100644 > --- a/block/file-posix.c > +++ b/block/file-posix.c > @@ -1038,15 +1038,13 @@ static void raw_reopen_abort(BDRVReopenState *state) > s->reopen_state = NULL; > } > > -static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd) > +static int sg_get_max_transfer_length(int fd) > { > #ifdef BLKSECTGET > int max_bytes = 0; > - short max_sectors = 0; > - if (bs->sg && ioctl(fd, BLKSECTGET, &max_bytes) == 0) { > + > + if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) { > return max_bytes; > - } else if (!bs->sg && ioctl(fd, BLKSECTGET, &max_sectors) == 0) { > - return max_sectors << BDRV_SECTOR_BITS; > } else { > return -errno; > } > @@ -1055,25 +1053,31 @@ static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd) > #endif > } > > -static int hdev_get_max_segments(const struct stat *st) > +static int sg_get_max_segments(int fd) > { > #ifdef CONFIG_LINUX > char buf[32]; > const char *end; > - char *sysfspath; > + char *sysfspath = NULL; > int ret; > - int fd = -1; > + int sysfd = -1; > long max_segments; > + struct stat st; > + > + if (fstat(fd, &st)) { > + ret = -errno; > + goto out; > + } > > sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments", > - major(st->st_rdev), minor(st->st_rdev)); > - fd = open(sysfspath, O_RDONLY); > - if (fd == -1) { > + major(st.st_rdev), minor(st.st_rdev)); > + sysfd = open(sysfspath, O_RDONLY); > + if (sysfd == -1) { > ret = -errno; > goto out; > } > do { > - ret = read(fd, buf, sizeof(buf) - 1); > + ret = read(sysfd, buf, sizeof(buf) - 1); > } while (ret == -1 && errno == EINTR); > if (ret < 0) { > ret = -errno; > @@ -1090,8 +1094,8 @@ static int hdev_get_max_segments(const struct stat *st) > } > > out: > - if (fd != -1) { > - close(fd); > + if (sysfd != -1) { > + close(sysfd); > } > g_free(sysfspath); > return ret; > @@ -1103,19 +1107,17 @@ out: > static void raw_refresh_limits(BlockDriverState *bs, Error **errp) > { > BDRVRawState *s = bs->opaque; > - struct stat st; > > - if (!fstat(s->fd, &st)) { > - if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) { > - int ret = hdev_get_max_transfer_length(bs, s->fd); > - if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { > - bs->bl.max_transfer = pow2floor(ret); > - } > - ret = hdev_get_max_segments(&st); > - if (ret > 0) { > - bs->bl.max_transfer = MIN(bs->bl.max_transfer, > - ret * getpagesize()); > - } > + if (bs->sg) { > + int ret = sg_get_max_transfer_length(s->fd); > + > + if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { > + bs->bl.max_transfer = pow2floor(ret); > + } > + > + ret = sg_get_max_segments(s->fd); > + if (ret > 0) { > + bs->bl.max_transfer = MIN(bs->bl.max_transfer, ret * getpagesize()); > } > } > Ping. Best regards, Maxim Levitsky
diff --git a/block/file-posix.c b/block/file-posix.c index ab05b51a66..4479cc7ab4 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1038,15 +1038,13 @@ static void raw_reopen_abort(BDRVReopenState *state) s->reopen_state = NULL; } -static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd) +static int sg_get_max_transfer_length(int fd) { #ifdef BLKSECTGET int max_bytes = 0; - short max_sectors = 0; - if (bs->sg && ioctl(fd, BLKSECTGET, &max_bytes) == 0) { + + if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) { return max_bytes; - } else if (!bs->sg && ioctl(fd, BLKSECTGET, &max_sectors) == 0) { - return max_sectors << BDRV_SECTOR_BITS; } else { return -errno; } @@ -1055,25 +1053,31 @@ static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd) #endif } -static int hdev_get_max_segments(const struct stat *st) +static int sg_get_max_segments(int fd) { #ifdef CONFIG_LINUX char buf[32]; const char *end; - char *sysfspath; + char *sysfspath = NULL; int ret; - int fd = -1; + int sysfd = -1; long max_segments; + struct stat st; + + if (fstat(fd, &st)) { + ret = -errno; + goto out; + } sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments", - major(st->st_rdev), minor(st->st_rdev)); - fd = open(sysfspath, O_RDONLY); - if (fd == -1) { + major(st.st_rdev), minor(st.st_rdev)); + sysfd = open(sysfspath, O_RDONLY); + if (sysfd == -1) { ret = -errno; goto out; } do { - ret = read(fd, buf, sizeof(buf) - 1); + ret = read(sysfd, buf, sizeof(buf) - 1); } while (ret == -1 && errno == EINTR); if (ret < 0) { ret = -errno; @@ -1090,8 +1094,8 @@ static int hdev_get_max_segments(const struct stat *st) } out: - if (fd != -1) { - close(fd); + if (sysfd != -1) { + close(sysfd); } g_free(sysfspath); return ret; @@ -1103,19 +1107,17 @@ out: static void raw_refresh_limits(BlockDriverState *bs, Error **errp) { BDRVRawState *s = bs->opaque; - struct stat st; - if (!fstat(s->fd, &st)) { - if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) { - int ret = hdev_get_max_transfer_length(bs, s->fd); - if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { - bs->bl.max_transfer = pow2floor(ret); - } - ret = hdev_get_max_segments(&st); - if (ret > 0) { - bs->bl.max_transfer = MIN(bs->bl.max_transfer, - ret * getpagesize()); - } + if (bs->sg) { + int ret = sg_get_max_transfer_length(s->fd); + + if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { + bs->bl.max_transfer = pow2floor(ret); + } + + ret = sg_get_max_segments(s->fd); + if (ret > 0) { + bs->bl.max_transfer = MIN(bs->bl.max_transfer, ret * getpagesize()); } }