diff mbox series

[v4,4/7] file-posix: try BLKSECTGET on block devices too, do not round to power of 2

Message ID 20210608131634.423904-5-pbonzini@redhat.com
State New
Headers show
Series block: file-posix queue | expand

Commit Message

Paolo Bonzini June 8, 2021, 1:16 p.m. UTC
bs->sg is only true for character devices, but block devices can also
be used with scsi-block and scsi-generic.  Unfortunately BLKSECTGET
returns bytes in an int for /dev/sgN devices, and sectors in a short
for block devices, so account for that in the code.

The maximum transfer also need not be a power of 2 (for example I have
seen disks with 1280 KiB maximum transfer) so there's no need to pass
the result through pow2floor.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/file-posix.c | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

Comments

Eric Blake June 8, 2021, 5:53 p.m. UTC | #1
On Tue, Jun 08, 2021 at 03:16:31PM +0200, Paolo Bonzini wrote:
> bs->sg is only true for character devices, but block devices can also
> be used with scsi-block and scsi-generic.  Unfortunately BLKSECTGET
> returns bytes in an int for /dev/sgN devices, and sectors in a short
> for block devices, so account for that in the code.

Gotta love inconsistent and poorly-documented kernel interfaces! (on my
system, 'man -k BLKSECTGET' had no hits)

> 
> The maximum transfer also need not be a power of 2 (for example I have
> seen disks with 1280 KiB maximum transfer) so there's no need to pass
> the result through pow2floor.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/file-posix.c | 44 ++++++++++++++++++++++++--------------------
>  1 file changed, 24 insertions(+), 20 deletions(-)
>

Reviewed-by: Eric Blake <eblake@redhat.com>
Maxim Levitsky June 9, 2021, 4:15 p.m. UTC | #2
On Tue, 2021-06-08 at 15:16 +0200, Paolo Bonzini wrote:
> bs->sg is only true for character devices, but block devices can also
> be used with scsi-block and scsi-generic.  Unfortunately BLKSECTGET
> returns bytes in an int for /dev/sgN devices, and sectors in a short
> for block devices, so account for that in the code.
> 
> The maximum transfer also need not be a power of 2 (for example I have
> seen disks with 1280 KiB maximum transfer) so there's no need to pass
> the result through pow2floor.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/file-posix.c | 44 ++++++++++++++++++++++++--------------------
>  1 file changed, 24 insertions(+), 20 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index c9746d3eb6..1439293f63 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1149,22 +1149,27 @@ static void raw_reopen_abort(BDRVReopenState *state)
>      s->reopen_state = NULL;
>  }
>  
> -static int sg_get_max_transfer_length(int fd)
> +static int hdev_get_max_hw_transfer(int fd, struct stat *st)
>  {
>  #ifdef BLKSECTGET
> -    int max_bytes = 0;
> -
> -    if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
> -        return max_bytes;
> +    if (S_ISBLK(st->st_mode)) {
> +        unsigned short max_sectors = 0;
> +        if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
> +            return max_sectors * 512;
> +        }
>      } else {
> -        return -errno;
> +        int max_bytes = 0;
> +        if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {

Again I would use the bs->sg for that.

> +            return max_bytes;
> +        }
>      }
> +    return -errno;
>  #else
>      return -ENOSYS;
>  #endif
>  }
>  
> -static int sg_get_max_segments(int fd)
> +static int hdev_get_max_segments(int fd, struct stat *st)
>  {
>  #ifdef CONFIG_LINUX
>      char buf[32];
> @@ -1173,26 +1178,20 @@ static int sg_get_max_segments(int fd)
>      int ret;
>      int sysfd = -1;
>      long max_segments;
> -    struct stat st;
>  
> -    if (fstat(fd, &st)) {
> -        ret = -errno;
> -        goto out;
> -    }
> -
> -    if (S_ISCHR(st.st_mode)) {
> +    if (S_ISCHR(st->st_mode)) {
>          if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
>              return ret;
>          }
>          return -ENOTSUP;
>      }
>  
> -    if (!S_ISBLK(st.st_mode)) {
> +    if (!S_ISBLK(st->st_mode)) {
>          return -ENOTSUP;
>      }
>  
>      sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> -                                major(st.st_rdev), minor(st.st_rdev));
> +                                major(st->st_rdev), minor(st->st_rdev));
>      sysfd = open(sysfspath, O_RDONLY);
>      if (sysfd == -1) {
>          ret = -errno;
> @@ -1229,15 +1228,20 @@ out:
>  static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
>  {
>      BDRVRawState *s = bs->opaque;
> +    struct stat st;
> +
> +    if (fstat(s->fd, &st)) {
> +        return;
> +    }
>  
> -    if (bs->sg) {
> -        int ret = sg_get_max_transfer_length(s->fd);
> +    if (bs->sg || S_ISBLK(st.st_mode)) {
> +        int ret = hdev_get_max_hw_transfer(s->fd, &st);
>  
>          if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
> -            bs->bl.max_hw_transfer = pow2floor(ret);
> +            bs->bl.max_hw_transfer = ret;
>          }
>  
> -        ret = sg_get_max_segments(s->fd);
> +        ret = hdev_get_max_segments(s->fd, &st);
>          if (ret > 0) {
>              bs->bl.max_iov = ret;
>          }


Roughly speaking this looks correct, but I might have missed something as well.

This is roughly the same as patches from Tom Yan which I carried in my series

https://www.mail-archive.com/qemu-devel@nongnu.org/msg768258.html
https://www.mail-archive.com/qemu-devel@nongnu.org/msg768262.html


I like a bit more how he created separate functions for /dev/sg and for all other block devices.
Please take a look.

Also not related to this patch, you are missing my fix I did to the VPD limit emulation, please consider taking
it into the series:

https://www.mail-archive.com/qemu-devel@nongnu.org/msg768260.html


Best regards,
	Maxim Levitsky
diff mbox series

Patch

diff --git a/block/file-posix.c b/block/file-posix.c
index c9746d3eb6..1439293f63 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1149,22 +1149,27 @@  static void raw_reopen_abort(BDRVReopenState *state)
     s->reopen_state = NULL;
 }
 
-static int sg_get_max_transfer_length(int fd)
+static int hdev_get_max_hw_transfer(int fd, struct stat *st)
 {
 #ifdef BLKSECTGET
-    int max_bytes = 0;
-
-    if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
-        return max_bytes;
+    if (S_ISBLK(st->st_mode)) {
+        unsigned short max_sectors = 0;
+        if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+            return max_sectors * 512;
+        }
     } else {
-        return -errno;
+        int max_bytes = 0;
+        if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
+            return max_bytes;
+        }
     }
+    return -errno;
 #else
     return -ENOSYS;
 #endif
 }
 
-static int sg_get_max_segments(int fd)
+static int hdev_get_max_segments(int fd, struct stat *st)
 {
 #ifdef CONFIG_LINUX
     char buf[32];
@@ -1173,26 +1178,20 @@  static int sg_get_max_segments(int fd)
     int ret;
     int sysfd = -1;
     long max_segments;
-    struct stat st;
 
-    if (fstat(fd, &st)) {
-        ret = -errno;
-        goto out;
-    }
-
-    if (S_ISCHR(st.st_mode)) {
+    if (S_ISCHR(st->st_mode)) {
         if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
             return ret;
         }
         return -ENOTSUP;
     }
 
-    if (!S_ISBLK(st.st_mode)) {
+    if (!S_ISBLK(st->st_mode)) {
         return -ENOTSUP;
     }
 
     sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
-                                major(st.st_rdev), minor(st.st_rdev));
+                                major(st->st_rdev), minor(st->st_rdev));
     sysfd = open(sysfspath, O_RDONLY);
     if (sysfd == -1) {
         ret = -errno;
@@ -1229,15 +1228,20 @@  out:
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
+    struct stat st;
+
+    if (fstat(s->fd, &st)) {
+        return;
+    }
 
-    if (bs->sg) {
-        int ret = sg_get_max_transfer_length(s->fd);
+    if (bs->sg || S_ISBLK(st.st_mode)) {
+        int ret = hdev_get_max_hw_transfer(s->fd, &st);
 
         if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
-            bs->bl.max_hw_transfer = pow2floor(ret);
+            bs->bl.max_hw_transfer = ret;
         }
 
-        ret = sg_get_max_segments(s->fd);
+        ret = hdev_get_max_segments(s->fd, &st);
         if (ret > 0) {
             bs->bl.max_iov = ret;
         }