diff mbox series

[PULL,v2,02/16] block/file-posix: introduce helper functions for sysfs attributes

Message ID 20230515160506.1776883-3-stefanha@redhat.com
State New
Headers show
Series [PULL,v2,01/16] block/block-common: add zoned device structs | expand

Commit Message

Stefan Hajnoczi May 15, 2023, 4:04 p.m. UTC
From: Sam Li <faithilikerun@gmail.com>

Use get_sysfs_str_val() to get the string value of device
zoned model. Then get_sysfs_zoned_model() can convert it to
BlockZoneModel type of QEMU.

Use get_sysfs_long_val() to get the long value of zoned device
information.

Signed-off-by: Sam Li <faithilikerun@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
Acked-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20230508045533.175575-3-faithilikerun@gmail.com
Message-id: 20230324090605.28361-3-faithilikerun@gmail.com
[Adjust commit message prefix as suggested by Philippe Mathieu-Daudé
<philmd@linaro.org>.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 include/block/block_int-common.h |   3 +
 block/file-posix.c               | 135 ++++++++++++++++++++++---------
 2 files changed, 100 insertions(+), 38 deletions(-)

Comments

Matthew Rosato May 31, 2023, 6:21 p.m. UTC | #1
On 5/15/23 12:04 PM, Stefan Hajnoczi wrote:
> From: Sam Li <faithilikerun@gmail.com>
> 
> Use get_sysfs_str_val() to get the string value of device
> zoned model. Then get_sysfs_zoned_model() can convert it to
> BlockZoneModel type of QEMU.
> 
> Use get_sysfs_long_val() to get the long value of zoned device
> information.

Hi Stefan, Sam,

I am having an issue on s390x using virtio-blk-{pci,ccw} backed by an NVMe partition, and I've bisected the root cause to this commit. 

I noticed that tests which use the partition e.g. /dev/nvme0n1p1 as a backing device would fail, but those that use the namespace e.g. /dev/nvme0n1 would still succeed.  The root issue appears to be that the block device associated with the partition does not have a "max_segments" attribute, and prior to this patch hdev_get_max_segment() would return -ENOENT in this case.  After this patch, however, QEMU is instead crashing.  It looks like g_file_get_contents is returning 0 with a len == 0 if the specified sysfs path does not exist.  The following diff on top seems to resolve the issue for me:


diff --git a/block/file-posix.c b/block/file-posix.c
index 0ab158efba2..eeb0247c74e 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1243,7 +1243,7 @@ static int get_sysfs_str_val(struct stat *st, const char *attribute,
                                 major(st->st_rdev), minor(st->st_rdev),
                                 attribute);
     ret = g_file_get_contents(sysfspath, val, &len, NULL);
-    if (ret == -1) {
+    if (ret == -1 || len == 0) {
         return -ENOENT;
     }
 



> 
> Signed-off-by: Sam Li <faithilikerun@gmail.com>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> Reviewed-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
> Acked-by: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> Message-id: 20230508045533.175575-3-faithilikerun@gmail.com
> Message-id: 20230324090605.28361-3-faithilikerun@gmail.com
> [Adjust commit message prefix as suggested by Philippe Mathieu-Daudé
> <philmd@linaro.org>.
> --Stefan]
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  include/block/block_int-common.h |   3 +
>  block/file-posix.c               | 135 ++++++++++++++++++++++---------
>  2 files changed, 100 insertions(+), 38 deletions(-)
> 
> diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
> index 4909876756..c7ca5a83e9 100644
> --- a/include/block/block_int-common.h
> +++ b/include/block/block_int-common.h
> @@ -862,6 +862,9 @@ typedef struct BlockLimits {
>       * an explicit monitor command to load the disk inside the guest).
>       */
>      bool has_variable_length;
> +
> +    /* device zone model */
> +    BlockZoneModel zoned;
>  } BlockLimits;
>  
>  typedef struct BdrvOpBlocker BdrvOpBlocker;
> diff --git a/block/file-posix.c b/block/file-posix.c
> index c7b723368e..97c597a2a0 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1202,15 +1202,89 @@ static int hdev_get_max_hw_transfer(int fd, struct stat *st)
>  #endif
>  }
>  
> -static int hdev_get_max_segments(int fd, struct stat *st)
> +/*
> + * Get a sysfs attribute value as character string.
> + */
> +#ifdef CONFIG_LINUX
> +static int get_sysfs_str_val(struct stat *st, const char *attribute,
> +                             char **val) {
> +    g_autofree char *sysfspath = NULL;
> +    int ret;
> +    size_t len;
> +
> +    if (!S_ISBLK(st->st_mode)) {
> +        return -ENOTSUP;
> +    }
> +
> +    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/%s",
> +                                major(st->st_rdev), minor(st->st_rdev),
> +                                attribute);
> +    ret = g_file_get_contents(sysfspath, val, &len, NULL);
> +    if (ret == -1) {
> +        return -ENOENT;
> +    }
> +
> +    /* The file is ended with '\n' */
> +    char *p;
> +    p = *val;
> +    if (*(p + len - 1) == '\n') {
> +        *(p + len - 1) = '\0';
> +    }
> +    return ret;
> +}
> +#endif
> +
> +static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned)
>  {
> +    g_autofree char *val = NULL;
> +    int ret;
> +
> +    ret = get_sysfs_str_val(st, "zoned", &val);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    if (strcmp(val, "host-managed") == 0) {
> +        *zoned = BLK_Z_HM;
> +    } else if (strcmp(val, "host-aware") == 0) {
> +        *zoned = BLK_Z_HA;
> +    } else if (strcmp(val, "none") == 0) {
> +        *zoned = BLK_Z_NONE;
> +    } else {
> +        return -ENOTSUP;
> +    }
> +    return 0;
> +}
> +
> +/*
> + * Get a sysfs attribute value as a long integer.
> + */
>  #ifdef CONFIG_LINUX
> -    char buf[32];
> +static long get_sysfs_long_val(struct stat *st, const char *attribute)
> +{
> +    g_autofree char *str = NULL;
>      const char *end;
> -    char *sysfspath = NULL;
> +    long val;
> +    int ret;
> +
> +    ret = get_sysfs_str_val(st, attribute, &str);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    /* The file is ended with '\n', pass 'end' to accept that. */
> +    ret = qemu_strtol(str, &end, 10, &val);
> +    if (ret == 0 && end && *end == '\0') {
> +        ret = val;
> +    }
> +    return ret;
> +}
> +#endif
> +
> +static int hdev_get_max_segments(int fd, struct stat *st)
> +{
> +#ifdef CONFIG_LINUX
>      int ret;
> -    int sysfd = -1;
> -    long max_segments;
>  
>      if (S_ISCHR(st->st_mode)) {
>          if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
> @@ -1218,44 +1292,27 @@ static int hdev_get_max_segments(int fd, struct stat *st)
>          }
>          return -ENOTSUP;
>      }
> -
> -    if (!S_ISBLK(st->st_mode)) {
> -        return -ENOTSUP;
> -    }
> -
> -    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> -                                major(st->st_rdev), minor(st->st_rdev));
> -    sysfd = open(sysfspath, O_RDONLY);
> -    if (sysfd == -1) {
> -        ret = -errno;
> -        goto out;
> -    }
> -    ret = RETRY_ON_EINTR(read(sysfd, buf, sizeof(buf) - 1));
> -    if (ret < 0) {
> -        ret = -errno;
> -        goto out;
> -    } else if (ret == 0) {
> -        ret = -EIO;
> -        goto out;
> -    }
> -    buf[ret] = 0;
> -    /* The file is ended with '\n', pass 'end' to accept that. */
> -    ret = qemu_strtol(buf, &end, 10, &max_segments);
> -    if (ret == 0 && end && *end == '\n') {
> -        ret = max_segments;
> -    }
> -
> -out:
> -    if (sysfd != -1) {
> -        close(sysfd);
> -    }
> -    g_free(sysfspath);
> -    return ret;
> +    return get_sysfs_long_val(st, "max_segments");
>  #else
>      return -ENOTSUP;
>  #endif
>  }
>  
> +static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
> +                                     Error **errp)
> +{
> +    BlockZoneModel zoned;
> +    int ret;
> +
> +    bs->bl.zoned = BLK_Z_NONE;
> +
> +    ret = get_sysfs_zoned_model(st, &zoned);
> +    if (ret < 0 || zoned == BLK_Z_NONE) {
> +        return;
> +    }
> +    bs->bl.zoned = zoned;
> +}
> +
>  static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
>  {
>      BDRVRawState *s = bs->opaque;
> @@ -1297,6 +1354,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
>              bs->bl.max_hw_iov = ret;
>          }
>      }
> +
> +    raw_refresh_zoned_limits(bs, &st, errp);
>  }
>  
>  static int check_for_dasd(int fd)
Sam Li June 2, 2023, 6:18 p.m. UTC | #2
Matthew Rosato <mjrosato@linux.ibm.com> 于2023年6月1日周四 02:21写道:
>
> On 5/15/23 12:04 PM, Stefan Hajnoczi wrote:
> > From: Sam Li <faithilikerun@gmail.com>
> >
> > Use get_sysfs_str_val() to get the string value of device
> > zoned model. Then get_sysfs_zoned_model() can convert it to
> > BlockZoneModel type of QEMU.
> >
> > Use get_sysfs_long_val() to get the long value of zoned device
> > information.
>
> Hi Stefan, Sam,
>
> I am having an issue on s390x using virtio-blk-{pci,ccw} backed by an NVMe partition, and I've bisected the root cause to this commit.
>
> I noticed that tests which use the partition e.g. /dev/nvme0n1p1 as a backing device would fail, but those that use the namespace e.g. /dev/nvme0n1 would still succeed.  The root issue appears to be that the block device associated with the partition does not have a "max_segments" attribute, and prior to this patch hdev_get_max_segment() would return -ENOENT in this case.  After this patch, however, QEMU is instead crashing.  It looks like g_file_get_contents is returning 0 with a len == 0 if the specified sysfs path does not exist.  The following diff on top seems to resolve the issue for me:
>
>
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 0ab158efba2..eeb0247c74e 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1243,7 +1243,7 @@ static int get_sysfs_str_val(struct stat *st, const char *attribute,
>                                  major(st->st_rdev), minor(st->st_rdev),
>                                  attribute);
>      ret = g_file_get_contents(sysfspath, val, &len, NULL);
> -    if (ret == -1) {
> +    if (ret == -1 || len == 0) {
>          return -ENOENT;
>      }
>

Hi Matthew,

Thanks for the information. After some checking, I think the bug here
is that g_file_get_contens returns g_boolean value and the error case
will return 0 instead of -1 in my previous code. Can the following
line fix your issue on the s390x device?

+ if (ret == FALSE) {

https://docs.gtk.org/glib/func.file_get_contents.html

Thanks,
Sam




>
>
>
> >
> > Signed-off-by: Sam Li <faithilikerun@gmail.com>
> > Reviewed-by: Hannes Reinecke <hare@suse.de>
> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> > Reviewed-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>
> > Acked-by: Kevin Wolf <kwolf@redhat.com>
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Message-id: 20230508045533.175575-3-faithilikerun@gmail.com
> > Message-id: 20230324090605.28361-3-faithilikerun@gmail.com
> > [Adjust commit message prefix as suggested by Philippe Mathieu-Daudé
> > <philmd@linaro.org>.
> > --Stefan]
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  include/block/block_int-common.h |   3 +
> >  block/file-posix.c               | 135 ++++++++++++++++++++++---------
> >  2 files changed, 100 insertions(+), 38 deletions(-)
> >
> > diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
> > index 4909876756..c7ca5a83e9 100644
> > --- a/include/block/block_int-common.h
> > +++ b/include/block/block_int-common.h
> > @@ -862,6 +862,9 @@ typedef struct BlockLimits {
> >       * an explicit monitor command to load the disk inside the guest).
> >       */
> >      bool has_variable_length;
> > +
> > +    /* device zone model */
> > +    BlockZoneModel zoned;
> >  } BlockLimits;
> >
> >  typedef struct BdrvOpBlocker BdrvOpBlocker;
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index c7b723368e..97c597a2a0 100644
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -1202,15 +1202,89 @@ static int hdev_get_max_hw_transfer(int fd, struct stat *st)
> >  #endif
> >  }
> >
> > -static int hdev_get_max_segments(int fd, struct stat *st)
> > +/*
> > + * Get a sysfs attribute value as character string.
> > + */
> > +#ifdef CONFIG_LINUX
> > +static int get_sysfs_str_val(struct stat *st, const char *attribute,
> > +                             char **val) {
> > +    g_autofree char *sysfspath = NULL;
> > +    int ret;
> > +    size_t len;
> > +
> > +    if (!S_ISBLK(st->st_mode)) {
> > +        return -ENOTSUP;
> > +    }
> > +
> > +    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/%s",
> > +                                major(st->st_rdev), minor(st->st_rdev),
> > +                                attribute);
> > +    ret = g_file_get_contents(sysfspath, val, &len, NULL);
> > +    if (ret == -1) {
> > +        return -ENOENT;
> > +    }
> > +
> > +    /* The file is ended with '\n' */
> > +    char *p;
> > +    p = *val;
> > +    if (*(p + len - 1) == '\n') {
> > +        *(p + len - 1) = '\0';
> > +    }
> > +    return ret;
> > +}
> > +#endif
> > +
> > +static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned)
> >  {
> > +    g_autofree char *val = NULL;
> > +    int ret;
> > +
> > +    ret = get_sysfs_str_val(st, "zoned", &val);
> > +    if (ret < 0) {
> > +        return ret;
> > +    }
> > +
> > +    if (strcmp(val, "host-managed") == 0) {
> > +        *zoned = BLK_Z_HM;
> > +    } else if (strcmp(val, "host-aware") == 0) {
> > +        *zoned = BLK_Z_HA;
> > +    } else if (strcmp(val, "none") == 0) {
> > +        *zoned = BLK_Z_NONE;
> > +    } else {
> > +        return -ENOTSUP;
> > +    }
> > +    return 0;
> > +}
> > +
> > +/*
> > + * Get a sysfs attribute value as a long integer.
> > + */
> >  #ifdef CONFIG_LINUX
> > -    char buf[32];
> > +static long get_sysfs_long_val(struct stat *st, const char *attribute)
> > +{
> > +    g_autofree char *str = NULL;
> >      const char *end;
> > -    char *sysfspath = NULL;
> > +    long val;
> > +    int ret;
> > +
> > +    ret = get_sysfs_str_val(st, attribute, &str);
> > +    if (ret < 0) {
> > +        return ret;
> > +    }
> > +
> > +    /* The file is ended with '\n', pass 'end' to accept that. */
> > +    ret = qemu_strtol(str, &end, 10, &val);
> > +    if (ret == 0 && end && *end == '\0') {
> > +        ret = val;
> > +    }
> > +    return ret;
> > +}
> > +#endif
> > +
> > +static int hdev_get_max_segments(int fd, struct stat *st)
> > +{
> > +#ifdef CONFIG_LINUX
> >      int ret;
> > -    int sysfd = -1;
> > -    long max_segments;
> >
> >      if (S_ISCHR(st->st_mode)) {
> >          if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
> > @@ -1218,44 +1292,27 @@ static int hdev_get_max_segments(int fd, struct stat *st)
> >          }
> >          return -ENOTSUP;
> >      }
> > -
> > -    if (!S_ISBLK(st->st_mode)) {
> > -        return -ENOTSUP;
> > -    }
> > -
> > -    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> > -                                major(st->st_rdev), minor(st->st_rdev));
> > -    sysfd = open(sysfspath, O_RDONLY);
> > -    if (sysfd == -1) {
> > -        ret = -errno;
> > -        goto out;
> > -    }
> > -    ret = RETRY_ON_EINTR(read(sysfd, buf, sizeof(buf) - 1));
> > -    if (ret < 0) {
> > -        ret = -errno;
> > -        goto out;
> > -    } else if (ret == 0) {
> > -        ret = -EIO;
> > -        goto out;
> > -    }
> > -    buf[ret] = 0;
> > -    /* The file is ended with '\n', pass 'end' to accept that. */
> > -    ret = qemu_strtol(buf, &end, 10, &max_segments);
> > -    if (ret == 0 && end && *end == '\n') {
> > -        ret = max_segments;
> > -    }
> > -
> > -out:
> > -    if (sysfd != -1) {
> > -        close(sysfd);
> > -    }
> > -    g_free(sysfspath);
> > -    return ret;
> > +    return get_sysfs_long_val(st, "max_segments");
> >  #else
> >      return -ENOTSUP;
> >  #endif
> >  }
> >
> > +static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
> > +                                     Error **errp)
> > +{
> > +    BlockZoneModel zoned;
> > +    int ret;
> > +
> > +    bs->bl.zoned = BLK_Z_NONE;
> > +
> > +    ret = get_sysfs_zoned_model(st, &zoned);
> > +    if (ret < 0 || zoned == BLK_Z_NONE) {
> > +        return;
> > +    }
> > +    bs->bl.zoned = zoned;
> > +}
> > +
> >  static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
> >  {
> >      BDRVRawState *s = bs->opaque;
> > @@ -1297,6 +1354,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
> >              bs->bl.max_hw_iov = ret;
> >          }
> >      }
> > +
> > +    raw_refresh_zoned_limits(bs, &st, errp);
> >  }
> >
> >  static int check_for_dasd(int fd)
>
Matthew Rosato June 2, 2023, 6:41 p.m. UTC | #3
On 6/2/23 2:18 PM, Sam Li wrote:
> Matthew Rosato <mjrosato@linux.ibm.com> 于2023年6月1日周四 02:21写道:
>>
>> On 5/15/23 12:04 PM, Stefan Hajnoczi wrote:
>>> From: Sam Li <faithilikerun@gmail.com>
>>>
>>> Use get_sysfs_str_val() to get the string value of device
>>> zoned model. Then get_sysfs_zoned_model() can convert it to
>>> BlockZoneModel type of QEMU.
>>>
>>> Use get_sysfs_long_val() to get the long value of zoned device
>>> information.
>>
>> Hi Stefan, Sam,
>>
>> I am having an issue on s390x using virtio-blk-{pci,ccw} backed by an NVMe partition, and I've bisected the root cause to this commit.
>>
>> I noticed that tests which use the partition e.g. /dev/nvme0n1p1 as a backing device would fail, but those that use the namespace e.g. /dev/nvme0n1 would still succeed.  The root issue appears to be that the block device associated with the partition does not have a "max_segments" attribute, and prior to this patch hdev_get_max_segment() would return -ENOENT in this case.  After this patch, however, QEMU is instead crashing.  It looks like g_file_get_contents is returning 0 with a len == 0 if the specified sysfs path does not exist.  The following diff on top seems to resolve the issue for me:
>>
>>
>> diff --git a/block/file-posix.c b/block/file-posix.c
>> index 0ab158efba2..eeb0247c74e 100644
>> --- a/block/file-posix.c
>> +++ b/block/file-posix.c
>> @@ -1243,7 +1243,7 @@ static int get_sysfs_str_val(struct stat *st, const char *attribute,
>>                                  major(st->st_rdev), minor(st->st_rdev),
>>                                  attribute);
>>      ret = g_file_get_contents(sysfspath, val, &len, NULL);
>> -    if (ret == -1) {
>> +    if (ret == -1 || len == 0) {
>>          return -ENOENT;
>>      }
>>
> 
> Hi Matthew,
> 
> Thanks for the information. After some checking, I think the bug here
> is that g_file_get_contens returns g_boolean value and the error case
> will return 0 instead of -1 in my previous code. Can the following
> line fix your issue on the s390x device?
> 
> + if (ret == FALSE) {
> 
> https://docs.gtk.org/glib/func.file_get_contents.html

Hi Sam,

Ah, good point, I didn't notice file_get_contents was meant to be a bool return and wondered why I was getting a return of 0 in the failing case, hence the check for len == 0.

Anyway, yes, I verified that checking for ret == FALSE fixes the issue.  FWIW, along the same line I also checked that this works:

    if (!g_file_get_contents(sysfspath, val, &len, NULL)) {
        return -ENOENT;
    }

which I personally think looks cleaner and matches the other uses of g_file_get_contents in QEMU.  Could also get rid of ret and just return 0 at the bottom of the function.

Thanks,
Matt
Sam Li June 2, 2023, 6:45 p.m. UTC | #4
Matthew Rosato <mjrosato@linux.ibm.com> 于2023年6月3日周六 02:41写道:
>
> On 6/2/23 2:18 PM, Sam Li wrote:
> > Matthew Rosato <mjrosato@linux.ibm.com> 于2023年6月1日周四 02:21写道:
> >>
> >> On 5/15/23 12:04 PM, Stefan Hajnoczi wrote:
> >>> From: Sam Li <faithilikerun@gmail.com>
> >>>
> >>> Use get_sysfs_str_val() to get the string value of device
> >>> zoned model. Then get_sysfs_zoned_model() can convert it to
> >>> BlockZoneModel type of QEMU.
> >>>
> >>> Use get_sysfs_long_val() to get the long value of zoned device
> >>> information.
> >>
> >> Hi Stefan, Sam,
> >>
> >> I am having an issue on s390x using virtio-blk-{pci,ccw} backed by an NVMe partition, and I've bisected the root cause to this commit.
> >>
> >> I noticed that tests which use the partition e.g. /dev/nvme0n1p1 as a backing device would fail, but those that use the namespace e.g. /dev/nvme0n1 would still succeed.  The root issue appears to be that the block device associated with the partition does not have a "max_segments" attribute, and prior to this patch hdev_get_max_segment() would return -ENOENT in this case.  After this patch, however, QEMU is instead crashing.  It looks like g_file_get_contents is returning 0 with a len == 0 if the specified sysfs path does not exist.  The following diff on top seems to resolve the issue for me:
> >>
> >>
> >> diff --git a/block/file-posix.c b/block/file-posix.c
> >> index 0ab158efba2..eeb0247c74e 100644
> >> --- a/block/file-posix.c
> >> +++ b/block/file-posix.c
> >> @@ -1243,7 +1243,7 @@ static int get_sysfs_str_val(struct stat *st, const char *attribute,
> >>                                  major(st->st_rdev), minor(st->st_rdev),
> >>                                  attribute);
> >>      ret = g_file_get_contents(sysfspath, val, &len, NULL);
> >> -    if (ret == -1) {
> >> +    if (ret == -1 || len == 0) {
> >>          return -ENOENT;
> >>      }
> >>
> >
> > Hi Matthew,
> >
> > Thanks for the information. After some checking, I think the bug here
> > is that g_file_get_contens returns g_boolean value and the error case
> > will return 0 instead of -1 in my previous code. Can the following
> > line fix your issue on the s390x device?
> >
> > + if (ret == FALSE) {
> >
> > https://docs.gtk.org/glib/func.file_get_contents.html
>
> Hi Sam,
>
> Ah, good point, I didn't notice file_get_contents was meant to be a bool return and wondered why I was getting a return of 0 in the failing case, hence the check for len == 0.
>
> Anyway, yes, I verified that checking for ret == FALSE fixes the issue.  FWIW, along the same line I also checked that this works:
>
>     if (!g_file_get_contents(sysfspath, val, &len, NULL)) {
>         return -ENOENT;
>     }
>
> which I personally think looks cleaner and matches the other uses of g_file_get_contents in QEMU.  Could also get rid of ret and just return 0 at the bottom of the function.

Indeed. I will fix this. Thanks!

Sam
diff mbox series

Patch

diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 4909876756..c7ca5a83e9 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -862,6 +862,9 @@  typedef struct BlockLimits {
      * an explicit monitor command to load the disk inside the guest).
      */
     bool has_variable_length;
+
+    /* device zone model */
+    BlockZoneModel zoned;
 } BlockLimits;
 
 typedef struct BdrvOpBlocker BdrvOpBlocker;
diff --git a/block/file-posix.c b/block/file-posix.c
index c7b723368e..97c597a2a0 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1202,15 +1202,89 @@  static int hdev_get_max_hw_transfer(int fd, struct stat *st)
 #endif
 }
 
-static int hdev_get_max_segments(int fd, struct stat *st)
+/*
+ * Get a sysfs attribute value as character string.
+ */
+#ifdef CONFIG_LINUX
+static int get_sysfs_str_val(struct stat *st, const char *attribute,
+                             char **val) {
+    g_autofree char *sysfspath = NULL;
+    int ret;
+    size_t len;
+
+    if (!S_ISBLK(st->st_mode)) {
+        return -ENOTSUP;
+    }
+
+    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/%s",
+                                major(st->st_rdev), minor(st->st_rdev),
+                                attribute);
+    ret = g_file_get_contents(sysfspath, val, &len, NULL);
+    if (ret == -1) {
+        return -ENOENT;
+    }
+
+    /* The file is ended with '\n' */
+    char *p;
+    p = *val;
+    if (*(p + len - 1) == '\n') {
+        *(p + len - 1) = '\0';
+    }
+    return ret;
+}
+#endif
+
+static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned)
 {
+    g_autofree char *val = NULL;
+    int ret;
+
+    ret = get_sysfs_str_val(st, "zoned", &val);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (strcmp(val, "host-managed") == 0) {
+        *zoned = BLK_Z_HM;
+    } else if (strcmp(val, "host-aware") == 0) {
+        *zoned = BLK_Z_HA;
+    } else if (strcmp(val, "none") == 0) {
+        *zoned = BLK_Z_NONE;
+    } else {
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+/*
+ * Get a sysfs attribute value as a long integer.
+ */
 #ifdef CONFIG_LINUX
-    char buf[32];
+static long get_sysfs_long_val(struct stat *st, const char *attribute)
+{
+    g_autofree char *str = NULL;
     const char *end;
-    char *sysfspath = NULL;
+    long val;
+    int ret;
+
+    ret = get_sysfs_str_val(st, attribute, &str);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* The file is ended with '\n', pass 'end' to accept that. */
+    ret = qemu_strtol(str, &end, 10, &val);
+    if (ret == 0 && end && *end == '\0') {
+        ret = val;
+    }
+    return ret;
+}
+#endif
+
+static int hdev_get_max_segments(int fd, struct stat *st)
+{
+#ifdef CONFIG_LINUX
     int ret;
-    int sysfd = -1;
-    long max_segments;
 
     if (S_ISCHR(st->st_mode)) {
         if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
@@ -1218,44 +1292,27 @@  static int hdev_get_max_segments(int fd, struct stat *st)
         }
         return -ENOTSUP;
     }
-
-    if (!S_ISBLK(st->st_mode)) {
-        return -ENOTSUP;
-    }
-
-    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
-                                major(st->st_rdev), minor(st->st_rdev));
-    sysfd = open(sysfspath, O_RDONLY);
-    if (sysfd == -1) {
-        ret = -errno;
-        goto out;
-    }
-    ret = RETRY_ON_EINTR(read(sysfd, buf, sizeof(buf) - 1));
-    if (ret < 0) {
-        ret = -errno;
-        goto out;
-    } else if (ret == 0) {
-        ret = -EIO;
-        goto out;
-    }
-    buf[ret] = 0;
-    /* The file is ended with '\n', pass 'end' to accept that. */
-    ret = qemu_strtol(buf, &end, 10, &max_segments);
-    if (ret == 0 && end && *end == '\n') {
-        ret = max_segments;
-    }
-
-out:
-    if (sysfd != -1) {
-        close(sysfd);
-    }
-    g_free(sysfspath);
-    return ret;
+    return get_sysfs_long_val(st, "max_segments");
 #else
     return -ENOTSUP;
 #endif
 }
 
+static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
+                                     Error **errp)
+{
+    BlockZoneModel zoned;
+    int ret;
+
+    bs->bl.zoned = BLK_Z_NONE;
+
+    ret = get_sysfs_zoned_model(st, &zoned);
+    if (ret < 0 || zoned == BLK_Z_NONE) {
+        return;
+    }
+    bs->bl.zoned = zoned;
+}
+
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
@@ -1297,6 +1354,8 @@  static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
             bs->bl.max_hw_iov = ret;
         }
     }
+
+    raw_refresh_zoned_limits(bs, &st, errp);
 }
 
 static int check_for_dasd(int fd)