[RFC v3 3/5] file-posix: introduce get_sysfs_long_val for zoned device information.

Sam Li posted 5 patches 3 years, 7 months ago
Maintainers: Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Fam Zheng <fam@euphon.net>
[RFC v3 3/5] file-posix: introduce get_sysfs_long_val for zoned device information.
Posted by Sam Li 3 years, 7 months ago
Use sysfs attribute files to get the zoned device information in case
that ioctl() commands of zone management interface won't work. It can
return long type of value like chunk_sectors, zoned_append_max_bytes,
max_open_zones, max_active_zones.
---
 block/file-posix.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 1b8b0d351f..73c2cdfbca 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1216,15 +1216,19 @@ static int hdev_get_max_hw_transfer(int fd, struct stat *st)
 #endif
 }
 
-static int hdev_get_max_segments(int fd, struct stat *st)
-{
+/*
+ * Get zoned device information (chunk_sectors, zoned_append_max_bytes,
+ * max_open_zones, max_active_zones) through sysfs attribute files.
+ */
+static long get_sysfs_long_val(int fd, struct stat *st,
+                               const char *attribute) {
 #ifdef CONFIG_LINUX
     char buf[32];
     const char *end;
     char *sysfspath = NULL;
     int ret;
     int sysfd = -1;
-    long max_segments;
+    long val;
 
     if (S_ISCHR(st->st_mode)) {
         if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
@@ -1237,8 +1241,9 @@ static int hdev_get_max_segments(int fd, struct stat *st)
         return -ENOTSUP;
     }
 
-    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
-                                major(st->st_rdev), minor(st->st_rdev));
+    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/%s",
+                                major(st->st_rdev), minor(st->st_rdev),
+                                attribute);
     sysfd = open(sysfspath, O_RDONLY);
     if (sysfd == -1) {
         ret = -errno;
@@ -1256,9 +1261,9 @@ static int hdev_get_max_segments(int fd, struct stat *st)
     }
     buf[ret] = 0;
     /* The file is ended with '\n', pass 'end' to accept that. */
-    ret = qemu_strtol(buf, &end, 10, &max_segments);
+    ret = qemu_strtol(buf, &end, 10, &val);
     if (ret == 0 && end && *end == '\n') {
-        ret = max_segments;
+        ret = val;
     }
 
 out:
@@ -1272,6 +1277,15 @@ out:
 #endif
 }
 
+static int hdev_get_max_segments(int fd, struct stat *st) {
+    int ret;
+    ret = get_sysfs_long_val(fd, st, "max_segments");
+    if (ret < 0) {
+        return -1;
+    }
+    return ret;
+}
+
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
@@ -1872,6 +1886,7 @@ static int handle_aiocb_zone_report(void *opaque) {
 
 static int handle_aiocb_zone_mgmt(void *opaque) {
     RawPosixAIOData *aiocb = opaque;
+    BlockDriverState *s = aiocb->bs;
     int fd = aiocb->aio_fildes;
     int64_t offset = aiocb->aio_offset;
     int64_t len = aiocb->aio_nbytes;
@@ -1884,11 +1899,9 @@ static int handle_aiocb_zone_mgmt(void *opaque) {
     int64_t zone_size_mask;
     int ret;
 
-    ret = ioctl(fd, BLKGETZONESZ, &zone_size);
-    if (ret) {
-        return -1;
-    }
-
+    g_autofree struct stat *file = g_new(struct stat, 1);
+    stat(s->filename, file);
+    zone_size = get_sysfs_long_val(fd, file, "chunk_sectors");
     zone_size_mask = zone_size - 1;
     if (offset & zone_size_mask) {
         error_report("offset is not the start of a zone");
-- 
2.35.3
Re: [RFC v3 3/5] file-posix: introduce get_sysfs_long_val for zoned device information.
Posted by Stefan Hajnoczi 3 years, 7 months ago
On Mon, Jun 27, 2022 at 08:19:15AM +0800, Sam Li wrote:
> Use sysfs attribute files to get the zoned device information in case
> that ioctl() commands of zone management interface won't work. It can
> return long type of value like chunk_sectors, zoned_append_max_bytes,
> max_open_zones, max_active_zones.
> ---
>  block/file-posix.c | 37 +++++++++++++++++++++++++------------
>  1 file changed, 25 insertions(+), 12 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 1b8b0d351f..73c2cdfbca 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1216,15 +1216,19 @@ static int hdev_get_max_hw_transfer(int fd, struct stat *st)
>  #endif
>  }
>  
> -static int hdev_get_max_segments(int fd, struct stat *st)
> -{
> +/*
> + * Get zoned device information (chunk_sectors, zoned_append_max_bytes,
> + * max_open_zones, max_active_zones) through sysfs attribute files.

This function is also used to get max_segments, which is not related to
zoned devices. How about:

  Get a block queue sysfs attribute value.

> + */
> +static long get_sysfs_long_val(int fd, struct stat *st,
> +                               const char *attribute) {
>  #ifdef CONFIG_LINUX
>      char buf[32];
>      const char *end;
>      char *sysfspath = NULL;
>      int ret;
>      int sysfd = -1;
> -    long max_segments;
> +    long val;
>  
>      if (S_ISCHR(st->st_mode)) {
>          if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
> @@ -1237,8 +1241,9 @@ static int hdev_get_max_segments(int fd, struct stat *st)
>          return -ENOTSUP;
>      }
>  
> -    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> -                                major(st->st_rdev), minor(st->st_rdev));
> +    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/%s",
> +                                major(st->st_rdev), minor(st->st_rdev),
> +                                attribute);
>      sysfd = open(sysfspath, O_RDONLY);
>      if (sysfd == -1) {
>          ret = -errno;
> @@ -1256,9 +1261,9 @@ static int hdev_get_max_segments(int fd, struct stat *st)
>      }
>      buf[ret] = 0;
>      /* The file is ended with '\n', pass 'end' to accept that. */
> -    ret = qemu_strtol(buf, &end, 10, &max_segments);
> +    ret = qemu_strtol(buf, &end, 10, &val);
>      if (ret == 0 && end && *end == '\n') {
> -        ret = max_segments;
> +        ret = val;
>      }
>  
>  out:
> @@ -1272,6 +1277,15 @@ out:
>  #endif
>  }
>  
> +static int hdev_get_max_segments(int fd, struct stat *st) {
> +    int ret;
> +    ret = get_sysfs_long_val(fd, st, "max_segments");
> +    if (ret < 0) {
> +        return -1;

This hides the actual error number. The if statement can be dropped and
the function can be simplified to:

  static int hdev_get_max_segments(int fd, struct stat *st) {
      return get_sysfs_long_val(fd, st, "max_segments");
  }

Whether you want to keep the tiny helper function or inline
get_sysfs_long_val(fd, st, "max_segments") into the caller is up to you.

> +    }
> +    return ret;
> +}
> +
>  static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
>  {
>      BDRVRawState *s = bs->opaque;
> @@ -1872,6 +1886,7 @@ static int handle_aiocb_zone_report(void *opaque) {
>  
>  static int handle_aiocb_zone_mgmt(void *opaque) {
>      RawPosixAIOData *aiocb = opaque;
> +    BlockDriverState *s = aiocb->bs;
>      int fd = aiocb->aio_fildes;
>      int64_t offset = aiocb->aio_offset;
>      int64_t len = aiocb->aio_nbytes;
> @@ -1884,11 +1899,9 @@ static int handle_aiocb_zone_mgmt(void *opaque) {
>      int64_t zone_size_mask;
>      int ret;
>  
> -    ret = ioctl(fd, BLKGETZONESZ, &zone_size);
> -    if (ret) {
> -        return -1;
> -    }
> -
> +    g_autofree struct stat *file = g_new(struct stat, 1);
> +    stat(s->filename, file);

There is no need to allocate struct stat using g_new(). It's a small
struct that can be on the stack.

Also, fstat(fd, &st) is preferred when the file descriptor is already
open because it avoids race conditions due to file renaming/path
traversal.

Here is how this could be written:

  struct stat st;
  if (fstat(fd, &st) < 0) {
      return -errno;
  }

> +    zone_size = get_sysfs_long_val(fd, file, "chunk_sectors");
>      zone_size_mask = zone_size - 1;
>      if (offset & zone_size_mask) {
>          error_report("offset is not the start of a zone");
> -- 
> 2.35.3
> 
Re: [RFC v3 3/5] file-posix: introduce get_sysfs_long_val for zoned device information.
Posted by Sam Li 3 years, 7 months ago
Stefan Hajnoczi <stefanha@redhat.com> 于2022年6月28日周二 16:20写道:
>
> On Mon, Jun 27, 2022 at 08:19:15AM +0800, Sam Li wrote:
> > Use sysfs attribute files to get the zoned device information in case
> > that ioctl() commands of zone management interface won't work. It can
> > return long type of value like chunk_sectors, zoned_append_max_bytes,
> > max_open_zones, max_active_zones.
> > ---
> >  block/file-posix.c | 37 +++++++++++++++++++++++++------------
> >  1 file changed, 25 insertions(+), 12 deletions(-)
> >
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index 1b8b0d351f..73c2cdfbca 100644
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -1216,15 +1216,19 @@ static int hdev_get_max_hw_transfer(int fd, struct stat *st)
> >  #endif
> >  }
> >
> > -static int hdev_get_max_segments(int fd, struct stat *st)
> > -{
> > +/*
> > + * Get zoned device information (chunk_sectors, zoned_append_max_bytes,
> > + * max_open_zones, max_active_zones) through sysfs attribute files.
>
> This function is also used to get max_segments, which is not related to
> zoned devices. How about:
>
>   Get a block queue sysfs attribute value.
>
> > + */
> > +static long get_sysfs_long_val(int fd, struct stat *st,
> > +                               const char *attribute) {
> >  #ifdef CONFIG_LINUX
> >      char buf[32];
> >      const char *end;
> >      char *sysfspath = NULL;
> >      int ret;
> >      int sysfd = -1;
> > -    long max_segments;
> > +    long val;
> >
> >      if (S_ISCHR(st->st_mode)) {
> >          if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
> > @@ -1237,8 +1241,9 @@ static int hdev_get_max_segments(int fd, struct stat *st)
> >          return -ENOTSUP;
> >      }
> >
> > -    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> > -                                major(st->st_rdev), minor(st->st_rdev));
> > +    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/%s",
> > +                                major(st->st_rdev), minor(st->st_rdev),
> > +                                attribute);
> >      sysfd = open(sysfspath, O_RDONLY);
> >      if (sysfd == -1) {
> >          ret = -errno;
> > @@ -1256,9 +1261,9 @@ static int hdev_get_max_segments(int fd, struct stat *st)
> >      }
> >      buf[ret] = 0;
> >      /* The file is ended with '\n', pass 'end' to accept that. */
> > -    ret = qemu_strtol(buf, &end, 10, &max_segments);
> > +    ret = qemu_strtol(buf, &end, 10, &val);
> >      if (ret == 0 && end && *end == '\n') {
> > -        ret = max_segments;
> > +        ret = val;
> >      }
> >
> >  out:
> > @@ -1272,6 +1277,15 @@ out:
> >  #endif
> >  }
> >
> > +static int hdev_get_max_segments(int fd, struct stat *st) {
> > +    int ret;
> > +    ret = get_sysfs_long_val(fd, st, "max_segments");
> > +    if (ret < 0) {
> > +        return -1;
>
> This hides the actual error number. The if statement can be dropped and
> the function can be simplified to:
>
>   static int hdev_get_max_segments(int fd, struct stat *st) {
>       return get_sysfs_long_val(fd, st, "max_segments");
>   }
>
> Whether you want to keep the tiny helper function or inline
> get_sysfs_long_val(fd, st, "max_segments") into the caller is up to you.
>
> > +    }
> > +    return ret;
> > +}
> > +
> >  static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
> >  {
> >      BDRVRawState *s = bs->opaque;
> > @@ -1872,6 +1886,7 @@ static int handle_aiocb_zone_report(void *opaque) {
> >
> >  static int handle_aiocb_zone_mgmt(void *opaque) {
> >      RawPosixAIOData *aiocb = opaque;
> > +    BlockDriverState *s = aiocb->bs;
> >      int fd = aiocb->aio_fildes;
> >      int64_t offset = aiocb->aio_offset;
> >      int64_t len = aiocb->aio_nbytes;
> > @@ -1884,11 +1899,9 @@ static int handle_aiocb_zone_mgmt(void *opaque) {
> >      int64_t zone_size_mask;
> >      int ret;
> >
> > -    ret = ioctl(fd, BLKGETZONESZ, &zone_size);
> > -    if (ret) {
> > -        return -1;
> > -    }
> > -
> > +    g_autofree struct stat *file = g_new(struct stat, 1);
> > +    stat(s->filename, file);
>
> There is no need to allocate struct stat using g_new(). It's a small
> struct that can be on the stack.
>
> Also, fstat(fd, &st) is preferred when the file descriptor is already
> open because it avoids race conditions due to file renaming/path
> traversal.
>
> Here is how this could be written:
>
>   struct stat st;
>   if (fstat(fd, &st) < 0) {
>       return -errno;
>   }

Thanks for the suggestions!

>
> > +    zone_size = get_sysfs_long_val(fd, file, "chunk_sectors");
> >      zone_size_mask = zone_size - 1;
> >      if (offset & zone_size_mask) {
> >          error_report("offset is not the start of a zone");
> > --
> > 2.35.3
> >
Re: [RFC v3 3/5] file-posix: introduce get_sysfs_long_val for zoned device information.
Posted by Hannes Reinecke 3 years, 7 months ago
On 6/27/22 02:19, Sam Li wrote:
> Use sysfs attribute files to get the zoned device information in case
> that ioctl() commands of zone management interface won't work. It can
> return long type of value like chunk_sectors, zoned_append_max_bytes,
> max_open_zones, max_active_zones.
> ---
>   block/file-posix.c | 37 +++++++++++++++++++++++++------------
>   1 file changed, 25 insertions(+), 12 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 1b8b0d351f..73c2cdfbca 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1216,15 +1216,19 @@ static int hdev_get_max_hw_transfer(int fd, struct stat *st)
>   #endif
>   }
>   
> -static int hdev_get_max_segments(int fd, struct stat *st)
> -{
> +/*
> + * Get zoned device information (chunk_sectors, zoned_append_max_bytes,
> + * max_open_zones, max_active_zones) through sysfs attribute files.
> + */
> +static long get_sysfs_long_val(int fd, struct stat *st,
> +                               const char *attribute) {
>   #ifdef CONFIG_LINUX
>       char buf[32];
>       const char *end;
>       char *sysfspath = NULL;
>       int ret;
>       int sysfd = -1;
> -    long max_segments;
> +    long val;
>   
>       if (S_ISCHR(st->st_mode)) {
>           if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
> @@ -1237,8 +1241,9 @@ static int hdev_get_max_segments(int fd, struct stat *st)
>           return -ENOTSUP;
>       }
>   
> -    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> -                                major(st->st_rdev), minor(st->st_rdev));
> +    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/%s",
> +                                major(st->st_rdev), minor(st->st_rdev),
> +                                attribute);
>       sysfd = open(sysfspath, O_RDONLY);
>       if (sysfd == -1) {
>           ret = -errno;
> @@ -1256,9 +1261,9 @@ static int hdev_get_max_segments(int fd, struct stat *st)
>       }
>       buf[ret] = 0;
>       /* The file is ended with '\n', pass 'end' to accept that. */
> -    ret = qemu_strtol(buf, &end, 10, &max_segments);
> +    ret = qemu_strtol(buf, &end, 10, &val);
>       if (ret == 0 && end && *end == '\n') {
> -        ret = max_segments;
> +        ret = val;
>       }
>   
>   out:
> @@ -1272,6 +1277,15 @@ out:
>   #endif
>   }
>   
> +static int hdev_get_max_segments(int fd, struct stat *st) {
> +    int ret;
> +    ret = get_sysfs_long_val(fd, st, "max_segments");
> +    if (ret < 0) {
> +        return -1;
> +    }
> +    return ret;
> +}
> +
>   static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
>   {
>       BDRVRawState *s = bs->opaque;
> @@ -1872,6 +1886,7 @@ static int handle_aiocb_zone_report(void *opaque) {
>   
>   static int handle_aiocb_zone_mgmt(void *opaque) {
>       RawPosixAIOData *aiocb = opaque;
> +    BlockDriverState *s = aiocb->bs;
>       int fd = aiocb->aio_fildes;
>       int64_t offset = aiocb->aio_offset;
>       int64_t len = aiocb->aio_nbytes;
> @@ -1884,11 +1899,9 @@ static int handle_aiocb_zone_mgmt(void *opaque) {
>       int64_t zone_size_mask;
>       int ret;
>   
> -    ret = ioctl(fd, BLKGETZONESZ, &zone_size);
> -    if (ret) {
> -        return -1;
> -    }
> -
> +    g_autofree struct stat *file = g_new(struct stat, 1);
> +    stat(s->filename, file);
> +    zone_size = get_sysfs_long_val(fd, file, "chunk_sectors");
>       zone_size_mask = zone_size - 1;
>       if (offset & zone_size_mask) {
>           error_report("offset is not the start of a zone");

Round of applause.

Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		           Kernel Storage Architect
hare@suse.de			                  +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer

Re: [RFC v3 3/5] file-posix: introduce get_sysfs_long_val for zoned device information.
Posted by Sam Li 3 years, 7 months ago
Hannes Reinecke <hare@suse.de> 于2022年6月27日周一 15:31写道:
>
> On 6/27/22 02:19, Sam Li wrote:
> > Use sysfs attribute files to get the zoned device information in case
> > that ioctl() commands of zone management interface won't work. It can
> > return long type of value like chunk_sectors, zoned_append_max_bytes,
> > max_open_zones, max_active_zones.
> > ---
> >   block/file-posix.c | 37 +++++++++++++++++++++++++------------
> >   1 file changed, 25 insertions(+), 12 deletions(-)
> >
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index 1b8b0d351f..73c2cdfbca 100644
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -1216,15 +1216,19 @@ static int hdev_get_max_hw_transfer(int fd, struct stat *st)
> >   #endif
> >   }
> >
> > -static int hdev_get_max_segments(int fd, struct stat *st)
> > -{
> > +/*
> > + * Get zoned device information (chunk_sectors, zoned_append_max_bytes,
> > + * max_open_zones, max_active_zones) through sysfs attribute files.
> > + */
> > +static long get_sysfs_long_val(int fd, struct stat *st,
> > +                               const char *attribute) {
> >   #ifdef CONFIG_LINUX
> >       char buf[32];
> >       const char *end;
> >       char *sysfspath = NULL;
> >       int ret;
> >       int sysfd = -1;
> > -    long max_segments;
> > +    long val;
> >
> >       if (S_ISCHR(st->st_mode)) {
> >           if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
> > @@ -1237,8 +1241,9 @@ static int hdev_get_max_segments(int fd, struct stat *st)
> >           return -ENOTSUP;
> >       }
> >
> > -    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> > -                                major(st->st_rdev), minor(st->st_rdev));
> > +    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/%s",
> > +                                major(st->st_rdev), minor(st->st_rdev),
> > +                                attribute);
> >       sysfd = open(sysfspath, O_RDONLY);
> >       if (sysfd == -1) {
> >           ret = -errno;
> > @@ -1256,9 +1261,9 @@ static int hdev_get_max_segments(int fd, struct stat *st)
> >       }
> >       buf[ret] = 0;
> >       /* The file is ended with '\n', pass 'end' to accept that. */
> > -    ret = qemu_strtol(buf, &end, 10, &max_segments);
> > +    ret = qemu_strtol(buf, &end, 10, &val);
> >       if (ret == 0 && end && *end == '\n') {
> > -        ret = max_segments;
> > +        ret = val;
> >       }
> >
> >   out:
> > @@ -1272,6 +1277,15 @@ out:
> >   #endif
> >   }
> >
> > +static int hdev_get_max_segments(int fd, struct stat *st) {
> > +    int ret;
> > +    ret = get_sysfs_long_val(fd, st, "max_segments");
> > +    if (ret < 0) {
> > +        return -1;
> > +    }
> > +    return ret;
> > +}
> > +
> >   static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
> >   {
> >       BDRVRawState *s = bs->opaque;
> > @@ -1872,6 +1886,7 @@ static int handle_aiocb_zone_report(void *opaque) {
> >
> >   static int handle_aiocb_zone_mgmt(void *opaque) {
> >       RawPosixAIOData *aiocb = opaque;
> > +    BlockDriverState *s = aiocb->bs;
> >       int fd = aiocb->aio_fildes;
> >       int64_t offset = aiocb->aio_offset;
> >       int64_t len = aiocb->aio_nbytes;
> > @@ -1884,11 +1899,9 @@ static int handle_aiocb_zone_mgmt(void *opaque) {
> >       int64_t zone_size_mask;
> >       int ret;
> >
> > -    ret = ioctl(fd, BLKGETZONESZ, &zone_size);
> > -    if (ret) {
> > -        return -1;
> > -    }
> > -
> > +    g_autofree struct stat *file = g_new(struct stat, 1);
> > +    stat(s->filename, file);
> > +    zone_size = get_sysfs_long_val(fd, file, "chunk_sectors");
> >       zone_size_mask = zone_size - 1;
> >       if (offset & zone_size_mask) {
> >           error_report("offset is not the start of a zone");
>
> Round of applause.
>
Thanks! It was based on Damien's suggestion.

> Reviewed-by: Hannes Reinecke <hare@suse.de>
>
> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke                        Kernel Storage Architect
> hare@suse.de                                      +49 911 74053 688
> SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
> HRB 36809 (AG Nürnberg), GF: Felix Imendörffer