[PATCH v3 4/7] file-posix: try BLKSECTGET on block devices too, do not round to power of 2

Paolo Bonzini posted 7 patches 4 years, 8 months ago
Maintainers: Kevin Wolf <kwolf@redhat.com>, Eric Blake <eblake@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Markus Armbruster <armbru@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Max Reitz <mreitz@redhat.com>, Fam Zheng <fam@euphon.net>
There is a newer version of this series
[PATCH v3 4/7] file-posix: try BLKSECTGET on block devices too, do not round to power of 2
Posted by Paolo Bonzini 4 years, 8 months ago
bs->sg is only true for character devices, but block devices can also
be used with scsi-block and scsi-generic.  Unfortunately BLKSECTGET
returns bytes in an int for /dev/sgN devices, and sectors in a short
for block devices, so account for that in the code.

The maximum transfer also need not be a power of 2 (for example I have
seen disks with 1280 KiB maximum transfer) so there's no need to pass
the result through pow2floor.

This patch reintroduces the logic that was removed in commit 867eccfed8
("file-posix: Use max transfer length/segment count only for SCSI
passthrough", 2019-07-12).  The removal was motivated by the performance
regression when using a block device's max_transfer with kernel I/O;
the new, separate max_hw_transfer limit avoids reintroducing the same
regression.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/file-posix.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index f55f92d0f5..1439293f63 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1149,22 +1149,27 @@ static void raw_reopen_abort(BDRVReopenState *state)
     s->reopen_state = NULL;
 }
 
-static int sg_get_max_transfer_length(int fd)
+static int hdev_get_max_hw_transfer(int fd, struct stat *st)
 {
 #ifdef BLKSECTGET
-    int max_bytes = 0;
-
-    if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
-        return max_bytes;
+    if (S_ISBLK(st->st_mode)) {
+        unsigned short max_sectors = 0;
+        if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+            return max_sectors * 512;
+        }
     } else {
-        return -errno;
+        int max_bytes = 0;
+        if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
+            return max_bytes;
+        }
     }
+    return -errno;
 #else
     return -ENOSYS;
 #endif
 }
 
-static int sg_get_max_segments(int fd)
+static int hdev_get_max_segments(int fd, struct stat *st)
 {
 #ifdef CONFIG_LINUX
     char buf[32];
@@ -1173,12 +1178,6 @@ static int sg_get_max_segments(int fd)
     int ret;
     int sysfd = -1;
     long max_segments;
-    struct stat st;
-
-    if (fstat(fd, &st)) {
-        ret = -errno;
-        goto out;
-    }
 
     if (S_ISCHR(st->st_mode)) {
         if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
@@ -1192,7 +1191,7 @@ static int sg_get_max_segments(int fd)
     }
 
     sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
-                                major(st.st_rdev), minor(st.st_rdev));
+                                major(st->st_rdev), minor(st->st_rdev));
     sysfd = open(sysfspath, O_RDONLY);
     if (sysfd == -1) {
         ret = -errno;
@@ -1229,15 +1228,20 @@ out:
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
+    struct stat st;
+
+    if (fstat(s->fd, &st)) {
+        return;
+    }
 
-    if (bs->sg) {
-        int ret = sg_get_max_transfer_length(s->fd);
+    if (bs->sg || S_ISBLK(st.st_mode)) {
+        int ret = hdev_get_max_hw_transfer(s->fd, &st);
 
         if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
-            bs->bl.max_hw_transfer = pow2floor(ret);
+            bs->bl.max_hw_transfer = ret;
         }
 
-        ret = sg_get_max_segments(s->fd);
+        ret = hdev_get_max_segments(s->fd, &st);
         if (ret > 0) {
             bs->bl.max_iov = ret;
         }
-- 
2.31.1



Re: [PATCH v3 4/7] file-posix: try BLKSECTGET on block devices too, do not round to power of 2
Posted by Max Reitz 4 years, 7 months ago
On 03.06.21 15:37, Paolo Bonzini wrote:
> bs->sg is only true for character devices, but block devices can also
> be used with scsi-block and scsi-generic.  Unfortunately BLKSECTGET
> returns bytes in an int for /dev/sgN devices, and sectors in a short
> for block devices, so account for that in the code.
>
> The maximum transfer also need not be a power of 2 (for example I have
> seen disks with 1280 KiB maximum transfer) so there's no need to pass
> the result through pow2floor.
>
> This patch reintroduces the logic that was removed in commit 867eccfed8
> ("file-posix: Use max transfer length/segment count only for SCSI
> passthrough", 2019-07-12).  The removal was motivated by the performance
> regression when using a block device's max_transfer with kernel I/O;
> the new, separate max_hw_transfer limit avoids reintroducing the same
> regression.

(And the fact that the result of hdev_get_max_segments() is no longer 
used to cap max_transfer, but is just stored in max_iov instead.)

> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   block/file-posix.c | 40 ++++++++++++++++++++++------------------
>   1 file changed, 22 insertions(+), 18 deletions(-)
>
> diff --git a/block/file-posix.c b/block/file-posix.c
> index f55f92d0f5..1439293f63 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1149,22 +1149,27 @@ static void raw_reopen_abort(BDRVReopenState *state)
>       s->reopen_state = NULL;
>   }
>   
> -static int sg_get_max_transfer_length(int fd)
> +static int hdev_get_max_hw_transfer(int fd, struct stat *st)
>   {
>   #ifdef BLKSECTGET
> -    int max_bytes = 0;
> -
> -    if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
> -        return max_bytes;
> +    if (S_ISBLK(st->st_mode)) {
> +        unsigned short max_sectors = 0;
> +        if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
> +            return max_sectors * 512;
> +        }
>       } else {
> -        return -errno;
> +        int max_bytes = 0;
> +        if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
> +            return max_bytes;
> +        }
>       }
> +    return -errno;
>   #else
>       return -ENOSYS;
>   #endif
>   }
>   
> -static int sg_get_max_segments(int fd)
> +static int hdev_get_max_segments(int fd, struct stat *st)

OK, now I see why in patch 1 you treated `st` as a pointer. Still, needs 
to be `st.` in patch 1, and changed to `st->` in this patch only.

>   {
>   #ifdef CONFIG_LINUX
>       char buf[32];
> @@ -1173,12 +1178,6 @@ static int sg_get_max_segments(int fd)
>       int ret;
>       int sysfd = -1;
>       long max_segments;
> -    struct stat st;
> -
> -    if (fstat(fd, &st)) {
> -        ret = -errno;
> -        goto out;
> -    }
>   
>       if (S_ISCHR(st->st_mode)) {
>           if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
> @@ -1192,7 +1191,7 @@ static int sg_get_max_segments(int fd)
>       }
>   
>       sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> -                                major(st.st_rdev), minor(st.st_rdev));
> +                                major(st->st_rdev), minor(st->st_rdev));
>       sysfd = open(sysfspath, O_RDONLY);
>       if (sysfd == -1) {
>           ret = -errno;
> @@ -1229,15 +1228,20 @@ out:
>   static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
>   {
>       BDRVRawState *s = bs->opaque;
> +    struct stat st;
> +
> +    if (fstat(s->fd, &st)) {
> +        return;
> +    }

First, I think if we ignore an error, there should be a comment 
explaining why that’s OK.

It is OK here because inquiring transfer limits is best-effort.

However, the alignment probes below the following block are completely 
independent of `st`. Therefore, I don’t think we should skip them if 
`fstat()` failed here; only the `if (bs->sg || ...)` block should be 
skipped.

Max

>   
> -    if (bs->sg) {
> -        int ret = sg_get_max_transfer_length(s->fd);
> +    if (bs->sg || S_ISBLK(st.st_mode)) {
> +        int ret = hdev_get_max_hw_transfer(s->fd, &st);
>   
>           if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
> -            bs->bl.max_hw_transfer = pow2floor(ret);
> +            bs->bl.max_hw_transfer = ret;
>           }
>   
> -        ret = sg_get_max_segments(s->fd);
> +        ret = hdev_get_max_segments(s->fd, &st);
>           if (ret > 0) {
>               bs->bl.max_iov = ret;
>           }