[PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices

Simon Rowe posted 2 patches 4 years, 5 months ago
There is a newer version of this series
[PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices
Posted by Simon Rowe 4 years, 5 months ago
Signed-off-by: Simon Rowe <simon.rowe@nutanix.com>
---
 src/util/iohelper.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/util/iohelper.c b/src/util/iohelper.c
index b8810d16d3..e6eb178fde 100644
--- a/src/util/iohelper.c
+++ b/src/util/iohelper.c
@@ -28,6 +28,8 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
 
 #include "virthread.h"
 #include "virfile.h"
@@ -56,6 +58,8 @@ runIO(const char *path, int fd, int oflags)
     unsigned long long total = 0;
     bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0);
     off_t end = 0;
+    struct stat sb;
+    bool isBlockDev = false;
 
 #if WITH_POSIX_MEMALIGN
     if (posix_memalign(&base, alignMask + 1, buflen))
@@ -86,9 +90,11 @@ runIO(const char *path, int fd, int oflags)
         fdinname = "stdin";
         fdout = fd;
         fdoutname = path;
+        if (fstat(fd, &sb) == 0)
+            isBlockDev = S_ISBLK(sb.st_mode);
         /* To make the implementation simpler, we give up on any
          * attempt to use O_DIRECT in a non-trivial manner.  */
-        if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
+        if (!isBlockDev && direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
             virReportSystemError(end < 0 ? errno : EINVAL, "%s",
                                  _("O_DIRECT write needs empty seekable file"));
             goto cleanup;
@@ -140,7 +146,7 @@ runIO(const char *path, int fd, int oflags)
                 goto cleanup;
             }
 
-            if (ftruncate(fd, total) < 0) {
+            if (!isBlockDev && ftruncate(fd, total) < 0) {
                 virReportSystemError(errno, _("Unable to truncate %s"), fdoutname);
                 goto cleanup;
             }
-- 
2.22.3

Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices
Posted by Michal Prívozník 4 years, 5 months ago
On 8/20/21 10:39 AM, Simon Rowe wrote:
> Signed-off-by: Simon Rowe <simon.rowe@nutanix.com>
> ---
>  src/util/iohelper.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/src/util/iohelper.c b/src/util/iohelper.c
> index b8810d16d3..e6eb178fde 100644
> --- a/src/util/iohelper.c
> +++ b/src/util/iohelper.c
> @@ -28,6 +28,8 @@
>  #include <unistd.h>
>  #include <fcntl.h>
>  #include <stdlib.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
>  
>  #include "virthread.h"
>  #include "virfile.h"
> @@ -56,6 +58,8 @@ runIO(const char *path, int fd, int oflags)
>      unsigned long long total = 0;
>      bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0);
>      off_t end = 0;
> +    struct stat sb;
> +    bool isBlockDev = false;
>  
>  #if WITH_POSIX_MEMALIGN
>      if (posix_memalign(&base, alignMask + 1, buflen))
> @@ -86,9 +90,11 @@ runIO(const char *path, int fd, int oflags)
>          fdinname = "stdin";
>          fdout = fd;
>          fdoutname = path;
> +        if (fstat(fd, &sb) == 0)
> +            isBlockDev = S_ISBLK(sb.st_mode);
>          /* To make the implementation simpler, we give up on any
>           * attempt to use O_DIRECT in a non-trivial manner.  */
> -        if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
> +        if (!isBlockDev && direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
>              virReportSystemError(end < 0 ? errno : EINVAL, "%s",
>                                   _("O_DIRECT write needs empty seekable file"));
>              goto cleanup;
> @@ -140,7 +146,7 @@ runIO(const char *path, int fd, int oflags)
>                  goto cleanup;
>              }
>  
> -            if (ftruncate(fd, total) < 0) {
> +            if (!isBlockDev && ftruncate(fd, total) < 0) {
>                  virReportSystemError(errno, _("Unable to truncate %s"), fdoutname);
>                  goto cleanup;
>              }
> 

IIUC, O_DIRECT is no good for block devices. I wonder whether the caller
(doCoreDump()) should learn whether the path is a block device and don't
set O_DIRECT flag instead of changing iohelper.

Michal

Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices
Posted by Simon Rowe 4 years, 5 months ago
I can change to doing this if it preferred,

Simon
________________________________
From: Michal Prívozník <mprivozn@redhat.com>
Sent: 20 August 2021 16:09
To: Simon Rowe <simon.rowe@nutanix.com>; libvir-list@redhat.com <libvir-list@redhat.com>
Subject: Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices

On 8/20/21 10:39 AM, Simon Rowe wrote:
> Signed-off-by: Simon Rowe <simon.rowe@nutanix.com>
> ---
>  src/util/iohelper.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/src/util/iohelper.c b/src/util/iohelper.c
> index b8810d16d3..e6eb178fde 100644
> --- a/src/util/iohelper.c
> +++ b/src/util/iohelper.c
> @@ -28,6 +28,8 @@
>  #include <unistd.h>
>  #include <fcntl.h>
>  #include <stdlib.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
>
>  #include "virthread.h"
>  #include "virfile.h"
> @@ -56,6 +58,8 @@ runIO(const char *path, int fd, int oflags)
>      unsigned long long total = 0;
>      bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0);
>      off_t end = 0;
> +    struct stat sb;
> +    bool isBlockDev = false;
>
>  #if WITH_POSIX_MEMALIGN
>      if (posix_memalign(&base, alignMask + 1, buflen))
> @@ -86,9 +90,11 @@ runIO(const char *path, int fd, int oflags)
>          fdinname = "stdin";
>          fdout = fd;
>          fdoutname = path;
> +        if (fstat(fd, &sb) == 0)
> +            isBlockDev = S_ISBLK(sb.st_mode);
>          /* To make the implementation simpler, we give up on any
>           * attempt to use O_DIRECT in a non-trivial manner.  */
> -        if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
> +        if (!isBlockDev && direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
>              virReportSystemError(end < 0 ? errno : EINVAL, "%s",
>                                   _("O_DIRECT write needs empty seekable file"));
>              goto cleanup;
> @@ -140,7 +146,7 @@ runIO(const char *path, int fd, int oflags)
>                  goto cleanup;
>              }
>
> -            if (ftruncate(fd, total) < 0) {
> +            if (!isBlockDev && ftruncate(fd, total) < 0) {
>                  virReportSystemError(errno, _("Unable to truncate %s"), fdoutname);
>                  goto cleanup;
>              }
>

IIUC, O_DIRECT is no good for block devices. I wonder whether the caller
(doCoreDump()) should learn whether the path is a block device and don't
set O_DIRECT flag instead of changing iohelper.

Michal

Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices
Posted by Simon Rowe 4 years, 5 months ago
O_DIRECT is under control of the caller (though the --bypass-cache option). I am running in a constrained memory environment and when I don't use O_DIRECT the dump stalls at 5% load av hits 41 and the system becomes unresponsive.

Simon
________________________________
From: Michal Prívozník <mprivozn@redhat.com>
Sent: 20 August 2021 16:09
To: Simon Rowe <simon.rowe@nutanix.com>; libvir-list@redhat.com <libvir-list@redhat.com>
Subject: Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices

On 8/20/21 10:39 AM, Simon Rowe wrote:
> Signed-off-by: Simon Rowe <simon.rowe@nutanix.com>
> ---
>  src/util/iohelper.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/src/util/iohelper.c b/src/util/iohelper.c
> index b8810d16d3..e6eb178fde 100644
> --- a/src/util/iohelper.c
> +++ b/src/util/iohelper.c
> @@ -28,6 +28,8 @@
>  #include <unistd.h>
>  #include <fcntl.h>
>  #include <stdlib.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
>
>  #include "virthread.h"
>  #include "virfile.h"
> @@ -56,6 +58,8 @@ runIO(const char *path, int fd, int oflags)
>      unsigned long long total = 0;
>      bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0);
>      off_t end = 0;
> +    struct stat sb;
> +    bool isBlockDev = false;
>
>  #if WITH_POSIX_MEMALIGN
>      if (posix_memalign(&base, alignMask + 1, buflen))
> @@ -86,9 +90,11 @@ runIO(const char *path, int fd, int oflags)
>          fdinname = "stdin";
>          fdout = fd;
>          fdoutname = path;
> +        if (fstat(fd, &sb) == 0)
> +            isBlockDev = S_ISBLK(sb.st_mode);
>          /* To make the implementation simpler, we give up on any
>           * attempt to use O_DIRECT in a non-trivial manner.  */
> -        if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
> +        if (!isBlockDev && direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
>              virReportSystemError(end < 0 ? errno : EINVAL, "%s",
>                                   _("O_DIRECT write needs empty seekable file"));
>              goto cleanup;
> @@ -140,7 +146,7 @@ runIO(const char *path, int fd, int oflags)
>                  goto cleanup;
>              }
>
> -            if (ftruncate(fd, total) < 0) {
> +            if (!isBlockDev && ftruncate(fd, total) < 0) {
>                  virReportSystemError(errno, _("Unable to truncate %s"), fdoutname);
>                  goto cleanup;
>              }
>

IIUC, O_DIRECT is no good for block devices. I wonder whether the caller
(doCoreDump()) should learn whether the path is a block device and don't
set O_DIRECT flag instead of changing iohelper.

Michal

Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices
Posted by Michal Prívozník 4 years, 5 months ago
On 8/23/21 4:38 PM, Simon Rowe wrote:
> O_DIRECT is under control of the caller (though the --bypass-cache option). I am running in a constrained memory environment and when I don't use O_DIRECT the dump stalls at 5% load av hits 41 and the system becomes unresponsive.

Fair enough. I thought that block devices are always opened without
poisoning host cache. If they aren't then you patch makes sense.
But, what is your kernel version? Hopefully it's not too old. Is it a
special block device or just some regular disk?

Looking into the kernel code, it does implement O_DIRECT for block
devices:

kernel.git $ git grep -npC10 "\.direct_IO" -- fs/block_dev.c
fs/block_dev.c-1680-}
fs/block_dev.c-1681-
fs/block_dev.c=1682=static const struct address_space_operations def_blk_aops = {
fs/block_dev.c-1683-    .set_page_dirty = __set_page_dirty_buffers,
fs/block_dev.c-1684-    .readpage       = blkdev_readpage,
fs/block_dev.c-1685-    .readahead      = blkdev_readahead,
fs/block_dev.c-1686-    .writepage      = blkdev_writepage,
fs/block_dev.c-1687-    .write_begin    = blkdev_write_begin,
fs/block_dev.c-1688-    .write_end      = blkdev_write_end,
fs/block_dev.c-1689-    .writepages     = blkdev_writepages,
fs/block_dev.c:1690:    .direct_IO      = blkdev_direct_IO,
fs/block_dev.c-1691-    .migratepage    = buffer_migrate_page_norefs,
fs/block_dev.c-1692-    .is_dirty_writeback = buffer_check_dirty_writeback,
fs/block_dev.c-1693-};
fs/block_dev.c-1694-
fs/block_dev.c-1695-#define     BLKDEV_FALLOC_FL_SUPPORTED                                      \
fs/block_dev.c-1696-            (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
fs/block_dev.c-1697-             FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
fs/block_dev.c-1698-
fs/block_dev.c-1699-static long blkdev_fallocate(struct file *file, int mode, loff_t start,
fs/block_dev.c-1700-                         loff_t len)

So it seems like your patch was correct after all. Would you like to
resend this in v2 (I understand that patch 2/2 needs a rework) or do you
want me to push this one right away?

Michal

Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices
Posted by Simon Rowe 4 years, 5 months ago
I'm using 5.4.109. I'll re-submit with changes to the second patch shortly,

thanks
Simon
________________________________
From: Michal Prívozník <mprivozn@redhat.com>
Sent: 23 August 2021 16:04
To: Simon Rowe <simon.rowe@nutanix.com>; libvir-list@redhat.com <libvir-list@redhat.com>
Subject: Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices

On 8/23/21 4:38 PM, Simon Rowe wrote:
> O_DIRECT is under control of the caller (though the --bypass-cache option). I am running in a constrained memory environment and when I don't use O_DIRECT the dump stalls at 5% load av hits 41 and the system becomes unresponsive.

Fair enough. I thought that block devices are always opened without
poisoning host cache. If they aren't then you patch makes sense.
But, what is your kernel version? Hopefully it's not too old. Is it a
special block device or just some regular disk?

Looking into the kernel code, it does implement O_DIRECT for block
devices:

kernel.git $ git grep -npC10 "\.direct_IO" -- fs/block_dev.c
fs/block_dev.c-1680-}
fs/block_dev.c-1681-
fs/block_dev.c=1682=static const struct address_space_operations def_blk_aops = {
fs/block_dev.c-1683-    .set_page_dirty = __set_page_dirty_buffers,
fs/block_dev.c-1684-    .readpage       = blkdev_readpage,
fs/block_dev.c-1685-    .readahead      = blkdev_readahead,
fs/block_dev.c-1686-    .writepage      = blkdev_writepage,
fs/block_dev.c-1687-    .write_begin    = blkdev_write_begin,
fs/block_dev.c-1688-    .write_end      = blkdev_write_end,
fs/block_dev.c-1689-    .writepages     = blkdev_writepages,
fs/block_dev.c:1690:    .direct_IO      = blkdev_direct_IO,
fs/block_dev.c-1691-    .migratepage    = buffer_migrate_page_norefs,
fs/block_dev.c-1692-    .is_dirty_writeback = buffer_check_dirty_writeback,
fs/block_dev.c-1693-};
fs/block_dev.c-1694-
fs/block_dev.c-1695-#define     BLKDEV_FALLOC_FL_SUPPORTED                                      \
fs/block_dev.c-1696-            (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
fs/block_dev.c-1697-             FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
fs/block_dev.c-1698-
fs/block_dev.c-1699-static long blkdev_fallocate(struct file *file, int mode, loff_t start,
fs/block_dev.c-1700-                         loff_t len)

So it seems like your patch was correct after all. Would you like to
resend this in v2 (I understand that patch 2/2 needs a rework) or do you
want me to push this one right away?

Michal

Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices
Posted by Simon Rowe 4 years, 5 months ago
Missed to answer part of your question. I'm using an NBD device backed by a userspace process using nbdkit,

Simon
________________________________
From: Simon Rowe <simon.rowe@nutanix.com>
Sent: 23 August 2021 16:15
To: Michal Prívozník <mprivozn@redhat.com>; libvir-list@redhat.com <libvir-list@redhat.com>
Subject: Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices

I'm using 5.4.109. I'll re-submit with changes to the second patch shortly,

thanks
Simon
________________________________
From: Michal Prívozník <mprivozn@redhat.com>
Sent: 23 August 2021 16:04
To: Simon Rowe <simon.rowe@nutanix.com>; libvir-list@redhat.com <libvir-list@redhat.com>
Subject: Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices

On 8/23/21 4:38 PM, Simon Rowe wrote:
> O_DIRECT is under control of the caller (though the --bypass-cache option). I am running in a constrained memory environment and when I don't use O_DIRECT the dump stalls at 5% load av hits 41 and the system becomes unresponsive.

Fair enough. I thought that block devices are always opened without
poisoning host cache. If they aren't then you patch makes sense.
But, what is your kernel version? Hopefully it's not too old. Is it a
special block device or just some regular disk?

Looking into the kernel code, it does implement O_DIRECT for block
devices:

kernel.git $ git grep -npC10 "\.direct_IO" -- fs/block_dev.c
fs/block_dev.c-1680-}
fs/block_dev.c-1681-
fs/block_dev.c=1682=static const struct address_space_operations def_blk_aops = {
fs/block_dev.c-1683-    .set_page_dirty = __set_page_dirty_buffers,
fs/block_dev.c-1684-    .readpage       = blkdev_readpage,
fs/block_dev.c-1685-    .readahead      = blkdev_readahead,
fs/block_dev.c-1686-    .writepage      = blkdev_writepage,
fs/block_dev.c-1687-    .write_begin    = blkdev_write_begin,
fs/block_dev.c-1688-    .write_end      = blkdev_write_end,
fs/block_dev.c-1689-    .writepages     = blkdev_writepages,
fs/block_dev.c:1690:    .direct_IO      = blkdev_direct_IO,
fs/block_dev.c-1691-    .migratepage    = buffer_migrate_page_norefs,
fs/block_dev.c-1692-    .is_dirty_writeback = buffer_check_dirty_writeback,
fs/block_dev.c-1693-};
fs/block_dev.c-1694-
fs/block_dev.c-1695-#define     BLKDEV_FALLOC_FL_SUPPORTED                                      \
fs/block_dev.c-1696-            (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
fs/block_dev.c-1697-             FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
fs/block_dev.c-1698-
fs/block_dev.c-1699-static long blkdev_fallocate(struct file *file, int mode, loff_t start,
fs/block_dev.c-1700-                         loff_t len)

So it seems like your patch was correct after all. Would you like to
resend this in v2 (I understand that patch 2/2 needs a rework) or do you
want me to push this one right away?

Michal