[PULL 3/6] qemu/osdep: Split qemu_close_all_open_fd() and add fallback

Richard Henderson posted 6 patches 3 months, 3 weeks ago
There is a newer version of this series
[PULL 3/6] qemu/osdep: Split qemu_close_all_open_fd() and add fallback
Posted by Richard Henderson 3 months, 3 weeks ago
From: Clément Léger <cleger@rivosinc.com>

In order to make it cleaner, split qemu_close_all_open_fd() logic into
multiple subfunctions (close with close_range(), with /proc/self/fd and
fallback).

Signed-off-by: Clément Léger <cleger@rivosinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <20240802145423.3232974-3-cleger@rivosinc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 util/oslib-posix.c | 50 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 1e867efa47..9b79fc7cff 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -808,27 +808,16 @@ int qemu_msync(void *addr, size_t length, int fd)
     return msync(addr, length, MS_SYNC);
 }
 
-/*
- * Close all open file descriptors.
- */
-void qemu_close_all_open_fd(void)
+static bool qemu_close_all_open_fd_proc(void)
 {
     struct dirent *de;
     int fd, dfd;
     DIR *dir;
 
-#ifdef CONFIG_CLOSE_RANGE
-    int r = close_range(0, ~0U, 0);
-    if (!r) {
-        /* Success, no need to try other ways. */
-        return;
-    }
-#endif
-
     dir = opendir("/proc/self/fd");
     if (!dir) {
         /* If /proc is not mounted, there is nothing that can be done. */
-        return;
+        return false;
     }
     /* Avoid closing the directory. */
     dfd = dirfd(dir);
@@ -840,4 +829,39 @@ void qemu_close_all_open_fd(void)
         }
     }
     closedir(dir);
+
+    return true;
+}
+
+static bool qemu_close_all_open_fd_close_range(void)
+{
+#ifdef CONFIG_CLOSE_RANGE
+    int r = close_range(0, ~0U, 0);
+    if (!r) {
+        /* Success, no need to try other ways. */
+        return true;
+    }
+#endif
+    return false;
+}
+
+static void qemu_close_all_open_fd_fallback(void)
+{
+    int open_max = sysconf(_SC_OPEN_MAX), i;
+
+    /* Fallback */
+    for (i = 0; i < open_max; i++) {
+        close(i);
+    }
+}
+
+/*
+ * Close all open file descriptors.
+ */
+void qemu_close_all_open_fd(void)
+{
+    if (!qemu_close_all_open_fd_close_range() &&
+        !qemu_close_all_open_fd_proc()) {
+        qemu_close_all_open_fd_fallback();
+    }
 }
-- 
2.43.0


Re: [PULL 3/6] qemu/osdep: Split qemu_close_all_open_fd() and add fallback
Posted by Daniel P. Berrangé 2 months, 3 weeks ago
This is already merged, but I have two comments - one improvement
and one bug which we should probably fix before release.

On Mon, Aug 05, 2024 at 10:31:26AM +1000, Richard Henderson wrote:
> From: Clément Léger <cleger@rivosinc.com>
> 
> In order to make it cleaner, split qemu_close_all_open_fd() logic into
> multiple subfunctions (close with close_range(), with /proc/self/fd and
> fallback).
> 
> Signed-off-by: Clément Léger <cleger@rivosinc.com>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Message-ID: <20240802145423.3232974-3-cleger@rivosinc.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  util/oslib-posix.c | 50 ++++++++++++++++++++++++++++++++++------------
>  1 file changed, 37 insertions(+), 13 deletions(-)
> 
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index 1e867efa47..9b79fc7cff 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -808,27 +808,16 @@ int qemu_msync(void *addr, size_t length, int fd)
>      return msync(addr, length, MS_SYNC);
>  }
>  
> -/*
> - * Close all open file descriptors.
> - */
> -void qemu_close_all_open_fd(void)
> +static bool qemu_close_all_open_fd_proc(void)
>  {
>      struct dirent *de;
>      int fd, dfd;
>      DIR *dir;
>  
> -#ifdef CONFIG_CLOSE_RANGE
> -    int r = close_range(0, ~0U, 0);
> -    if (!r) {
> -        /* Success, no need to try other ways. */
> -        return;
> -    }
> -#endif
> -
>      dir = opendir("/proc/self/fd");

IIUC from previous threads this is valid on Linux and on Solaris.

On FreeBSD & macOS, you need /dev/fd though.

>      if (!dir) {
>          /* If /proc is not mounted, there is nothing that can be done. */
> -        return;
> +        return false;
>      }
>      /* Avoid closing the directory. */
>      dfd = dirfd(dir);
> @@ -840,4 +829,39 @@ void qemu_close_all_open_fd(void)
>          }
>      }
>      closedir(dir);
> +
> +    return true;
> +}
> +
> +static bool qemu_close_all_open_fd_close_range(void)
> +{
> +#ifdef CONFIG_CLOSE_RANGE
> +    int r = close_range(0, ~0U, 0);
> +    if (!r) {
> +        /* Success, no need to try other ways. */
> +        return true;
> +    }
> +#endif
> +    return false;
> +}
> +
> +static void qemu_close_all_open_fd_fallback(void)
> +{
> +    int open_max = sysconf(_SC_OPEN_MAX), i;
> +
> +    /* Fallback */
> +    for (i = 0; i < open_max; i++) {
> +        close(i);
> +    }

I'm told that sysconf(_SC_OPEN_MAX) returns -1 on some versions of
macOS. "Luckily" since we assigned to 'int' rather than 'unsigned int'
this will result in us not closing any FDs in this fallback path,
rather than trying to close several billion FDs (an effective hang).

If _SC_OPEN_MAX returns -1, we should fallback to the OPEN_MAX
constant on macOS (see commit de448e0f26e710e9d2b7fc91393c40ac24b75847
which tackled a similar issue wrt getrlimit), and fallback to perhaps
a hardcoded 1024 on non-macOS.


> +}
> +
> +/*
> + * Close all open file descriptors.
> + */
> +void qemu_close_all_open_fd(void)
> +{
> +    if (!qemu_close_all_open_fd_close_range() &&
> +        !qemu_close_all_open_fd_proc()) {
> +        qemu_close_all_open_fd_fallback();
> +    }
>  }
> -- 
> 2.43.0
> 
> 

With regards,
Daniel

[1] https://github.com/open-mpi/ompi/issues/10358
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|


Re: [PULL 3/6] qemu/osdep: Split qemu_close_all_open_fd() and add fallback
Posted by Richard Henderson 2 months, 3 weeks ago
On 8/28/24 22:48, Daniel P. Berrangé wrote:
>>       dir = opendir("/proc/self/fd");
> 
> IIUC from previous threads this is valid on Linux and on Solaris.
> 
> On FreeBSD & macOS, you need /dev/fd though.

Fair, but importantly, it doesn't do anything *incorrect* those systems: it merely skips 
this method with ENOENT.

>> +    int open_max = sysconf(_SC_OPEN_MAX), i;
>> +
>> +    /* Fallback */
>> +    for (i = 0; i < open_max; i++) {
>> +        close(i);
>> +    }
> 
> I'm told that sysconf(_SC_OPEN_MAX) returns -1 on some versions of
> macOS. "Luckily" since we assigned to 'int' rather than 'unsigned int'
> this will result in us not closing any FDs in this fallback path,
> rather than trying to close several billion FDs (an effective hang).

Ouch.

> 
> If _SC_OPEN_MAX returns -1, we should fallback to the OPEN_MAX
> constant on macOS (see commit de448e0f26e710e9d2b7fc91393c40ac24b75847
> which tackled a similar issue wrt getrlimit), and fallback to perhaps
> a hardcoded 1024 on non-macOS.

I wish the timing on this had been better -- 25 minutes earlier and I would have delayed rc4.

Since macOS simply doesn't close fds, I'm of a mind to release 9.1.0 without this, and fix 
it for 9.1.1.  Thoughts?


r~

Re: [PULL 3/6] qemu/osdep: Split qemu_close_all_open_fd() and add fallback
Posted by Daniel P. Berrangé 2 months, 3 weeks ago
On Thu, Aug 29, 2024 at 08:47:27AM +1000, Richard Henderson wrote:
> On 8/28/24 22:48, Daniel P. Berrangé wrote:
> > >       dir = opendir("/proc/self/fd");
> > 
> > IIUC from previous threads this is valid on Linux and on Solaris.
> > 
> > On FreeBSD & macOS, you need /dev/fd though.
> 
> Fair, but importantly, it doesn't do anything *incorrect* those systems: it
> merely skips this method with ENOENT.
> 
> > > +    int open_max = sysconf(_SC_OPEN_MAX), i;
> > > +
> > > +    /* Fallback */
> > > +    for (i = 0; i < open_max; i++) {
> > > +        close(i);
> > > +    }
> > 
> > I'm told that sysconf(_SC_OPEN_MAX) returns -1 on some versions of
> > macOS. "Luckily" since we assigned to 'int' rather than 'unsigned int'
> > this will result in us not closing any FDs in this fallback path,
> > rather than trying to close several billion FDs (an effective hang).
> 
> Ouch.
> 
> > 
> > If _SC_OPEN_MAX returns -1, we should fallback to the OPEN_MAX
> > constant on macOS (see commit de448e0f26e710e9d2b7fc91393c40ac24b75847
> > which tackled a similar issue wrt getrlimit), and fallback to perhaps
> > a hardcoded 1024 on non-macOS.
> 
> I wish the timing on this had been better -- 25 minutes earlier and I would have delayed rc4.
> 
> Since macOS simply doesn't close fds, I'm of a mind to release 9.1.0 without
> this, and fix it for 9.1.1.  Thoughts?

The original net/tap.c code for closing FDs has the same bug, so in that
area we do NOT have a regression.

The original async teardown code would fail to close FDs as it is looking
for close_range or /proc/$PID/fd, and has no sysconf fallback, so again
no regression there.

IOW, I think this is acceptable to fix in 9.1 stable.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|


Re: [PULL 3/6] qemu/osdep: Split qemu_close_all_open_fd() and add fallback
Posted by Clément Léger 2 months, 3 weeks ago

On 28/08/2024 14:48, Daniel P. Berrangé wrote:
> This is already merged, but I have two comments - one improvement
> and one bug which we should probably fix before release.
> 
> On Mon, Aug 05, 2024 at 10:31:26AM +1000, Richard Henderson wrote:
>> From: Clément Léger <cleger@rivosinc.com>
>>
>> In order to make it cleaner, split qemu_close_all_open_fd() logic into
>> multiple subfunctions (close with close_range(), with /proc/self/fd and
>> fallback).
>>
>> Signed-off-by: Clément Léger <cleger@rivosinc.com>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> Message-ID: <20240802145423.3232974-3-cleger@rivosinc.com>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>  util/oslib-posix.c | 50 ++++++++++++++++++++++++++++++++++------------
>>  1 file changed, 37 insertions(+), 13 deletions(-)
>>
>> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
>> index 1e867efa47..9b79fc7cff 100644
>> --- a/util/oslib-posix.c
>> +++ b/util/oslib-posix.c
>> @@ -808,27 +808,16 @@ int qemu_msync(void *addr, size_t length, int fd)
>>      return msync(addr, length, MS_SYNC);
>>  }
>>  
>> -/*
>> - * Close all open file descriptors.
>> - */
>> -void qemu_close_all_open_fd(void)
>> +static bool qemu_close_all_open_fd_proc(void)
>>  {
>>      struct dirent *de;
>>      int fd, dfd;
>>      DIR *dir;
>>  
>> -#ifdef CONFIG_CLOSE_RANGE
>> -    int r = close_range(0, ~0U, 0);
>> -    if (!r) {
>> -        /* Success, no need to try other ways. */
>> -        return;
>> -    }
>> -#endif
>> -
>>      dir = opendir("/proc/self/fd");
> 
> IIUC from previous threads this is valid on Linux and on Solaris.
> 
> On FreeBSD & macOS, you need /dev/fd though.

Acked.

> 
>>      if (!dir) {
>>          /* If /proc is not mounted, there is nothing that can be done. */
>> -        return;
>> +        return false;
>>      }
>>      /* Avoid closing the directory. */
>>      dfd = dirfd(dir);
>> @@ -840,4 +829,39 @@ void qemu_close_all_open_fd(void)
>>          }
>>      }
>>      closedir(dir);
>> +
>> +    return true;
>> +}
>> +
>> +static bool qemu_close_all_open_fd_close_range(void)
>> +{
>> +#ifdef CONFIG_CLOSE_RANGE
>> +    int r = close_range(0, ~0U, 0);
>> +    if (!r) {
>> +        /* Success, no need to try other ways. */
>> +        return true;
>> +    }
>> +#endif
>> +    return false;
>> +}
>> +
>> +static void qemu_close_all_open_fd_fallback(void)
>> +{
>> +    int open_max = sysconf(_SC_OPEN_MAX), i;
>> +
>> +    /* Fallback */
>> +    for (i = 0; i < open_max; i++) {
>> +        close(i);
>> +    }
> 
> I'm told that sysconf(_SC_OPEN_MAX) returns -1 on some versions of
> macOS. "Luckily" since we assigned to 'int' rather than 'unsigned int'
> this will result in us not closing any FDs in this fallback path,
> rather than trying to close several billion FDs (an effective hang).
> 
> If _SC_OPEN_MAX returns -1, we should fallback to the OPEN_MAX
> constant on macOS (see commit de448e0f26e710e9d2b7fc91393c40ac24b75847
> which tackled a similar issue wrt getrlimit), and fallback to perhaps
> a hardcoded 1024 on non-macOS.

Thanks for catching this, I can submit these fixes except if you already
prepared something though.

Clément

> 
> 
>> +}
>> +
>> +/*
>> + * Close all open file descriptors.
>> + */
>> +void qemu_close_all_open_fd(void)
>> +{
>> +    if (!qemu_close_all_open_fd_close_range() &&
>> +        !qemu_close_all_open_fd_proc()) {
>> +        qemu_close_all_open_fd_fallback();
>> +    }
>>  }
>> -- 
>> 2.43.0
>>
>>
> 
> With regards,
> Daniel
> 
> [1] https://github.com/open-mpi/ompi/issues/10358