[PATCH v7 4/5] qemu/osdep: Add excluded fd parameter to qemu_close_all_open_fd()

Clément Léger posted 5 patches 3 months, 3 weeks ago
There is a newer version of this series
[PATCH v7 4/5] qemu/osdep: Add excluded fd parameter to qemu_close_all_open_fd()
Posted by Clément Léger 3 months, 3 weeks ago
In order for this function to be usable by tap.c code, add a list of
file descriptors that should not be closed.

Signed-off-by: Clément Léger <cleger@rivosinc.com>
---
 include/qemu/osdep.h    |   8 ++-
 system/async-teardown.c |   2 +-
 util/oslib-posix.c      | 107 ++++++++++++++++++++++++++++++++++------
 3 files changed, 98 insertions(+), 19 deletions(-)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 5cd8517380..0bf6f0a356 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -760,9 +760,13 @@ int qemu_fdatasync(int fd);
 /**
  * qemu_close_all_open_fd:
  *
- * Close all open file descriptors
+ * Close all open file descriptors except the ones supplied in the @skip array
+ *
+ * @skip: ordered array of distinct file descriptors that should not be closed
+ *        if any, or NULL.
+ * @nskip: number of entries in the @skip array or 0 if @skip is NULL.
  */
-void qemu_close_all_open_fd(void);
+void qemu_close_all_open_fd(const int *skip, unsigned int nskip);
 
 /**
  * Sync changes made to the memory mapped file back to the backing
diff --git a/system/async-teardown.c b/system/async-teardown.c
index edf49e1007..9148ee8d04 100644
--- a/system/async-teardown.c
+++ b/system/async-teardown.c
@@ -52,7 +52,7 @@ static int async_teardown_fn(void *arg)
      * Close all file descriptors that might have been inherited from the
      * main qemu process when doing clone, needed to make libvirt happy.
      */
-    qemu_close_all_open_fd();
+    qemu_close_all_open_fd(NULL, 0);
 
     /* Set up a handler for SIGHUP and unblock SIGHUP. */
     sigaction(SIGHUP, &sa, NULL);
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 9b79fc7cff..7583192192 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -808,11 +808,12 @@ int qemu_msync(void *addr, size_t length, int fd)
     return msync(addr, length, MS_SYNC);
 }
 
-static bool qemu_close_all_open_fd_proc(void)
+static bool qemu_close_all_open_fd_proc(const int *skip, unsigned int nskip)
 {
     struct dirent *de;
     int fd, dfd;
     DIR *dir;
+    unsigned int skip_start = 0, skip_end = nskip;
 
     dir = opendir("/proc/self/fd");
     if (!dir) {
@@ -823,8 +824,34 @@ static bool qemu_close_all_open_fd_proc(void)
     dfd = dirfd(dir);
 
     for (de = readdir(dir); de; de = readdir(dir)) {
+        bool close_fd = true;
+
+        if (de->d_name[0] == '.') {
+            continue;
+        }
         fd = atoi(de->d_name);
-        if (fd != dfd) {
+        if (fd == dfd) {
+            close_fd = false;
+            continue;
+        }
+
+        for (unsigned int i = skip_start; i < skip_end; i++) {
+            if (fd < skip[i]) {
+                /* We are below the next skipped fd, break */
+                break;
+            } else if (fd == skip[i]) {
+                close_fd = false;
+                /* Restrict the range as we found fds matching start/end */
+                if (i == skip_start) {
+                    skip_start++;
+                } else if (i == skip_end) {
+                    skip_end--;
+                }
+                break;
+            }
+        }
+
+        if (close_fd) {
             close(fd);
         }
     }
@@ -833,24 +860,68 @@ static bool qemu_close_all_open_fd_proc(void)
     return true;
 }
 
-static bool qemu_close_all_open_fd_close_range(void)
+static bool qemu_close_all_open_fd_close_range(const int *skip,
+                                               unsigned int nskip,
+                                               int open_max)
 {
 #ifdef CONFIG_CLOSE_RANGE
-    int r = close_range(0, ~0U, 0);
-    if (!r) {
-        /* Success, no need to try other ways. */
-        return true;
-    }
-#endif
+    int first = 0, last = open_max;
+    unsigned int cur_skip = 0;
+    int ret;
+
+    do {
+        /* Find the start boundary of the range to close */
+        while (cur_skip < nskip && first == skip[cur_skip]) {
+            cur_skip++;
+            first++;
+        }
+
+        /* Find the upper boundary of the range to close */
+        if (cur_skip < nskip) {
+            last = skip[cur_skip] - 1;
+        }
+        /*
+         * Adjust the maximum fd to close if it's above what the system
+         * supports
+         */
+        if (last > open_max) {
+            last = open_max;
+            /*
+             * We can directly skip the remaining skip fds since the current
+             * one is already above the maximum supported one.
+             */
+            cur_skip = nskip;
+        }
+        /* If last was adjusted, we might be > first, bail out */
+        if (first > last) {
+            break;
+        }
+
+        ret = close_range(first, last, 0);
+        if (ret < 0) {
+            return false;
+        }
+        first = last + 1;
+        last = open_max;
+    } while (cur_skip < nskip);
+
+    return true;
+#else
     return false;
+#endif
 }
 
-static void qemu_close_all_open_fd_fallback(void)
+static void qemu_close_all_open_fd_fallback(const int *skip, unsigned int nskip,
+                                            int open_max)
 {
-    int open_max = sysconf(_SC_OPEN_MAX), i;
+    unsigned int cur_skip = 0;
 
     /* Fallback */
-    for (i = 0; i < open_max; i++) {
+    for (int i = 0; i < open_max; i++) {
+        if (cur_skip < nskip && i == skip[cur_skip]) {
+            cur_skip++;
+            continue;
+        }
         close(i);
     }
 }
@@ -858,10 +929,14 @@ static void qemu_close_all_open_fd_fallback(void)
 /*
  * Close all open file descriptors.
  */
-void qemu_close_all_open_fd(void)
+void qemu_close_all_open_fd(const int *skip, unsigned int nskip)
 {
-    if (!qemu_close_all_open_fd_close_range() &&
-        !qemu_close_all_open_fd_proc()) {
-        qemu_close_all_open_fd_fallback();
+    int open_max = sysconf(_SC_OPEN_MAX) - 1;
+
+    assert(skip != NULL || nskip == 0);
+
+    if (!qemu_close_all_open_fd_close_range(skip, nskip, open_max) &&
+        !qemu_close_all_open_fd_proc(skip, nskip)) {
+        qemu_close_all_open_fd_fallback(skip, nskip, open_max);
     }
 }
-- 
2.45.2


Re: [PATCH v7 4/5] qemu/osdep: Add excluded fd parameter to qemu_close_all_open_fd()
Posted by Richard Henderson 3 months, 3 weeks ago
On 7/31/24 18:48, Clément Léger wrote:
> @@ -823,8 +824,34 @@ static bool qemu_close_all_open_fd_proc(void)
>       dfd = dirfd(dir);
>   
>       for (de = readdir(dir); de; de = readdir(dir)) {
> +        bool close_fd = true;
> +
> +        if (de->d_name[0] == '.') {
> +            continue;
> +        }
>           fd = atoi(de->d_name);
> -        if (fd != dfd) {
> +        if (fd == dfd) {
> +            close_fd = false;
> +            continue;
> +        }

Assignment to close_fd not used here.

> +
> +        for (unsigned int i = skip_start; i < skip_end; i++) {
> +            if (fd < skip[i]) {
> +                /* We are below the next skipped fd, break */
> +                break;
> +            } else if (fd == skip[i]) {
> +                close_fd = false;
> +                /* Restrict the range as we found fds matching start/end */
> +                if (i == skip_start) {
> +                    skip_start++;
> +                } else if (i == skip_end) {
> +                    skip_end--;
> +                }
> +                break;
> +            }
> +        }
> +
> +        if (close_fd) {
>               close(fd);
>           }
>       }
> @@ -833,24 +860,68 @@ static bool qemu_close_all_open_fd_proc(void)
>       return true;
>   }
>   
> -static bool qemu_close_all_open_fd_close_range(void)
> +static bool qemu_close_all_open_fd_close_range(const int *skip,
> +                                               unsigned int nskip,
> +                                               int open_max)
>   {
>   #ifdef CONFIG_CLOSE_RANGE
> -    int r = close_range(0, ~0U, 0);
> -    if (!r) {
> -        /* Success, no need to try other ways. */
> -        return true;
> -    }
> -#endif
> +    int first = 0, last = open_max;

If this were really _SC_OPEN_MAX, this would be off-by-one.

> -static void qemu_close_all_open_fd_fallback(void)
> +static void qemu_close_all_open_fd_fallback(const int *skip, unsigned int nskip,
> +                                            int open_max)
>   {
> -    int open_max = sysconf(_SC_OPEN_MAX), i;
> +    unsigned int cur_skip = 0;
>   
>       /* Fallback */
> -    for (i = 0; i < open_max; i++) {
> +    for (int i = 0; i < open_max; i++) {

Because this isn't really _SC_OPEN_MAX, this *is* off-by-one.

> -void qemu_close_all_open_fd(void)
> +void qemu_close_all_open_fd(const int *skip, unsigned int nskip)
>   {
> -    if (!qemu_close_all_open_fd_close_range() &&
> -        !qemu_close_all_open_fd_proc()) {
> -        qemu_close_all_open_fd_fallback();
> +    int open_max = sysconf(_SC_OPEN_MAX) - 1;

Better to move the -1 into qemu_close_all_open_fd_close_range where it's actually needed.


r~

Re: [PATCH v7 4/5] qemu/osdep: Add excluded fd parameter to qemu_close_all_open_fd()
Posted by Clément Léger 3 months, 3 weeks ago

On 31/07/2024 23:56, Richard Henderson wrote:
> On 7/31/24 18:48, Clément Léger wrote:
>> @@ -823,8 +824,34 @@ static bool qemu_close_all_open_fd_proc(void)
>>       dfd = dirfd(dir);
>>         for (de = readdir(dir); de; de = readdir(dir)) {
>> +        bool close_fd = true;
>> +
>> +        if (de->d_name[0] == '.') {
>> +            continue;
>> +        }
>>           fd = atoi(de->d_name);
>> -        if (fd != dfd) {
>> +        if (fd == dfd) {
>> +            close_fd = false;
>> +            continue;
>> +        }
> 
> Assignment to close_fd not used here.
> 
>> +
>> +        for (unsigned int i = skip_start; i < skip_end; i++) {
>> +            if (fd < skip[i]) {
>> +                /* We are below the next skipped fd, break */
>> +                break;
>> +            } else if (fd == skip[i]) {
>> +                close_fd = false;
>> +                /* Restrict the range as we found fds matching
>> start/end */
>> +                if (i == skip_start) {
>> +                    skip_start++;
>> +                } else if (i == skip_end) {
>> +                    skip_end--;
>> +                }
>> +                break;
>> +            }
>> +        }
>> +
>> +        if (close_fd) {
>>               close(fd);
>>           }
>>       }
>> @@ -833,24 +860,68 @@ static bool qemu_close_all_open_fd_proc(void)
>>       return true;
>>   }
>>   -static bool qemu_close_all_open_fd_close_range(void)
>> +static bool qemu_close_all_open_fd_close_range(const int *skip,
>> +                                               unsigned int nskip,
>> +                                               int open_max)
>>   {
>>   #ifdef CONFIG_CLOSE_RANGE
>> -    int r = close_range(0, ~0U, 0);
>> -    if (!r) {
>> -        /* Success, no need to try other ways. */
>> -        return true;
>> -    }
>> -#endif
>> +    int first = 0, last = open_max;
> 
> If this were really _SC_OPEN_MAX, this would be off-by-one.
> 
>> -static void qemu_close_all_open_fd_fallback(void)
>> +static void qemu_close_all_open_fd_fallback(const int *skip, unsigned
>> int nskip,
>> +                                            int open_max)
>>   {
>> -    int open_max = sysconf(_SC_OPEN_MAX), i;
>> +    unsigned int cur_skip = 0;
>>         /* Fallback */
>> -    for (i = 0; i < open_max; i++) {
>> +    for (int i = 0; i < open_max; i++) {
> 
> Because this isn't really _SC_OPEN_MAX, this *is* off-by-one.
> 
>> -void qemu_close_all_open_fd(void)
>> +void qemu_close_all_open_fd(const int *skip, unsigned int nskip)
>>   {
>> -    if (!qemu_close_all_open_fd_close_range() &&
>> -        !qemu_close_all_open_fd_proc()) {
>> -        qemu_close_all_open_fd_fallback();
>> +    int open_max = sysconf(_SC_OPEN_MAX) - 1;
> 
> Better to move the -1 into qemu_close_all_open_fd_close_range where it's
> actually needed.

Arg, I had it right in the previous version but messed up with the
SC_OPEN_MAX factorization.

I'll fix that,

Thanks,

CLément

> 
> 
> r~