In order for this function to be usable by tap.c code, add a list of
file descriptors that should not be closed.
Signed-off-by: Clément Léger <cleger@rivosinc.com>
---
include/qemu/osdep.h | 8 ++-
system/async-teardown.c | 2 +-
util/oslib-posix.c | 107 ++++++++++++++++++++++++++++++++++------
3 files changed, 98 insertions(+), 19 deletions(-)
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 5cd8517380..0bf6f0a356 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -760,9 +760,13 @@ int qemu_fdatasync(int fd);
/**
* qemu_close_all_open_fd:
*
- * Close all open file descriptors
+ * Close all open file descriptors except the ones supplied in the @skip array
+ *
+ * @skip: ordered array of distinct file descriptors that should not be closed
+ * if any, or NULL.
+ * @nskip: number of entries in the @skip array or 0 if @skip is NULL.
*/
-void qemu_close_all_open_fd(void);
+void qemu_close_all_open_fd(const int *skip, unsigned int nskip);
/**
* Sync changes made to the memory mapped file back to the backing
diff --git a/system/async-teardown.c b/system/async-teardown.c
index edf49e1007..9148ee8d04 100644
--- a/system/async-teardown.c
+++ b/system/async-teardown.c
@@ -52,7 +52,7 @@ static int async_teardown_fn(void *arg)
* Close all file descriptors that might have been inherited from the
* main qemu process when doing clone, needed to make libvirt happy.
*/
- qemu_close_all_open_fd();
+ qemu_close_all_open_fd(NULL, 0);
/* Set up a handler for SIGHUP and unblock SIGHUP. */
sigaction(SIGHUP, &sa, NULL);
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 9b79fc7cff..7583192192 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -808,11 +808,12 @@ int qemu_msync(void *addr, size_t length, int fd)
return msync(addr, length, MS_SYNC);
}
-static bool qemu_close_all_open_fd_proc(void)
+static bool qemu_close_all_open_fd_proc(const int *skip, unsigned int nskip)
{
struct dirent *de;
int fd, dfd;
DIR *dir;
+ unsigned int skip_start = 0, skip_end = nskip;
dir = opendir("/proc/self/fd");
if (!dir) {
@@ -823,8 +824,34 @@ static bool qemu_close_all_open_fd_proc(void)
dfd = dirfd(dir);
for (de = readdir(dir); de; de = readdir(dir)) {
+ bool close_fd = true;
+
+ if (de->d_name[0] == '.') {
+ continue;
+ }
fd = atoi(de->d_name);
- if (fd != dfd) {
+ if (fd == dfd) {
+ close_fd = false;
+ continue;
+ }
+
+ for (unsigned int i = skip_start; i < skip_end; i++) {
+ if (fd < skip[i]) {
+ /* We are below the next skipped fd, break */
+ break;
+ } else if (fd == skip[i]) {
+ close_fd = false;
+ /* Restrict the range as we found fds matching start/end */
+ if (i == skip_start) {
+ skip_start++;
+ } else if (i == skip_end) {
+ skip_end--;
+ }
+ break;
+ }
+ }
+
+ if (close_fd) {
close(fd);
}
}
@@ -833,24 +860,68 @@ static bool qemu_close_all_open_fd_proc(void)
return true;
}
-static bool qemu_close_all_open_fd_close_range(void)
+static bool qemu_close_all_open_fd_close_range(const int *skip,
+ unsigned int nskip,
+ int open_max)
{
#ifdef CONFIG_CLOSE_RANGE
- int r = close_range(0, ~0U, 0);
- if (!r) {
- /* Success, no need to try other ways. */
- return true;
- }
-#endif
+ int first = 0, last = open_max;
+ unsigned int cur_skip = 0;
+ int ret;
+
+ do {
+ /* Find the start boundary of the range to close */
+ while (cur_skip < nskip && first == skip[cur_skip]) {
+ cur_skip++;
+ first++;
+ }
+
+ /* Find the upper boundary of the range to close */
+ if (cur_skip < nskip) {
+ last = skip[cur_skip] - 1;
+ }
+ /*
+ * Adjust the maximum fd to close if it's above what the system
+ * supports
+ */
+ if (last > open_max) {
+ last = open_max;
+ /*
+ * We can directly skip the remaining skip fds since the current
+ * one is already above the maximum supported one.
+ */
+ cur_skip = nskip;
+ }
+ /* If last was adjusted, we might be > first, bail out */
+ if (first > last) {
+ break;
+ }
+
+ ret = close_range(first, last, 0);
+ if (ret < 0) {
+ return false;
+ }
+ first = last + 1;
+ last = open_max;
+ } while (cur_skip < nskip);
+
+ return true;
+#else
return false;
+#endif
}
-static void qemu_close_all_open_fd_fallback(void)
+static void qemu_close_all_open_fd_fallback(const int *skip, unsigned int nskip,
+ int open_max)
{
- int open_max = sysconf(_SC_OPEN_MAX), i;
+ unsigned int cur_skip = 0;
/* Fallback */
- for (i = 0; i < open_max; i++) {
+ for (int i = 0; i < open_max; i++) {
+ if (cur_skip < nskip && i == skip[cur_skip]) {
+ cur_skip++;
+ continue;
+ }
close(i);
}
}
@@ -858,10 +929,14 @@ static void qemu_close_all_open_fd_fallback(void)
/*
* Close all open file descriptors.
*/
-void qemu_close_all_open_fd(void)
+void qemu_close_all_open_fd(const int *skip, unsigned int nskip)
{
- if (!qemu_close_all_open_fd_close_range() &&
- !qemu_close_all_open_fd_proc()) {
- qemu_close_all_open_fd_fallback();
+ int open_max = sysconf(_SC_OPEN_MAX) - 1;
+
+ assert(skip != NULL || nskip == 0);
+
+ if (!qemu_close_all_open_fd_close_range(skip, nskip, open_max) &&
+ !qemu_close_all_open_fd_proc(skip, nskip)) {
+ qemu_close_all_open_fd_fallback(skip, nskip, open_max);
}
}
--
2.45.2
On 7/31/24 18:48, Clément Léger wrote:
> @@ -823,8 +824,34 @@ static bool qemu_close_all_open_fd_proc(void)
> dfd = dirfd(dir);
>
> for (de = readdir(dir); de; de = readdir(dir)) {
> + bool close_fd = true;
> +
> + if (de->d_name[0] == '.') {
> + continue;
> + }
> fd = atoi(de->d_name);
> - if (fd != dfd) {
> + if (fd == dfd) {
> + close_fd = false;
> + continue;
> + }
Assignment to close_fd not used here.
> +
> + for (unsigned int i = skip_start; i < skip_end; i++) {
> + if (fd < skip[i]) {
> + /* We are below the next skipped fd, break */
> + break;
> + } else if (fd == skip[i]) {
> + close_fd = false;
> + /* Restrict the range as we found fds matching start/end */
> + if (i == skip_start) {
> + skip_start++;
> + } else if (i == skip_end) {
> + skip_end--;
> + }
> + break;
> + }
> + }
> +
> + if (close_fd) {
> close(fd);
> }
> }
> @@ -833,24 +860,68 @@ static bool qemu_close_all_open_fd_proc(void)
> return true;
> }
>
> -static bool qemu_close_all_open_fd_close_range(void)
> +static bool qemu_close_all_open_fd_close_range(const int *skip,
> + unsigned int nskip,
> + int open_max)
> {
> #ifdef CONFIG_CLOSE_RANGE
> - int r = close_range(0, ~0U, 0);
> - if (!r) {
> - /* Success, no need to try other ways. */
> - return true;
> - }
> -#endif
> + int first = 0, last = open_max;
If this were really _SC_OPEN_MAX, this would be off-by-one.
> -static void qemu_close_all_open_fd_fallback(void)
> +static void qemu_close_all_open_fd_fallback(const int *skip, unsigned int nskip,
> + int open_max)
> {
> - int open_max = sysconf(_SC_OPEN_MAX), i;
> + unsigned int cur_skip = 0;
>
> /* Fallback */
> - for (i = 0; i < open_max; i++) {
> + for (int i = 0; i < open_max; i++) {
Because this isn't really _SC_OPEN_MAX, this *is* off-by-one.
> -void qemu_close_all_open_fd(void)
> +void qemu_close_all_open_fd(const int *skip, unsigned int nskip)
> {
> - if (!qemu_close_all_open_fd_close_range() &&
> - !qemu_close_all_open_fd_proc()) {
> - qemu_close_all_open_fd_fallback();
> + int open_max = sysconf(_SC_OPEN_MAX) - 1;
Better to move the -1 into qemu_close_all_open_fd_close_range where it's actually needed.
r~
On 31/07/2024 23:56, Richard Henderson wrote:
> On 7/31/24 18:48, Clément Léger wrote:
>> @@ -823,8 +824,34 @@ static bool qemu_close_all_open_fd_proc(void)
>> dfd = dirfd(dir);
>> for (de = readdir(dir); de; de = readdir(dir)) {
>> + bool close_fd = true;
>> +
>> + if (de->d_name[0] == '.') {
>> + continue;
>> + }
>> fd = atoi(de->d_name);
>> - if (fd != dfd) {
>> + if (fd == dfd) {
>> + close_fd = false;
>> + continue;
>> + }
>
> Assignment to close_fd not used here.
>
>> +
>> + for (unsigned int i = skip_start; i < skip_end; i++) {
>> + if (fd < skip[i]) {
>> + /* We are below the next skipped fd, break */
>> + break;
>> + } else if (fd == skip[i]) {
>> + close_fd = false;
>> + /* Restrict the range as we found fds matching
>> start/end */
>> + if (i == skip_start) {
>> + skip_start++;
>> + } else if (i == skip_end) {
>> + skip_end--;
>> + }
>> + break;
>> + }
>> + }
>> +
>> + if (close_fd) {
>> close(fd);
>> }
>> }
>> @@ -833,24 +860,68 @@ static bool qemu_close_all_open_fd_proc(void)
>> return true;
>> }
>> -static bool qemu_close_all_open_fd_close_range(void)
>> +static bool qemu_close_all_open_fd_close_range(const int *skip,
>> + unsigned int nskip,
>> + int open_max)
>> {
>> #ifdef CONFIG_CLOSE_RANGE
>> - int r = close_range(0, ~0U, 0);
>> - if (!r) {
>> - /* Success, no need to try other ways. */
>> - return true;
>> - }
>> -#endif
>> + int first = 0, last = open_max;
>
> If this were really _SC_OPEN_MAX, this would be off-by-one.
>
>> -static void qemu_close_all_open_fd_fallback(void)
>> +static void qemu_close_all_open_fd_fallback(const int *skip, unsigned
>> int nskip,
>> + int open_max)
>> {
>> - int open_max = sysconf(_SC_OPEN_MAX), i;
>> + unsigned int cur_skip = 0;
>> /* Fallback */
>> - for (i = 0; i < open_max; i++) {
>> + for (int i = 0; i < open_max; i++) {
>
> Because this isn't really _SC_OPEN_MAX, this *is* off-by-one.
>
>> -void qemu_close_all_open_fd(void)
>> +void qemu_close_all_open_fd(const int *skip, unsigned int nskip)
>> {
>> - if (!qemu_close_all_open_fd_close_range() &&
>> - !qemu_close_all_open_fd_proc()) {
>> - qemu_close_all_open_fd_fallback();
>> + int open_max = sysconf(_SC_OPEN_MAX) - 1;
>
> Better to move the -1 into qemu_close_all_open_fd_close_range where it's
> actually needed.
Arg, I had it right in the previous version but messed up with the
SC_OPEN_MAX factorization.
I'll fix that,
Thanks,
CLément
>
>
> r~
© 2016 - 2026 Red Hat, Inc.