This introduces a new QEMU API qemu_close_range() that closes all
open file descriptors from first to last (included).
This API will try a more efficient call to close_range(), or walk
through of /proc/self/fd whenever these are possible, otherwise it
falls back to a plain close loop.
Co-developed-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Bin Meng <bmeng@tinylab.org>
---
Changes in v4:
- add 'first > last' check logic
- reorder the ifdefs logic
- change i to unsigned int type
- use qemu_strtoi() instead of atoi()
- limit last upper value to sysconf(_SC_OPEN_MAX) - 1
Changes in v3:
- fix win32 build failure
Changes in v2:
- new patch: "util/osdep: Introduce qemu_close_range()"
include/qemu/osdep.h | 1 +
util/osdep.c | 60 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+)
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index cc61b00ba9..e22434ce10 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -560,6 +560,7 @@ int qemu_open_old(const char *name, int flags, ...);
int qemu_open(const char *name, int flags, Error **errp);
int qemu_create(const char *name, int flags, mode_t mode, Error **errp);
int qemu_close(int fd);
+int qemu_close_range(unsigned int first, unsigned int last);
int qemu_unlink(const char *name);
#ifndef _WIN32
int qemu_dup_flags(int fd, int flags);
diff --git a/util/osdep.c b/util/osdep.c
index e996c4744a..1d8c719b3f 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -411,6 +411,66 @@ int qemu_close(int fd)
return close(fd);
}
+int qemu_close_range(unsigned int first, unsigned int last)
+{
+ if (first > last) {
+ errno = EINVAL;
+ return -1;
+ }
+
+#ifndef _WIN32
+ if (last >= sysconf(_SC_OPEN_MAX)) {
+ last = sysconf(_SC_OPEN_MAX) - 1;
+ }
+#endif
+
+#ifdef CONFIG_CLOSE_RANGE
+ int r = close_range(first, last, 0);
+ if (!r) {
+ /* Success, no need to try other ways */
+ return 0;
+ }
+#endif
+
+#ifdef __linux__
+ DIR *dir = opendir("/proc/self/fd");
+ if (dir) {
+ /* Avoid closing the directory */
+ int dfd = dirfd(dir);
+
+ for (struct dirent *de = readdir(dir); de; de = readdir(dir)) {
+ int fd, ret;
+
+ ret = qemu_strtoi(de->d_name, NULL, 10, &fd);
+ if (ret) {
+ /* skip "." and ".." */
+ continue;
+ }
+ if (fd < first || fd > last) {
+ /* Exclude the fds outside the target range */
+ continue;
+ }
+ if (fd != dfd) {
+ close(fd);
+ }
+ }
+ closedir(dir);
+
+ return 0;
+ }
+#endif
+
+ /*
+ * If /proc is not mounted or /proc/self/fd is not supported,
+ * try close() from first to last.
+ */
+ for (unsigned int i = first; i <= last; i++) {
+ close(i);
+ }
+
+ return 0;
+}
+
/*
* Delete a file from the filesystem, unless the filename is /dev/fdset/...
*
--
2.34.1
Bin Meng <bmeng@tinylab.org> writes:
> This introduces a new QEMU API qemu_close_range() that closes all
> open file descriptors from first to last (included).
>
> This API will try a more efficient call to close_range(), or walk
> through of /proc/self/fd whenever these are possible, otherwise it
> falls back to a plain close loop.
>
> Co-developed-by: Zhangjin Wu <falcon@tinylab.org>
> Signed-off-by: Bin Meng <bmeng@tinylab.org>
>
> ---
>
> Changes in v4:
> - add 'first > last' check logic
> - reorder the ifdefs logic
> - change i to unsigned int type
> - use qemu_strtoi() instead of atoi()
> - limit last upper value to sysconf(_SC_OPEN_MAX) - 1
>
> Changes in v3:
> - fix win32 build failure
>
> Changes in v2:
> - new patch: "util/osdep: Introduce qemu_close_range()"
>
> include/qemu/osdep.h | 1 +
> util/osdep.c | 60 ++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 61 insertions(+)
>
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index cc61b00ba9..e22434ce10 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -560,6 +560,7 @@ int qemu_open_old(const char *name, int flags, ...);
> int qemu_open(const char *name, int flags, Error **errp);
> int qemu_create(const char *name, int flags, mode_t mode, Error **errp);
> int qemu_close(int fd);
> +int qemu_close_range(unsigned int first, unsigned int last);
> int qemu_unlink(const char *name);
> #ifndef _WIN32
> int qemu_dup_flags(int fd, int flags);
> diff --git a/util/osdep.c b/util/osdep.c
> index e996c4744a..1d8c719b3f 100644
> --- a/util/osdep.c
> +++ b/util/osdep.c
> @@ -411,6 +411,66 @@ int qemu_close(int fd)
> return close(fd);
> }
>
> +int qemu_close_range(unsigned int first, unsigned int last)
> +{
> + if (first > last) {
> + errno = EINVAL;
> + return -1;
> + }
> +
> +#ifndef _WIN32
> + if (last >= sysconf(_SC_OPEN_MAX)) {
> + last = sysconf(_SC_OPEN_MAX) - 1;
> + }
> +#endif
> +
> +#ifdef CONFIG_CLOSE_RANGE
> + int r = close_range(first, last, 0);
TOCTTOU if sysconf(_SC_OPEN_MAX) can change at run time.
Say the caller passes ~0U to @last, like the example program in
close_range()'s manual page does.
Since this is larger than sysconf(_SC_OPEN_MAX), we clamp it to
sysconf(_SC_OPEN_MAX). If the actual value increases before we get to
call close_range(), we can fail to close all fds.
Can't happen if we simply drop the clamping.
> + if (!r) {
> + /* Success, no need to try other ways */
> + return 0;
> + }
> +#endif
What are the failure modes of close_range() where the other ways are
worth trying? I asked this in review of v3, and you replied it should
only ever fail when first > last, which you catch before getting here in
v4.
Why not simply return r?
> +
> +#ifdef __linux__
> + DIR *dir = opendir("/proc/self/fd");
> + if (dir) {
> + /* Avoid closing the directory */
> + int dfd = dirfd(dir);
> +
> + for (struct dirent *de = readdir(dir); de; de = readdir(dir)) {
> + int fd, ret;
> +
> + ret = qemu_strtoi(de->d_name, NULL, 10, &fd);
> + if (ret) {
> + /* skip "." and ".." */
Anything that isn't a decimal integer, actually. Should be just "." and
".." in practice.
> + continue;
> + }
> + if (fd < first || fd > last) {
Not clamping @last is just fine here.
> + /* Exclude the fds outside the target range */
Suggest to put this comment right before the if.
> + continue;
> + }
> + if (fd != dfd) {
> + close(fd);
> + }
Do we still need this now we skip "."?
> + }
> + closedir(dir);
> +
> + return 0;
> + }
> +#endif
> +
> + /*
> + * If /proc is not mounted or /proc/self/fd is not supported,
> + * try close() from first to last.
> + */
> + for (unsigned int i = first; i <= last; i++) {
Here, we do need to stop at sysconf(_SC_OPEN_MAX) - 1. Recommend to
move the clamping before this loop.
Still a TOCTTOU, but acceptable here, because this fallback is
fundamentally racy no matter what.
> + close(i);
> + }
> +
> + return 0;
> +}
> +
> /*
> * Delete a file from the filesystem, unless the filename is /dev/fdset/...
> *
© 2016 - 2026 Red Hat, Inc.