From: Juraj Marcin <jmarcin@redhat.com>
With the default TCP stack configuration, it could be even 2 hours
before the connection times out due to the other side not being
reachable. However, in some cases, the application needs to be aware of
a connection issue much sooner.
This is the case, for example, for postcopy live migration. If there is
no traffic from the migration destination guest (server-side) to the
migration source guest (client-side), the destination keeps waiting for
pages indefinitely and does not switch to the postcopy-paused state.
This can happen, for example, if the destination QEMU instance is
started with the '-S' command line option and the machine is not started
yet, or if the machine is idle and produces no new page faults for
not-yet-migrated pages.
This patch introduces new inet socket parameters that control count,
idle period, and interval of TCP keep-alive packets before the
connection is considered broken. These parameters are available on
systems where the respective TCP socket options are defined
(TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL).
The default value for all is 0, which means the system configuration is
used.
Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
---
meson.build | 6 ++++
qapi/sockets.json | 15 ++++++++
util/qemu-sockets.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 109 insertions(+)
diff --git a/meson.build b/meson.build
index 41f68d3806..680f47cf42 100644
--- a/meson.build
+++ b/meson.build
@@ -2734,6 +2734,12 @@ if linux_io_uring.found()
config_host_data.set('HAVE_IO_URING_PREP_WRITEV2',
cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2'))
endif
+config_host_data.set('HAVE_TCP_KEEPCNT',
+ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPCNT'))
+config_host_data.set('HAVE_TCP_KEEPIDLE',
+ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPIDLE'))
+config_host_data.set('HAVE_TCP_KEEPINTVL',
+ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPINTVL'))
# has_member
config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
diff --git a/qapi/sockets.json b/qapi/sockets.json
index 62797cd027..bb9d298635 100644
--- a/qapi/sockets.json
+++ b/qapi/sockets.json
@@ -59,6 +59,18 @@
# @keep-alive: enable keep-alive when connecting to/listening on this socket.
# (Since 4.2, not supported for listening sockets until 10.1)
#
+# @keep-alive-count: number of keep-alive packets sent before the connection is
+# closed. Only supported for TCP sockets on systems where TCP_KEEPCNT
+# socket option is defined. (Since 10.1)
+#
+# @keep-alive-idle: time in seconds the connection needs to be idle before
+# sending a keepalive packet. Only supported for TCP sockets on systems
+# where TCP_KEEPIDLE socket option is defined. (Since 10.1)
+#
+# @keep-alive-interval: time in secods between keep-alive packets. Only
+# supported for TCP sockets on systems where TCP_KEEPINTVL is defined.
+# (Since 10.1)
+#
# @mptcp: enable multi-path TCP. (Since 6.1)
#
# Since: 1.3
@@ -71,6 +83,9 @@
'*ipv4': 'bool',
'*ipv6': 'bool',
'*keep-alive': 'bool',
+ '*keep-alive-count': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPCNT' },
+ '*keep-alive-idle': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPIDLE' },
+ '*keep-alive-interval': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPINTVL' },
'*mptcp': { 'type': 'bool', 'if': 'HAVE_IPPROTO_MPTCP' } } }
##
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index fed17a1ffb..8e355b097c 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -217,6 +217,45 @@ static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp)
"Unable to set keep-alive option on socket");
return -1;
}
+#ifdef HAVE_TCP_KEEPCNT
+ if (saddr->has_keep_alive_count &&
+ saddr->keep_alive_count) {
+ int keep_count = saddr->has_keep_alive_count;
+ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &keep_count,
+ sizeof(keep_count));
+ if (ret < 0) {
+ error_setg_errno(errp, errno,
+ "Unable to set TCP keep-alive count option on socket");
+ return -1;
+ }
+ }
+#endif
+#ifdef HAVE_TCP_KEEPIDLE
+ if (saddr->has_keep_alive_idle &&
+ saddr->keep_alive_idle) {
+ int keep_idle = saddr->has_keep_alive_idle;
+ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &keep_idle,
+ sizeof(keep_idle));
+ if (ret < 0) {
+ error_setg_errno(errp, errno,
+ "Unable to set TCP keep-alive idle option on socket");
+ return -1;
+ }
+ }
+#endif
+#ifdef HAVE_TCP_KEEPINTVL
+ if (saddr->has_keep_alive_interval &&
+ saddr->keep_alive_interval) {
+ int keep_interval = saddr->has_keep_alive_interval;
+ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &keep_interval,
+ sizeof(keep_interval));
+ if (ret < 0) {
+ error_setg_errno(errp, errno,
+ "Unable to set TCP keep-alive interval option on socket");
+ return -1;
+ }
+ }
+#endif
}
return 0;
}
@@ -628,6 +667,22 @@ static int inet_parse_flag(const char *flagname, const char *optstr, bool *val,
return 0;
}
+static int inet_parse_u32(const char *optname, const char *optstr,
+ uint32_t max, uint32_t *val, Error **errp)
+{
+ int pos;
+ if (sscanf(optstr, "%" PRIu32 "%n", val, &pos) != 1 ||
+ (optstr[pos] != '\0' && optstr[pos] != ',')) {
+ error_setg(errp, "error parsing %s argument", optname);
+ return -1;
+ }
+ if (*val > max) {
+ error_setg(errp, "%s is too large", optname);
+ return -1;
+ }
+ return 0;
+}
+
int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
{
const char *optstr, *h;
@@ -700,6 +755,39 @@ int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
}
addr->has_keep_alive = true;
}
+#ifdef HAVE_TCP_KEEPCNT
+ begin = strstr(optstr, ",keep-alive-count=");
+ if (begin) {
+ if (inet_parse_u32("keep-alive-count",
+ begin + strlen(",keep-alive-count="), INT_MAX,
+ &addr->keep_alive_count, errp)) {
+ return -1;
+ }
+ addr->has_keep_alive_count = true;
+ }
+#endif
+#ifdef HAVE_TCP_KEEPIDLE
+ begin = strstr(optstr, ",keep-alive-idle=");
+ if (begin) {
+ if (inet_parse_u32("keep-alive-idle",
+ begin + strlen(",keep-alive-idle="), INT_MAX,
+ &addr->keep_alive_idle, errp)) {
+ return -1;
+ }
+ addr->has_keep_alive_idle = true;
+ }
+#endif
+#ifdef HAVE_TCP_KEEPINTVL
+ begin = strstr(optstr, ",keep-alive-interval=");
+ if (begin) {
+ if (inet_parse_u32("keep-alive-interval",
+ begin + strlen(",keep-alive-interval="), INT_MAX,
+ &addr->keep_alive_interval, errp)) {
+ return -1;
+ }
+ addr->has_keep_alive_interval = true;
+ }
+#endif
#ifdef HAVE_IPPROTO_MPTCP
begin = strstr(optstr, ",mptcp");
if (begin) {
--
2.48.1
On Tue, Apr 08, 2025 at 01:25:04PM +0200, Juraj Marcin wrote:
> From: Juraj Marcin <jmarcin@redhat.com>
>
> With the default TCP stack configuration, it could be even 2 hours
> before the connection times out due to the other side not being
> reachable. However, in some cases, the application needs to be aware of
> a connection issue much sooner.
>
> This is the case, for example, for postcopy live migration. If there is
> no traffic from the migration destination guest (server-side) to the
> migration source guest (client-side), the destination keeps waiting for
> pages indefinitely and does not switch to the postcopy-paused state.
> This can happen, for example, if the destination QEMU instance is
> started with the '-S' command line option and the machine is not started
> yet, or if the machine is idle and produces no new page faults for
> not-yet-migrated pages.
>
> This patch introduces new inet socket parameters that control count,
> idle period, and interval of TCP keep-alive packets before the
> connection is considered broken. These parameters are available on
> systems where the respective TCP socket options are defined
> (TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL).
>
> The default value for all is 0, which means the system configuration is
> used.
>
> Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
> ---
> meson.build | 6 ++++
> qapi/sockets.json | 15 ++++++++
> util/qemu-sockets.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 109 insertions(+)
>
> diff --git a/meson.build b/meson.build
> index 41f68d3806..680f47cf42 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -2734,6 +2734,12 @@ if linux_io_uring.found()
> config_host_data.set('HAVE_IO_URING_PREP_WRITEV2',
> cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2'))
> endif
> +config_host_data.set('HAVE_TCP_KEEPCNT',
> + cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPCN'T))
> +config_host_data.set('HAVE_TCP_KEEPIDLE',
> + cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPIDLE'))
> +config_host_data.set('HAVE_TCP_KEEPINTVL',
> + cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPINTVL'))
What platforms are you aware of that do NOT have these
settings available ? I'm wondering if we can just assume
they always exist.
>
> # has_member
> config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
> diff --git a/qapi/sockets.json b/qapi/sockets.json
> index 62797cd027..bb9d298635 100644
> --- a/qapi/sockets.json
> +++ b/qapi/sockets.json
> @@ -59,6 +59,18 @@
> # @keep-alive: enable keep-alive when connecting to/listening on this socket.
> # (Since 4.2, not supported for listening sockets until 10.1)
> #
> +# @keep-alive-count: number of keep-alive packets sent before the connection is
> +# closed. Only supported for TCP sockets on systems where TCP_KEEPCNT
> +# socket option is defined. (Since 10.1)
> +#
> +# @keep-alive-idle: time in seconds the connection needs to be idle before
> +# sending a keepalive packet. Only supported for TCP sockets on systems
> +# where TCP_KEEPIDLE socket option is defined. (Since 10.1)
> +#
> +# @keep-alive-interval: time in secods between keep-alive packets. Only
Trivial typo s/secods/seconds/
> +# supported for TCP sockets on systems where TCP_KEEPINTVL is defined.
> +# (Since 10.1)
> +#
> # @mptcp: enable multi-path TCP. (Since 6.1)
> #
> # Since: 1.3
> @@ -71,6 +83,9 @@
> '*ipv4': 'bool',
> '*ipv6': 'bool',
> '*keep-alive': 'bool',
> + '*keep-alive-count': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPCNT' },
> + '*keep-alive-idle': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPIDLE' },
> + '*keep-alive-interval': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPINTVL' },
> '*mptcp': { 'type': 'bool', 'if': 'HAVE_IPPROTO_MPTCP' } } }
>
> ##
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Fri, Apr 11, 2025 at 02:54:29PM +0100, Daniel P. Berrangé wrote:
> On Tue, Apr 08, 2025 at 01:25:04PM +0200, Juraj Marcin wrote:
> > From: Juraj Marcin <jmarcin@redhat.com>
> >
> > With the default TCP stack configuration, it could be even 2 hours
> > before the connection times out due to the other side not being
> > reachable. However, in some cases, the application needs to be aware of
> > a connection issue much sooner.
> >
> > This is the case, for example, for postcopy live migration. If there is
> > no traffic from the migration destination guest (server-side) to the
> > migration source guest (client-side), the destination keeps waiting for
> > pages indefinitely and does not switch to the postcopy-paused state.
> > This can happen, for example, if the destination QEMU instance is
> > started with the '-S' command line option and the machine is not started
> > yet, or if the machine is idle and produces no new page faults for
> > not-yet-migrated pages.
> >
> > This patch introduces new inet socket parameters that control count,
> > idle period, and interval of TCP keep-alive packets before the
> > connection is considered broken. These parameters are available on
> > systems where the respective TCP socket options are defined
> > (TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL).
> >
> > The default value for all is 0, which means the system configuration is
> > used.
> >
> > Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
> > ---
> > meson.build | 6 ++++
> > qapi/sockets.json | 15 ++++++++
> > util/qemu-sockets.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 109 insertions(+)
> >
> > diff --git a/meson.build b/meson.build
> > index 41f68d3806..680f47cf42 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -2734,6 +2734,12 @@ if linux_io_uring.found()
> > config_host_data.set('HAVE_IO_URING_PREP_WRITEV2',
> > cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2'))
> > endif
> > +config_host_data.set('HAVE_TCP_KEEPCNT',
> > + cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPCN'T))
> > +config_host_data.set('HAVE_TCP_KEEPIDLE',
> > + cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPIDLE'))
> > +config_host_data.set('HAVE_TCP_KEEPINTVL',
> > + cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPINTVL'))
>
> What platforms are you aware of that do NOT have these
> settings available ? I'm wondering if we can just assume
> they always exist.
macOS appears to have these, except that 'TCP_KEEPIDLE' is under a
differnt name 'TCP_KEEPALIVE':
https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172
Likewise I see them available in mingw for Wndows builds, with both
names
$ grep -r TCP_KEEP /usr/i686-w64-mingw32/sys-root/mingw/include/
/usr/i686-w64-mingw32/sys-root/mingw/include/ws2ipdef.h:#define TCP_KEEPALIVE 3
/usr/i686-w64-mingw32/sys-root/mingw/include/ws2ipdef.h:#define TCP_KEEPCNT 16
/usr/i686-w64-mingw32/sys-root/mingw/include/ws2ipdef.h:#define TCP_KEEPIDLE TCP_KEEPALIVE
/usr/i686-w64-mingw32/sys-root/mingw/include/ws2ipdef.h:#define TCP_KEEPINTVL 17
but your patch wouldn't enable it because it checks netinet/tcp.h
AFAICT, the only platform that matters to QEMU that seems to miss this
is OpenBSD
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
Hi Daniel
On 2025-04-11 16:49, Daniel P. Berrangé wrote:
> On Fri, Apr 11, 2025 at 02:54:29PM +0100, Daniel P. Berrangé wrote:
> > On Tue, Apr 08, 2025 at 01:25:04PM +0200, Juraj Marcin wrote:
> > > From: Juraj Marcin <jmarcin@redhat.com>
> > >
> > > With the default TCP stack configuration, it could be even 2 hours
> > > before the connection times out due to the other side not being
> > > reachable. However, in some cases, the application needs to be aware of
> > > a connection issue much sooner.
> > >
> > > This is the case, for example, for postcopy live migration. If there is
> > > no traffic from the migration destination guest (server-side) to the
> > > migration source guest (client-side), the destination keeps waiting for
> > > pages indefinitely and does not switch to the postcopy-paused state.
> > > This can happen, for example, if the destination QEMU instance is
> > > started with the '-S' command line option and the machine is not started
> > > yet, or if the machine is idle and produces no new page faults for
> > > not-yet-migrated pages.
> > >
> > > This patch introduces new inet socket parameters that control count,
> > > idle period, and interval of TCP keep-alive packets before the
> > > connection is considered broken. These parameters are available on
> > > systems where the respective TCP socket options are defined
> > > (TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL).
> > >
> > > The default value for all is 0, which means the system configuration is
> > > used.
> > >
> > > Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
> > > ---
> > > meson.build | 6 ++++
> > > qapi/sockets.json | 15 ++++++++
> > > util/qemu-sockets.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
> > > 3 files changed, 109 insertions(+)
> > >
> > > diff --git a/meson.build b/meson.build
> > > index 41f68d3806..680f47cf42 100644
> > > --- a/meson.build
> > > +++ b/meson.build
> > > @@ -2734,6 +2734,12 @@ if linux_io_uring.found()
> > > config_host_data.set('HAVE_IO_URING_PREP_WRITEV2',
> > > cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2'))
> > > endif
> > > +config_host_data.set('HAVE_TCP_KEEPCNT',
> > > + cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPCN'T))
> > > +config_host_data.set('HAVE_TCP_KEEPIDLE',
> > > + cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPIDLE'))
> > > +config_host_data.set('HAVE_TCP_KEEPINTVL',
> > > + cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPINTVL'))
> >
> > What platforms are you aware of that do NOT have these
> > settings available ? I'm wondering if we can just assume
> > they always exist.
>
> macOS appears to have these, except that 'TCP_KEEPIDLE' is under a
> differnt name 'TCP_KEEPALIVE':
>
> https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172
>
> Likewise I see them available in mingw for Wndows builds, with both
> names
>
> $ grep -r TCP_KEEP /usr/i686-w64-mingw32/sys-root/mingw/include/
> /usr/i686-w64-mingw32/sys-root/mingw/include/ws2ipdef.h:#define TCP_KEEPALIVE 3
> /usr/i686-w64-mingw32/sys-root/mingw/include/ws2ipdef.h:#define TCP_KEEPCNT 16
> /usr/i686-w64-mingw32/sys-root/mingw/include/ws2ipdef.h:#define TCP_KEEPIDLE TCP_KEEPALIVE
> /usr/i686-w64-mingw32/sys-root/mingw/include/ws2ipdef.h:#define TCP_KEEPINTVL 17
>
> but your patch wouldn't enable it because it checks netinet/tcp.h
>
> AFAICT, the only platform that matters to QEMU that seems to miss this
> is OpenBSD
>
Yes, I couldn't find them in the OpenBSD man-pages. Other BSD variants
support all of them. I will mention it explicitly in the QAPI docs, that
OpenBSD is not supported.
I will also fix it for Windows and Darwin, and submit an updated series.
Thank you!
Best regards,
Juraj Marcin
>
> With regards,
> Daniel
> --
> |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o- https://fstop138.berrange.com :|
> |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
>
© 2016 - 2025 Red Hat, Inc.