Provide the cpr=on option to preserve TAP and vhost descriptors during
cpr-transfer, so the management layer does not need to create a new
device for the target.
Save all tap fd's in order with the tap device fds saved first,
and the vhostfd saved after.
Example:
-netdev tap,id=hostnet2,cpr=on
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Signed-off-by: Ben Chaney <bchaney@akamai.com>
---
hw/vfio/device.c | 2 +-
include/migration/cpr.h | 4 +--
migration/cpr.c | 19 +++++++------
net/tap.c | 74 +++++++++++++++++++++++++++++++++++++++----------
qapi/net.json | 6 +++-
5 files changed, 77 insertions(+), 28 deletions(-)
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 086f20f676..cbc8db6a67 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -363,7 +363,7 @@ void vfio_device_free_name(VFIODevice *vbasedev)
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
{
- vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp);
+ vbasedev->fd = get_fd_param(vbasedev->dev->id, str, 0, true, errp);
}
static VFIODeviceIOOps vfio_device_io_ops_ioctl;
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
index d585fadc5b..ded6ceff7c 100644
--- a/include/migration/cpr.h
+++ b/include/migration/cpr.h
@@ -48,8 +48,8 @@ void cpr_state_close(void);
struct QIOChannel *cpr_state_ioc(void);
bool cpr_incoming_needed(void *opaque);
-int cpr_get_fd_param(const char *name, const char *fdname, int index,
- Error **errp);
+int get_fd_param(const char *cpr_name, const char *fdname, int index, bool cpr,
+ Error **errp);
QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp);
diff --git a/migration/cpr.c b/migration/cpr.c
index c0bf93a7ba..f2c40eeba5 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -311,11 +311,12 @@ bool cpr_incoming_needed(void *opaque)
}
/*
- * cpr_get_fd_param: find a descriptor and return its value.
+ * get_fd_param: find a descriptor and return its value.
*
- * @name: CPR name for the descriptor
+ * @cpr_name: CPR name for the descriptor
* @fdname: An integer-valued string, or a name passed to a getfd command
* @index: CPR index of the descriptor
+ * @cpr: cpr is enabled on the associated device
* @errp: returned error message
*
* If CPR is not being performed, then use @fdname to find the fd.
@@ -324,23 +325,23 @@ bool cpr_incoming_needed(void *opaque)
*
* On success returns the fd value, else returns -1.
*/
-int cpr_get_fd_param(const char *name, const char *fdname, int index,
- Error **errp)
+int get_fd_param(const char *cpr_name, const char *fdname, int index,
+ bool cpr, Error **errp)
{
ERRP_GUARD();
int fd;
- if (cpr_is_incoming()) {
- fd = cpr_find_fd(name, index);
+ if (cpr && cpr_is_incoming()) {
+ fd = cpr_find_fd(cpr_name, index);
if (fd < 0) {
error_setg(errp, "cannot find saved value for fd %s", fdname);
}
} else {
fd = monitor_fd_param(monitor_cur(), fdname, errp);
- if (fd >= 0) {
- cpr_save_fd(name, index, fd);
- } else {
+ if (fd < 0) {
error_prepend(errp, "Could not parse object fd %s:", fdname);
+ } else if (cpr) {
+ cpr_save_fd(cpr_name, index, fd);
}
}
return fd;
diff --git a/net/tap.c b/net/tap.c
index 1847167e4f..8875498434 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -35,6 +35,7 @@
#include "net/eth.h"
#include "net/net.h"
#include "clients.h"
+#include "migration/cpr.h"
#include "monitor/monitor.h"
#include "system/system.h"
#include "qapi/error.h"
@@ -80,6 +81,7 @@ typedef struct TAPState {
bool has_uso;
bool has_tunnel;
bool enabled;
+ bool cpr;
VHostNetState *vhost_net;
unsigned host_vnet_hdr_len;
Notifier exit;
@@ -323,6 +325,9 @@ static void tap_cleanup(NetClientState *nc)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ if (s->cpr) {
+ cpr_delete_fd_all(nc->name);
+ }
if (s->vhost_net) {
vhost_net_cleanup(s->vhost_net);
g_free(s->vhost_net);
@@ -690,18 +695,24 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
return fd;
}
+/* CPR fd's for each queue are saved at these indices */
+#define TAP_FD_INDEX(queue) ((queue))
+#define TAP_VHOSTFD_INDEX(queue, total_fds) ((queue) + (total_fds))
+
#define MAX_TAP_QUEUES 1024
static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
const char *model, const char *name,
const char *ifname, const char *script,
const char *downscript, const char *vhostfdname,
- int vnet_hdr, int fd, Error **errp)
+ int vnet_hdr, int fd, int index, Error **errp)
{
Error *err = NULL;
TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
+ bool cpr = tap->has_cpr ? tap->cpr : false;
int vhostfd;
+ s->cpr = cpr;
tap_set_sndbuf(s->fd, tap, &err);
if (err) {
error_propagate(errp, err);
@@ -736,7 +747,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
}
if (vhostfdname) {
- vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
+ vhostfd = get_fd_param(name, vhostfdname, index, cpr, &err);
if (vhostfd == -1) {
error_propagate(errp, err);
goto failed;
@@ -745,12 +756,21 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
goto failed;
}
} else {
- vhostfd = open("/dev/vhost-net", O_RDWR);
+ vhostfd = cpr ? cpr_find_fd(name, index) : -1;
+ if (vhostfd < 0) {
+ vhostfd = open("/dev/vhost-net", O_RDWR);
+ if (cpr && vhostfd >= 0) {
+ cpr_save_fd(name, index, vhostfd);
+ }
+ }
if (vhostfd < 0) {
error_setg_file_open(errp, errno, "/dev/vhost-net");
goto failed;
}
if (!qemu_set_blocking(vhostfd, false, errp)) {
+ if (!cpr) {
+ close(vhostfd);
+ }
goto failed;
}
}
@@ -776,6 +796,9 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
return;
failed:
+ if (cpr) {
+ cpr_delete_fd_all(name);
+ }
qemu_del_net_client(&s->nc);
}
@@ -808,7 +831,8 @@ static int get_fds(char *str, char *fds[], int max)
int net_init_tap(const Netdev *netdev, const char *name,
NetClientState *peer, Error **errp)
{
- const NetdevTapOptions *tap;
+ const NetdevTapOptions *tap = &netdev->u.tap;
+ bool cpr = tap->has_cpr ? tap->cpr : false;
int fd, vnet_hdr = 0, i = 0, queues;
/* for the no-fd, no-helper case */
const char *script;
@@ -844,7 +868,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
goto out;
}
- fd = monitor_fd_param(monitor_cur(), tap->fd, errp);
+ fd = get_fd_param(name, tap->fd, TAP_FD_INDEX(0), cpr, errp);
if (fd == -1) {
ret = -1;
goto out;
@@ -865,13 +889,15 @@ int net_init_tap(const Netdev *netdev, const char *name,
net_init_tap_one(tap, peer, "tap", name, NULL,
script, downscript,
- vhostfdname, vnet_hdr, fd, &err);
+ vhostfdname, vnet_hdr, fd,
+ TAP_VHOSTFD_INDEX(0, 1), &err);
if (err) {
error_propagate(errp, err);
close(fd);
ret = -1;
goto out;
}
+
} else if (tap->fds) {
char **fds;
char **vhost_fds;
@@ -902,7 +928,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
}
for (i = 0; i < nfds; i++) {
- fd = monitor_fd_param(monitor_cur(), fds[i], errp);
+ fd = get_fd_param(name, fds[i], TAP_FD_INDEX(i), cpr, errp);
if (fd == -1) {
ret = -1;
goto free_fail;
@@ -929,7 +955,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
net_init_tap_one(tap, peer, "tap", name, ifname,
script, downscript,
tap->vhostfds ? vhost_fds[i] : NULL,
- vnet_hdr, fd, &err);
+ vnet_hdr, fd, TAP_VHOSTFD_INDEX(i, nfds), &err);
if (err) {
error_propagate(errp, err);
ret = -1;
@@ -957,9 +983,15 @@ free_fail:
goto out;
}
- fd = net_bridge_run_helper(tap->helper,
- tap->br ?: DEFAULT_BRIDGE_INTERFACE,
- errp);
+ fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(0)) : -1;
+ if (fd < 0) {
+ fd = net_bridge_run_helper(tap->helper,
+ tap->br ?: DEFAULT_BRIDGE_INTERFACE,
+ errp);
+ if (cpr && fd >= 0) {
+ cpr_save_fd(name, TAP_FD_INDEX(0), fd);
+ }
+ }
if (fd == -1) {
ret = -1;
goto out;
@@ -979,13 +1011,14 @@ free_fail:
net_init_tap_one(tap, peer, "bridge", name, ifname,
script, downscript, vhostfdname,
- vnet_hdr, fd, &err);
+ vnet_hdr, fd, TAP_VHOSTFD_INDEX(0, 1), &err);
if (err) {
error_propagate(errp, err);
close(fd);
ret = -1;
goto out;
}
+
} else {
g_autofree char *default_script = NULL;
g_autofree char *default_downscript = NULL;
@@ -1010,8 +1043,14 @@ free_fail:
}
for (i = 0; i < queues; i++) {
- fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
- ifname, sizeof ifname, queues > 1, errp);
+ fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(i)) : -1;
+ if (fd < 0) {
+ fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
+ ifname, sizeof ifname, queues > 1, errp);
+ if (cpr && fd >= 0) {
+ cpr_save_fd(name, TAP_FD_INDEX(i), fd);
+ }
+ }
if (fd == -1) {
ret = -1;
goto out;
@@ -1029,7 +1068,9 @@ free_fail:
net_init_tap_one(tap, peer, "tap", name, ifname,
i >= 1 ? "no" : script,
i >= 1 ? "no" : downscript,
- vhostfdname, vnet_hdr, fd, &err);
+ vhostfdname, vnet_hdr,
+ fd, TAP_VHOSTFD_INDEX(i, queues),
+ &err);
if (err) {
error_propagate(errp, err);
close(fd);
@@ -1040,6 +1081,9 @@ free_fail:
}
out:
+ if (ret && cpr) {
+ cpr_delete_fd_all(name);
+ }
return ret;
}
diff --git a/qapi/net.json b/qapi/net.json
index 118bd34965..4b12fca94b 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -355,6 +355,9 @@
# @poll-us: maximum number of microseconds that could be spent on busy
# polling for tap (since 2.7)
#
+# @cpr: preserve the state of this device and its associated file
+# descriptors during cpr-transfer for reduced migration downtime
+#
# Since: 1.2
##
{ 'struct': 'NetdevTapOptions',
@@ -373,7 +376,8 @@
'*vhostfds': 'str',
'*vhostforce': 'bool',
'*queues': 'uint32',
- '*poll-us': 'uint32'} }
+ '*poll-us': 'uint32',
+ '*cpr': 'bool'} }
##
# @NetdevSocketOptions:
--
2.34.1
Ben Chaney <bchaney@akamai.com> writes:
> Provide the cpr=on option to preserve TAP and vhost descriptors during
> cpr-transfer, so the management layer does not need to create a new
> device for the target.
>
> Save all tap fd's in order with the tap device fds saved first,
> and the vhostfd saved after.
>
> Example:
>
> -netdev tap,id=hostnet2,cpr=on
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> Signed-off-by: Ben Chaney <bchaney@akamai.com>
[...]
> diff --git a/qapi/net.json b/qapi/net.json
> index 118bd34965..4b12fca94b 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -355,6 +355,9 @@
> # @poll-us: maximum number of microseconds that could be spent on busy
> # polling for tap (since 2.7)
> #
> +# @cpr: preserve the state of this device and its associated file
> +# descriptors during cpr-transfer for reduced migration downtime
(default: false) (since 11.0)
> +#
> # Since: 1.2
> ##
> { 'struct': 'NetdevTapOptions',
> @@ -373,7 +376,8 @@
> '*vhostfds': 'str',
> '*vhostforce': 'bool',
> '*queues': 'uint32',
> - '*poll-us': 'uint32'} }
> + '*poll-us': 'uint32',
> + '*cpr': 'bool'} }
>
> ##
> # @NetdevSocketOptions:
With that, QAPI schema
Acked-by: Markus Armbruster <armbru@redhat.com>
© 2016 - 2026 Red Hat, Inc.