From: Steve Sistare <steven.sistare@oracle.com>
Provide the cpr=on option to preserve TAP and vhost descriptors during
cpr-transfer, so the management layer does not need to create a new
device for the target.
Save all tap fd's in canonical order, leveraging the index argument of
cpr_save_fd. For the i'th queue, the tap device fd is saved at index 2*i,
and the vhostfd (if any) at index 2*i+1.
tap and vhost fd's are passed by name to the monitor when a NIC is hot
plugged, but the name is not known to qemu after cpr. Allow the manager
to pass -1 for the fd "name" in the new qemu args to indicate that QEMU
should search for a saved value. Example:
-netdev tap,id=hostnet2,fds=-1:-1,vhostfds=-1:-1,cpr=on
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Signed-off-by: Ben Chaney <bchaney@akamai.com>
---
hw/vfio/device.c | 2 +-
include/migration/cpr.h | 2 +-
migration/cpr.c | 11 ++++----
net/tap.c | 73 +++++++++++++++++++++++++++++++++++++++----------
qapi/net.json | 5 +++-
5 files changed, 70 insertions(+), 23 deletions(-)
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 76869828fc..73e622f7b5 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -362,7 +362,7 @@ void vfio_device_free_name(VFIODevice *vbasedev)
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
{
- vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp);
+ vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, true, errp);
}
static VFIODeviceIOOps vfio_device_io_ops_ioctl;
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
index d585fadc5b..68424b4b03 100644
--- a/include/migration/cpr.h
+++ b/include/migration/cpr.h
@@ -48,7 +48,7 @@ void cpr_state_close(void);
struct QIOChannel *cpr_state_ioc(void);
bool cpr_incoming_needed(void *opaque);
-int cpr_get_fd_param(const char *name, const char *fdname, int index,
+int cpr_get_fd_param(const char *name, const char *fdname, int index, bool cpr,
Error **errp);
QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
diff --git a/migration/cpr.c b/migration/cpr.c
index c0bf93a7ba..19bd56339d 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -316,6 +316,7 @@ bool cpr_incoming_needed(void *opaque)
* @name: CPR name for the descriptor
* @fdname: An integer-valued string, or a name passed to a getfd command
* @index: CPR index of the descriptor
+ * @cpr: use cpr
* @errp: returned error message
*
* If CPR is not being performed, then use @fdname to find the fd.
@@ -325,22 +326,22 @@ bool cpr_incoming_needed(void *opaque)
* On success returns the fd value, else returns -1.
*/
int cpr_get_fd_param(const char *name, const char *fdname, int index,
- Error **errp)
+ bool cpr, Error **errp)
{
ERRP_GUARD();
int fd;
- if (cpr_is_incoming()) {
+ if (cpr && cpr_is_incoming()) {
fd = cpr_find_fd(name, index);
if (fd < 0) {
error_setg(errp, "cannot find saved value for fd %s", fdname);
}
} else {
fd = monitor_fd_param(monitor_cur(), fdname, errp);
- if (fd >= 0) {
- cpr_save_fd(name, index, fd);
- } else {
+ if (fd < 0) {
error_prepend(errp, "Could not parse object fd %s:", fdname);
+ } else if (cpr) {
+ cpr_save_fd(name, index, fd);
}
}
return fd;
diff --git a/net/tap.c b/net/tap.c
index 9d480574c3..79e29addd1 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -35,6 +35,7 @@
#include "net/eth.h"
#include "net/net.h"
#include "clients.h"
+#include "migration/cpr.h"
#include "monitor/monitor.h"
#include "system/system.h"
#include "qapi/error.h"
@@ -80,6 +81,7 @@ typedef struct TAPState {
bool has_uso;
bool has_tunnel;
bool enabled;
+ bool cpr;
VHostNetState *vhost_net;
unsigned host_vnet_hdr_len;
Notifier exit;
@@ -323,6 +325,9 @@ static void tap_cleanup(NetClientState *nc)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ if (s->cpr) {
+ cpr_delete_fd_all(nc->name);
+ }
if (s->vhost_net) {
vhost_net_cleanup(s->vhost_net);
g_free(s->vhost_net);
@@ -690,18 +695,24 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
return fd;
}
+/* CPR fd's for each queue are saved at these indices */
+#define TAP_FD_INDEX(queue) (2 * (queue) + 0)
+#define TAP_VHOSTFD_INDEX(queue) (2 * (queue) + 1)
+
#define MAX_TAP_QUEUES 1024
static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
const char *model, const char *name,
const char *ifname, const char *script,
const char *downscript, const char *vhostfdname,
- int vnet_hdr, int fd, Error **errp)
+ int vnet_hdr, int fd, int index, Error **errp)
{
Error *err = NULL;
TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
+ bool cpr = tap->has_cpr ? tap->cpr : false;
int vhostfd;
+ s->cpr = cpr;
tap_set_sndbuf(s->fd, tap, &err);
if (err) {
error_propagate(errp, err);
@@ -736,7 +747,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
}
if (vhostfdname) {
- vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
+ vhostfd = cpr_get_fd_param(name, vhostfdname, index, cpr, &err);
if (vhostfd == -1) {
error_propagate(errp, err);
goto failed;
@@ -745,13 +756,22 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
goto failed;
}
} else {
- vhostfd = open("/dev/vhost-net", O_RDWR);
+ vhostfd = cpr ? cpr_find_fd(name, index) : -1;
+ if (vhostfd < 0) {
+ vhostfd = open("/dev/vhost-net", O_RDWR);
+ if (cpr && vhostfd >= 0) {
+ cpr_save_fd(name, index, vhostfd);
+ }
+ }
if (vhostfd < 0) {
error_setg_errno(errp, errno,
"tap: open vhost char device failed");
goto failed;
}
if (!qemu_set_blocking(vhostfd, false, errp)) {
+ if (!cpr) {
+ close(vhostfd);
+ }
goto failed;
}
}
@@ -777,6 +797,9 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
return;
failed:
+ if (cpr) {
+ cpr_delete_fd_all(name);
+ }
qemu_del_net_client(&s->nc);
}
@@ -809,7 +832,8 @@ static int get_fds(char *str, char *fds[], int max)
int net_init_tap(const Netdev *netdev, const char *name,
NetClientState *peer, Error **errp)
{
- const NetdevTapOptions *tap;
+ const NetdevTapOptions *tap = &netdev->u.tap;
+ bool cpr = tap->has_cpr ? tap->cpr : false;
int fd, vnet_hdr = 0, i = 0, queues;
/* for the no-fd, no-helper case */
const char *script;
@@ -845,7 +869,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
goto out;
}
- fd = monitor_fd_param(monitor_cur(), tap->fd, errp);
+ fd = cpr_get_fd_param(name, tap->fd, TAP_FD_INDEX(0), cpr, errp);
if (fd == -1) {
ret = -1;
goto out;
@@ -866,13 +890,14 @@ int net_init_tap(const Netdev *netdev, const char *name,
net_init_tap_one(tap, peer, "tap", name, NULL,
script, downscript,
- vhostfdname, vnet_hdr, fd, &err);
+ vhostfdname, vnet_hdr, fd, TAP_VHOSTFD_INDEX(0), &err);
if (err) {
error_propagate(errp, err);
close(fd);
ret = -1;
goto out;
}
+
} else if (tap->fds) {
char **fds;
char **vhost_fds;
@@ -903,7 +928,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
}
for (i = 0; i < nfds; i++) {
- fd = monitor_fd_param(monitor_cur(), fds[i], errp);
+ fd = cpr_get_fd_param(name, fds[i], TAP_FD_INDEX(i), cpr, errp);
if (fd == -1) {
ret = -1;
goto free_fail;
@@ -930,7 +955,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
net_init_tap_one(tap, peer, "tap", name, ifname,
script, downscript,
tap->vhostfds ? vhost_fds[i] : NULL,
- vnet_hdr, fd, &err);
+ vnet_hdr, fd, TAP_VHOSTFD_INDEX(i), &err);
if (err) {
error_propagate(errp, err);
ret = -1;
@@ -958,9 +983,15 @@ free_fail:
goto out;
}
- fd = net_bridge_run_helper(tap->helper,
- tap->br ?: DEFAULT_BRIDGE_INTERFACE,
- errp);
+ fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(0)) : -1;
+ if (fd < 0) {
+ fd = net_bridge_run_helper(tap->helper,
+ tap->br ?: DEFAULT_BRIDGE_INTERFACE,
+ errp);
+ if (cpr && fd >= 0) {
+ cpr_save_fd(name, TAP_FD_INDEX(0), fd);
+ }
+ }
if (fd == -1) {
ret = -1;
goto out;
@@ -980,13 +1011,14 @@ free_fail:
net_init_tap_one(tap, peer, "bridge", name, ifname,
script, downscript, vhostfdname,
- vnet_hdr, fd, &err);
+ vnet_hdr, fd, TAP_VHOSTFD_INDEX(0), &err);
if (err) {
error_propagate(errp, err);
close(fd);
ret = -1;
goto out;
}
+
} else {
g_autofree char *default_script = NULL;
g_autofree char *default_downscript = NULL;
@@ -1011,8 +1043,14 @@ free_fail:
}
for (i = 0; i < queues; i++) {
- fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
- ifname, sizeof ifname, queues > 1, errp);
+ fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(i)) : -1;
+ if (fd < 0) {
+ fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
+ ifname, sizeof ifname, queues > 1, errp);
+ if (cpr && fd >= 0) {
+ cpr_save_fd(name, TAP_FD_INDEX(i), fd);
+ }
+ }
if (fd == -1) {
ret = -1;
goto out;
@@ -1030,7 +1068,9 @@ free_fail:
net_init_tap_one(tap, peer, "tap", name, ifname,
i >= 1 ? "no" : script,
i >= 1 ? "no" : downscript,
- vhostfdname, vnet_hdr, fd, &err);
+ vhostfdname, vnet_hdr,
+ fd, TAP_VHOSTFD_INDEX(i),
+ &err);
if (err) {
error_propagate(errp, err);
close(fd);
@@ -1041,6 +1081,9 @@ free_fail:
}
out:
+ if (ret && cpr) {
+ cpr_delete_fd_all(name);
+ }
return ret;
}
diff --git a/qapi/net.json b/qapi/net.json
index 118bd34965..264213b5d9 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -355,6 +355,8 @@
# @poll-us: maximum number of microseconds that could be spent on busy
# polling for tap (since 2.7)
#
+# @cpr: preserve fds and vhostfds during cpr-transfer.
+#
# Since: 1.2
##
{ 'struct': 'NetdevTapOptions',
@@ -373,7 +375,8 @@
'*vhostfds': 'str',
'*vhostforce': 'bool',
'*queues': 'uint32',
- '*poll-us': 'uint32'} }
+ '*poll-us': 'uint32',
+ '*cpr': 'bool'} }
##
# @NetdevSocketOptions:
--
2.34.1
On Wed, Dec 03, 2025 at 01:51:23PM -0500, Ben Chaney wrote:
> From: Steve Sistare <steven.sistare@oracle.com>
>
> Provide the cpr=on option to preserve TAP and vhost descriptors during
> cpr-transfer, so the management layer does not need to create a new
> device for the target.
>
> Save all tap fd's in canonical order, leveraging the index argument of
> cpr_save_fd. For the i'th queue, the tap device fd is saved at index 2*i,
> and the vhostfd (if any) at index 2*i+1.
This interleaving feels risky from the POV of future extensibility.
Although its unlikely that we'll need a third type of FD per queue,
it would be easy to leave this possiblity open.
IOW, IMHO we should save all tap FDs, then all vhostfds with no
interleaving. If we ever get further FDs to save in future, then
they can be set to follow the vhostfds.
>
> tap and vhost fd's are passed by name to the monitor when a NIC is hot
> plugged, but the name is not known to qemu after cpr. Allow the manager
> to pass -1 for the fd "name" in the new qemu args to indicate that QEMU
> should search for a saved value. Example:
>
> -netdev tap,id=hostnet2,fds=-1:-1,vhostfds=-1:-1,cpr=on
This syntax feels redundant. If cpr==off then "fds" must
always be valid, or not specified at all. If cpr=on, then
"fds" will always be -1. I don't see any point in setting
the 'fds' or 'vhostfds' arg at all. It should simply be:
-netdev tap,id=hostnet2,cpr=on
this in turn avoids introducing special syntax for allowing
-1 in 'fds' or 'vhostfds' which Markus was concerned with.
> diff --git a/include/migration/cpr.h b/include/migration/cpr.h
> index d585fadc5b..68424b4b03 100644
> --- a/include/migration/cpr.h
> +++ b/include/migration/cpr.h
> @@ -48,7 +48,7 @@ void cpr_state_close(void);
> struct QIOChannel *cpr_state_ioc(void);
>
> bool cpr_incoming_needed(void *opaque);
> -int cpr_get_fd_param(const char *name, const char *fdname, int index,
> +int cpr_get_fd_param(const char *name, const char *fdname, int index, bool cpr,
> Error **errp);
>
> QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
> diff --git a/migration/cpr.c b/migration/cpr.c
> index c0bf93a7ba..19bd56339d 100644
> --- a/migration/cpr.c
> +++ b/migration/cpr.c
> @@ -316,6 +316,7 @@ bool cpr_incoming_needed(void *opaque)
> * @name: CPR name for the descriptor
> * @fdname: An integer-valued string, or a name passed to a getfd command
> * @index: CPR index of the descriptor
> + * @cpr: use cpr
This feels wierdly redundant too. THe method name already implies
use of 'cpr', and yet we now have another parameter to ask whether
to use 'cpr'. At the very least these semantics deserve a much
better explanation than "@cpr: use cpr", as I don't know what the
intention is here.
> * @errp: returned error message
> *
> * If CPR is not being performed, then use @fdname to find the fd.
> @@ -325,22 +326,22 @@ bool cpr_incoming_needed(void *opaque)
> * On success returns the fd value, else returns -1.
> */
> int cpr_get_fd_param(const char *name, const char *fdname, int index,
> - Error **errp)
> + bool cpr, Error **errp)
> {
> ERRP_GUARD();
> int fd;
>
> - if (cpr_is_incoming()) {
> + if (cpr && cpr_is_incoming()) {
> fd = cpr_find_fd(name, index);
> if (fd < 0) {
> error_setg(errp, "cannot find saved value for fd %s", fdname);
> }
> } else {
> fd = monitor_fd_param(monitor_cur(), fdname, errp);
> - if (fd >= 0) {
> - cpr_save_fd(name, index, fd);
> - } else {
> + if (fd < 0) {
> error_prepend(errp, "Could not parse object fd %s:", fdname);
> + } else if (cpr) {
> + cpr_save_fd(name, index, fd);
> }
> }
> return fd;
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On 12/3/25 19:51, Ben Chaney wrote:
> From: Steve Sistare <steven.sistare@oracle.com>
>
> Provide the cpr=on option to preserve TAP and vhost descriptors during
> cpr-transfer, so the management layer does not need to create a new
> device for the target.
>
> Save all tap fd's in canonical order, leveraging the index argument of
> cpr_save_fd. For the i'th queue, the tap device fd is saved at index 2*i,
> and the vhostfd (if any) at index 2*i+1.
>
> tap and vhost fd's are passed by name to the monitor when a NIC is hot
> plugged, but the name is not known to qemu after cpr. Allow the manager
> to pass -1 for the fd "name" in the new qemu args to indicate that QEMU
> should search for a saved value. Example:
>
> -netdev tap,id=hostnet2,fds=-1:-1,vhostfds=-1:-1,cpr=on
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> Signed-off-by: Ben Chaney <bchaney@akamai.com>
> ---
> hw/vfio/device.c | 2 +-
> include/migration/cpr.h | 2 +-
> migration/cpr.c | 11 ++++----
> net/tap.c | 73 +++++++++++++++++++++++++++++++++++++++----------
> qapi/net.json | 5 +++-
> 5 files changed, 70 insertions(+), 23 deletions(-)
>
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index 76869828fc..73e622f7b5 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -362,7 +362,7 @@ void vfio_device_free_name(VFIODevice *vbasedev)
>
> void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
> {
> - vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp);
> + vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, true, errp);
This looks weird to me.
This calls a cpr* routine with a 'cpr' bool argument that toggles
CPR on or off. It looks a bit hacky. Could you clarify the intention?
C.
> }
>
> static VFIODeviceIOOps vfio_device_io_ops_ioctl;
> diff --git a/include/migration/cpr.h b/include/migration/cpr.h
> index d585fadc5b..68424b4b03 100644
> --- a/include/migration/cpr.h
> +++ b/include/migration/cpr.h
> @@ -48,7 +48,7 @@ void cpr_state_close(void);
> struct QIOChannel *cpr_state_ioc(void);
>
> bool cpr_incoming_needed(void *opaque);
> -int cpr_get_fd_param(const char *name, const char *fdname, int index,
> +int cpr_get_fd_param(const char *name, const char *fdname, int index, bool cpr,
> Error **errp);
>
> QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
> diff --git a/migration/cpr.c b/migration/cpr.c
> index c0bf93a7ba..19bd56339d 100644
> --- a/migration/cpr.c
> +++ b/migration/cpr.c
> @@ -316,6 +316,7 @@ bool cpr_incoming_needed(void *opaque)
> * @name: CPR name for the descriptor
> * @fdname: An integer-valued string, or a name passed to a getfd command
> * @index: CPR index of the descriptor
> + * @cpr: use cpr
> * @errp: returned error message
> *
> * If CPR is not being performed, then use @fdname to find the fd.
> @@ -325,22 +326,22 @@ bool cpr_incoming_needed(void *opaque)
> * On success returns the fd value, else returns -1.
> */
> int cpr_get_fd_param(const char *name, const char *fdname, int index,
> - Error **errp)
> + bool cpr, Error **errp)
> {
> ERRP_GUARD();
> int fd;
>
> - if (cpr_is_incoming()) {
> + if (cpr && cpr_is_incoming()) {
> fd = cpr_find_fd(name, index);
> if (fd < 0) {
> error_setg(errp, "cannot find saved value for fd %s", fdname);
> }
> } else {
> fd = monitor_fd_param(monitor_cur(), fdname, errp);
> - if (fd >= 0) {
> - cpr_save_fd(name, index, fd);
> - } else {
> + if (fd < 0) {
> error_prepend(errp, "Could not parse object fd %s:", fdname);
> + } else if (cpr) {
> + cpr_save_fd(name, index, fd);
> }
> }
> return fd;
> diff --git a/net/tap.c b/net/tap.c
> index 9d480574c3..79e29addd1 100644
> --- a/net/tap.c
> +++ b/net/tap.c
> @@ -35,6 +35,7 @@
> #include "net/eth.h"
> #include "net/net.h"
> #include "clients.h"
> +#include "migration/cpr.h"
> #include "monitor/monitor.h"
> #include "system/system.h"
> #include "qapi/error.h"
> @@ -80,6 +81,7 @@ typedef struct TAPState {
> bool has_uso;
> bool has_tunnel;
> bool enabled;
> + bool cpr;
> VHostNetState *vhost_net;
> unsigned host_vnet_hdr_len;
> Notifier exit;
> @@ -323,6 +325,9 @@ static void tap_cleanup(NetClientState *nc)
> {
> TAPState *s = DO_UPCAST(TAPState, nc, nc);
>
> + if (s->cpr) {
> + cpr_delete_fd_all(nc->name);
> + }
> if (s->vhost_net) {
> vhost_net_cleanup(s->vhost_net);
> g_free(s->vhost_net);
> @@ -690,18 +695,24 @@ static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
> return fd;
> }
>
> +/* CPR fd's for each queue are saved at these indices */
> +#define TAP_FD_INDEX(queue) (2 * (queue) + 0)
> +#define TAP_VHOSTFD_INDEX(queue) (2 * (queue) + 1)
> +
> #define MAX_TAP_QUEUES 1024
>
> static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
> const char *model, const char *name,
> const char *ifname, const char *script,
> const char *downscript, const char *vhostfdname,
> - int vnet_hdr, int fd, Error **errp)
> + int vnet_hdr, int fd, int index, Error **errp)
> {
> Error *err = NULL;
> TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
> + bool cpr = tap->has_cpr ? tap->cpr : false;
> int vhostfd;
>
> + s->cpr = cpr;
> tap_set_sndbuf(s->fd, tap, &err);
> if (err) {
> error_propagate(errp, err);
> @@ -736,7 +747,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
> }
>
> if (vhostfdname) {
> - vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
> + vhostfd = cpr_get_fd_param(name, vhostfdname, index, cpr, &err);
> if (vhostfd == -1) {
> error_propagate(errp, err);
> goto failed;
> @@ -745,13 +756,22 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
> goto failed;
> }
> } else {
> - vhostfd = open("/dev/vhost-net", O_RDWR);
> + vhostfd = cpr ? cpr_find_fd(name, index) : -1;
> + if (vhostfd < 0) {
> + vhostfd = open("/dev/vhost-net", O_RDWR);
> + if (cpr && vhostfd >= 0) {
> + cpr_save_fd(name, index, vhostfd);
> + }
> + }
> if (vhostfd < 0) {
> error_setg_errno(errp, errno,
> "tap: open vhost char device failed");
> goto failed;
> }
> if (!qemu_set_blocking(vhostfd, false, errp)) {
> + if (!cpr) {
> + close(vhostfd);
> + }
> goto failed;
> }
> }
> @@ -777,6 +797,9 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
> return;
>
> failed:
> + if (cpr) {
> + cpr_delete_fd_all(name);
> + }
> qemu_del_net_client(&s->nc);
> }
>
> @@ -809,7 +832,8 @@ static int get_fds(char *str, char *fds[], int max)
> int net_init_tap(const Netdev *netdev, const char *name,
> NetClientState *peer, Error **errp)
> {
> - const NetdevTapOptions *tap;
> + const NetdevTapOptions *tap = &netdev->u.tap;
> + bool cpr = tap->has_cpr ? tap->cpr : false;
> int fd, vnet_hdr = 0, i = 0, queues;
> /* for the no-fd, no-helper case */
> const char *script;
> @@ -845,7 +869,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
> goto out;
> }
>
> - fd = monitor_fd_param(monitor_cur(), tap->fd, errp);
> + fd = cpr_get_fd_param(name, tap->fd, TAP_FD_INDEX(0), cpr, errp);
> if (fd == -1) {
> ret = -1;
> goto out;
> @@ -866,13 +890,14 @@ int net_init_tap(const Netdev *netdev, const char *name,
>
> net_init_tap_one(tap, peer, "tap", name, NULL,
> script, downscript,
> - vhostfdname, vnet_hdr, fd, &err);
> + vhostfdname, vnet_hdr, fd, TAP_VHOSTFD_INDEX(0), &err);
> if (err) {
> error_propagate(errp, err);
> close(fd);
> ret = -1;
> goto out;
> }
> +
> } else if (tap->fds) {
> char **fds;
> char **vhost_fds;
> @@ -903,7 +928,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
> }
>
> for (i = 0; i < nfds; i++) {
> - fd = monitor_fd_param(monitor_cur(), fds[i], errp);
> + fd = cpr_get_fd_param(name, fds[i], TAP_FD_INDEX(i), cpr, errp);
> if (fd == -1) {
> ret = -1;
> goto free_fail;
> @@ -930,7 +955,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
> net_init_tap_one(tap, peer, "tap", name, ifname,
> script, downscript,
> tap->vhostfds ? vhost_fds[i] : NULL,
> - vnet_hdr, fd, &err);
> + vnet_hdr, fd, TAP_VHOSTFD_INDEX(i), &err);
> if (err) {
> error_propagate(errp, err);
> ret = -1;
> @@ -958,9 +983,15 @@ free_fail:
> goto out;
> }
>
> - fd = net_bridge_run_helper(tap->helper,
> - tap->br ?: DEFAULT_BRIDGE_INTERFACE,
> - errp);
> + fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(0)) : -1;
> + if (fd < 0) {
> + fd = net_bridge_run_helper(tap->helper,
> + tap->br ?: DEFAULT_BRIDGE_INTERFACE,
> + errp);
> + if (cpr && fd >= 0) {
> + cpr_save_fd(name, TAP_FD_INDEX(0), fd);
> + }
> + }
> if (fd == -1) {
> ret = -1;
> goto out;
> @@ -980,13 +1011,14 @@ free_fail:
>
> net_init_tap_one(tap, peer, "bridge", name, ifname,
> script, downscript, vhostfdname,
> - vnet_hdr, fd, &err);
> + vnet_hdr, fd, TAP_VHOSTFD_INDEX(0), &err);
> if (err) {
> error_propagate(errp, err);
> close(fd);
> ret = -1;
> goto out;
> }
> +
> } else {
> g_autofree char *default_script = NULL;
> g_autofree char *default_downscript = NULL;
> @@ -1011,8 +1043,14 @@ free_fail:
> }
>
> for (i = 0; i < queues; i++) {
> - fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
> - ifname, sizeof ifname, queues > 1, errp);
> + fd = cpr ? cpr_find_fd(name, TAP_FD_INDEX(i)) : -1;
> + if (fd < 0) {
> + fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
> + ifname, sizeof ifname, queues > 1, errp);
> + if (cpr && fd >= 0) {
> + cpr_save_fd(name, TAP_FD_INDEX(i), fd);
> + }
> + }
> if (fd == -1) {
> ret = -1;
> goto out;
> @@ -1030,7 +1068,9 @@ free_fail:
> net_init_tap_one(tap, peer, "tap", name, ifname,
> i >= 1 ? "no" : script,
> i >= 1 ? "no" : downscript,
> - vhostfdname, vnet_hdr, fd, &err);
> + vhostfdname, vnet_hdr,
> + fd, TAP_VHOSTFD_INDEX(i),
> + &err);
> if (err) {
> error_propagate(errp, err);
> close(fd);
> @@ -1041,6 +1081,9 @@ free_fail:
> }
>
> out:
> + if (ret && cpr) {
> + cpr_delete_fd_all(name);
> + }
> return ret;
> }
>
> diff --git a/qapi/net.json b/qapi/net.json
> index 118bd34965..264213b5d9 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -355,6 +355,8 @@
> # @poll-us: maximum number of microseconds that could be spent on busy
> # polling for tap (since 2.7)
> #
> +# @cpr: preserve fds and vhostfds during cpr-transfer.
> +#
> # Since: 1.2
> ##
> { 'struct': 'NetdevTapOptions',
> @@ -373,7 +375,8 @@
> '*vhostfds': 'str',
> '*vhostforce': 'bool',
> '*queues': 'uint32',
> - '*poll-us': 'uint32'} }
> + '*poll-us': 'uint32',
> + '*cpr': 'bool'} }
>
> ##
> # @NetdevSocketOptions:
>
Ben Chaney <bchaney@akamai.com> writes:
> From: Steve Sistare <steven.sistare@oracle.com>
>
> Provide the cpr=on option to preserve TAP and vhost descriptors during
> cpr-transfer, so the management layer does not need to create a new
> device for the target.
>
> Save all tap fd's in canonical order, leveraging the index argument of
> cpr_save_fd. For the i'th queue, the tap device fd is saved at index 2*i,
> and the vhostfd (if any) at index 2*i+1.
>
> tap and vhost fd's are passed by name to the monitor when a NIC is hot
> plugged, but the name is not known to qemu after cpr. Allow the manager
> to pass -1 for the fd "name" in the new qemu args to indicate that QEMU
> should search for a saved value. Example:
>
> -netdev tap,id=hostnet2,fds=-1:-1,vhostfds=-1:-1,cpr=on
Hmm. See below.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> Signed-off-by: Ben Chaney <bchaney@akamai.com>
[...]
> diff --git a/qapi/net.json b/qapi/net.json
> index 118bd34965..264213b5d9 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -355,6 +355,8 @@
##
# @NetdevTapOptions:
#
# Used to configure a host TAP network interface backend.
#
# @ifname: interface name
#
# @fd: file descriptor of an already opened tap
#
# @fds: multiple file descriptors of already opened multiqueue capable
# tap
Not this patch's fault: the interface is misguided, and its
documentation inadequate.
@fds is a string of file descriptor names or numbers separated by ':'.
Not documented. I found out by reading the code.
This violates QAPI design principle "no string parsing". It should be
an array of strings.
Aside: get_fds() should use g_strsplit().
Your patch extends the syntax to "file descriptor names or numbers or
"-1" separated by ":". This is problematic.
Before the patch, a file descriptor name or number is interpreted as a
file descriptor number if it starts with a digit. "-1" doesn't, so it's
interpreted as a file descriptor name. Yes, "-1" works as file
descriptor name. I just verified that
{"execute": "getfd", "arguments": {"fdname": "-1"}}
works by changing 'fdname': 'fdname' to 'fdname': '-1' in
tests/qtest/libqtest.c, and running tests/qtest/dbus-display-test with
QTEST_LOG=/dev/stdout. The test passes using file descriptor name "-1".
Aside: not restricting the syntax of identifiers to something sensible
like "begin with a letter, and contain only ASCII letters, digits, and
hyphen" is a mistake we've make again and again.
Your patch changes the interpretation of "-1" from "file descriptor
name" to "saved file descriptor".
If it does so regardless of the value of @cpr, then this is an
incompatible change.
We normally require such changes to go through the deprecation process.
We waive that when we're *confident* not doing so will not inconvenience
any users. Are we here?
If it does so only when @cpr is true, the semantics of "-1" depends on
@cpr. Yuck!
We can accept "yuck!" when the alternatives are no better. Have we
considered any?
Regardless, we clearly need to document syntax and semantics of @fds.
Please fix the doc string before this patch, then have this patch update
it.
#
# @script: script to initialize the interface
#
# @downscript: script to shut down the interface
#
# @br: bridge name (since 2.8)
#
# @helper: command to execute to configure bridge
#
# @sndbuf: send buffer limit. Understands [TGMKkb] suffixes.
#
# @vnet_hdr: enable the IFF_VNET_HDR flag on the tap interface
#
# @vhost: enable vhost-net network accelerator
#
# @vhostfd: file descriptor of an already opened vhost net device
#
# @vhostfds: file descriptors of multiple already opened vhost net
# devices
Likewise.
#
# @vhostforce: vhost on for non-MSIX virtio guests
#
# @queues: number of queues to be created for multiqueue capable tap
#
> # @poll-us: maximum number of microseconds that could be spent on busy
> # polling for tap (since 2.7)
> #
> +# @cpr: preserve fds and vhostfds during cpr-transfer.
The commit message explains things in a lot more detail. Users may not
need to know all that detail. But this feels too terse.
Please don't abbreviate "file descriptors" to "fds" in documentation
prose.
> +#
> # Since: 1.2
> ##
> { 'struct': 'NetdevTapOptions',
> @@ -373,7 +375,8 @@
'data': {
'*ifname': 'str',
'*fd': 'str',
'*fds': 'str',
'*script': 'str',
'*downscript': 'str',
'*br': 'str',
'*helper': 'str',
'*sndbuf': 'size',
'*vnet_hdr': 'bool',
'*vhost': 'bool',
'*vhostfd': 'str',
> '*vhostfds': 'str',
> '*vhostforce': 'bool',
> '*queues': 'uint32',
> - '*poll-us': 'uint32'} }
> + '*poll-us': 'uint32',
> + '*cpr': 'bool'} }
>
> ##
> # @NetdevSocketOptions:
Hi Markus, On Thu, Dec 4, 2025 at 4:09 PM Markus Armbruster <armbru@redhat.com> wrote: > > Ben Chaney <bchaney@akamai.com> writes: > > > From: Steve Sistare <steven.sistare@oracle.com> > > > > Provide the cpr=on option to preserve TAP and vhost descriptors during > > cpr-transfer, so the management layer does not need to create a new > > device for the target. > > > > Save all tap fd's in canonical order, leveraging the index argument of > > cpr_save_fd. For the i'th queue, the tap device fd is saved at index 2*i, > > and the vhostfd (if any) at index 2*i+1. > > > > tap and vhost fd's are passed by name to the monitor when a NIC is hot > > plugged, but the name is not known to qemu after cpr. Allow the manager > > to pass -1 for the fd "name" in the new qemu args to indicate that QEMU > > should search for a saved value. Example: > > > > -netdev tap,id=hostnet2,fds=-1:-1,vhostfds=-1:-1,cpr=on > > Hmm. See below. > > > > > Signed-off-by: Steve Sistare <steven.sistare@oracle.com> > > Signed-off-by: Ben Chaney <bchaney@akamai.com> > > [...] > > > diff --git a/qapi/net.json b/qapi/net.json > > index 118bd34965..264213b5d9 100644 > > --- a/qapi/net.json > > +++ b/qapi/net.json > > @@ -355,6 +355,8 @@ > ## > # @NetdevTapOptions: > # > # Used to configure a host TAP network interface backend. > # > # @ifname: interface name > # > # @fd: file descriptor of an already opened tap > # > # @fds: multiple file descriptors of already opened multiqueue capable > # tap > > Not this patch's fault: the interface is misguided, and its > documentation inadequate. > > @fds is a string of file descriptor names or numbers separated by ':'. > Not documented. I found out by reading the code. > > This violates QAPI design principle "no string parsing". It should be > an array of strings. > I agree with your concern. Just a note that this "fds" was introduced before QAPI if I am not wrong. Thanks
Jason Wang <jasowang@redhat.com> writes: > Hi Markus, > > On Thu, Dec 4, 2025 at 4:09 PM Markus Armbruster <armbru@redhat.com> wrote: >> >> Ben Chaney <bchaney@akamai.com> writes: >> >> > From: Steve Sistare <steven.sistare@oracle.com> >> > >> > Provide the cpr=on option to preserve TAP and vhost descriptors during >> > cpr-transfer, so the management layer does not need to create a new >> > device for the target. >> > >> > Save all tap fd's in canonical order, leveraging the index argument of >> > cpr_save_fd. For the i'th queue, the tap device fd is saved at index 2*i, >> > and the vhostfd (if any) at index 2*i+1. >> > >> > tap and vhost fd's are passed by name to the monitor when a NIC is hot >> > plugged, but the name is not known to qemu after cpr. Allow the manager >> > to pass -1 for the fd "name" in the new qemu args to indicate that QEMU >> > should search for a saved value. Example: >> > >> > -netdev tap,id=hostnet2,fds=-1:-1,vhostfds=-1:-1,cpr=on >> >> Hmm. See below. >> >> > >> > Signed-off-by: Steve Sistare <steven.sistare@oracle.com> >> > Signed-off-by: Ben Chaney <bchaney@akamai.com> >> >> [...] >> >> > diff --git a/qapi/net.json b/qapi/net.json >> > index 118bd34965..264213b5d9 100644 >> > --- a/qapi/net.json >> > +++ b/qapi/net.json >> > @@ -355,6 +355,8 @@ >> ## >> # @NetdevTapOptions: >> # >> # Used to configure a host TAP network interface backend. >> # >> # @ifname: interface name >> # >> # @fd: file descriptor of an already opened tap >> # >> # @fds: multiple file descriptors of already opened multiqueue capable >> # tap >> >> Not this patch's fault: the interface is misguided, and its >> documentation inadequate. >> >> @fds is a string of file descriptor names or numbers separated by ':'. >> Not documented. I found out by reading the code. >> >> This violates QAPI design principle "no string parsing". It should be >> an array of strings. >> > > I agree with your concern. Just a note that this "fds" was introduced > before QAPI if I am not wrong. It's from 2013 (commit 264986e2c8f). QAPI was still young then (netdev_add had been QAPIfied less than a year ago), we had much to learn, and interface review barely happened. All understandable, and no reason to throw shade on anyone involved :)
© 2016 - 2025 Red Hat, Inc.