[PATCH v2 7/8] ch: support restore with net devices

Purna Pavan Chandra posted 8 patches 2 months, 2 weeks ago
[PATCH v2 7/8] ch: support restore with net devices
Posted by Purna Pavan Chandra 2 months, 2 weeks ago
Cloud-hypervisor now supports restoring with new net fds.
Ref: https://github.com/cloud-hypervisor/cloud-hypervisor/pull/6402
So, pass new tap fds via SCM_RIGHTS to CH's restore api.

Signed-off-by: Purna Pavan Chandra <paekkaladevi@linux.microsoft.com>
---
 src/ch/ch_capabilities.c |  6 +++
 src/ch/ch_capabilities.h |  1 +
 src/ch/ch_driver.c       | 29 +++++++-----
 src/ch/ch_monitor.c      | 21 ++++++++-
 src/ch/ch_monitor.h      |  2 +-
 src/ch/ch_process.c      | 98 +++++++++++++++++++++++++++++++++-------
 6 files changed, 126 insertions(+), 31 deletions(-)

diff --git a/src/ch/ch_capabilities.c b/src/ch/ch_capabilities.c
index 5941851500..f3765a8095 100644
--- a/src/ch/ch_capabilities.c
+++ b/src/ch/ch_capabilities.c
@@ -65,6 +65,12 @@ virCHCapsInitCHVersionCaps(int version)
     if (version >= 36000000)
         virCHCapsSet(chCaps, CH_SOCKET_BACKEND_SERIAL_PORT);
 
+    /* Starting v40, Cloud-Hypervisor supports restore with new net fds.
+     * This is required to be able to restore a guest with network config define.
+     * https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v40.0 */
+    if (version >= 40000000)
+        virCHCapsSet(chCaps, CH_RESTORE_WITH_NEW_TAPFDS);
+
     return g_steal_pointer(&chCaps);
 
 }
diff --git a/src/ch/ch_capabilities.h b/src/ch/ch_capabilities.h
index 03932511f6..e5efb14b68 100644
--- a/src/ch/ch_capabilities.h
+++ b/src/ch/ch_capabilities.h
@@ -28,6 +28,7 @@ typedef enum {
     CH_SERIAL_CONSOLE_IN_PARALLEL, /* Serial and Console ports can work in parallel */
     CH_MULTIFD_IN_ADDNET, /* Cloud-hypervisor can accept multiple FDs in add-net api */
     CH_SOCKET_BACKEND_SERIAL_PORT, /* Support Unix socket as a backend for a serial port */
+    CH_RESTORE_WITH_NEW_TAPFDS, /* Cloud-hypervisor support for restore with new net fds */
 
     CH_CAPS_LAST /* this must always be the last item */
 } virCHCapsFlags;
diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c
index fbeac60825..24aed471c5 100644
--- a/src/ch/ch_driver.c
+++ b/src/ch/ch_driver.c
@@ -680,22 +680,24 @@ chDomainDestroy(virDomainPtr dom)
 }
 
 static int
-chDomainSaveAdditionalValidation(virDomainDef *vmdef)
+chDomainSaveRestoreAdditionalValidation(virCHDriver *driver, virDomainDef *vmdef)
 {
-    /*
-    SAVE and RESTORE are functional only without any networking and
-    device passthrough configuration
-    */
-    if (vmdef->nnets > 0) {
-        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
-                       _("cannot save domain with network interfaces"));
-        return -1;
-    }
+    /* SAVE and RESTORE are functional only without any host device
+     * passthrough configuration */
     if  (vmdef->nhostdevs > 0) {
         virReportError(VIR_ERR_OPERATION_INVALID, "%s",
-                       _("cannot save domain with host devices"));
+                       _("cannot save/restore domain with host devices"));
         return -1;
     }
+
+    if (vmdef->nnets > 0) {
+        if (!virBitmapIsBitSet(driver->chCaps, CH_RESTORE_WITH_NEW_TAPFDS)) {
+            virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+                           _("cannot save/restore domain with network devices"));
+            return -1;
+        }
+    }
+
     return 0;
 }
 
@@ -728,7 +730,7 @@ chDoDomainSave(virCHDriver *driver,
     VIR_AUTOCLOSE fd = -1;
     int ret = -1;
 
-    if (chDomainSaveAdditionalValidation(vm->def) < 0)
+    if (chDomainSaveRestoreAdditionalValidation(driver, vm->def) < 0)
         goto end;
 
     domainState = virDomainObjGetState(vm, NULL);
@@ -1087,6 +1089,9 @@ chDomainRestoreFlags(virConnectPtr conn,
     if (virDomainRestoreFlagsEnsureACL(conn, def) < 0)
         goto cleanup;
 
+    if (chDomainSaveRestoreAdditionalValidation(driver, def) < 0)
+        goto cleanup;
+
     if (!(vm = virDomainObjListAdd(driver->domains, &def,
                                    driver->xmlopt,
                                    VIR_DOMAIN_OBJ_LIST_ADD_LIVE |
diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c
index 8d8be1f46e..3af5e7f2d1 100644
--- a/src/ch/ch_monitor.c
+++ b/src/ch/ch_monitor.c
@@ -978,14 +978,31 @@ virCHMonitorSaveVM(virCHMonitor *mon, const char *to)
 }
 
 int
-virCHMonitorBuildRestoreJson(const char *from, char **jsonstr)
+virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char **jsonstr)
 {
+    size_t i;
     g_autoptr(virJSONValue) restore_json = virJSONValueNewObject();
-
     g_autofree char *path_url = g_strdup_printf("file://%s", from);
+
     if (virJSONValueObjectAppendString(restore_json, "source_url", path_url))
         return -1;
 
+    /* Pass the netconfig needed to restore with new netfds */
+    if (vmdef->nnets) {
+        g_autoptr(virJSONValue) nets = virJSONValueNewArray();
+        for (i = 0; i < vmdef->nnets; i++) {
+            g_autoptr(virJSONValue) net_json = virJSONValueNewObject();
+            g_autofree char *id = g_strdup_printf("%s_%ld", CH_NET_ID_PREFIX, i);
+            if (virJSONValueObjectAppendString(net_json, "id", id) < 0)
+                return -1;
+            if (virJSONValueObjectAppendNumberInt(net_json, "num_fds", vmdef->nets[i]->driver.virtio.queues))
+                return -1;
+            virJSONValueArrayAppend(nets, &net_json);
+        }
+        if (virJSONValueObjectAppend(restore_json, "net_fds", &nets))
+            return -1;
+    }
+
     if (!(*jsonstr = virJSONValueToString(restore_json, false)))
         return -1;
 
diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h
index 375b7a49a4..02a6685d58 100644
--- a/src/ch/ch_monitor.h
+++ b/src/ch/ch_monitor.h
@@ -127,4 +127,4 @@ int virCHMonitorGetIOThreads(virCHMonitor *mon,
                              virDomainIOThreadInfo ***iothreads);
 int
 virCHMonitorBuildNetJson(virDomainNetDef *netdef, int netindex, char **jsonstr);
-int virCHMonitorBuildRestoreJson(const char *from, char **jsonstr);
+int virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char **jsonstr);
diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c
index 0ac0a5c9a2..540bf6801d 100644
--- a/src/ch/ch_process.c
+++ b/src/ch/ch_process.c
@@ -29,6 +29,7 @@
 #include "ch_process.h"
 #include "domain_cgroup.h"
 #include "domain_interface.h"
+#include "viralloc.h"
 #include "virerror.h"
 #include "virfile.h"
 #include "virjson.h"
@@ -716,6 +717,56 @@ chProcessAddNetworkDevices(virCHDriver *driver,
     return 0;
 }
 
+/**
+ * virCHRestoreCreateNetworkDevices:
+ * @driver: pointer to driver structure
+ * @vmdef: pointer to domain definition
+ * @vmtapfds: returned array of FDs of guest interfaces
+ * @nvmtapfds: returned number of network indexes
+ * @nicindexes: returned array of network indexes
+ * @nnicindexes: returned number of network indexes
+ *
+ * Create network devices for the domain. This function is called during
+ * domain restore.
+ *
+ * Returns 0 on success or -1 in case of error
+*/
+static int
+virCHRestoreCreateNetworkDevices(virCHDriver *driver,
+                                 virDomainDef *vmdef,
+                                 int **vmtapfds,
+                                 size_t *nvmtapfds,
+                                 int **nicindexes,
+                                 size_t *nnicindexes)
+{
+    size_t i, j;
+    size_t tapfd_len;
+    size_t index_vmtapfds;
+    for (i = 0; i < vmdef->nnets; i++) {
+        g_autofree int *tapfds = NULL;
+        tapfd_len = vmdef->nets[i]->driver.virtio.queues;
+        if (virCHDomainValidateActualNetDef(vmdef->nets[i]) < 0) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                           _("net definition failed validation"));
+            return -1;
+        }
+        tapfds = g_new0(int, tapfd_len);
+        memset(tapfds, -1, (tapfd_len) * sizeof(int));
+
+        /* Connect Guest interfaces */
+        if (virCHConnetNetworkInterfaces(driver, vmdef, vmdef->nets[i], tapfds,
+                                          nicindexes, nnicindexes) < 0)
+            return -1;
+
+        index_vmtapfds = *nvmtapfds;
+        VIR_EXPAND_N(*vmtapfds, *nvmtapfds, tapfd_len);
+        for (j = 0; j < tapfd_len; j++) {
+            VIR_APPEND_ELEMENT_INPLACE(*vmtapfds, index_vmtapfds, tapfds[j]);
+        }
+    }
+    return 0;
+}
+
 /**
  * virCHProcessStartValidate:
  * @driver: pointer to driver structure
@@ -917,7 +968,12 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
     g_autofree char *payload = NULL;
     g_autofree char *response = NULL;
     VIR_AUTOCLOSE mon_sockfd = -1;
+    g_autofree int *tapfds = NULL;
+    g_autofree int *nicindexes = NULL;
     size_t payload_len;
+    size_t ntapfds = 0;
+    size_t nnicindexes = 0;
+    int ret = -1;
 
     if (!priv->monitor) {
         /* Get the first monitor connection if not already */
@@ -932,17 +988,7 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
     vm->def->id = vm->pid;
     priv->machineName = virCHDomainGetMachineName(vm);
 
-    /* Pass 0, NULL as restore only works without networking support */
-    if (virDomainCgroupSetupCgroup("ch", vm,
-                                   0, NULL, /* nnicindexes, nicindexes */
-                                   &priv->cgroup,
-                                   cfg->cgroupControllers,
-                                   0, /*maxThreadsPerProc*/
-                                   priv->driver->privileged,
-                                   priv->machineName) < 0)
-        return -1;
-
-    if (virCHMonitorBuildRestoreJson(from, &payload) < 0) {
+    if (virCHMonitorBuildRestoreJson(vm->def, from, &payload) < 0) {
         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                        _("failed to restore domain"));
         return -1;
@@ -960,20 +1006,40 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
     if ((mon_sockfd = chMonitorSocketConnect(priv->monitor)) < 0)
         return -1;
 
-    if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, NULL, 0) < 0) {
+    if (virCHRestoreCreateNetworkDevices(driver, vm->def, &tapfds, &ntapfds, &nicindexes, &nnicindexes) < 0)
+        goto cleanup;
+
+    if (virDomainCgroupSetupCgroup("ch", vm,
+                                   nnicindexes, nicindexes,
+                                   &priv->cgroup,
+                                   cfg->cgroupControllers,
+                                   0, /*maxThreadsPerProc*/
+                                   priv->driver->privileged,
+                                   priv->machineName) < 0)
+        goto cleanup;
+
+    /* Bring up netdevs before restoring vm */
+    if (virDomainInterfaceStartDevices(vm->def) < 0)
+        goto cleanup;
+
+    if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, tapfds, ntapfds) < 0) {
         virReportSystemError(errno, "%s",
                              _("Failed to send restore request to CH"));
-        return -1;
+        goto cleanup;
     }
 
     /* Restore is a syncronous operationin CH. so, pass false to wait until there's a reponse */
     if (chSocketProcessHttpResponse(mon_sockfd, false) < 0)
-        return -1;
+        goto cleanup;
 
     if (virCHProcessSetup(vm) < 0)
-        return -1;
+        goto cleanup;
 
     virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_FROM_SNAPSHOT);
+    ret = 0;
 
-    return 0;
+ cleanup:
+    if (tapfds)
+        chCloseFDs(tapfds, ntapfds);
+    return ret;
 }
-- 
2.34.1
Re: [PATCH v2 7/8] ch: support restore with net devices
Posted by Praveen K Paladugu 1 month, 3 weeks ago

On 7/4/2024 6:13 AM, Purna Pavan Chandra wrote:
> Cloud-hypervisor now supports restoring with new net fds.
> Ref: https://github.com/cloud-hypervisor/cloud-hypervisor/pull/6402
> So, pass new tap fds via SCM_RIGHTS to CH's restore api.
> 
> Signed-off-by: Purna Pavan Chandra <paekkaladevi@linux.microsoft.com>
> ---
>   src/ch/ch_capabilities.c |  6 +++
>   src/ch/ch_capabilities.h |  1 +
>   src/ch/ch_driver.c       | 29 +++++++-----
>   src/ch/ch_monitor.c      | 21 ++++++++-
>   src/ch/ch_monitor.h      |  2 +-
>   src/ch/ch_process.c      | 98 +++++++++++++++++++++++++++++++++-------
>   6 files changed, 126 insertions(+), 31 deletions(-)
> 
> diff --git a/src/ch/ch_capabilities.c b/src/ch/ch_capabilities.c
> index 5941851500..f3765a8095 100644
> --- a/src/ch/ch_capabilities.c
> +++ b/src/ch/ch_capabilities.c
> @@ -65,6 +65,12 @@ virCHCapsInitCHVersionCaps(int version)
>       if (version >= 36000000)
>           virCHCapsSet(chCaps, CH_SOCKET_BACKEND_SERIAL_PORT);
>   
> +    /* Starting v40, Cloud-Hypervisor supports restore with new net fds.
> +     * This is required to be able to restore a guest with network config define.
> +     * https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v40.0 */
> +    if (version >= 40000000)
> +        virCHCapsSet(chCaps, CH_RESTORE_WITH_NEW_TAPFDS);
> +
>       return g_steal_pointer(&chCaps);
>   
>   }
> diff --git a/src/ch/ch_capabilities.h b/src/ch/ch_capabilities.h
> index 03932511f6..e5efb14b68 100644
> --- a/src/ch/ch_capabilities.h
> +++ b/src/ch/ch_capabilities.h
> @@ -28,6 +28,7 @@ typedef enum {
>       CH_SERIAL_CONSOLE_IN_PARALLEL, /* Serial and Console ports can work in parallel */
>       CH_MULTIFD_IN_ADDNET, /* Cloud-hypervisor can accept multiple FDs in add-net api */
>       CH_SOCKET_BACKEND_SERIAL_PORT, /* Support Unix socket as a backend for a serial port */
> +    CH_RESTORE_WITH_NEW_TAPFDS, /* Cloud-hypervisor support for restore with new net fds */
>   
>       CH_CAPS_LAST /* this must always be the last item */
>   } virCHCapsFlags;
> diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c
> index fbeac60825..24aed471c5 100644
> --- a/src/ch/ch_driver.c
> +++ b/src/ch/ch_driver.c
> @@ -680,22 +680,24 @@ chDomainDestroy(virDomainPtr dom)
>   }
>   
>   static int
> -chDomainSaveAdditionalValidation(virDomainDef *vmdef)
> +chDomainSaveRestoreAdditionalValidation(virCHDriver *driver, virDomainDef *vmdef)
>   {
> -    /*
> -    SAVE and RESTORE are functional only without any networking and
> -    device passthrough configuration
> -    */
> -    if (vmdef->nnets > 0) {
> -        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> -                       _("cannot save domain with network interfaces"));
> -        return -1;
> -    }
> +    /* SAVE and RESTORE are functional only without any host device
> +     * passthrough configuration */
>       if  (vmdef->nhostdevs > 0) {
>           virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> -                       _("cannot save domain with host devices"));
> +                       _("cannot save/restore domain with host devices"));
>           return -1;
>       }
> +
> +    if (vmdef->nnets > 0) {
> +        if (!virBitmapIsBitSet(driver->chCaps, CH_RESTORE_WITH_NEW_TAPFDS)) {
> +            virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> +                           _("cannot save/restore domain with network devices"));
> +            return -1;
> +        }
> +    }
> +
>       return 0;
>   }
>   
> @@ -728,7 +730,7 @@ chDoDomainSave(virCHDriver *driver,
>       VIR_AUTOCLOSE fd = -1;
>       int ret = -1;
>   
> -    if (chDomainSaveAdditionalValidation(vm->def) < 0)
> +    if (chDomainSaveRestoreAdditionalValidation(driver, vm->def) < 0)
>           goto end;
>   
>       domainState = virDomainObjGetState(vm, NULL);
> @@ -1087,6 +1089,9 @@ chDomainRestoreFlags(virConnectPtr conn,
>       if (virDomainRestoreFlagsEnsureACL(conn, def) < 0)
>           goto cleanup;
>   
> +    if (chDomainSaveRestoreAdditionalValidation(driver, def) < 0)
> +        goto cleanup;
> +
>       if (!(vm = virDomainObjListAdd(driver->domains, &def,
>                                      driver->xmlopt,
>                                      VIR_DOMAIN_OBJ_LIST_ADD_LIVE |
> diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c
> index 8d8be1f46e..3af5e7f2d1 100644
> --- a/src/ch/ch_monitor.c
> +++ b/src/ch/ch_monitor.c
> @@ -978,14 +978,31 @@ virCHMonitorSaveVM(virCHMonitor *mon, const char *to)
>   }
>   
>   int
> -virCHMonitorBuildRestoreJson(const char *from, char **jsonstr)
> +virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char **jsonstr)
>   {
> +    size_t i;
>       g_autoptr(virJSONValue) restore_json = virJSONValueNewObject();
> -
>       g_autofree char *path_url = g_strdup_printf("file://%s", from);
> +
>       if (virJSONValueObjectAppendString(restore_json, "source_url", path_url))
>           return -1;
>   
> +    /* Pass the netconfig needed to restore with new netfds */
> +    if (vmdef->nnets) {
> +        g_autoptr(virJSONValue) nets = virJSONValueNewArray();
> +        for (i = 0; i < vmdef->nnets; i++) {
> +            g_autoptr(virJSONValue) net_json = virJSONValueNewObject();
> +            g_autofree char *id = g_strdup_printf("%s_%ld", CH_NET_ID_PREFIX, i);
> +            if (virJSONValueObjectAppendString(net_json, "id", id) < 0)
> +                return -1;
> +            if (virJSONValueObjectAppendNumberInt(net_json, "num_fds", vmdef->nets[i]->driver.virtio.queues))
> +                return -1;
> +            virJSONValueArrayAppend(nets, &net_json);
> +        }
> +        if (virJSONValueObjectAppend(restore_json, "net_fds", &nets))
> +            return -1;
> +    }
> +
>       if (!(*jsonstr = virJSONValueToString(restore_json, false)))
>           return -1;
>   
> diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h
> index 375b7a49a4..02a6685d58 100644
> --- a/src/ch/ch_monitor.h
> +++ b/src/ch/ch_monitor.h
> @@ -127,4 +127,4 @@ int virCHMonitorGetIOThreads(virCHMonitor *mon,
>                                virDomainIOThreadInfo ***iothreads);
>   int
>   virCHMonitorBuildNetJson(virDomainNetDef *netdef, int netindex, char **jsonstr);
> -int virCHMonitorBuildRestoreJson(const char *from, char **jsonstr);
> +int virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char **jsonstr);
> diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c
> index 0ac0a5c9a2..540bf6801d 100644
> --- a/src/ch/ch_process.c
> +++ b/src/ch/ch_process.c
> @@ -29,6 +29,7 @@
>   #include "ch_process.h"
>   #include "domain_cgroup.h"
>   #include "domain_interface.h"
> +#include "viralloc.h"
>   #include "virerror.h"
>   #include "virfile.h"
>   #include "virjson.h"
> @@ -716,6 +717,56 @@ chProcessAddNetworkDevices(virCHDriver *driver,
>       return 0;
>   }
>   
> +/**
> + * virCHRestoreCreateNetworkDevices:
> + * @driver: pointer to driver structure
> + * @vmdef: pointer to domain definition
> + * @vmtapfds: returned array of FDs of guest interfaces
> + * @nvmtapfds: returned number of network indexes
> + * @nicindexes: returned array of network indexes
> + * @nnicindexes: returned number of network indexes
> + *
> + * Create network devices for the domain. This function is called during
> + * domain restore.
> + *
> + * Returns 0 on success or -1 in case of error
> +*/
> +static int
> +virCHRestoreCreateNetworkDevices(virCHDriver *driver,
> +                                 virDomainDef *vmdef,
> +                                 int **vmtapfds,
> +                                 size_t *nvmtapfds,
> +                                 int **nicindexes,
> +                                 size_t *nnicindexes)
> +{
> +    size_t i, j;
> +    size_t tapfd_len;
> +    size_t index_vmtapfds;
Instead of duplicating these steps from chProcessAddNetworkDevices,
please consider refactoring them into a different to be used here and
in chProcessAddNetworkDevices.
> +    for (i = 0; i < vmdef->nnets; i++) {
> +        g_autofree int *tapfds = NULL;
> +        tapfd_len = vmdef->nets[i]->driver.virtio.queues;
> +        if (virCHDomainValidateActualNetDef(vmdef->nets[i]) < 0) {
> +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                           _("net definition failed validation"));
> +            return -1;
> +        }
> +        tapfds = g_new0(int, tapfd_len);
> +        memset(tapfds, -1, (tapfd_len) * sizeof(int));
> +
> +        /* Connect Guest interfaces */
> +        if (virCHConnetNetworkInterfaces(driver, vmdef, vmdef->nets[i], tapfds,
> +                                          nicindexes, nnicindexes) < 0)
> +            return -1;
> +
> +        index_vmtapfds = *nvmtapfds;
> +        VIR_EXPAND_N(*vmtapfds, *nvmtapfds, tapfd_len);
> +        for (j = 0; j < tapfd_len; j++) {
> +            VIR_APPEND_ELEMENT_INPLACE(*vmtapfds, index_vmtapfds, tapfds[j]);
> +        }
> +    }
> +    return 0;
> +}
> +
>   /**
>    * virCHProcessStartValidate:
>    * @driver: pointer to driver structure
> @@ -917,7 +968,12 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
>       g_autofree char *payload = NULL;
>       g_autofree char *response = NULL;
>       VIR_AUTOCLOSE mon_sockfd = -1;
> +    g_autofree int *tapfds = NULL;
> +    g_autofree int *nicindexes = NULL;
>       size_t payload_len;
> +    size_t ntapfds = 0;
> +    size_t nnicindexes = 0;
> +    int ret = -1;
>   
>       if (!priv->monitor) {
>           /* Get the first monitor connection if not already */
> @@ -932,17 +988,7 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
>       vm->def->id = vm->pid;
>       priv->machineName = virCHDomainGetMachineName(vm);
>   
> -    /* Pass 0, NULL as restore only works without networking support */
> -    if (virDomainCgroupSetupCgroup("ch", vm,
> -                                   0, NULL, /* nnicindexes, nicindexes */
> -                                   &priv->cgroup,
> -                                   cfg->cgroupControllers,
> -                                   0, /*maxThreadsPerProc*/
> -                                   priv->driver->privileged,
> -                                   priv->machineName) < 0)
> -        return -1;
> -
> -    if (virCHMonitorBuildRestoreJson(from, &payload) < 0) {
> +    if (virCHMonitorBuildRestoreJson(vm->def, from, &payload) < 0) {
>           virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>                          _("failed to restore domain"));
>           return -1;
> @@ -960,20 +1006,40 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
>       if ((mon_sockfd = chMonitorSocketConnect(priv->monitor)) < 0)
>           return -1;
>   
> -    if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, NULL, 0) < 0) {
> +    if (virCHRestoreCreateNetworkDevices(driver, vm->def, &tapfds, &ntapfds, &nicindexes, &nnicindexes) < 0)
> +        goto cleanup;
> +
> +    if (virDomainCgroupSetupCgroup("ch", vm,
> +                                   nnicindexes, nicindexes,
> +                                   &priv->cgroup,
> +                                   cfg->cgroupControllers,
> +                                   0, /*maxThreadsPerProc*/
> +                                   priv->driver->privileged,
> +                                   priv->machineName) < 0)
> +        goto cleanup;
> +
> +    /* Bring up netdevs before restoring vm */
> +    if (virDomainInterfaceStartDevices(vm->def) < 0)
> +        goto cleanup;
> +
> +    if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, tapfds, ntapfds) < 0) {
>           virReportSystemError(errno, "%s",
>                                _("Failed to send restore request to CH"));
> -        return -1;
> +        goto cleanup;
>       }
> 
After sending the FDs, they have to closed on Libvirt side, even if the
send was successful. Please check chProcessAddNetworkDevices for details
on it.
>       /* Restore is a syncronous operationin CH. so, pass false to wait until there's a reponse */
>       if (chSocketProcessHttpResponse(mon_sockfd, false) < 0)
> -        return -1;
> +        goto cleanup;
>   
>       if (virCHProcessSetup(vm) < 0)
> -        return -1;
> +        goto cleanup;
>   
>       virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_FROM_SNAPSHOT);
> +    ret = 0;
>   
> -    return 0;
> + cleanup:
> +    if (tapfds)
> +        chCloseFDs(tapfds, ntapfds);
> +    return ret;
>   }

-- 
Regards,
Praveen
Re: [PATCH v2 7/8] ch: support restore with net devices
Posted by Purna Pavan Chandra Aekkaladevi 1 month, 1 week ago
On Thu, Jul 25, 2024 at 12:04:47PM -0500, Praveen K Paladugu wrote:
> 
> 
> On 7/4/2024 6:13 AM, Purna Pavan Chandra wrote:
> > Cloud-hypervisor now supports restoring with new net fds.
> > Ref: https://github.com/cloud-hypervisor/cloud-hypervisor/pull/6402
> > So, pass new tap fds via SCM_RIGHTS to CH's restore api.
> > 
> > Signed-off-by: Purna Pavan Chandra <paekkaladevi@linux.microsoft.com>
> > ---
> >   src/ch/ch_capabilities.c |  6 +++
> >   src/ch/ch_capabilities.h |  1 +
> >   src/ch/ch_driver.c       | 29 +++++++-----
> >   src/ch/ch_monitor.c      | 21 ++++++++-
> >   src/ch/ch_monitor.h      |  2 +-
> >   src/ch/ch_process.c      | 98 +++++++++++++++++++++++++++++++++-------
> >   6 files changed, 126 insertions(+), 31 deletions(-)
> > 
> > diff --git a/src/ch/ch_capabilities.c b/src/ch/ch_capabilities.c
> > index 5941851500..f3765a8095 100644
> > --- a/src/ch/ch_capabilities.c
> > +++ b/src/ch/ch_capabilities.c
> > @@ -65,6 +65,12 @@ virCHCapsInitCHVersionCaps(int version)
> >       if (version >= 36000000)
> >           virCHCapsSet(chCaps, CH_SOCKET_BACKEND_SERIAL_PORT);
> > +    /* Starting v40, Cloud-Hypervisor supports restore with new net fds.
> > +     * This is required to be able to restore a guest with network config define.
> > +     * https://github.com/cloud-hypervisor/cloud-hypervisor/releases/tag/v40.0 */
> > +    if (version >= 40000000)
> > +        virCHCapsSet(chCaps, CH_RESTORE_WITH_NEW_TAPFDS);
> > +
> >       return g_steal_pointer(&chCaps);
> >   }
> > diff --git a/src/ch/ch_capabilities.h b/src/ch/ch_capabilities.h
> > index 03932511f6..e5efb14b68 100644
> > --- a/src/ch/ch_capabilities.h
> > +++ b/src/ch/ch_capabilities.h
> > @@ -28,6 +28,7 @@ typedef enum {
> >       CH_SERIAL_CONSOLE_IN_PARALLEL, /* Serial and Console ports can work in parallel */
> >       CH_MULTIFD_IN_ADDNET, /* Cloud-hypervisor can accept multiple FDs in add-net api */
> >       CH_SOCKET_BACKEND_SERIAL_PORT, /* Support Unix socket as a backend for a serial port */
> > +    CH_RESTORE_WITH_NEW_TAPFDS, /* Cloud-hypervisor support for restore with new net fds */
> >       CH_CAPS_LAST /* this must always be the last item */
> >   } virCHCapsFlags;
> > diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c
> > index fbeac60825..24aed471c5 100644
> > --- a/src/ch/ch_driver.c
> > +++ b/src/ch/ch_driver.c
> > @@ -680,22 +680,24 @@ chDomainDestroy(virDomainPtr dom)
> >   }
> >   static int
> > -chDomainSaveAdditionalValidation(virDomainDef *vmdef)
> > +chDomainSaveRestoreAdditionalValidation(virCHDriver *driver, virDomainDef *vmdef)
> >   {
> > -    /*
> > -    SAVE and RESTORE are functional only without any networking and
> > -    device passthrough configuration
> > -    */
> > -    if (vmdef->nnets > 0) {
> > -        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> > -                       _("cannot save domain with network interfaces"));
> > -        return -1;
> > -    }
> > +    /* SAVE and RESTORE are functional only without any host device
> > +     * passthrough configuration */
> >       if  (vmdef->nhostdevs > 0) {
> >           virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> > -                       _("cannot save domain with host devices"));
> > +                       _("cannot save/restore domain with host devices"));
> >           return -1;
> >       }
> > +
> > +    if (vmdef->nnets > 0) {
> > +        if (!virBitmapIsBitSet(driver->chCaps, CH_RESTORE_WITH_NEW_TAPFDS)) {
> > +            virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> > +                           _("cannot save/restore domain with network devices"));
> > +            return -1;
> > +        }
> > +    }
> > +
> >       return 0;
> >   }
> > @@ -728,7 +730,7 @@ chDoDomainSave(virCHDriver *driver,
> >       VIR_AUTOCLOSE fd = -1;
> >       int ret = -1;
> > -    if (chDomainSaveAdditionalValidation(vm->def) < 0)
> > +    if (chDomainSaveRestoreAdditionalValidation(driver, vm->def) < 0)
> >           goto end;
> >       domainState = virDomainObjGetState(vm, NULL);
> > @@ -1087,6 +1089,9 @@ chDomainRestoreFlags(virConnectPtr conn,
> >       if (virDomainRestoreFlagsEnsureACL(conn, def) < 0)
> >           goto cleanup;
> > +    if (chDomainSaveRestoreAdditionalValidation(driver, def) < 0)
> > +        goto cleanup;
> > +
> >       if (!(vm = virDomainObjListAdd(driver->domains, &def,
> >                                      driver->xmlopt,
> >                                      VIR_DOMAIN_OBJ_LIST_ADD_LIVE |
> > diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c
> > index 8d8be1f46e..3af5e7f2d1 100644
> > --- a/src/ch/ch_monitor.c
> > +++ b/src/ch/ch_monitor.c
> > @@ -978,14 +978,31 @@ virCHMonitorSaveVM(virCHMonitor *mon, const char *to)
> >   }
> >   int
> > -virCHMonitorBuildRestoreJson(const char *from, char **jsonstr)
> > +virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char **jsonstr)
> >   {
> > +    size_t i;
> >       g_autoptr(virJSONValue) restore_json = virJSONValueNewObject();
> > -
> >       g_autofree char *path_url = g_strdup_printf("file://%s", from);
> > +
> >       if (virJSONValueObjectAppendString(restore_json, "source_url", path_url))
> >           return -1;
> > +    /* Pass the netconfig needed to restore with new netfds */
> > +    if (vmdef->nnets) {
> > +        g_autoptr(virJSONValue) nets = virJSONValueNewArray();
> > +        for (i = 0; i < vmdef->nnets; i++) {
> > +            g_autoptr(virJSONValue) net_json = virJSONValueNewObject();
> > +            g_autofree char *id = g_strdup_printf("%s_%ld", CH_NET_ID_PREFIX, i);
> > +            if (virJSONValueObjectAppendString(net_json, "id", id) < 0)
> > +                return -1;
> > +            if (virJSONValueObjectAppendNumberInt(net_json, "num_fds", vmdef->nets[i]->driver.virtio.queues))
> > +                return -1;
> > +            virJSONValueArrayAppend(nets, &net_json);
> > +        }
> > +        if (virJSONValueObjectAppend(restore_json, "net_fds", &nets))
> > +            return -1;
> > +    }
> > +
> >       if (!(*jsonstr = virJSONValueToString(restore_json, false)))
> >           return -1;
> > diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h
> > index 375b7a49a4..02a6685d58 100644
> > --- a/src/ch/ch_monitor.h
> > +++ b/src/ch/ch_monitor.h
> > @@ -127,4 +127,4 @@ int virCHMonitorGetIOThreads(virCHMonitor *mon,
> >                                virDomainIOThreadInfo ***iothreads);
> >   int
> >   virCHMonitorBuildNetJson(virDomainNetDef *netdef, int netindex, char **jsonstr);
> > -int virCHMonitorBuildRestoreJson(const char *from, char **jsonstr);
> > +int virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char **jsonstr);
> > diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c
> > index 0ac0a5c9a2..540bf6801d 100644
> > --- a/src/ch/ch_process.c
> > +++ b/src/ch/ch_process.c
> > @@ -29,6 +29,7 @@
> >   #include "ch_process.h"
> >   #include "domain_cgroup.h"
> >   #include "domain_interface.h"
> > +#include "viralloc.h"
> >   #include "virerror.h"
> >   #include "virfile.h"
> >   #include "virjson.h"
> > @@ -716,6 +717,56 @@ chProcessAddNetworkDevices(virCHDriver *driver,
> >       return 0;
> >   }
> > +/**
> > + * virCHRestoreCreateNetworkDevices:
> > + * @driver: pointer to driver structure
> > + * @vmdef: pointer to domain definition
> > + * @vmtapfds: returned array of FDs of guest interfaces
> > + * @nvmtapfds: returned number of network indexes
> > + * @nicindexes: returned array of network indexes
> > + * @nnicindexes: returned number of network indexes
> > + *
> > + * Create network devices for the domain. This function is called during
> > + * domain restore.
> > + *
> > + * Returns 0 on success or -1 in case of error
> > +*/
> > +static int
> > +virCHRestoreCreateNetworkDevices(virCHDriver *driver,
> > +                                 virDomainDef *vmdef,
> > +                                 int **vmtapfds,
> > +                                 size_t *nvmtapfds,
> > +                                 int **nicindexes,
> > +                                 size_t *nnicindexes)
> > +{
> > +    size_t i, j;
> > +    size_t tapfd_len;
> > +    size_t index_vmtapfds;
> Instead of duplicating these steps from chProcessAddNetworkDevices,
> please consider refactoring them into a different to be used here and
> in chProcessAddNetworkDevices.

chProcessAddNetworkDevices is already refactored and the common code is
moved to new functions. Since the action performed inside the loop are
different here, this can not be refactored further.

> > +    for (i = 0; i < vmdef->nnets; i++) {
> > +        g_autofree int *tapfds = NULL;
> > +        tapfd_len = vmdef->nets[i]->driver.virtio.queues;
> > +        if (virCHDomainValidateActualNetDef(vmdef->nets[i]) < 0) {
> > +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> > +                           _("net definition failed validation"));
> > +            return -1;
> > +        }
> > +        tapfds = g_new0(int, tapfd_len);
> > +        memset(tapfds, -1, (tapfd_len) * sizeof(int));
> > +
> > +        /* Connect Guest interfaces */
> > +        if (virCHConnetNetworkInterfaces(driver, vmdef, vmdef->nets[i], tapfds,
> > +                                          nicindexes, nnicindexes) < 0)
> > +            return -1;
> > +
> > +        index_vmtapfds = *nvmtapfds;
> > +        VIR_EXPAND_N(*vmtapfds, *nvmtapfds, tapfd_len);
> > +        for (j = 0; j < tapfd_len; j++) {
> > +            VIR_APPEND_ELEMENT_INPLACE(*vmtapfds, index_vmtapfds, tapfds[j]);
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> >   /**
> >    * virCHProcessStartValidate:
> >    * @driver: pointer to driver structure
> > @@ -917,7 +968,12 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
> >       g_autofree char *payload = NULL;
> >       g_autofree char *response = NULL;
> >       VIR_AUTOCLOSE mon_sockfd = -1;
> > +    g_autofree int *tapfds = NULL;
> > +    g_autofree int *nicindexes = NULL;
> >       size_t payload_len;
> > +    size_t ntapfds = 0;
> > +    size_t nnicindexes = 0;
> > +    int ret = -1;
> >       if (!priv->monitor) {
> >           /* Get the first monitor connection if not already */
> > @@ -932,17 +988,7 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
> >       vm->def->id = vm->pid;
> >       priv->machineName = virCHDomainGetMachineName(vm);
> > -    /* Pass 0, NULL as restore only works without networking support */
> > -    if (virDomainCgroupSetupCgroup("ch", vm,
> > -                                   0, NULL, /* nnicindexes, nicindexes */
> > -                                   &priv->cgroup,
> > -                                   cfg->cgroupControllers,
> > -                                   0, /*maxThreadsPerProc*/
> > -                                   priv->driver->privileged,
> > -                                   priv->machineName) < 0)
> > -        return -1;
> > -
> > -    if (virCHMonitorBuildRestoreJson(from, &payload) < 0) {
> > +    if (virCHMonitorBuildRestoreJson(vm->def, from, &payload) < 0) {
> >           virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> >                          _("failed to restore domain"));
> >           return -1;
> > @@ -960,20 +1006,40 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
> >       if ((mon_sockfd = chMonitorSocketConnect(priv->monitor)) < 0)
> >           return -1;
> > -    if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, NULL, 0) < 0) {
> > +    if (virCHRestoreCreateNetworkDevices(driver, vm->def, &tapfds, &ntapfds, &nicindexes, &nnicindexes) < 0)
> > +        goto cleanup;
> > +
> > +    if (virDomainCgroupSetupCgroup("ch", vm,
> > +                                   nnicindexes, nicindexes,
> > +                                   &priv->cgroup,
> > +                                   cfg->cgroupControllers,
> > +                                   0, /*maxThreadsPerProc*/
> > +                                   priv->driver->privileged,
> > +                                   priv->machineName) < 0)
> > +        goto cleanup;
> > +
> > +    /* Bring up netdevs before restoring vm */
> > +    if (virDomainInterfaceStartDevices(vm->def) < 0)
> > +        goto cleanup;
> > +
> > +    if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, tapfds, ntapfds) < 0) {
> >           virReportSystemError(errno, "%s",
> >                                _("Failed to send restore request to CH"));
> > -        return -1;
> > +        goto cleanup;
> >       }
> > 
> After sending the FDs, they have to closed on Libvirt side, even if the
> send was successful. Please check chProcessAddNetworkDevices for details
> on it.

This is taken care in the cleanup section.

> >       /* Restore is a syncronous operationin CH. so, pass false to wait until there's a reponse */
> >       if (chSocketProcessHttpResponse(mon_sockfd, false) < 0)
> > -        return -1;
> > +        goto cleanup;
> >       if (virCHProcessSetup(vm) < 0)
> > -        return -1;
> > +        goto cleanup;
> >       virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_FROM_SNAPSHOT);
> > +    ret = 0;
> > -    return 0;
> > + cleanup:
> > +    if (tapfds)
> > +        chCloseFDs(tapfds, ntapfds);
> > +    return ret;
> >   }
> 
> -- 
> Regards,
> Praveen