[RFC] Add basic driver for the Cloud-Hypervisor

William Douglas posted 1 patch 3 years, 8 months ago
Test syntax-check failed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20200827182430.753251-1-william.douglas@intel.com
There is a newer version of this series
include/libvirt/virterror.h |   1 +
libvirt.spec.in             |  32 ++
meson.build                 |   5 +
meson_options.txt           |   1 +
po/POTFILES.in              |   5 +
src/ch/ch_conf.c            | 239 +++++++++
src/ch/ch_conf.h            |  87 ++++
src/ch/ch_domain.c          | 219 +++++++++
src/ch/ch_domain.h          |  68 +++
src/ch/ch_driver.c          | 937 ++++++++++++++++++++++++++++++++++++
src/ch/ch_driver.h          |  24 +
src/ch/ch_monitor.c         | 796 ++++++++++++++++++++++++++++++
src/ch/ch_monitor.h         |  62 +++
src/ch/ch_process.c         | 125 +++++
src/ch/ch_process.h         |  30 ++
src/ch/meson.build          |  44 ++
src/conf/domain_conf.c      |   1 +
src/conf/domain_conf.h      |   1 +
src/meson.build             |   1 +
src/qemu/qemu_command.c     |   1 +
src/remote/remote_daemon.c  |   4 +
src/util/virerror.c         |   1 +
tools/virsh.c               |   3 +
23 files changed, 2687 insertions(+)
create mode 100644 src/ch/ch_conf.c
create mode 100644 src/ch/ch_conf.h
create mode 100644 src/ch/ch_domain.c
create mode 100644 src/ch/ch_domain.h
create mode 100644 src/ch/ch_driver.c
create mode 100644 src/ch/ch_driver.h
create mode 100644 src/ch/ch_monitor.c
create mode 100644 src/ch/ch_monitor.h
create mode 100644 src/ch/ch_process.c
create mode 100644 src/ch/ch_process.h
create mode 100644 src/ch/meson.build
[RFC] Add basic driver for the Cloud-Hypervisor
Posted by William Douglas 3 years, 8 months ago
This patch adds support for the following initial VM actions using the
Cloud-Hypervsior API:
 * vm.create
 * vm.delete
 * vm.boot
 * vm.shutdown
 * vm.reboot
 * vm.pause
 * vm.resume

To use the Cloud-Hypervisor driver, the v0.9.0 (the as of now current)
release of Cloud-Hypervisor is required to be installed.

Signed-off-by: William Douglas <william.douglas@intel.com>
---
 include/libvirt/virterror.h |   1 +
 libvirt.spec.in             |  32 ++
 meson.build                 |   5 +
 meson_options.txt           |   1 +
 po/POTFILES.in              |   5 +
 src/ch/ch_conf.c            | 239 +++++++++
 src/ch/ch_conf.h            |  87 ++++
 src/ch/ch_domain.c          | 219 +++++++++
 src/ch/ch_domain.h          |  68 +++
 src/ch/ch_driver.c          | 937 ++++++++++++++++++++++++++++++++++++
 src/ch/ch_driver.h          |  24 +
 src/ch/ch_monitor.c         | 796 ++++++++++++++++++++++++++++++
 src/ch/ch_monitor.h         |  62 +++
 src/ch/ch_process.c         | 125 +++++
 src/ch/ch_process.h         |  30 ++
 src/ch/meson.build          |  44 ++
 src/conf/domain_conf.c      |   1 +
 src/conf/domain_conf.h      |   1 +
 src/meson.build             |   1 +
 src/qemu/qemu_command.c     |   1 +
 src/remote/remote_daemon.c  |   4 +
 src/util/virerror.c         |   1 +
 tools/virsh.c               |   3 +
 23 files changed, 2687 insertions(+)
 create mode 100644 src/ch/ch_conf.c
 create mode 100644 src/ch/ch_conf.h
 create mode 100644 src/ch/ch_domain.c
 create mode 100644 src/ch/ch_domain.h
 create mode 100644 src/ch/ch_driver.c
 create mode 100644 src/ch/ch_driver.h
 create mode 100644 src/ch/ch_monitor.c
 create mode 100644 src/ch/ch_monitor.h
 create mode 100644 src/ch/ch_process.c
 create mode 100644 src/ch/ch_process.h
 create mode 100644 src/ch/meson.build

diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h
index 0f1c32283d..21faa8f128 100644
--- a/include/libvirt/virterror.h
+++ b/include/libvirt/virterror.h
@@ -136,6 +136,7 @@ typedef enum {
 
     VIR_FROM_TPM = 70,          /* Error from TPM */
     VIR_FROM_BPF = 71,          /* Error from BPF code */
+    VIR_FROM_CH = 72,           /* Error from Cloud-Hypervisor driver */
 
 # ifdef VIR_ENUM_SENTINELS
     VIR_ERR_DOMAIN_LAST
diff --git a/libvirt.spec.in b/libvirt.spec.in
index bb74443484..66edb1fa76 100644
--- a/libvirt.spec.in
+++ b/libvirt.spec.in
@@ -14,6 +14,7 @@
 
 # The hypervisor drivers that run in libvirtd
 %define with_qemu          0%{!?_without_qemu:1}
+%define with_ch            0%{!?_without_ch:1}
 %define with_lxc           0%{!?_without_lxc:1}
 %define with_libxl         0%{!?_without_libxl:1}
 %define with_vbox          0%{!?_without_vbox:1}
@@ -232,6 +233,9 @@ Requires: libvirt-daemon-driver-lxc = %{version}-%{release}
 %if %{with_qemu}
 Requires: libvirt-daemon-driver-qemu = %{version}-%{release}
 %endif
+%if %{with_ch}
+Requires: libvirt-daemon-driver-ch = %{version}-%{release}
+%endif
 # We had UML driver, but we've removed it.
 Obsoletes: libvirt-daemon-driver-uml <= 5.0.0
 Obsoletes: libvirt-daemon-uml <= 5.0.0
@@ -744,6 +748,20 @@ QEMU
 %endif
 
 
+%if %{with_ch}
+%package daemon-driver-ch
+Summary: Cloud-Hypervisor driver plugin for the libvirtd daemon
+Requires: libvirt-daemon = %{version}-%{release}
+Requires: libvirt-libs = %{version}-%{release}
+Requires: /usr/bin/cloud-hypervisor
+
+%description daemon-driver-ch
+The Cloud-Hypervisor driver plugin for the libvirtd daemon,
+providing an implementation of the hypervisor driver APIs
+using Cloud-Hypervisor
+%endif
+
+
 %if %{with_lxc}
 %package daemon-driver-lxc
 Summary: LXC driver plugin for the libvirtd daemon
@@ -1001,6 +1019,12 @@ exit 1
     %define arg_qemu -Ddriver_qemu=disabled
 %endif
 
+%if %{with_ch}
+    %define arg_ch -Ddriver_ch=enabled
+%else
+    %define arg_ch -Ddriver_ch=disabled
+%endif
+
 %if %{with_openvz}
     %define arg_openvz -Ddriver_openvz=enabled
 %else
@@ -1132,6 +1156,7 @@ export SOURCE_DATE_EPOCH=$(stat --printf='%Y' %{_specdir}/%{name}.spec)
 %meson \
            -Drunstatedir=%{_rundir} \
            %{?arg_qemu} \
+           %{?arg_ch} \
            %{?arg_openvz} \
            %{?arg_lxc} \
            %{?arg_vbox} \
@@ -1739,6 +1764,13 @@ exit 0
 %{_mandir}/man1/virt-qemu-run.1*
 %endif
 
+%if %{with_ch}
+%files daemon-driver-ch
+%dir %attr(0700, root, root) %{_localstatedir}/log/libvirt/ch/
+%ghost %dir %{_rundir}/libvirt/ch/
+%{_libdir}/%{name}/connection-driver/libvirt_driver_ch.so
+%endif
+
 %if %{with_lxc}
 %files daemon-driver-lxc
 %config(noreplace) %{_sysconfdir}/sysconfig/virtlxcd
diff --git a/meson.build b/meson.build
index dabd4196e6..a6759cb051 100644
--- a/meson.build
+++ b/meson.build
@@ -1722,6 +1722,10 @@ elif get_option('driver_lxc').enabled()
   error('linux and remote_driver are required for LXC')
 endif
 
+if not get_option('driver_ch').disabled() and host_machine.system() == 'linux' and conf.has('WITH_LIBVIRTD')
+  conf.set('WITH_CH', 1)
+endif
+
 # there's no use compiling the network driver without the libvirt
 # daemon, nor compiling it for macOS, where it breaks the compile
 if not get_option('driver_network').disabled() and conf.has('WITH_LIBVIRTD') and host_machine.system() != 'darwin'
@@ -2369,6 +2373,7 @@ driver_summary = {
   'VBox': conf.has('WITH_VBOX'),
   'libxl': conf.has('WITH_LIBXL'),
   'LXC': conf.has('WITH_LXC'),
+  'Cloud-Hypervisor': conf.has('WITH_CH'),
   'ESX': conf.has('WITH_ESX'),
   'Hyper-V': conf.has('WITH_HYPERV'),
   'vz': conf.has('WITH_VZ'),
diff --git a/meson_options.txt b/meson_options.txt
index 79554c3186..bfb3e79e1a 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -55,6 +55,7 @@ option('driver_interface', type: 'feature', value: 'auto', description: 'host in
 option('driver_libvirtd', type: 'feature', value: 'auto', description: 'libvirtd driver')
 option('driver_libxl', type: 'feature', value: 'auto', description: 'libxenlight driver')
 option('driver_lxc', type: 'feature', value: 'auto', description: 'Linux Container driver')
+option('driver_ch', type: 'feature', value: 'auto', description: 'Cloud-Hypervisor driver')
 option('driver_network', type: 'feature', value: 'auto', description: 'virtual network driver')
 option('driver_openvz', type: 'feature', value: 'auto', description: 'OpenVZ driver')
 option('driver_qemu', type: 'feature', value: 'auto', description: 'QEMU/KVM driver')
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 3d6c20c55f..b12a1b1e56 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -18,6 +18,11 @@
 @SRCDIR@src/bhyve/bhyve_monitor.c
 @SRCDIR@src/bhyve/bhyve_parse_command.c
 @SRCDIR@src/bhyve/bhyve_process.c
+@SRCDIR@src/ch/ch_conf.c
+@SRCDIR@src/ch/ch_domain.c
+@SRCDIR@src/ch/ch_driver.c
+@SRCDIR@src/ch/ch_monitor.c
+@SRCDIR@src/ch/ch_process.c
 @SRCDIR@src/conf/backup_conf.c
 @SRCDIR@src/conf/capabilities.c
 @SRCDIR@src/conf/checkpoint_conf.c
diff --git a/src/ch/ch_conf.c b/src/ch/ch_conf.c
new file mode 100644
index 0000000000..8769b0f7e2
--- /dev/null
+++ b/src/ch/ch_conf.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright Intel Corp. 2020
+ *
+ * ch_driver.h: header file for Cloud-Hypervisor driver functions
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "configmake.h"
+#include "viralloc.h"
+#include "vircommand.h"
+#include "virlog.h"
+#include "virobject.h"
+#include "virstring.h"
+#include "virutil.h"
+
+#include "ch_conf.h"
+#include "ch_domain.h"
+
+#define VIR_FROM_THIS VIR_FROM_CH
+
+VIR_LOG_INIT("ch.ch_conf");
+
+static virClassPtr virCHDriverConfigClass;
+static void virCHDriverConfigDispose(void *obj);
+
+static int virCHConfigOnceInit(void)
+{
+    if (!VIR_CLASS_NEW(virCHDriverConfig, virClassForObject()))
+        return -1;
+
+    return 0;
+}
+
+VIR_ONCE_GLOBAL_INIT(virCHConfig);
+
+
+/* Functions */
+virCapsPtr virCHDriverCapsInit(void)
+{
+    virCapsPtr caps;
+    virCapsGuestPtr guest;
+
+    if ((caps = virCapabilitiesNew(virArchFromHost(),
+                                   false, false)) == NULL)
+        goto cleanup;
+
+    if (!(caps->host.numa = virCapabilitiesHostNUMANewHost()))
+        goto cleanup;
+
+    if (virCapabilitiesInitCaches(caps) < 0)
+        goto cleanup;
+
+    if ((guest = virCapabilitiesAddGuest(caps,
+                                         VIR_DOMAIN_OSTYPE_HVM,
+                                         caps->host.arch,
+                                         NULL,
+                                         NULL,
+                                         0,
+                                         NULL)) == NULL)
+        goto cleanup;
+
+    if (virCapabilitiesAddGuestDomain(guest,
+                                      VIR_DOMAIN_VIRT_CH,
+                                      NULL,
+                                      NULL,
+                                      0,
+                                      NULL) == NULL)
+        goto cleanup;
+
+    return caps;
+
+ cleanup:
+    virObjectUnref(caps);
+    return NULL;
+}
+
+/**
+ * virCHDriverGetCapabilities:
+ *
+ * Get a reference to the virCapsPtr instance for the
+ * driver. If @refresh is true, the capabilities will be
+ * rebuilt first
+ *
+ * The caller must release the reference with virObjetUnref
+ *
+ * Returns: a reference to a virCapsPtr instance or NULL
+ */
+virCapsPtr virCHDriverGetCapabilities(virCHDriverPtr driver,
+                                      bool refresh)
+{
+    virCapsPtr ret;
+    if (refresh) {
+        virCapsPtr caps = NULL;
+        if ((caps = virCHDriverCapsInit()) == NULL)
+            return NULL;
+
+        chDriverLock(driver);
+        virObjectUnref(driver->caps);
+        driver->caps = caps;
+    } else {
+        chDriverLock(driver);
+    }
+
+    ret = virObjectRef(driver->caps);
+    chDriverUnlock(driver);
+    return ret;
+}
+
+virDomainXMLOptionPtr
+chDomainXMLConfInit(virCHDriverPtr driver)
+{
+    virCHDriverDomainDefParserConfig.priv = driver;
+    return virDomainXMLOptionNew(&virCHDriverDomainDefParserConfig,
+                                 &virCHDriverPrivateDataCallbacks,
+                                 NULL, NULL, NULL);
+}
+
+virCHDriverConfigPtr
+virCHDriverConfigNew(void)
+{
+    virCHDriverConfigPtr cfg;
+
+    if (virCHConfigInitialize() < 0)
+        return NULL;
+
+    if (!(cfg = virObjectNew(virCHDriverConfigClass)))
+        return NULL;
+
+    cfg->stateDir = g_strdup(CH_STATE_DIR);
+    cfg->logDir = g_strdup(CH_LOG_DIR);
+
+    return cfg;
+}
+
+virCHDriverConfigPtr virCHDriverGetConfig(virCHDriverPtr driver)
+{
+    virCHDriverConfigPtr cfg;
+    chDriverLock(driver);
+    cfg = virObjectRef(driver->config);
+    chDriverUnlock(driver);
+    return cfg;
+}
+
+static void
+virCHDriverConfigDispose(void *obj)
+{
+    virCHDriverConfigPtr cfg = obj;
+
+    VIR_FREE(cfg->stateDir);
+    VIR_FREE(cfg->logDir);
+}
+
+static int
+chExtractVersionInfo(int *retversion)
+{
+    int ret = -1;
+    unsigned long version;
+    char *help = NULL;
+    char *tmp;
+    virCommandPtr cmd = virCommandNewArgList(CH_CMD, "--version", NULL);
+
+    if (retversion)
+        *retversion = 0;
+
+    virCommandAddEnvString(cmd, "LC_ALL=C");
+    virCommandSetOutputBuffer(cmd, &help);
+
+    if (virCommandRun(cmd, NULL) < 0)
+        goto cleanup;
+
+    tmp = help;
+
+    /* expected format: cloud-hypervisor v<major>.<minor>.<micro> */
+    if ((tmp = STRSKIP(tmp, "cloud-hypervisor v")) == NULL)
+        goto cleanup;
+
+    if (virParseVersionString(tmp, &version, false) < 0)
+        goto cleanup;
+
+    // v0.9.0 is the minimum supported version
+    if ((unsigned int)(version / 1000000) < 1) {
+        if (((unsigned int)((unsigned long)(version % 1000000)) / 1000) < 9) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                           _("Cloud-Hypervisor version is too old (v0.9.0 is the minimum supported version)"));
+            goto cleanup;
+        }
+    }
+
+
+    if (retversion)
+        *retversion = version;
+
+    ret = 0;
+
+ cleanup:
+    virCommandFree(cmd);
+    VIR_FREE(help);
+
+    return ret;
+}
+
+int chExtractVersion(virCHDriverPtr driver)
+{
+    if (driver->version > 0)
+        return 0;
+
+    if (chExtractVersionInfo(&driver->version) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Could not extract Cloud-Hypervisor version"));
+        return -1;
+    }
+
+    return 0;
+}
+
+int chStrToInt(const char *str)
+{
+    int val;
+
+    if (virStrToLong_i(str, NULL, 10, &val) < 0)
+        return 0;
+
+    return val;
+}
diff --git a/src/ch/ch_conf.h b/src/ch/ch_conf.h
new file mode 100644
index 0000000000..04334130f7
--- /dev/null
+++ b/src/ch/ch_conf.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright Intel Corp. 2020
+ *
+ * ch_driver.h: header file for Cloud-Hypervisor driver functions
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "virdomainobjlist.h"
+#include "virthread.h"
+
+#define CH_DRIVER_NAME "CH"
+#define CH_CMD "cloud-hypervisor"
+
+#define CH_STATE_DIR RUNSTATEDIR "/libvirt/ch"
+#define CH_LOG_DIR LOCALSTATEDIR "/log/libvirt/ch"
+
+typedef struct _virCHDriver virCHDriver;
+typedef virCHDriver *virCHDriverPtr;
+
+typedef struct _virCHDriverConfig virCHDriverConfig;
+typedef virCHDriverConfig *virCHDriverConfigPtr;
+
+struct _virCHDriverConfig {
+    virObject parent;
+
+    char *stateDir;
+    char *logDir;
+};
+
+struct _virCHDriver
+{
+    virMutex lock;
+
+    /* Require lock to get a reference on the object,
+     * lockless access thereafter */
+    virCapsPtr caps;
+
+    /* Immutable pointer, Immutable object */
+    virDomainXMLOptionPtr xmlopt;
+
+    /* Immutable pointer, self-locking APIs */
+    virDomainObjListPtr domains;
+
+    /* Cloud-Hypervisor version */
+    int version;
+
+    /* Require lock to get reference on 'config',
+     * then lockless thereafter */
+    virCHDriverConfigPtr config;
+
+    /* pid file FD, ensures two copies of the driver can't use the same root */
+    int lockFD;
+};
+
+virCapsPtr virCHDriverCapsInit(void);
+virCapsPtr virCHDriverGetCapabilities(virCHDriverPtr driver,
+                                      bool refresh);
+virDomainXMLOptionPtr chDomainXMLConfInit(virCHDriverPtr driver);
+virCHDriverConfigPtr virCHDriverConfigNew(void);
+virCHDriverConfigPtr virCHDriverGetConfig(virCHDriverPtr driver);
+int chExtractVersion(virCHDriverPtr driver);
+int chStrToInt(const char *str);
+
+static inline void chDriverLock(virCHDriverPtr driver)
+{
+    virMutexLock(&driver->lock);
+}
+
+static inline void chDriverUnlock(virCHDriverPtr driver)
+{
+    virMutexUnlock(&driver->lock);
+}
diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c
new file mode 100644
index 0000000000..a46641d50d
--- /dev/null
+++ b/src/ch/ch_domain.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright Intel Corp. 2020
+ *
+ * ch_driver.h: header file for Cloud-Hypervisor driver functions
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "ch_domain.h"
+#include "viralloc.h"
+#include "virlog.h"
+#include "virtime.h"
+
+#define VIR_FROM_THIS VIR_FROM_CH
+
+VIR_ENUM_IMPL(virCHDomainJob,
+              CH_JOB_LAST,
+              "none",
+              "query",
+              "destroy",
+              "modify",
+);
+
+VIR_LOG_INIT("ch.ch_domain");
+
+static int
+virCHDomainObjInitJob(virCHDomainObjPrivatePtr priv)
+{
+    memset(&priv->job, 0, sizeof(priv->job));
+
+    if (virCondInit(&priv->job.cond) < 0)
+        return -1;
+
+    return 0;
+}
+
+static void
+virCHDomainObjResetJob(virCHDomainObjPrivatePtr priv)
+{
+    struct virCHDomainJobObj *job = &priv->job;
+
+    job->active = CH_JOB_NONE;
+    job->owner = 0;
+}
+
+static void
+virCHDomainObjFreeJob(virCHDomainObjPrivatePtr priv)
+{
+    ignore_value(virCondDestroy(&priv->job.cond));
+}
+
+/*
+ * obj must be locked before calling, virCHDriverPtr must NOT be locked
+ *
+ * This must be called by anything that will change the VM state
+ * in any way
+ *
+ * Upon successful return, the object will have its ref count increased.
+ * Successful calls must be followed by EndJob eventually.
+ */
+int
+virCHDomainObjBeginJob(virDomainObjPtr obj, enum virCHDomainJob job)
+{
+    virCHDomainObjPrivatePtr priv = obj->privateData;
+    unsigned long long now;
+    unsigned long long then;
+
+    if (virTimeMillisNow(&now) < 0)
+        return -1;
+    then = now + CH_JOB_WAIT_TIME;
+
+    while (priv->job.active) {
+        VIR_DEBUG("Wait normal job condition for starting job: %s",
+                  virCHDomainJobTypeToString(job));
+        if (virCondWaitUntil(&priv->job.cond, &obj->parent.lock, then) < 0)
+            goto error;
+    }
+
+    virCHDomainObjResetJob(priv);
+
+    VIR_DEBUG("Starting job: %s", virCHDomainJobTypeToString(job));
+    priv->job.active = job;
+    priv->job.owner = virThreadSelfID();
+
+    return 0;
+
+ error:
+    VIR_WARN("Cannot start job (%s) for domain %s;"
+             " current job is (%s) owned by (%d)",
+             virCHDomainJobTypeToString(job),
+             obj->def->name,
+             virCHDomainJobTypeToString(priv->job.active),
+             priv->job.owner);
+
+    if (errno == ETIMEDOUT)
+        virReportError(VIR_ERR_OPERATION_TIMEOUT,
+                       "%s", _("cannot acquire state change lock"));
+    else
+        virReportSystemError(errno,
+                             "%s", _("cannot acquire job mutex"));
+    return -1;
+}
+
+/*
+ * obj must be locked and have a reference before calling
+ *
+ * To be called after completing the work associated with the
+ * earlier virCHDomainBeginJob() call
+ */
+void
+virCHDomainObjEndJob(virDomainObjPtr obj)
+{
+    virCHDomainObjPrivatePtr priv = obj->privateData;
+    enum virCHDomainJob job = priv->job.active;
+
+    VIR_DEBUG("Stopping job: %s",
+              virCHDomainJobTypeToString(job));
+
+    virCHDomainObjResetJob(priv);
+    virCondSignal(&priv->job.cond);
+}
+
+static void *
+virCHDomainObjPrivateAlloc(void *opaque G_GNUC_UNUSED)
+{
+    virCHDomainObjPrivatePtr priv;
+
+    if (VIR_ALLOC(priv) < 0)
+        return NULL;
+
+    if (virCHDomainObjInitJob(priv) < 0) {
+        VIR_FREE(priv);
+        return NULL;
+    }
+
+    return priv;
+}
+
+static void
+virCHDomainObjPrivateFree(void *data)
+{
+    virCHDomainObjPrivatePtr priv = data;
+
+    virCHDomainObjFreeJob(priv);
+    VIR_FREE(priv);
+}
+
+static int
+virCHDomainObjPrivateXMLFormat(virBufferPtr buf,
+                               virDomainObjPtr vm)
+{
+    virCHDomainObjPrivatePtr priv = vm->privateData;
+    virBufferAsprintf(buf, "<init pid='%lld'/>\n",
+                      (long long)priv->initpid);
+
+    return 0;
+}
+
+static int
+virCHDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt,
+                              virDomainObjPtr vm,
+                              virDomainDefParserConfigPtr config G_GNUC_UNUSED)
+{
+    virCHDomainObjPrivatePtr priv = vm->privateData;
+    long long thepid;
+
+    if (virXPathLongLong("string(./init[1]/@pid)", ctxt, &thepid) < 0) {
+        VIR_WARN("Failed to load init pid from state %s",
+                 virGetLastErrorMessage());
+        priv->initpid = 0;
+    } else {
+        priv->initpid = thepid;
+    }
+
+    return 0;
+}
+
+virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks = {
+    .alloc = virCHDomainObjPrivateAlloc,
+    .free = virCHDomainObjPrivateFree,
+    .format = virCHDomainObjPrivateXMLFormat,
+    .parse  = virCHDomainObjPrivateXMLParse,
+};
+
+static int
+virCHDomainDefPostParse(virDomainDefPtr def,
+                        unsigned int parseFlags G_GNUC_UNUSED,
+                        void *opaque,
+                        void *parseOpaque G_GNUC_UNUSED)
+{
+    virCHDriverPtr driver = opaque;
+    g_autoptr(virCaps) caps = virCHDriverGetCapabilities(driver, false);
+    if (!caps)
+        return -1;
+    if (!virCapabilitiesDomainSupported(caps, def->os.type,
+                                        def->os.arch,
+                                        def->virtType))
+        return -1;
+
+    return 0;
+}
+
+virDomainDefParserConfig virCHDriverDomainDefParserConfig = {
+    .domainPostParseCallback = virCHDomainDefPostParse,
+};
diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h
new file mode 100644
index 0000000000..144f147173
--- /dev/null
+++ b/src/ch/ch_domain.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright Intel Corp. 2020
+ *
+ * ch_driver.h: header file for Cloud-Hypervisor driver functions
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "ch_conf.h"
+#include "ch_monitor.h"
+
+/* Give up waiting for mutex after 30 seconds */
+#define CH_JOB_WAIT_TIME (1000ull * 30)
+
+/* Only 1 job is allowed at any time
+ * A job includes *all* ch.so api, even those just querying
+ * information, not merely actions */
+
+enum virCHDomainJob {
+    CH_JOB_NONE = 0,      /* Always set to 0 for easy if (jobActive) conditions */
+    CH_JOB_QUERY,         /* Doesn't change any state */
+    CH_JOB_DESTROY,       /* Destroys the domain (cannot be masked out) */
+    CH_JOB_MODIFY,        /* May change state */
+    CH_JOB_LAST
+};
+VIR_ENUM_DECL(virCHDomainJob);
+
+
+struct virCHDomainJobObj {
+    virCond cond;                       /* Use to coordinate jobs */
+    enum virCHDomainJob active;        /* Currently running job */
+    int owner;                          /* Thread which set current job */
+};
+
+
+typedef struct _virCHDomainObjPrivate virCHDomainObjPrivate;
+typedef virCHDomainObjPrivate *virCHDomainObjPrivatePtr;
+struct _virCHDomainObjPrivate {
+    pid_t initpid;
+
+    struct virCHDomainJobObj job;
+
+    virCHMonitorPtr monitor;
+};
+
+extern virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks;
+extern virDomainDefParserConfig virCHDriverDomainDefParserConfig;
+
+int
+virCHDomainObjBeginJob(virDomainObjPtr obj, enum virCHDomainJob job)
+    G_GNUC_WARN_UNUSED_RESULT;
+
+void
+virCHDomainObjEndJob(virDomainObjPtr obj);
diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c
new file mode 100644
index 0000000000..e5b027f71f
--- /dev/null
+++ b/src/ch/ch_driver.c
@@ -0,0 +1,937 @@
+/*
+ * Copyright Intel Corp. 2020
+ *
+ * ch_driver.h: header file for Cloud-Hypervisor driver functions
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "ch_conf.h"
+#include "ch_domain.h"
+#include "ch_driver.h"
+#include "ch_monitor.h"
+#include "ch_process.h"
+#include "datatypes.h"
+#include "driver.h"
+#include "viraccessapicheck.h"
+#include "viralloc.h"
+#include "virbuffer.h"
+#include "vircommand.h"
+#include "virerror.h"
+#include "virfile.h"
+#include "virlog.h"
+#include "virnetdevtap.h"
+#include "virobject.h"
+#include "virstring.h"
+#include "virtypedparam.h"
+#include "viruri.h"
+#include "virutil.h"
+#include "viruuid.h"
+
+#define VIR_FROM_THIS VIR_FROM_CH
+
+VIR_LOG_INIT("ch.ch_driver");
+
+static int chStateInitialize(bool privileged,
+                             const char *root,
+                             virStateInhibitCallback callback,
+                             void *opaque);
+static int chStateCleanup(void);
+virCHDriverPtr ch_driver = NULL;
+
+static virDomainObjPtr
+chDomObjFromDomain(virDomainPtr domain)
+{
+    virDomainObjPtr vm;
+    virCHDriverPtr driver = domain->conn->privateData;
+    char uuidstr[VIR_UUID_STRING_BUFLEN];
+
+    vm = virDomainObjListFindByUUID(driver->domains, domain->uuid);
+    if (!vm) {
+        virUUIDFormat(domain->uuid, uuidstr);
+        virReportError(VIR_ERR_NO_DOMAIN,
+                       _("no domain with matching uuid '%s' (%s)"),
+                       uuidstr, domain->name);
+        return NULL;
+    }
+
+    return vm;
+}
+
+/* Functions */
+static int
+chConnectURIProbe(char **uri)
+{
+    if (ch_driver == NULL)
+        return 0;
+
+    *uri = g_strdup("ch:///system");
+    return 1;
+}
+
+static virDrvOpenStatus chConnectOpen(virConnectPtr conn,
+                                      virConnectAuthPtr auth G_GNUC_UNUSED,
+                                      virConfPtr conf G_GNUC_UNUSED,
+                                      unsigned int flags)
+{
+    virCheckFlags(VIR_CONNECT_RO, VIR_DRV_OPEN_ERROR);
+
+    /* URI was good, but driver isn't active */
+    if (ch_driver == NULL) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       "%s", _("Cloud-Hypervisor state driver is not active"));
+        return VIR_DRV_OPEN_ERROR;
+    }
+
+    if (virConnectOpenEnsureACL(conn) < 0)
+        return VIR_DRV_OPEN_ERROR;
+
+    conn->privateData = ch_driver;
+
+    return VIR_DRV_OPEN_SUCCESS;
+}
+
+static int chConnectClose(virConnectPtr conn)
+{
+    conn->privateData = NULL;
+    return 0;
+}
+
+static const char *chConnectGetType(virConnectPtr conn)
+{
+    if (virConnectGetTypeEnsureACL(conn) < 0)
+        return NULL;
+
+    return "CH";
+}
+
+static int chConnectGetVersion(virConnectPtr conn,
+                               unsigned long *version)
+{
+    virCHDriverPtr driver = conn->privateData;
+
+    if (virConnectGetVersionEnsureACL(conn) < 0)
+        return -1;
+
+    chDriverLock(driver);
+    *version = driver->version;
+    chDriverUnlock(driver);
+    return 0;
+}
+
+static char *chConnectGetHostname(virConnectPtr conn)
+{
+    if (virConnectGetHostnameEnsureACL(conn) < 0)
+        return NULL;
+
+    return virGetHostname();
+}
+
+static int chConnectNumOfDomains(virConnectPtr conn)
+{
+    virCHDriverPtr driver = conn->privateData;
+
+    if (virConnectNumOfDomainsEnsureACL(conn) < 0)
+        return -1;
+
+    return virDomainObjListNumOfDomains(driver->domains, true,
+                                        virConnectNumOfDomainsCheckACL, conn);
+}
+
+static int chConnectListDomains(virConnectPtr conn, int *ids, int nids)
+{
+    virCHDriverPtr driver = conn->privateData;
+
+    if (virConnectListDomainsEnsureACL(conn) < 0)
+        return -1;
+
+    return virDomainObjListGetActiveIDs(driver->domains, ids, nids,
+                                     virConnectListDomainsCheckACL, conn);
+}
+
+static int
+chConnectListAllDomains(virConnectPtr conn,
+                        virDomainPtr **domains,
+                        unsigned int flags)
+{
+    virCHDriverPtr driver = conn->privateData;
+
+    virCheckFlags(VIR_CONNECT_LIST_DOMAINS_FILTERS_ALL, -1);
+
+    if (virConnectListAllDomainsEnsureACL(conn) < 0)
+        return -1;
+
+    return virDomainObjListExport(driver->domains, conn, domains,
+                                 virConnectListAllDomainsCheckACL, flags);
+}
+
+static int chNodeGetInfo(virConnectPtr conn,
+                         virNodeInfoPtr nodeinfo)
+{
+    if (virNodeGetInfoEnsureACL(conn) < 0)
+        return -1;
+
+    return virCapabilitiesGetNodeInfo(nodeinfo);
+}
+
+static char *chConnectGetCapabilities(virConnectPtr conn)
+{
+    virCHDriverPtr driver = conn->privateData;
+    virCapsPtr caps;
+    char *xml;
+
+    if (virConnectGetCapabilitiesEnsureACL(conn) < 0)
+        return NULL;
+
+    if (!(caps = virCHDriverGetCapabilities(driver, true)))
+        return NULL;
+
+    xml = virCapabilitiesFormatXML(caps);
+
+    virObjectUnref(caps);
+    return xml;
+}
+
+/**
+ * chDomainCreateXML:
+ * @conn: pointer to connection
+ * @xml: XML definition of domain
+ * @flags: bitwise-OR of supported virDomainCreateFlags
+ *
+ * Creates a domain based on xml and starts it
+ *
+ * Returns a new domain object or NULL in case of failure.
+ */
+static virDomainPtr
+chDomainCreateXML(virConnectPtr conn,
+                           const char *xml,
+                           unsigned int flags)
+{
+    virCHDriverPtr driver = conn->privateData;
+    virDomainDefPtr vmdef = NULL;
+    virDomainObjPtr vm = NULL;
+    virDomainPtr dom = NULL;
+    unsigned int parse_flags = VIR_DOMAIN_DEF_PARSE_INACTIVE;
+
+    virCheckFlags(VIR_DOMAIN_START_VALIDATE, NULL);
+
+    if (flags & VIR_DOMAIN_START_VALIDATE)
+        parse_flags |= VIR_DOMAIN_DEF_PARSE_VALIDATE_SCHEMA;
+
+
+    if ((vmdef = virDomainDefParseString(xml, driver->xmlopt,
+                                         NULL, parse_flags)) == NULL)
+        goto cleanup;
+
+    if (virDomainCreateXMLEnsureACL(conn, vmdef) < 0)
+        goto cleanup;
+
+    if (!(vm = virDomainObjListAdd(driver->domains,
+                                   vmdef,
+                                   driver->xmlopt,
+                                   VIR_DOMAIN_OBJ_LIST_ADD_LIVE |
+                                       VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE,
+                                   NULL)))
+        goto cleanup;
+
+    vmdef = NULL;
+    vm->persistent = 1;
+
+    if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0)
+        goto cleanup;
+
+    if (virCHProcessStart(driver, vm, VIR_DOMAIN_RUNNING_BOOTED) < 0)
+        goto cleanup;
+
+    dom = virGetDomain(conn, vm->def->name, vm->def->uuid, vm->def->id);
+
+    virCHDomainObjEndJob(vm);
+
+ cleanup:
+    virDomainDefFree(vmdef);
+    virDomainObjEndAPI(&vm);
+    chDriverUnlock(driver);
+    return dom;
+}
+
+static int
+chDomainCreateWithFlags(virDomainPtr dom, unsigned int flags)
+{
+    virCHDriverPtr driver = dom->conn->privateData;
+    virDomainObjPtr vm;
+    int ret = -1;
+
+    virCheckFlags(0, -1);
+
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    if (virDomainCreateWithFlagsEnsureACL(dom->conn, vm->def) < 0)
+        goto cleanup;
+
+    if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0)
+        goto cleanup;
+
+    ret = virCHProcessStart(driver, vm, VIR_DOMAIN_RUNNING_BOOTED);
+
+    virCHDomainObjEndJob(vm);
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return ret;
+}
+
+static int
+chDomainCreate(virDomainPtr dom)
+{
+    return chDomainCreateWithFlags(dom, 0);
+}
+
+static virDomainPtr
+chDomainDefineXMLFlags(virConnectPtr conn, const char *xml, unsigned int flags)
+{
+    virCHDriverPtr driver = conn->privateData;
+    virDomainDefPtr vmdef = NULL;
+    virDomainObjPtr vm = NULL;
+    virDomainPtr dom = NULL;
+    unsigned int parse_flags = VIR_DOMAIN_DEF_PARSE_INACTIVE;
+
+    virCheckFlags(VIR_DOMAIN_DEFINE_VALIDATE, NULL);
+
+    if (flags & VIR_DOMAIN_START_VALIDATE)
+        parse_flags |= VIR_DOMAIN_DEF_PARSE_VALIDATE_SCHEMA;
+
+    if ((vmdef = virDomainDefParseString(xml, driver->xmlopt,
+                                         NULL, parse_flags)) == NULL)
+        goto cleanup;
+
+    if (virXMLCheckIllegalChars("name", vmdef->name, "\n") < 0)
+        goto cleanup;
+
+    if (virDomainDefineXMLFlagsEnsureACL(conn, vmdef) < 0)
+        goto cleanup;
+
+    if (!(vm = virDomainObjListAdd(driver->domains, vmdef,
+                                   driver->xmlopt,
+                                   0, NULL)))
+        goto cleanup;
+
+    vmdef = NULL;
+    vm->persistent = 1;
+
+    dom = virGetDomain(conn, vm->def->name, vm->def->uuid, vm->def->id);
+
+ cleanup:
+    virDomainDefFree(vmdef);
+    virDomainObjEndAPI(&vm);
+    return dom;
+}
+
+static virDomainPtr
+chDomainDefineXML(virConnectPtr conn, const char *xml)
+{
+    return chDomainDefineXMLFlags(conn, xml, 0);
+}
+
+static int
+chDomainUndefineFlags(virDomainPtr dom,
+                      unsigned int flags)
+{
+    virCHDriverPtr driver = dom->conn->privateData;
+    virDomainObjPtr vm;
+    int ret = -1;
+
+    virCheckFlags(0, -1);
+
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    if (virDomainUndefineFlagsEnsureACL(dom->conn, vm->def) < 0)
+        goto cleanup;
+
+    if (!vm->persistent) {
+        virReportError(VIR_ERR_OPERATION_INVALID,
+                       "%s", _("Cannot undefine transient domain"));
+        goto cleanup;
+    }
+
+    if (virDomainObjIsActive(vm)) {
+        vm->persistent = 0;
+    } else {
+        virDomainObjListRemove(driver->domains, vm);
+    }
+
+    ret = 0;
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return ret;
+}
+
+static int
+chDomainUndefine(virDomainPtr dom)
+{
+    return chDomainUndefineFlags(dom, 0);
+}
+
+static int chDomainIsActive(virDomainPtr dom)
+{
+    virCHDriverPtr driver = dom->conn->privateData;
+    virDomainObjPtr vm;
+    int ret = -1;
+
+    chDriverLock(driver);
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    if (virDomainIsActiveEnsureACL(dom->conn, vm->def) < 0)
+        goto cleanup;
+
+    ret = virDomainObjIsActive(vm);
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    chDriverUnlock(driver);
+    return ret;
+}
+
+static int
+chDomainShutdownFlags(virDomainPtr dom,
+                      unsigned int flags)
+{
+    virCHDomainObjPrivatePtr priv;
+    virDomainObjPtr vm;
+    virDomainState state;
+    int ret = -1;
+
+    virCheckFlags(VIR_DOMAIN_SHUTDOWN_INITCTL |
+                  VIR_DOMAIN_SHUTDOWN_SIGNAL, -1);
+
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    priv = vm->privateData;
+
+    if (virDomainShutdownFlagsEnsureACL(dom->conn, vm->def, flags) < 0)
+        goto cleanup;
+
+    if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0)
+        goto cleanup;
+
+    if (virDomainObjCheckActive(vm) < 0)
+        goto endjob;
+
+    state = virDomainObjGetState(vm, NULL);
+    if (state != VIR_DOMAIN_RUNNING && state != VIR_DOMAIN_PAUSED) {
+        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+                       _("only can shutdown running/paused domain"));
+        goto endjob;
+    } else {
+        if (virCHMonitorShutdownVM(priv->monitor) < 0) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                        _("failed to shutdown guest VM"));
+            goto endjob;
+        }
+    }
+
+    virDomainObjSetState(vm, VIR_DOMAIN_SHUTDOWN, VIR_DOMAIN_SHUTDOWN_USER);
+
+    ret = 0;
+
+ endjob:
+    virCHDomainObjEndJob(vm);
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return ret;
+}
+
+static int
+chDomainShutdown(virDomainPtr dom)
+{
+    return chDomainShutdownFlags(dom, 0);
+}
+
+
+static int
+chDomainReboot(virDomainPtr dom, unsigned int flags)
+{
+    virCHDomainObjPrivatePtr priv;
+    virDomainObjPtr vm;
+    virDomainState state;
+    int ret = -1;
+
+    virCheckFlags(VIR_DOMAIN_REBOOT_INITCTL |
+                  VIR_DOMAIN_REBOOT_SIGNAL, -1);
+
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    priv = vm->privateData;
+
+    if (virDomainRebootEnsureACL(dom->conn, vm->def, flags) < 0)
+        goto cleanup;
+
+    if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0)
+        goto cleanup;
+
+    if (virDomainObjCheckActive(vm) < 0)
+        goto endjob;
+
+    state = virDomainObjGetState(vm, NULL);
+    if (state != VIR_DOMAIN_RUNNING && state != VIR_DOMAIN_PAUSED) {
+        virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
+                       _("only can reboot running/paused domain"));
+        goto endjob;
+    } else {
+        if (virCHMonitorRebootVM(priv->monitor) < 0) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                           _("failed to reboot domain"));
+            goto endjob;
+        }
+    }
+
+    if (state == VIR_DOMAIN_RUNNING)
+        virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, VIR_DOMAIN_RUNNING_BOOTED);
+    else
+        virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, VIR_DOMAIN_RUNNING_UNPAUSED);
+
+    ret = 0;
+
+ endjob:
+    virCHDomainObjEndJob(vm);
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return ret;
+}
+
+static int
+chDomainSuspend(virDomainPtr dom)
+{
+    virCHDomainObjPrivatePtr priv;
+    virDomainObjPtr vm;
+    int ret = -1;
+
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    priv = vm->privateData;
+
+    if (virDomainSuspendEnsureACL(dom->conn, vm->def) < 0)
+        goto cleanup;
+
+    if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0)
+        goto cleanup;
+
+    if (virDomainObjCheckActive(vm) < 0)
+        goto endjob;
+
+    if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_RUNNING) {
+        virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
+                       _("only can suspend running domain"));
+        goto endjob;
+    } else {
+        if (virCHMonitorSuspendVM(priv->monitor) < 0) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                        _("failed to suspend domain"));
+            goto endjob;
+        }
+    }
+
+    virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_USER);
+
+    ret = 0;
+
+ endjob:
+    virCHDomainObjEndJob(vm);
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return ret;
+}
+
+static int
+chDomainResume(virDomainPtr dom)
+{
+    virCHDomainObjPrivatePtr priv;
+    virDomainObjPtr vm;
+    int ret = -1;
+
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    priv = vm->privateData;
+
+    if (virDomainResumeEnsureACL(dom->conn, vm->def) < 0)
+        goto cleanup;
+
+    if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0)
+        goto cleanup;
+
+    if (virDomainObjCheckActive(vm) < 0)
+        goto endjob;
+
+    if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_PAUSED) {
+        virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
+                       _("only can resume paused domain"));
+        goto endjob;
+    } else {
+        if (virCHMonitorResumeVM(priv->monitor) < 0) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                        _("failed to resume domain"));
+            goto endjob;
+        }
+    }
+
+    virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, VIR_DOMAIN_RUNNING_UNPAUSED);
+
+    ret = 0;
+
+ endjob:
+    virCHDomainObjEndJob(vm);
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return ret;
+}
+
+/**
+ * chDomainDestroyFlags:
+ * @dom: pointer to domain to destroy
+ * @flags: extra flags; not used yet.
+ *
+ * Sends SIGKILL to Cloud-Hypervisor process to terminate it
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+static int
+chDomainDestroyFlags(virDomainPtr dom, unsigned int flags)
+{
+    virCHDriverPtr driver = dom->conn->privateData;
+    virDomainObjPtr vm;
+    int ret = -1;
+
+    virCheckFlags(0, -1);
+
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    if (virDomainDestroyFlagsEnsureACL(dom->conn, vm->def) < 0)
+        goto cleanup;
+
+    if (virCHDomainObjBeginJob(vm, CH_JOB_DESTROY) < 0)
+        goto cleanup;
+
+    if (virDomainObjCheckActive(vm) < 0)
+        goto endjob;
+
+    ret = virCHProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED);
+
+ endjob:
+    virCHDomainObjEndJob(vm);
+    if (!vm->persistent)
+        virDomainObjListRemove(driver->domains, vm);
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return ret;
+}
+
+static int
+chDomainDestroy(virDomainPtr dom)
+{
+    return chDomainDestroyFlags(dom, 0);
+}
+
+static virDomainPtr chDomainLookupByID(virConnectPtr conn,
+                                       int id)
+{
+    virCHDriverPtr driver = conn->privateData;
+    virDomainObjPtr vm;
+    virDomainPtr dom = NULL;
+
+    chDriverLock(driver);
+    vm = virDomainObjListFindByID(driver->domains, id);
+    chDriverUnlock(driver);
+
+    if (!vm) {
+        virReportError(VIR_ERR_NO_DOMAIN,
+                       _("no domain with matching id '%d'"), id);
+        goto cleanup;
+    }
+
+    if (virDomainLookupByIDEnsureACL(conn, vm->def) < 0)
+        goto cleanup;
+
+    dom = virGetDomain(conn, vm->def->name, vm->def->uuid, vm->def->id);
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return dom;
+}
+
+static virDomainPtr chDomainLookupByName(virConnectPtr conn,
+                                         const char *name)
+{
+    virCHDriverPtr driver = conn->privateData;
+    virDomainObjPtr vm;
+    virDomainPtr dom = NULL;
+
+    chDriverLock(driver);
+    vm = virDomainObjListFindByName(driver->domains, name);
+    chDriverUnlock(driver);
+
+    if (!vm) {
+        virReportError(VIR_ERR_NO_DOMAIN,
+                       _("no domain with matching name '%s'"), name);
+        goto cleanup;
+    }
+
+    if (virDomainLookupByNameEnsureACL(conn, vm->def) < 0)
+        goto cleanup;
+
+    dom = virGetDomain(conn, vm->def->name, vm->def->uuid, vm->def->id);
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return dom;
+}
+
+static virDomainPtr chDomainLookupByUUID(virConnectPtr conn,
+                                         const unsigned char *uuid)
+{
+    virCHDriverPtr driver = conn->privateData;
+    virDomainObjPtr vm;
+    virDomainPtr dom = NULL;
+
+    chDriverLock(driver);
+    vm = virDomainObjListFindByUUID(driver->domains, uuid);
+    chDriverUnlock(driver);
+
+    if (!vm) {
+        char uuidstr[VIR_UUID_STRING_BUFLEN];
+        virUUIDFormat(uuid, uuidstr);
+        virReportError(VIR_ERR_NO_DOMAIN,
+                       _("no domain with matching uuid '%s'"), uuidstr);
+        goto cleanup;
+    }
+
+    if (virDomainLookupByUUIDEnsureACL(conn, vm->def) < 0)
+        goto cleanup;
+
+    dom = virGetDomain(conn, vm->def->name, vm->def->uuid, vm->def->id);
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return dom;
+}
+
+static int
+chDomainGetState(virDomainPtr dom,
+                 int *state,
+                 int *reason,
+                 unsigned int flags)
+{
+    virDomainObjPtr vm;
+    int ret = -1;
+
+    virCheckFlags(0, -1);
+
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    if (virDomainGetStateEnsureACL(dom->conn, vm->def) < 0)
+        goto cleanup;
+
+    *state = virDomainObjGetState(vm, reason);
+    ret = 0;
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return ret;
+}
+
+static char *chDomainGetXMLDesc(virDomainPtr dom,
+                                unsigned int flags)
+{
+    virCHDriverPtr driver = dom->conn->privateData;
+    virDomainObjPtr vm;
+    char *ret = NULL;
+
+    virCheckFlags(VIR_DOMAIN_XML_COMMON_FLAGS, NULL);
+
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    if (virDomainGetXMLDescEnsureACL(dom->conn, vm->def, flags) < 0)
+        goto cleanup;
+
+    ret = virDomainDefFormat(vm->def, driver->xmlopt,
+                             virDomainDefFormatConvertXMLFlags(flags));
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return ret;
+}
+
+static int chDomainGetInfo(virDomainPtr dom,
+                           virDomainInfoPtr info)
+{
+    virDomainObjPtr vm;
+    int ret = -1;
+
+    if (!(vm = chDomObjFromDomain(dom)))
+        goto cleanup;
+
+    if (virDomainGetInfoEnsureACL(dom->conn, vm->def) < 0)
+        goto cleanup;
+
+    info->state = virDomainObjGetState(vm, NULL);
+
+    info->cpuTime = 0;
+
+    info->maxMem = virDomainDefGetMemoryTotal(vm->def);
+    info->memory = vm->def->mem.cur_balloon;
+    info->nrVirtCpu = virDomainDefGetVcpus(vm->def);
+
+    ret = 0;
+
+ cleanup:
+    virDomainObjEndAPI(&vm);
+    return ret;
+}
+
+static int chStateCleanup(void)
+{
+    if (ch_driver == NULL)
+        return -1;
+
+    virObjectUnref(ch_driver->domains);
+    virObjectUnref(ch_driver->xmlopt);
+    virObjectUnref(ch_driver->caps);
+    virObjectUnref(ch_driver->config);
+    virMutexDestroy(&ch_driver->lock);
+    VIR_FREE(ch_driver);
+
+    return 0;
+}
+
+static int chStateInitialize(bool privileged,
+                             const char *root,
+                             virStateInhibitCallback callback G_GNUC_UNUSED,
+                             void *opaque G_GNUC_UNUSED)
+{
+    if (root != NULL) {
+        virReportError(VIR_ERR_INVALID_ARG, "%s",
+                       _("Driver does not support embedded mode"));
+        return -1;
+    }
+
+    /* Check that the user is root, silently disable if not */
+    if (!privileged) {
+        VIR_INFO("Not running privileged, disabling driver");
+        return VIR_DRV_STATE_INIT_SKIPPED;
+    }
+
+    if (VIR_ALLOC(ch_driver) < 0)
+        return VIR_DRV_STATE_INIT_ERROR;
+
+    if (virMutexInit(&ch_driver->lock) < 0) {
+        VIR_FREE(ch_driver);
+        return VIR_DRV_STATE_INIT_ERROR;
+    }
+
+    if (!(ch_driver->domains = virDomainObjListNew()))
+        goto cleanup;
+
+    if (!(ch_driver->caps = virCHDriverCapsInit()))
+        goto cleanup;
+
+    if (!(ch_driver->xmlopt = chDomainXMLConfInit(ch_driver)))
+        goto cleanup;
+
+    if (!(ch_driver->config = virCHDriverConfigNew()))
+        goto cleanup;
+
+    if (chExtractVersion(ch_driver) < 0)
+        goto cleanup;
+
+    return VIR_DRV_STATE_INIT_COMPLETE;
+
+ cleanup:
+    chStateCleanup();
+    return VIR_DRV_STATE_INIT_ERROR;
+}
+
+/* Function Tables */
+static virHypervisorDriver chHypervisorDriver = {
+    .name = "CH",
+    .connectURIProbe = chConnectURIProbe,
+    .connectOpen = chConnectOpen,                           /* 6.7.0 */
+    .connectClose = chConnectClose,                         /* 6.7.0 */
+    .connectGetType = chConnectGetType,                     /* 6.7.0 */
+    .connectGetVersion = chConnectGetVersion,               /* 6.7.0 */
+    .connectGetHostname = chConnectGetHostname,             /* 6.7.0 */
+    .connectNumOfDomains = chConnectNumOfDomains,           /* 6.7.0 */
+    .connectListAllDomains = chConnectListAllDomains,       /* 6.7.0 */
+    .connectListDomains = chConnectListDomains,             /* 6.7.0 */
+    .connectGetCapabilities = chConnectGetCapabilities,     /* 6.7.0 */
+    .domainCreateXML = chDomainCreateXML,                   /* 6.7.0 */
+    .domainCreate = chDomainCreate,                         /* 6.7.0 */
+    .domainCreateWithFlags = chDomainCreateWithFlags,       /* 6.7.0 */
+    .domainShutdown = chDomainShutdown,                     /* 6.7.0 */
+    .domainShutdownFlags = chDomainShutdownFlags,           /* 6.7.0 */
+    .domainReboot = chDomainReboot,                         /* 6.7.0 */
+    .domainSuspend = chDomainSuspend,                       /* 6.7.0 */
+    .domainResume = chDomainResume,                         /* 6.7.0 */
+    .domainDestroy = chDomainDestroy,                       /* 6.7.0 */
+    .domainDestroyFlags = chDomainDestroyFlags,             /* 6.7.0 */
+    .domainDefineXML = chDomainDefineXML,                   /* 6.7.0 */
+    .domainDefineXMLFlags = chDomainDefineXMLFlags,         /* 6.7.0 */
+    .domainUndefine = chDomainUndefine,                     /* 6.7.0 */
+    .domainUndefineFlags = chDomainUndefineFlags,           /* 6.7.0 */
+    .domainLookupByID = chDomainLookupByID,                 /* 6.7.0 */
+    .domainLookupByUUID = chDomainLookupByUUID,             /* 6.7.0 */
+    .domainLookupByName = chDomainLookupByName,             /* 6.7.0 */
+    .domainGetState = chDomainGetState,                     /* 6.7.0 */
+    .domainGetXMLDesc = chDomainGetXMLDesc,                 /* 6.7.0 */
+    .domainGetInfo = chDomainGetInfo,                       /* 6.7.0 */
+    .domainIsActive = chDomainIsActive,                     /* 6.7.0 */
+    .nodeGetInfo = chNodeGetInfo,                           /* 6.7.0 */
+};
+
+static virConnectDriver chConnectDriver = {
+    .localOnly = true,
+    .uriSchemes = (const char *[]){"CH", "Ch", "ch", "Cloud-Hypervisor", NULL},
+    .hypervisorDriver = &chHypervisorDriver,
+};
+
+static virStateDriver chStateDriver = {
+    .name = "CH",
+    .stateInitialize = chStateInitialize,
+    .stateCleanup = chStateCleanup,
+};
+
+int chRegister(void)
+{
+    if (virRegisterConnectDriver(&chConnectDriver, false) < 0)
+        return -1;
+    if (virRegisterStateDriver(&chStateDriver) < 0)
+        return -1;
+    return 0;
+}
diff --git a/src/ch/ch_driver.h b/src/ch/ch_driver.h
new file mode 100644
index 0000000000..0516c91c24
--- /dev/null
+++ b/src/ch/ch_driver.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright Intel Corp. 2020
+ *
+ * ch_driver.h: header file for Cloud-Hypervisor driver functions
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+/* Function declarations */
+int chRegister(void);
diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c
new file mode 100644
index 0000000000..ccef70f719
--- /dev/null
+++ b/src/ch/ch_monitor.c
@@ -0,0 +1,796 @@
+/*
+ * Copyright Intel Corp. 2020
+ *
+ * ch_driver.h: header file for Cloud-Hypervisor driver functions
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <curl/curl.h>
+
+#include "ch_conf.h"
+#include "ch_monitor.h"
+#include "viralloc.h"
+#include "vircommand.h"
+#include "virerror.h"
+#include "virfile.h"
+#include "virjson.h"
+#include "virlog.h"
+#include "virtime.h"
+
+#define VIR_FROM_THIS VIR_FROM_CH
+
+VIR_LOG_INIT("ch.ch_monitor");
+
+static virClassPtr virCHMonitorClass;
+static void virCHMonitorDispose(void *obj);
+
+static int virCHMonitorOnceInit(void)
+{
+    if (!VIR_CLASS_NEW(virCHMonitor, virClassForObjectLockable()))
+        return -1;
+
+    return 0;
+}
+
+VIR_ONCE_GLOBAL_INIT(virCHMonitor);
+
+int virCHMonitorShutdownVMM(virCHMonitorPtr mon);
+int virCHMonitorPutNoContent(virCHMonitorPtr mon, const char *endpoint);
+int virCHMonitorGet(virCHMonitorPtr mon, const char *endpoint);
+int virCHMonitorPingVMM(virCHMonitorPtr mon);
+
+static int
+virCHMonitorBuildCPUJson(virJSONValuePtr content, virDomainDefPtr vmdef)
+{
+    virJSONValuePtr cpus;
+    unsigned int maxvcpus = 0;
+    unsigned int nvcpus = 0;
+    virDomainVcpuDefPtr vcpu;
+    size_t i;
+
+    /* count maximum allowed number vcpus and enabled vcpus when boot.*/
+    maxvcpus = virDomainDefGetVcpusMax(vmdef);
+    for (i = 0; i < maxvcpus; i++) {
+        vcpu = virDomainDefGetVcpu(vmdef, i);
+        if (vcpu->online)
+            nvcpus++;
+    }
+
+    if (maxvcpus != 0 || nvcpus != 0) {
+        cpus = virJSONValueNewObject();
+        if (virJSONValueObjectAppendNumberInt(cpus, "boot_vcpus", nvcpus) < 0)
+            goto cleanup;
+        if (virJSONValueObjectAppendNumberInt(cpus, "max_vcpus", vmdef->maxvcpus) < 0)
+            goto cleanup;
+        if (virJSONValueObjectAppend(content, "cpus", cpus) < 0)
+            goto cleanup;
+    }
+
+    return 0;
+
+ cleanup:
+    virJSONValueFree(cpus);
+    return -1;
+}
+
+static int
+virCHMonitorBuildKernelJson(virJSONValuePtr content, virDomainDefPtr vmdef)
+{
+    virJSONValuePtr kernel;
+
+    if (vmdef->os.kernel == NULL) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Kernel image path in this domain is not defined"));
+        return -1;
+    } else {
+        kernel = virJSONValueNewObject();
+        if (virJSONValueObjectAppendString(kernel, "path", vmdef->os.kernel) < 0)
+            goto cleanup;
+        if (virJSONValueObjectAppend(content, "kernel", kernel) < 0)
+            goto cleanup;
+    }
+
+    return 0;
+
+ cleanup:
+    virJSONValueFree(kernel);
+    return -1;
+}
+
+static int
+virCHMonitorBuildCmdlineJson(virJSONValuePtr content, virDomainDefPtr vmdef)
+{
+    virJSONValuePtr cmdline;
+
+    cmdline = virJSONValueNewObject();
+    if (vmdef->os.cmdline) {
+        if (virJSONValueObjectAppendString(cmdline, "args", vmdef->os.cmdline) < 0)
+            goto cleanup;
+        if (virJSONValueObjectAppend(content, "cmdline", cmdline) < 0)
+            goto cleanup;
+    }
+
+    return 0;
+
+ cleanup:
+    virJSONValueFree(cmdline);
+    return -1;
+}
+
+static int
+virCHMonitorBuildMemoryJson(virJSONValuePtr content, virDomainDefPtr vmdef)
+{
+    virJSONValuePtr memory;
+    unsigned long long total_memory = virDomainDefGetMemoryInitial(vmdef) * 1024;
+
+    if (total_memory != 0) {
+        memory = virJSONValueNewObject();
+        if (virJSONValueObjectAppendNumberUlong(memory, "size", total_memory) < 0)
+            goto cleanup;
+        if (virJSONValueObjectAppend(content, "memory", memory) < 0)
+            goto cleanup;
+    }
+
+    return 0;
+
+ cleanup:
+    virJSONValueFree(memory);
+    return -1;
+}
+
+static int
+virCHMonitorBuildInitramfsJson(virJSONValuePtr content, virDomainDefPtr vmdef)
+{
+    virJSONValuePtr initramfs;
+
+    if (vmdef->os.initrd != NULL) {
+        initramfs = virJSONValueNewObject();
+        if (virJSONValueObjectAppendString(initramfs, "path", vmdef->os.initrd) < 0)
+            goto cleanup;
+        if (virJSONValueObjectAppend(content, "initramfs", initramfs) < 0)
+            goto cleanup;
+    }
+
+    return 0;
+
+ cleanup:
+    virJSONValueFree(initramfs);
+    return -1;
+}
+
+static int
+virCHMonitorBuildDiskJson(virJSONValuePtr disks, virDomainDiskDefPtr diskdef)
+{
+    virJSONValuePtr disk;
+
+    if (diskdef->src != NULL && diskdef->src->path != NULL) {
+        disk = virJSONValueNewObject();
+        if (virJSONValueObjectAppendString(disk, "path", diskdef->src->path) < 0)
+            goto cleanup;
+        if (diskdef->src->readonly) {
+            if (virJSONValueObjectAppendBoolean(disk, "readonly", true) < 0)
+                goto cleanup;
+        }
+        if (virJSONValueArrayAppend(disks, disk) < 0)
+            goto cleanup;
+    }
+
+    return 0;
+
+ cleanup:
+    virJSONValueFree(disk);
+    return -1;
+}
+
+static int
+virCHMonitorBuildDisksJson(virJSONValuePtr content, virDomainDefPtr vmdef)
+{
+    virJSONValuePtr disks;
+    size_t i;
+
+    if (vmdef->ndisks > 0) {
+        disks = virJSONValueNewArray();
+
+        for (i = 0; i < vmdef->ndisks; i++) {
+            if (virCHMonitorBuildDiskJson(disks, vmdef->disks[i]) < 0)
+                goto cleanup;
+        }
+        if (virJSONValueObjectAppend(content, "disks", disks) < 0)
+            goto cleanup;
+    }
+
+    return 0;
+
+ cleanup:
+    virJSONValueFree(disks);
+    return -1;
+}
+
+static int
+virCHMonitorBuildNetJson(virJSONValuePtr nets, virDomainNetDefPtr netdef)
+{
+    virDomainNetType netType = virDomainNetGetActualType(netdef);
+    char macaddr[VIR_MAC_STRING_BUFLEN];
+    virJSONValuePtr net;
+
+    // check net type at first
+    net = virJSONValueNewObject();
+
+    switch (netType) {
+    case VIR_DOMAIN_NET_TYPE_ETHERNET:
+        if (netdef->guestIP.nips == 1) {
+            const virNetDevIPAddr *ip = netdef->guestIP.ips[0];
+            g_autofree char *addr = NULL;
+            virSocketAddr netmask;
+            g_autofree char *netmaskStr = NULL;
+            if (!(addr = virSocketAddrFormat(&ip->address)))
+                goto cleanup;
+            if (virJSONValueObjectAppendString(net, "ip", addr) < 0)
+                goto cleanup;
+
+            if (virSocketAddrPrefixToNetmask(ip->prefix, &netmask, AF_INET) < 0) {
+                virReportError(VIR_ERR_INTERNAL_ERROR,
+                               _("Failed to translate net prefix %d to netmask"),
+                               ip->prefix);
+                goto cleanup;
+            }
+            if (!(netmaskStr = virSocketAddrFormat(&netmask)))
+                goto cleanup;
+            if (virJSONValueObjectAppendString(net, "mask", netmaskStr) < 0)
+                goto cleanup;
+        }
+        break;
+    case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
+        if ((virDomainChrType)netdef->data.vhostuser->type != VIR_DOMAIN_CHR_TYPE_UNIX) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("vhost_user type support UNIX socket in this CH"));
+            goto cleanup;
+        } else {
+            if (virJSONValueObjectAppendString(net, "vhost_socket", netdef->data.vhostuser->data.nix.path) < 0)
+                goto cleanup;
+            if (virJSONValueObjectAppendBoolean(net, "vhost_user", true) < 0)
+                goto cleanup;
+        }
+        break;
+    case VIR_DOMAIN_NET_TYPE_BRIDGE:
+    case VIR_DOMAIN_NET_TYPE_NETWORK:
+    case VIR_DOMAIN_NET_TYPE_DIRECT:
+    case VIR_DOMAIN_NET_TYPE_USER:
+    case VIR_DOMAIN_NET_TYPE_SERVER:
+    case VIR_DOMAIN_NET_TYPE_CLIENT:
+    case VIR_DOMAIN_NET_TYPE_MCAST:
+    case VIR_DOMAIN_NET_TYPE_INTERNAL:
+    case VIR_DOMAIN_NET_TYPE_HOSTDEV:
+    case VIR_DOMAIN_NET_TYPE_UDP:
+    case VIR_DOMAIN_NET_TYPE_LAST:
+    default:
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("Only ethernet and vhost_user type network types are "
+                         "supported in this CH"));
+        goto cleanup;
+    }
+
+    if (netdef->ifname != NULL) {
+        if (virJSONValueObjectAppendString(net, "tap", netdef->ifname) < 0)
+            goto cleanup;
+    }
+    if (virJSONValueObjectAppendString(net, "mac", virMacAddrFormat(&netdef->mac, macaddr)) < 0)
+        goto cleanup;
+
+
+    if (netdef->virtio != NULL) {
+        if (netdef->virtio->iommu == VIR_TRISTATE_SWITCH_ON) {
+            if (virJSONValueObjectAppendBoolean(net, "iommu", true) < 0)
+                goto cleanup;
+        }
+    }
+    if (netdef->driver.virtio.queues) {
+        if (virJSONValueObjectAppendNumberInt(net, "num_queues", netdef->driver.virtio.queues) < 0)
+            goto cleanup;
+    }
+
+    if (netdef->driver.virtio.rx_queue_size || netdef->driver.virtio.tx_queue_size) {
+        if (netdef->driver.virtio.rx_queue_size != netdef->driver.virtio.tx_queue_size) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+               _("virtio rx_queue_size option %d is not same with tx_queue_size %d"),
+               netdef->driver.virtio.rx_queue_size,
+               netdef->driver.virtio.tx_queue_size);
+            goto cleanup;
+        }
+        if (virJSONValueObjectAppendNumberInt(net, "queue_size", netdef->driver.virtio.rx_queue_size) < 0)
+            goto cleanup;
+    }
+
+    if (virJSONValueArrayAppend(nets, net) < 0)
+        goto cleanup;
+
+    return 0;
+
+ cleanup:
+    virJSONValueFree(net);
+    return -1;
+}
+
+static int
+virCHMonitorBuildNetsJson(virJSONValuePtr content, virDomainDefPtr vmdef)
+{
+    virJSONValuePtr nets;
+    size_t i;
+
+    if (vmdef->nnets > 0) {
+        nets = virJSONValueNewArray();
+
+        for (i = 0; i < vmdef->nnets; i++) {
+            if (virCHMonitorBuildNetJson(nets, vmdef->nets[i]) < 0)
+                goto cleanup;
+        }
+        if (virJSONValueObjectAppend(content, "net", nets) < 0)
+            goto cleanup;
+    }
+
+    return 0;
+
+ cleanup:
+    virJSONValueFree(nets);
+    return -1;
+}
+
+static int
+virCHMonitorBuildVMJson(virDomainDefPtr vmdef, char **jsonstr)
+{
+    virJSONValuePtr content = virJSONValueNewObject();
+    int ret = -1;
+
+    if (vmdef == NULL) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("VM is not defined"));
+        goto cleanup;
+    }
+
+    if (virCHMonitorBuildCPUJson(content, vmdef) < 0)
+        goto cleanup;
+
+    if (virCHMonitorBuildMemoryJson(content, vmdef) < 0)
+        goto cleanup;
+
+    if (virCHMonitorBuildKernelJson(content, vmdef) < 0)
+        goto cleanup;
+
+    if (virCHMonitorBuildCmdlineJson(content, vmdef) < 0)
+        goto cleanup;
+
+    if (virCHMonitorBuildInitramfsJson(content, vmdef) < 0)
+        goto cleanup;
+
+    if (virCHMonitorBuildDisksJson(content, vmdef) < 0)
+        goto cleanup;
+
+    if (virCHMonitorBuildNetsJson(content, vmdef) < 0)
+        goto cleanup;
+
+    if (!(*jsonstr = virJSONValueToString(content, false)))
+        goto cleanup;
+
+    ret = 0;
+
+ cleanup:
+    virJSONValueFree(content);
+    return ret;
+}
+
+/* generate command to launch Cloud-Hypervisor socket
+   return -1 - error
+           0 - OK
+   Caller has to free the cmd
+*/
+static virCommandPtr
+chMonitorBuildSocketCmd(virDomainObjPtr vm, const char *socket_path)
+{
+    virCommandPtr cmd;
+
+    if (vm->def == NULL) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("VM is not defined"));
+        return NULL;
+    }
+
+    if (vm->def->emulator != NULL)
+        cmd = virCommandNew(vm->def->emulator);
+    else
+        cmd = virCommandNew(CH_CMD);
+
+    virCommandAddArgList(cmd, "--api-socket", socket_path, NULL);
+
+    return cmd;
+}
+
+virCHMonitorPtr
+virCHMonitorNew(virDomainObjPtr vm, const char *socketdir)
+{
+    virCHMonitorPtr ret = NULL;
+    virCHMonitorPtr mon = NULL;
+    virCommandPtr cmd = NULL;
+    int pings = 0;
+
+    if (virCHMonitorInitialize() < 0)
+        return NULL;
+
+    if (!(mon = virObjectLockableNew(virCHMonitorClass)))
+        return NULL;
+
+    mon->socketpath = g_strdup_printf("%s/%s-socket", socketdir, vm->def->name);
+
+    /* prepare to launch Cloud-Hypervisor socket */
+    if (!(cmd = chMonitorBuildSocketCmd(vm, mon->socketpath)))
+        goto cleanup;
+
+    if (virFileMakePath(socketdir) < 0) {
+        virReportSystemError(errno,
+                             _("Cannot create socket directory '%s'"),
+                             socketdir);
+        goto cleanup;
+    }
+
+    /* launch Cloud-Hypervisor socket */
+    if (virCommandRunAsync(cmd, &mon->pid) < 0)
+        goto cleanup;
+
+    /* get a curl handle */
+    mon->handle = curl_easy_init();
+
+    /* try to ping VMM socket 5 times to make sure it is ready */
+    while (pings < 5) {
+        if (virCHMonitorPingVMM(mon) == 0)
+            break;
+        if (pings == 5)
+            goto cleanup;
+
+        g_usleep(100 * 1000);
+    }
+
+    /* now has its own reference */
+    virObjectRef(mon);
+    mon->vm = virObjectRef(vm);
+
+    ret = mon;
+
+ cleanup:
+    virCommandFree(cmd);
+    return ret;
+}
+
+static void virCHMonitorDispose(void *opaque)
+{
+    virCHMonitorPtr mon = opaque;
+
+    VIR_DEBUG("mon=%p", mon);
+    virObjectUnref(mon->vm);
+}
+
+void virCHMonitorClose(virCHMonitorPtr mon)
+{
+    if (!mon)
+        return;
+
+    if (mon->pid > 0) {
+        /* try cleaning up the Cloud-Hypervisor process */
+        virProcessAbort(mon->pid);
+        mon->pid = 0;
+    }
+
+    if (mon->handle)
+        curl_easy_cleanup(mon->handle);
+
+    if (mon->socketpath) {
+        if (virFileRemove(mon->socketpath, -1, -1) < 0) {
+            VIR_WARN("Unable to remove CH socket file '%s'",
+                     mon->socketpath);
+        }
+        VIR_FREE(mon->socketpath);
+    }
+
+    virObjectUnref(mon);
+    if (mon->vm)
+        virObjectUnref(mon->vm);
+}
+
+
+struct data {
+  char trace_ascii; /* 1 or 0 */
+};
+
+static void dump(const char *text,
+                 FILE *stream,
+                 unsigned char *ptr,
+                 size_t size,
+                 char nohex)
+{
+    size_t i;
+    size_t c;
+
+    unsigned int width = 0x10;
+
+    if (nohex)
+        /* without the hex output, we can fit more on screen */
+        width = 0x40;
+
+    fprintf(stream, "%s, %10.10lu bytes (0x%8.8lx)\n", text, (unsigned long)size,
+            (unsigned long)size);
+
+    for (i = 0; i < size; i += width) {
+
+        fprintf(stream, "%4.4lx: ", (unsigned long)i);
+
+        if (!nohex) {
+            /* hex not disabled, show it */
+            for (c = 0; c < width; c++) {
+                if (i + c < size)
+                    fprintf(stream, "%02x ", ptr[i + c]);
+                else
+                    fputs("   ", stream);
+            }
+        }
+
+        for (c = 0; (c < width) && (i + c < size); c++) {
+            /* check for 0D0A; if found, skip past and start a new line of output */
+            if (nohex && (i + c + 1 < size) && ptr[i + c] == 0x0D &&
+                ptr[i + c + 1] == 0x0A) {
+                i += (c + 2 - width);
+                break;
+            }
+            fprintf(stream, "%c",
+                    (ptr[i + c] >= 0x20) && (ptr[i + c] < 0x80) ? ptr[i + c] : '.');
+            /* check again for 0D0A, to avoid an extra \n if it's at width */
+            if (nohex && (i + c + 2 < size) && ptr[i + c + 1] == 0x0D &&
+                ptr[i + c + 2] == 0x0A) {
+                i += (c + 3 - width);
+                break;
+            }
+        }
+        fputc('\n', stream); /* newline */
+    }
+    fflush(stream);
+}
+
+static int my_trace(CURL *handle,
+                    curl_infotype type,
+                    char *data,
+                    size_t size,
+                    void *userp)
+{
+    struct data *config = (struct data *)userp;
+    const char *text = "";
+    (void)handle; /* prevent compiler warning */
+
+    switch (type) {
+    case CURLINFO_TEXT:
+        fprintf(stderr, "== Info: %s", data);
+        /* FALLTHROUGH */
+    case CURLINFO_END: /* in case a new one is introduced to shock us */
+        break;
+    case CURLINFO_HEADER_OUT:
+        text = "=> Send header";
+        break;
+    case CURLINFO_DATA_OUT:
+        text = "=> Send data";
+        break;
+    case CURLINFO_SSL_DATA_OUT:
+        text = "=> Send SSL data";
+        break;
+    case CURLINFO_HEADER_IN:
+        text = "<= Recv header";
+        break;
+    case CURLINFO_DATA_IN:
+        text = "<= Recv data";
+        break;
+    case CURLINFO_SSL_DATA_IN:
+        text = "<= Recv SSL data";
+        break;
+    }
+
+    dump(text, stderr, (unsigned char *)data, size, config->trace_ascii);
+    return 0;
+}
+
+static int
+virCHMonitorCurlPerform(CURL *handle)
+{
+    CURLcode errorCode;
+    long responseCode = 0;
+
+    struct data config;
+
+    config.trace_ascii = 1; /* enable ascii tracing */
+
+    curl_easy_setopt(handle, CURLOPT_DEBUGFUNCTION, my_trace);
+    curl_easy_setopt(handle, CURLOPT_DEBUGDATA, &config);
+
+    /* the DEBUGFUNCTION has no effect until we enable VERBOSE */
+    curl_easy_setopt(handle, CURLOPT_VERBOSE, 1L);
+
+    errorCode = curl_easy_perform(handle);
+
+    if (errorCode != CURLE_OK) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("curl_easy_perform() returned an error: %s (%d)"),
+                       curl_easy_strerror(errorCode), errorCode);
+        return -1;
+    }
+
+    errorCode = curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE,
+                                  &responseCode);
+
+    if (errorCode != CURLE_OK) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("curl_easy_getinfo(CURLINFO_RESPONSE_CODE) returned an "
+                         "error: %s (%d)"), curl_easy_strerror(errorCode),
+                       errorCode);
+        return -1;
+    }
+
+    if (responseCode < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("curl_easy_getinfo(CURLINFO_RESPONSE_CODE) returned a "
+                         "negative response code"));
+        return -1;
+    }
+
+    return responseCode;
+}
+
+int
+virCHMonitorPutNoContent(virCHMonitorPtr mon, const char *endpoint)
+{
+    char *url;
+    int responseCode = 0;
+    int ret = -1;
+
+    url = g_strdup_printf("%s/%s", URL_ROOT, endpoint);
+
+    virObjectLock(mon);
+
+    /* reset all options of a libcurl session handle at first */
+    curl_easy_reset(mon->handle);
+
+    curl_easy_setopt(mon->handle, CURLOPT_UNIX_SOCKET_PATH, mon->socketpath);
+    curl_easy_setopt(mon->handle, CURLOPT_URL, url);
+    curl_easy_setopt(mon->handle, CURLOPT_PUT, true);
+    curl_easy_setopt(mon->handle, CURLOPT_HTTPHEADER, NULL);
+
+    responseCode = virCHMonitorCurlPerform(mon->handle);
+
+    virObjectUnlock(mon);
+
+    if (responseCode == 200 || responseCode == 204)
+        ret = 0;
+
+    VIR_FREE(url);
+    return ret;
+}
+
+int
+virCHMonitorGet(virCHMonitorPtr mon, const char *endpoint)
+{
+    char *url;
+    int responseCode = 0;
+    int ret = -1;
+
+    url = g_strdup_printf("%s/%s", URL_ROOT, endpoint);
+
+    virObjectLock(mon);
+
+    /* reset all options of a libcurl session handle at first */
+    curl_easy_reset(mon->handle);
+
+    curl_easy_setopt(mon->handle, CURLOPT_UNIX_SOCKET_PATH, mon->socketpath);
+    curl_easy_setopt(mon->handle, CURLOPT_URL, url);
+
+    responseCode = virCHMonitorCurlPerform(mon->handle);
+
+    virObjectUnlock(mon);
+
+    if (responseCode == 200 || responseCode == 204)
+        ret = 0;
+
+    VIR_FREE(url);
+    return ret;
+}
+
+int
+virCHMonitorPingVMM(virCHMonitorPtr mon)
+{
+    return virCHMonitorGet(mon, URL_VMM_PING);
+}
+
+int
+virCHMonitorShutdownVMM(virCHMonitorPtr mon)
+{
+    return virCHMonitorPutNoContent(mon, URL_VMM_SHUTDOWN);
+}
+
+int
+virCHMonitorCreateVM(virCHMonitorPtr mon)
+{
+    g_autofree char *url = NULL;
+    int responseCode = 0;
+    int ret = -1;
+    g_autofree char *payload = NULL;
+    struct curl_slist *headers = NULL;
+
+    url = g_strdup_printf("%s/%s", URL_ROOT, URL_VM_CREATE);
+    headers = curl_slist_append(headers, "Accept: application/json");
+    headers = curl_slist_append(headers, "Content-Type: application/json");
+    headers = curl_slist_append(headers, "Expect:");
+
+    if (virCHMonitorBuildVMJson(mon->vm->def, &payload) != 0)
+        return -1;
+
+    virObjectLock(mon);
+
+    /* reset all options of a libcurl session handle at first */
+    curl_easy_reset(mon->handle);
+
+    curl_easy_setopt(mon->handle, CURLOPT_UNIX_SOCKET_PATH, mon->socketpath);
+    curl_easy_setopt(mon->handle, CURLOPT_URL, url);
+    curl_easy_setopt(mon->handle, CURLOPT_CUSTOMREQUEST, "PUT");
+    curl_easy_setopt(mon->handle, CURLOPT_HTTPHEADER, headers);
+    curl_easy_setopt(mon->handle, CURLOPT_POSTFIELDS, payload);
+
+    responseCode = virCHMonitorCurlPerform(mon->handle);
+
+    virObjectUnlock(mon);
+
+    if (responseCode == 200 || responseCode == 204)
+        ret = 0;
+
+    curl_slist_free_all(headers);
+    VIR_FREE(url);
+    VIR_FREE(payload);
+    return ret;
+}
+
+int
+virCHMonitorBootVM(virCHMonitorPtr mon)
+{
+    return virCHMonitorPutNoContent(mon, URL_VM_BOOT);
+}
+
+int
+virCHMonitorShutdownVM(virCHMonitorPtr mon)
+{
+    return virCHMonitorPutNoContent(mon, URL_VM_SHUTDOWN);
+}
+
+int
+virCHMonitorRebootVM(virCHMonitorPtr mon)
+{
+    return virCHMonitorPutNoContent(mon, URL_VM_REBOOT);
+}
+
+int
+virCHMonitorSuspendVM(virCHMonitorPtr mon)
+{
+    return virCHMonitorPutNoContent(mon, URL_VM_Suspend);
+}
+
+int
+virCHMonitorResumeVM(virCHMonitorPtr mon)
+{
+    return virCHMonitorPutNoContent(mon, URL_VM_RESUME);
+}
diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h
new file mode 100644
index 0000000000..bd6046a50b
--- /dev/null
+++ b/src/ch/ch_monitor.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright Intel Corp. 2020
+ *
+ * ch_driver.h: header file for Cloud-Hypervisor driver functions
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <curl/curl.h>
+
+#include "virobject.h"
+#include "domain_conf.h"
+
+#define URL_ROOT "http://localhost/api/v1"
+#define URL_VMM_SHUTDOWN "vmm.shutdown"
+#define URL_VMM_PING "vmm.ping"
+#define URL_VM_CREATE "vm.create"
+#define URL_VM_DELETE "vm.delete"
+#define URL_VM_BOOT "vm.boot"
+#define URL_VM_SHUTDOWN "vm.shutdown"
+#define URL_VM_REBOOT "vm.reboot"
+#define URL_VM_Suspend "vm.pause"
+#define URL_VM_RESUME "vm.resume"
+
+typedef struct _virCHMonitor virCHMonitor;
+typedef virCHMonitor *virCHMonitorPtr;
+
+struct _virCHMonitor {
+    virObjectLockable parent;
+
+    CURL *handle;
+
+    char *socketpath;
+
+    pid_t pid;
+
+    virDomainObjPtr vm;
+};
+
+virCHMonitorPtr virCHMonitorNew(virDomainObjPtr vm, const char *socketdir);
+void virCHMonitorClose(virCHMonitorPtr mon);
+
+int virCHMonitorCreateVM(virCHMonitorPtr mon);
+int virCHMonitorBootVM(virCHMonitorPtr mon);
+int virCHMonitorShutdownVM(virCHMonitorPtr mon);
+int virCHMonitorRebootVM(virCHMonitorPtr mon);
+int virCHMonitorSuspendVM(virCHMonitorPtr mon);
+int virCHMonitorResumeVM(virCHMonitorPtr mon);
diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c
new file mode 100644
index 0000000000..15f4801549
--- /dev/null
+++ b/src/ch/ch_process.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright Intel Corp. 2020
+ *
+ * ch_driver.h: header file for Cloud-Hypervisor driver functions
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "ch_domain.h"
+#include "ch_monitor.h"
+#include "ch_process.h"
+#include "viralloc.h"
+#include "virerror.h"
+#include "virlog.h"
+
+#define VIR_FROM_THIS VIR_FROM_CH
+
+VIR_LOG_INIT("ch.ch_process");
+
+#define START_SOCKET_POSTFIX ": starting up socket\n"
+#define START_VM_POSTFIX ": starting up vm\n"
+
+
+
+static virCHMonitorPtr virCHProcessConnectMonitor(virCHDriverPtr driver,
+                                                  virDomainObjPtr vm)
+{
+    virCHMonitorPtr monitor = NULL;
+    virCHDriverConfigPtr cfg = virCHDriverGetConfig(driver);
+
+    monitor = virCHMonitorNew(vm, cfg->stateDir);
+
+    virObjectUnref(cfg);
+    return monitor;
+}
+
+/**
+ * virCHProcessStart:
+ * @driver: pointer to driver structure
+ * @vm: pointer to virtual machine structure
+ * @reason: reason for switching vm to running state
+ *
+ * Starts Cloud-Hypervisor listen on a local socket
+ *
+ * Returns 0 on success or -1 in case of error
+ */
+int virCHProcessStart(virCHDriverPtr driver,
+                      virDomainObjPtr vm,
+                      virDomainRunningReason reason)
+{
+    int ret = -1;
+    virCHDomainObjPrivatePtr priv = vm->privateData;
+
+    if (!priv->monitor) {
+        /* And we can get the first monitor connection now too */
+        if (!(priv->monitor = virCHProcessConnectMonitor(driver, vm))) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                        _("failed to create connection to CH socket"));
+            goto cleanup;
+        }
+
+        if (virCHMonitorCreateVM(priv->monitor) < 0) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                        _("failed to create guest VM"));
+            goto cleanup;
+        }
+    }
+
+    if (virCHMonitorBootVM(priv->monitor) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("failed to boot guest VM"));
+        goto cleanup;
+    }
+
+    vm->pid = priv->monitor->pid;
+    vm->def->id = vm->pid;
+    virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
+
+    return 0;
+
+ cleanup:
+    if (ret)
+        virCHProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED);
+
+    return ret;
+}
+
+int virCHProcessStop(virCHDriverPtr driver G_GNUC_UNUSED,
+                     virDomainObjPtr vm,
+                     virDomainShutoffReason reason)
+{
+    virCHDomainObjPrivatePtr priv = vm->privateData;
+
+    VIR_DEBUG("Stopping VM name=%s pid=%d reason=%d",
+              vm->def->name, (int)vm->pid, (int)reason);
+
+    if (priv->monitor) {
+        virCHMonitorClose(priv->monitor);
+        priv->monitor = NULL;
+    }
+
+    vm->pid = -1;
+    vm->def->id = -1;
+
+    virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
+
+    return 0;
+}
diff --git a/src/ch/ch_process.h b/src/ch/ch_process.h
new file mode 100644
index 0000000000..22f3c39618
--- /dev/null
+++ b/src/ch/ch_process.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright Intel Corp. 2020
+ *
+ * ch_driver.h: header file for Cloud-Hypervisor driver functions
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "internal.h"
+
+int virCHProcessStart(virCHDriverPtr  driver,
+                      virDomainObjPtr vm,
+                      virDomainRunningReason reason);
+int virCHProcessStop(virCHDriverPtr driver,
+                     virDomainObjPtr vm,
+                     virDomainShutoffReason reason);
diff --git a/src/ch/meson.build b/src/ch/meson.build
new file mode 100644
index 0000000000..e41691bc05
--- /dev/null
+++ b/src/ch/meson.build
@@ -0,0 +1,44 @@
+ch_driver_sources = [
+  'ch_conf.c',
+  'ch_conf.h',
+  'ch_domain.c',
+  'ch_domain.h',
+  'ch_driver.c',
+  'ch_driver.h',
+  'ch_monitor.c',
+  'ch_monitor.h',
+  'ch_process.c',
+  'ch_process.h',
+]
+
+driver_source_files += files(ch_driver_sources)
+
+stateful_driver_source_files += files(ch_driver_sources)
+
+if conf.has('WITH_CH')
+  ch_driver_impl = static_library(
+    'virt_driver_ch_impl',
+    [
+      ch_driver_sources,
+    ],
+    dependencies: [
+      access_dep,
+      curl_dep,
+      log_dep,
+      src_dep,
+    ],
+    include_directories: [
+      conf_inc_dir,
+    ],
+  )
+
+  virt_modules += {
+    'name': 'virt_driver_ch',
+    'link_whole': [
+      ch_driver_impl,
+    ],
+    'link_args': [
+      libvirt_no_undefined,
+    ],
+  }
+endif
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 5d3ae8bb28..11b183ad2c 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -130,6 +130,7 @@ VIR_ENUM_IMPL(virDomainVirt,
               "parallels",
               "bhyve",
               "vz",
+              "ch",
 );
 
 VIR_ENUM_IMPL(virDomainOS,
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 8a0f26f5c0..4dba588728 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -139,6 +139,7 @@ typedef enum {
     VIR_DOMAIN_VIRT_PARALLELS,
     VIR_DOMAIN_VIRT_BHYVE,
     VIR_DOMAIN_VIRT_VZ,
+    VIR_DOMAIN_VIRT_CH,
 
     VIR_DOMAIN_VIRT_LAST
 } virDomainVirtType;
diff --git a/src/meson.build b/src/meson.build
index 5d8deaf548..90fbee39e5 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -272,6 +272,7 @@ subdir('esx')
 subdir('hyperv')
 subdir('libxl')
 subdir('lxc')
+subdir('ch')
 subdir('openvz')
 subdir('qemu')
 subdir('test')
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 6798febf8d..819b381e21 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -6820,6 +6820,7 @@ qemuBuildMachineCommandLine(virCommandPtr cmd,
     case VIR_DOMAIN_VIRT_PARALLELS:
     case VIR_DOMAIN_VIRT_BHYVE:
     case VIR_DOMAIN_VIRT_VZ:
+    case VIR_DOMAIN_VIRT_CH:
     case VIR_DOMAIN_VIRT_NONE:
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                        _("the QEMU binary does not support %s"),
diff --git a/src/remote/remote_daemon.c b/src/remote/remote_daemon.c
index 1aa9bfc0d2..f8df8de095 100644
--- a/src/remote/remote_daemon.c
+++ b/src/remote/remote_daemon.c
@@ -169,6 +169,10 @@ static int daemonInitialize(void)
     if (virDriverLoadModule("qemu", "qemuRegister", false) < 0)
         return -1;
 # endif
+# ifdef WITH_CH
+    if (virDriverLoadModule("ch", "chRegister", false) < 0)
+        return -1;
+# endif
 # ifdef WITH_LXC
     if (virDriverLoadModule("lxc", "lxcRegister", false) < 0)
         return -1;
diff --git a/src/util/virerror.c b/src/util/virerror.c
index 507a29f50f..9446c908bf 100644
--- a/src/util/virerror.c
+++ b/src/util/virerror.c
@@ -144,6 +144,7 @@ VIR_ENUM_IMPL(virErrorDomain,
 
               "TPM", /* 70 */
               "BPF",
+              "Cloud-Hypervisor Driver",
 );
 
 
diff --git a/tools/virsh.c b/tools/virsh.c
index 06ff5e8336..9555ea3374 100644
--- a/tools/virsh.c
+++ b/tools/virsh.c
@@ -510,6 +510,9 @@ virshShowVersion(vshControl *ctl G_GNUC_UNUSED)
 #ifdef WITH_OPENVZ
     vshPrint(ctl, " OpenVZ");
 #endif
+#ifdef WITH_CH
+    vshPrint(ctl, " Cloud-Hypervisor");
+#endif
 #ifdef WITH_VZ
     vshPrint(ctl, " Virtuozzo");
 #endif
-- 
2.26.2

Re: [RFC] Add basic driver for the Cloud-Hypervisor
Posted by Daniel P. Berrangé 3 years, 7 months ago
On Thu, Aug 27, 2020 at 11:24:32AM -0700, William Douglas wrote:
> This patch adds support for the following initial VM actions using the
> Cloud-Hypervsior API:
>  * vm.create
>  * vm.delete
>  * vm.boot
>  * vm.shutdown
>  * vm.reboot
>  * vm.pause
>  * vm.resume
> 
> To use the Cloud-Hypervisor driver, the v0.9.0 (the as of now current)
> release of Cloud-Hypervisor is required to be installed.

It would be useful to outline the general architecture / approach to
mgmt.

IIUC, cloud-hypervisor is similar to QEMU in that launching a VM just
involves spawning a process for that VM. There is no central daemon
or authority over all cloud-hypervisor VMs, thus libvirtd fills that
role by acting as the mgmt daemon.

In terms of communicating with the process, I'm seeing that basically
everyting is REST API calls with JSON encoded data.

> Signed-off-by: William Douglas <william.douglas@intel.com>
> ---
>  include/libvirt/virterror.h |   1 +
>  libvirt.spec.in             |  32 ++
>  meson.build                 |   5 +
>  meson_options.txt           |   1 +

It would be good to have a drvch.html.in file, well a drvch.rst
file actually to give users an intro to the new driver. It should
probably mention it is an early PoC not really targetting production
usage at this time since there's many key features missing.


> diff --git a/libvirt.spec.in b/libvirt.spec.in
> index bb74443484..66edb1fa76 100644
> --- a/libvirt.spec.in
> +++ b/libvirt.spec.in
> @@ -14,6 +14,7 @@
>  
>  # The hypervisor drivers that run in libvirtd
>  %define with_qemu          0%{!?_without_qemu:1}
> +%define with_ch            0%{!?_without_ch:1}
>  %define with_lxc           0%{!?_without_lxc:1}
>  %define with_libxl         0%{!?_without_libxl:1}
>  %define with_vbox          0%{!?_without_vbox:1}
> @@ -232,6 +233,9 @@ Requires: libvirt-daemon-driver-lxc = %{version}-%{release}
>  %if %{with_qemu}
>  Requires: libvirt-daemon-driver-qemu = %{version}-%{release}
>  %endif
> +%if %{with_ch}
> +Requires: libvirt-daemon-driver-ch = %{version}-%{release}
> +%endif
>  # We had UML driver, but we've removed it.
>  Obsoletes: libvirt-daemon-driver-uml <= 5.0.0
>  Obsoletes: libvirt-daemon-uml <= 5.0.0
> @@ -744,6 +748,20 @@ QEMU
>  %endif
>  
>  
> +%if %{with_ch}
> +%package daemon-driver-ch
> +Summary: Cloud-Hypervisor driver plugin for the libvirtd daemon
> +Requires: libvirt-daemon = %{version}-%{release}
> +Requires: libvirt-libs = %{version}-%{release}
> +Requires: /usr/bin/cloud-hypervisor
> +
> +%description daemon-driver-ch
> +The Cloud-Hypervisor driver plugin for the libvirtd daemon,
> +providing an implementation of the hypervisor driver APIs
> +using Cloud-Hypervisor
> +%endif

The spec file targets Fedora and RHEL, and IIUC, cloud-hypervisor
is not present in either of them. So for now the spec file should
always disable building of the new driver.


> diff --git a/meson.build b/meson.build
> index dabd4196e6..a6759cb051 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1722,6 +1722,10 @@ elif get_option('driver_lxc').enabled()
>    error('linux and remote_driver are required for LXC')
>  endif
>  
> +if not get_option('driver_ch').disabled() and host_machine.system() == 'linux' and conf.has('WITH_LIBVIRTD')
> +  conf.set('WITH_CH', 1)
> +endif

The driver relies on Curl and YAJL, so you'll need to do more there to
automatically disable the driver if either of those is not present.
Take a look a the handling of 'drive_qemu' for a reasonable illustration
for YAJL, which you can adapt and include curl too.

> diff --git a/src/ch/ch_conf.c b/src/ch/ch_conf.c
> new file mode 100644
> index 0000000000..8769b0f7e2
> --- /dev/null
> +++ b/src/ch/ch_conf.c
> @@ -0,0 +1,239 @@
> +/*
> + * Copyright Intel Corp. 2020
> + *
> + * ch_driver.h: header file for Cloud-Hypervisor driver functions

I'm surprised 'ninja test' didn't complain that the filename mentioned
here doesn't match the actual filename. Likewise for other files.


> +/* Functions */
> +virCapsPtr virCHDriverCapsInit(void)
> +{
> +    virCapsPtr caps;
> +    virCapsGuestPtr guest;
> +
> +    if ((caps = virCapabilitiesNew(virArchFromHost(),
> +                                   false, false)) == NULL)
> +        goto cleanup;
> +
> +    if (!(caps->host.numa = virCapabilitiesHostNUMANewHost()))
> +        goto cleanup;
> +
> +    if (virCapabilitiesInitCaches(caps) < 0)
> +        goto cleanup;
> +
> +    if ((guest = virCapabilitiesAddGuest(caps,
> +                                         VIR_DOMAIN_OSTYPE_HVM,
> +                                         caps->host.arch,
> +                                         NULL,
> +                                         NULL,
> +                                         0,
> +                                         NULL)) == NULL)
> +        goto cleanup;
> +
> +    if (virCapabilitiesAddGuestDomain(guest,
> +                                      VIR_DOMAIN_VIRT_CH,

IIUC, cloud hypervisor is still using KVM as the hypervisor,
merely providing a different userspace emulation layer. So
in terms of the libvirt virt type, we should still be using
VIR_DOMAIN_VIRT_KVM. No need to invent a VIR_DOMAIN_VIRT_CH.


> +                                      NULL,
> +                                      NULL,
> +                                      0,
> +                                      NULL) == NULL)
> +        goto cleanup;

This should also be testing of the required binary exists
on the host, and if it does not, then don't claim to support
guest domains.

On 64-bit hosts does CH provide a way to strictly emulate
just a 32-bit CPU, or will it always be 64-bit only ?

If the former, then you need to add the 32-bit equivalent
guest too.

> +
> +    return caps;
> +
> + cleanup:
> +    virObjectUnref(caps);
> +    return NULL;
> +}
> +

> +static void
> +virCHDriverConfigDispose(void *obj)
> +{
> +    virCHDriverConfigPtr cfg = obj;
> +
> +    VIR_FREE(cfg->stateDir);
> +    VIR_FREE(cfg->logDir);

Use g_free for these.

> +}
> +
> +static int
> +chExtractVersionInfo(int *retversion)
> +{
> +    int ret = -1;
> +    unsigned long version;
> +    char *help = NULL;
> +    char *tmp;
> +    virCommandPtr cmd = virCommandNewArgList(CH_CMD, "--version", NULL);
> +
> +    if (retversion)
> +        *retversion = 0;
> +
> +    virCommandAddEnvString(cmd, "LC_ALL=C");
> +    virCommandSetOutputBuffer(cmd, &help);
> +
> +    if (virCommandRun(cmd, NULL) < 0)
> +        goto cleanup;
> +
> +    tmp = help;
> +
> +    /* expected format: cloud-hypervisor v<major>.<minor>.<micro> */
> +    if ((tmp = STRSKIP(tmp, "cloud-hypervisor v")) == NULL)
> +        goto cleanup;
> +
> +    if (virParseVersionString(tmp, &version, false) < 0)
> +        goto cleanup;
> +
> +    // v0.9.0 is the minimum supported version
> +    if ((unsigned int)(version / 1000000) < 1) {
> +        if (((unsigned int)((unsigned long)(version % 1000000)) / 1000) < 9) {
> +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                           _("Cloud-Hypervisor version is too old (v0.9.0 is the minimum supported version)"));
> +            goto cleanup;
> +        }
> +    }

This is a bit more convulted than needed - just encode the
min version you want as an integer and do a single compare
eg

   #define MIN_VERSION ((0 * 1000000) + (9 * 1000) + (0))

   if (version < MIN_VERSION) {
       ..report error...
   }

> +int chStrToInt(const char *str)
> +{
> +    int val;
> +
> +    if (virStrToLong_i(str, NULL, 10, &val) < 0)
> +        return 0;
> +
> +    return val;
> +}

This method is pretty pointless and its also not used anywhere

> diff --git a/src/ch/ch_conf.h b/src/ch/ch_conf.h
> new file mode 100644
> index 0000000000..04334130f7
> --- /dev/null
> +++ b/src/ch/ch_conf.h
> +#pragma once
> +
> +#include "virdomainobjlist.h"
> +#include "virthread.h"
> +
> +#define CH_DRIVER_NAME "CH"
> +#define CH_CMD "cloud-hypervisor"
> +
> +#define CH_STATE_DIR RUNSTATEDIR "/libvirt/ch"
> +#define CH_LOG_DIR LOCALSTATEDIR "/log/libvirt/ch"
> +
> +typedef struct _virCHDriver virCHDriver;
> +typedef virCHDriver *virCHDriverPtr;
> +
> +typedef struct _virCHDriverConfig virCHDriverConfig;
> +typedef virCHDriverConfig *virCHDriverConfigPtr;
> +
> +struct _virCHDriverConfig {
> +    virObject parent;

FWIW, I'd encourage you to use  GObject rather than virObject
for anything new, as our long term goal is to convert all
code to use GObject alone and eliminate virObject


> diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c
> new file mode 100644
> index 0000000000..a46641d50d
> --- /dev/null
> +++ b/src/ch/ch_domain.c
> @@ -0,0 +1,219 @@

> +static void *
> +virCHDomainObjPrivateAlloc(void *opaque G_GNUC_UNUSED)
> +{
> +    virCHDomainObjPrivatePtr priv;
> +
> +    if (VIR_ALLOC(priv) < 0)
> +        return NULL;
> +
> +    if (virCHDomainObjInitJob(priv) < 0) {
> +        VIR_FREE(priv);
> +        return NULL;
> +    }
> +
> +    return priv;
> +}
> +
> +static void
> +virCHDomainObjPrivateFree(void *data)
> +{
> +    virCHDomainObjPrivatePtr priv = data;
> +
> +    virCHDomainObjFreeJob(priv);
> +    VIR_FREE(priv);
> +}
> +
> +static int
> +virCHDomainObjPrivateXMLFormat(virBufferPtr buf,
> +                               virDomainObjPtr vm)
> +{
> +    virCHDomainObjPrivatePtr priv = vm->privateData;
> +    virBufferAsprintf(buf, "<init pid='%lld'/>\n",
> +                      (long long)priv->initpid);
> +
> +    return 0;
> +}
> +
> +static int
> +virCHDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt,
> +                              virDomainObjPtr vm,
> +                              virDomainDefParserConfigPtr config G_GNUC_UNUSED)
> +{
> +    virCHDomainObjPrivatePtr priv = vm->privateData;
> +    long long thepid;
> +
> +    if (virXPathLongLong("string(./init[1]/@pid)", ctxt, &thepid) < 0) {
> +        VIR_WARN("Failed to load init pid from state %s",
> +                 virGetLastErrorMessage());
> +        priv->initpid = 0;
> +    } else {
> +        priv->initpid = thepid;
> +    }
> +
> +    return 0;
> +}

What is this  initpid needed for ?  The virDomainObjPtr struct
already lets us record a PID for the guest in the <domstatus>
element.


> diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c
> new file mode 100644
> index 0000000000..e5b027f71f
> --- /dev/null
> +++ b/src/ch/ch_driver.c
> @@ -0,0 +1,937 @@
> +/*
> + * Copyright Intel Corp. 2020
> + *
> + * ch_driver.h: header file for Cloud-Hypervisor driver functions
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library.  If not, see
> + * <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <config.h>
> +
> +#include "ch_conf.h"
> +#include "ch_domain.h"
> +#include "ch_driver.h"
> +#include "ch_monitor.h"
> +#include "ch_process.h"
> +#include "datatypes.h"
> +#include "driver.h"
> +#include "viraccessapicheck.h"
> +#include "viralloc.h"
> +#include "virbuffer.h"
> +#include "vircommand.h"
> +#include "virerror.h"
> +#include "virfile.h"
> +#include "virlog.h"
> +#include "virnetdevtap.h"
> +#include "virobject.h"
> +#include "virstring.h"
> +#include "virtypedparam.h"
> +#include "viruri.h"
> +#include "virutil.h"
> +#include "viruuid.h"
> +
> +#define VIR_FROM_THIS VIR_FROM_CH
> +
> +VIR_LOG_INIT("ch.ch_driver");


> +
> +/* Functions */
> +static int
> +chConnectURIProbe(char **uri)
> +{
> +    if (ch_driver == NULL)
> +        return 0;
> +
> +    *uri = g_strdup("ch:///system");
> +    return 1;
> +}

Are you not able to support the ch:///session mode for running
under the user's unprivileged session ?

> +/**
> + * chDomainCreateXML:
> + * @conn: pointer to connection
> + * @xml: XML definition of domain
> + * @flags: bitwise-OR of supported virDomainCreateFlags
> + *
> + * Creates a domain based on xml and starts it
> + *
> + * Returns a new domain object or NULL in case of failure.
> + */
> +static virDomainPtr
> +chDomainCreateXML(virConnectPtr conn,
> +                           const char *xml,
> +                           unsigned int flags)
> +{
> +    virCHDriverPtr driver = conn->privateData;
> +    virDomainDefPtr vmdef = NULL;
> +    virDomainObjPtr vm = NULL;
> +    virDomainPtr dom = NULL;
> +    unsigned int parse_flags = VIR_DOMAIN_DEF_PARSE_INACTIVE;
> +
> +    virCheckFlags(VIR_DOMAIN_START_VALIDATE, NULL);
> +
> +    if (flags & VIR_DOMAIN_START_VALIDATE)
> +        parse_flags |= VIR_DOMAIN_DEF_PARSE_VALIDATE_SCHEMA;
> +
> +
> +    if ((vmdef = virDomainDefParseString(xml, driver->xmlopt,
> +                                         NULL, parse_flags)) == NULL)
> +        goto cleanup;
> +
> +    if (virDomainCreateXMLEnsureACL(conn, vmdef) < 0)
> +        goto cleanup;
> +
> +    if (!(vm = virDomainObjListAdd(driver->domains,
> +                                   vmdef,
> +                                   driver->xmlopt,
> +                                   VIR_DOMAIN_OBJ_LIST_ADD_LIVE |
> +                                       VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE,
> +                                   NULL)))
> +        goto cleanup;
> +
> +    vmdef = NULL;
> +    vm->persistent = 1;

This isn't right - the virDomainCreateXML method is for creating
a transient guest only.

Also from this point onwards, for any failure you need to remove
the dangling guest from the driver->domains list.

> +
> +    if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0)
> +        goto cleanup;
> +
> +    if (virCHProcessStart(driver, vm, VIR_DOMAIN_RUNNING_BOOTED) < 0)
> +        goto cleanup;
> +
> +    dom = virGetDomain(conn, vm->def->name, vm->def->uuid, vm->def->id);
> +
> +    virCHDomainObjEndJob(vm);
> +
> + cleanup:
> +    virDomainDefFree(vmdef);
> +    virDomainObjEndAPI(&vm);
> +    chDriverUnlock(driver);
> +    return dom;
> +}




> +static int
> +chDomainShutdownFlags(virDomainPtr dom,
> +                      unsigned int flags)
> +{
> +    virCHDomainObjPrivatePtr priv;
> +    virDomainObjPtr vm;
> +    virDomainState state;
> +    int ret = -1;
> +
> +    virCheckFlags(VIR_DOMAIN_SHUTDOWN_INITCTL |
> +                  VIR_DOMAIN_SHUTDOWN_SIGNAL, -1);

These two shutdown methods would really only be for container
based virt. When you have a KVM guest, only an APIC based
trigger, or guest agent trigger make conceptual sense.

> +
> +    if (!(vm = chDomObjFromDomain(dom)))
> +        goto cleanup;
> +
> +    priv = vm->privateData;
> +
> +    if (virDomainShutdownFlagsEnsureACL(dom->conn, vm->def, flags) < 0)
> +        goto cleanup;
> +
> +    if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0)
> +        goto cleanup;
> +
> +    if (virDomainObjCheckActive(vm) < 0)
> +        goto endjob;
> +
> +    state = virDomainObjGetState(vm, NULL);
> +    if (state != VIR_DOMAIN_RUNNING && state != VIR_DOMAIN_PAUSED) {
> +        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> +                       _("only can shutdown running/paused domain"));
> +        goto endjob;
> +    } else {
> +        if (virCHMonitorShutdownVM(priv->monitor) < 0) {
> +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                        _("failed to shutdown guest VM"));
> +            goto endjob;
> +        }
> +    }
> +
> +    virDomainObjSetState(vm, VIR_DOMAIN_SHUTDOWN, VIR_DOMAIN_SHUTDOWN_USER);
> +
> +    ret = 0;
> +
> + endjob:
> +    virCHDomainObjEndJob(vm);
> +
> + cleanup:
> +    virDomainObjEndAPI(&vm);
> +    return ret;
> +}


> +static int
> +chDomainReboot(virDomainPtr dom, unsigned int flags)
> +{
> +    virCHDomainObjPrivatePtr priv;
> +    virDomainObjPtr vm;
> +    virDomainState state;
> +    int ret = -1;
> +
> +    virCheckFlags(VIR_DOMAIN_REBOOT_INITCTL |
> +                  VIR_DOMAIN_REBOOT_SIGNAL, -1);

Same note as above.

> +
> +    if (!(vm = chDomObjFromDomain(dom)))
> +        goto cleanup;
> +
> +    priv = vm->privateData;
> +
> +    if (virDomainRebootEnsureACL(dom->conn, vm->def, flags) < 0)
> +        goto cleanup;
> +
> +    if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0)
> +        goto cleanup;
> +
> +    if (virDomainObjCheckActive(vm) < 0)
> +        goto endjob;
> +
> +    state = virDomainObjGetState(vm, NULL);
> +    if (state != VIR_DOMAIN_RUNNING && state != VIR_DOMAIN_PAUSED) {
> +        virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
> +                       _("only can reboot running/paused domain"));
> +        goto endjob;
> +    } else {
> +        if (virCHMonitorRebootVM(priv->monitor) < 0) {
> +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                           _("failed to reboot domain"));
> +            goto endjob;
> +        }
> +    }
> +
> +    if (state == VIR_DOMAIN_RUNNING)
> +        virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, VIR_DOMAIN_RUNNING_BOOTED);
> +    else
> +        virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, VIR_DOMAIN_RUNNING_UNPAUSED);
> +
> +    ret = 0;
> +
> + endjob:
> +    virCHDomainObjEndJob(vm);
> +
> + cleanup:
> +    virDomainObjEndAPI(&vm);
> +    return ret;
> +}
> +
> +static int
> +chDomainSuspend(virDomainPtr dom)
> +{
> +    virCHDomainObjPrivatePtr priv;
> +    virDomainObjPtr vm;
> +    int ret = -1;
> +
> +    if (!(vm = chDomObjFromDomain(dom)))
> +        goto cleanup;
> +
> +    priv = vm->privateData;
> +
> +    if (virDomainSuspendEnsureACL(dom->conn, vm->def) < 0)
> +        goto cleanup;
> +
> +    if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0)
> +        goto cleanup;
> +
> +    if (virDomainObjCheckActive(vm) < 0)
> +        goto endjob;
> +
> +    if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_RUNNING) {
> +        virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
> +                       _("only can suspend running domain"));
> +        goto endjob;
> +    } else {
> +        if (virCHMonitorSuspendVM(priv->monitor) < 0) {

Just to explicitly confirm,  "suspend" in libvirt terminology means
pause execution of the guest CPUs, nothing else.   Does that match
semantics from CH ?

> +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                        _("failed to suspend domain"));
> +            goto endjob;
> +        }
> +    }
> +
> +    virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_USER);
> +
> +    ret = 0;
> +
> + endjob:
> +    virCHDomainObjEndJob(vm);
> +
> + cleanup:
> +    virDomainObjEndAPI(&vm);
> +    return ret;
> +}




> +static int chStateInitialize(bool privileged,
> +                             const char *root,
> +                             virStateInhibitCallback callback G_GNUC_UNUSED,
> +                             void *opaque G_GNUC_UNUSED)
> +{
> +    if (root != NULL) {
> +        virReportError(VIR_ERR_INVALID_ARG, "%s",
> +                       _("Driver does not support embedded mode"));
> +        return -1;
> +    }
> +
> +    /* Check that the user is root, silently disable if not */
> +    if (!privileged) {
> +        VIR_INFO("Not running privileged, disabling driver");
> +        return VIR_DRV_STATE_INIT_SKIPPED;
> +    }

Same question as earlier about why its forbidding non-root usage ?

> +static virConnectDriver chConnectDriver = {
> +    .localOnly = true,
> +    .uriSchemes = (const char *[]){"CH", "Ch", "ch", "Cloud-Hypervisor", NULL},

Your driver URI is "ch:///system", so the only thing that should be in this
list is "ch".

> +    .hypervisorDriver = &chHypervisorDriver,
> +};
> +
> +static virStateDriver chStateDriver = {
> +    .name = "CH",

I'd suggest "cloud-hypervisor" to be a bit clearer

> +    .stateInitialize = chStateInitialize,
> +    .stateCleanup = chStateCleanup,
> +};
> +
> +int chRegister(void)
> +{
> +    if (virRegisterConnectDriver(&chConnectDriver, false) < 0)
> +        return -1;
> +    if (virRegisterStateDriver(&chStateDriver) < 0)
> +        return -1;
> +    return 0;
> +}



> diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c
> new file mode 100644
> index 0000000000..ccef70f719
> --- /dev/null
> +++ b/src/ch/ch_monitor.c

> +static int
> +virCHMonitorBuildCPUJson(virJSONValuePtr content, virDomainDefPtr vmdef)
> +{
> +    virJSONValuePtr cpus;
> +    unsigned int maxvcpus = 0;
> +    unsigned int nvcpus = 0;
> +    virDomainVcpuDefPtr vcpu;
> +    size_t i;
> +
> +    /* count maximum allowed number vcpus and enabled vcpus when boot.*/
> +    maxvcpus = virDomainDefGetVcpusMax(vmdef);
> +    for (i = 0; i < maxvcpus; i++) {
> +        vcpu = virDomainDefGetVcpu(vmdef, i);
> +        if (vcpu->online)
> +            nvcpus++;
> +    }
> +
> +    if (maxvcpus != 0 || nvcpus != 0) {
> +        cpus = virJSONValueNewObject();
> +        if (virJSONValueObjectAppendNumberInt(cpus, "boot_vcpus", nvcpus) < 0)
> +            goto cleanup;
> +        if (virJSONValueObjectAppendNumberInt(cpus, "max_vcpus", vmdef->maxvcpus) < 0)
> +            goto cleanup;
> +        if (virJSONValueObjectAppend(content, "cpus", cpus) < 0)
> +            goto cleanup;
> +    }
> +
> +    return 0;
> +
> + cleanup:
> +    virJSONValueFree(cpus);
> +    return -1;
> +}
> +
> +static int
> +virCHMonitorBuildKernelJson(virJSONValuePtr content, virDomainDefPtr vmdef)
> +{
> +    virJSONValuePtr kernel;
> +
> +    if (vmdef->os.kernel == NULL) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                       _("Kernel image path in this domain is not defined"));
> +        return -1;
> +    } else {
> +        kernel = virJSONValueNewObject();
> +        if (virJSONValueObjectAppendString(kernel, "path", vmdef->os.kernel) < 0)
> +            goto cleanup;
> +        if (virJSONValueObjectAppend(content, "kernel", kernel) < 0)
> +            goto cleanup;
> +    }
> +
> +    return 0;
> +
> + cleanup:
> +    virJSONValueFree(kernel);
> +    return -1;
> +}
> +
> +static int
> +virCHMonitorBuildCmdlineJson(virJSONValuePtr content, virDomainDefPtr vmdef)
> +{
> +    virJSONValuePtr cmdline;
> +
> +    cmdline = virJSONValueNewObject();
> +    if (vmdef->os.cmdline) {
> +        if (virJSONValueObjectAppendString(cmdline, "args", vmdef->os.cmdline) < 0)
> +            goto cleanup;
> +        if (virJSONValueObjectAppend(content, "cmdline", cmdline) < 0)
> +            goto cleanup;
> +    }
> +
> +    return 0;
> +
> + cleanup:
> +    virJSONValueFree(cmdline);
> +    return -1;
> +}

I'm kinda inclined to say it is overkill to have separate methods for kernel,
initrd and cmdline. I'd handle all three at same time since they go hand in
hand with each other.


> +static int
> +virCHMonitorBuildDiskJson(virJSONValuePtr disks, virDomainDiskDefPtr diskdef)
> +{
> +    virJSONValuePtr disk;
> +
> +    if (diskdef->src != NULL && diskdef->src->path != NULL) {

You must validate diskdef->type before accesing any fields for the source
otherwise it is highly liable to crash on unexpected user input. Generally
we'd expect a switch(diskdef->type) and cases to each applicable type you
want to support, with errors raised for others.

> +        disk = virJSONValueNewObject();
> +        if (virJSONValueObjectAppendString(disk, "path", diskdef->src->path) < 0)
> +            goto cleanup;
> +        if (diskdef->src->readonly) {
> +            if (virJSONValueObjectAppendBoolean(disk, "readonly", true) < 0)
> +                goto cleanup;
> +        }
> +        if (virJSONValueArrayAppend(disks, disk) < 0)
> +            goto cleanup;
> +    }
> +
> +    return 0;
> +
> + cleanup:
> +    virJSONValueFree(disk);
> +    return -1;
> +}


> +static int
> +virCHMonitorBuildNetJson(virJSONValuePtr nets, virDomainNetDefPtr netdef)
> +{
> +    virDomainNetType netType = virDomainNetGetActualType(netdef);
> +    char macaddr[VIR_MAC_STRING_BUFLEN];
> +    virJSONValuePtr net;
> +
> +    // check net type at first
> +    net = virJSONValueNewObject();
> +
> +    switch (netType) {
> +    case VIR_DOMAIN_NET_TYPE_ETHERNET:
> +        if (netdef->guestIP.nips == 1) {
> +            const virNetDevIPAddr *ip = netdef->guestIP.ips[0];
> +            g_autofree char *addr = NULL;
> +            virSocketAddr netmask;
> +            g_autofree char *netmaskStr = NULL;
> +            if (!(addr = virSocketAddrFormat(&ip->address)))
> +                goto cleanup;
> +            if (virJSONValueObjectAppendString(net, "ip", addr) < 0)
> +                goto cleanup;
> +
> +            if (virSocketAddrPrefixToNetmask(ip->prefix, &netmask, AF_INET) < 0) {
> +                virReportError(VIR_ERR_INTERNAL_ERROR,
> +                               _("Failed to translate net prefix %d to netmask"),
> +                               ip->prefix);
> +                goto cleanup;
> +            }
> +            if (!(netmaskStr = virSocketAddrFormat(&netmask)))
> +                goto cleanup;
> +            if (virJSONValueObjectAppendString(net, "mask", netmaskStr) < 0)
> +                goto cleanup;
> +        }

else {
....report VIR_ERR_CONFIG_UNSUPPORTED...
}

As a general rule, when building the config from the user's XML, report
as many errors as possible. It is preferrable for apps to see an error
report than to have their config silently ignored.

> +        break;
> +    case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
> +        if ((virDomainChrType)netdef->data.vhostuser->type != VIR_DOMAIN_CHR_TYPE_UNIX) {
> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                           _("vhost_user type support UNIX socket in this CH"));
> +            goto cleanup;
> +        } else {
> +            if (virJSONValueObjectAppendString(net, "vhost_socket", netdef->data.vhostuser->data.nix.path) < 0)
> +                goto cleanup;
> +            if (virJSONValueObjectAppendBoolean(net, "vhost_user", true) < 0)
> +                goto cleanup;
> +        }
> +        break;
> +    case VIR_DOMAIN_NET_TYPE_BRIDGE:
> +    case VIR_DOMAIN_NET_TYPE_NETWORK:
> +    case VIR_DOMAIN_NET_TYPE_DIRECT:
> +    case VIR_DOMAIN_NET_TYPE_USER:
> +    case VIR_DOMAIN_NET_TYPE_SERVER:
> +    case VIR_DOMAIN_NET_TYPE_CLIENT:
> +    case VIR_DOMAIN_NET_TYPE_MCAST:
> +    case VIR_DOMAIN_NET_TYPE_INTERNAL:
> +    case VIR_DOMAIN_NET_TYPE_HOSTDEV:
> +    case VIR_DOMAIN_NET_TYPE_UDP:
> +    case VIR_DOMAIN_NET_TYPE_LAST:
> +    default:
> +        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                       _("Only ethernet and vhost_user type network types are "
> +                         "supported in this CH"));
> +        goto cleanup;
> +    }

For _LAST/defalt, use  virReportEnumRangeError




> +static int
> +virCHMonitorBuildVMJson(virDomainDefPtr vmdef, char **jsonstr)
> +{
> +    virJSONValuePtr content = virJSONValueNewObject();
> +    int ret = -1;
> +
> +    if (vmdef == NULL) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                       _("VM is not defined"));
> +        goto cleanup;
> +    }
> +
> +    if (virCHMonitorBuildCPUJson(content, vmdef) < 0)
> +        goto cleanup;
> +
> +    if (virCHMonitorBuildMemoryJson(content, vmdef) < 0)
> +        goto cleanup;
> +
> +    if (virCHMonitorBuildKernelJson(content, vmdef) < 0)
> +        goto cleanup;
> +
> +    if (virCHMonitorBuildCmdlineJson(content, vmdef) < 0)
> +        goto cleanup;
> +
> +    if (virCHMonitorBuildInitramfsJson(content, vmdef) < 0)
> +        goto cleanup;
> +
> +    if (virCHMonitorBuildDisksJson(content, vmdef) < 0)
> +        goto cleanup;
> +
> +    if (virCHMonitorBuildNetsJson(content, vmdef) < 0)
> +        goto cleanup;
> +
> +    if (!(*jsonstr = virJSONValueToString(content, false)))
> +        goto cleanup;
> +
> +    ret = 0;
> +
> + cleanup:
> +    virJSONValueFree(content);
> +    return ret;
> +}

Obviously you're doing the bare minimum here to get something running
which is fine for an initial merge. Ultimately though, it should be
reporting erors for any types of devices you don't wish to support.


> +static virCommandPtr
> +chMonitorBuildSocketCmd(virDomainObjPtr vm, const char *socket_path)
> +{
> +    virCommandPtr cmd;
> +
> +    if (vm->def == NULL) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                       _("VM is not defined"));
> +        return NULL;
> +    }
> +
> +    if (vm->def->emulator != NULL)
> +        cmd = virCommandNew(vm->def->emulator);
> +    else
> +        cmd = virCommandNew(CH_CMD);

If you populate the guest capabilities with the default emulator
path, it should get populated into the virDomainDef, and thus
be able to assume it is non-NULL.

> +
> +    virCommandAddArgList(cmd, "--api-socket", socket_path, NULL);
> +
> +    return cmd;
> +}
> +
> +virCHMonitorPtr
> +virCHMonitorNew(virDomainObjPtr vm, const char *socketdir)
> +{
> +    virCHMonitorPtr ret = NULL;
> +    virCHMonitorPtr mon = NULL;
> +    virCommandPtr cmd = NULL;
> +    int pings = 0;
> +
> +    if (virCHMonitorInitialize() < 0)
> +        return NULL;
> +
> +    if (!(mon = virObjectLockableNew(virCHMonitorClass)))
> +        return NULL;
> +
> +    mon->socketpath = g_strdup_printf("%s/%s-socket", socketdir, vm->def->name);
> +
> +    /* prepare to launch Cloud-Hypervisor socket */
> +    if (!(cmd = chMonitorBuildSocketCmd(vm, mon->socketpath)))
> +        goto cleanup;
> +
> +    if (virFileMakePath(socketdir) < 0) {
> +        virReportSystemError(errno,
> +                             _("Cannot create socket directory '%s'"),
> +                             socketdir);
> +        goto cleanup;
> +    }
> +
> +    /* launch Cloud-Hypervisor socket */
> +    if (virCommandRunAsync(cmd, &mon->pid) < 0)
> +        goto cleanup;
> +
> +    /* get a curl handle */
> +    mon->handle = curl_easy_init();
> +
> +    /* try to ping VMM socket 5 times to make sure it is ready */
> +    while (pings < 5) {
> +        if (virCHMonitorPingVMM(mon) == 0)
> +            break;
> +        if (pings == 5)
> +            goto cleanup;
> +
> +        g_usleep(100 * 1000);
> +    }

This is highly undesirable. Is there no way to launch the CH process
such that the socket is guaranteed to be accepting requests by the
time it has forked into the background ? Or is there a way to pass
in a pre-opened FD for the monitor socket ?

This kind of wait with timeout will cause startup failures due to
timeout under load.

> +
> +    /* now has its own reference */
> +    virObjectRef(mon);
> +    mon->vm = virObjectRef(vm);
> +
> +    ret = mon;
> +
> + cleanup:
> +    virCommandFree(cmd);
> +    return ret;
> +}


> +struct data {
> +  char trace_ascii; /* 1 or 0 */
> +};
> +
> +static void dump(const char *text,
> +                 FILE *stream,
> +                 unsigned char *ptr,
> +                 size_t size,
> +                 char nohex)
> +{
> +    size_t i;
> +    size_t c;
> +
> +    unsigned int width = 0x10;
> +
> +    if (nohex)
> +        /* without the hex output, we can fit more on screen */
> +        width = 0x40;
> +
> +    fprintf(stream, "%s, %10.10lu bytes (0x%8.8lx)\n", text, (unsigned long)size,
> +            (unsigned long)size);
> +
> +    for (i = 0; i < size; i += width) {
> +
> +        fprintf(stream, "%4.4lx: ", (unsigned long)i);
> +
> +        if (!nohex) {
> +            /* hex not disabled, show it */
> +            for (c = 0; c < width; c++) {
> +                if (i + c < size)
> +                    fprintf(stream, "%02x ", ptr[i + c]);
> +                else
> +                    fputs("   ", stream);
> +            }
> +        }
> +
> +        for (c = 0; (c < width) && (i + c < size); c++) {
> +            /* check for 0D0A; if found, skip past and start a new line of output */
> +            if (nohex && (i + c + 1 < size) && ptr[i + c] == 0x0D &&
> +                ptr[i + c + 1] == 0x0A) {
> +                i += (c + 2 - width);
> +                break;
> +            }
> +            fprintf(stream, "%c",
> +                    (ptr[i + c] >= 0x20) && (ptr[i + c] < 0x80) ? ptr[i + c] : '.');
> +            /* check again for 0D0A, to avoid an extra \n if it's at width */
> +            if (nohex && (i + c + 2 < size) && ptr[i + c + 1] == 0x0D &&
> +                ptr[i + c + 2] == 0x0A) {
> +                i += (c + 3 - width);
> +                break;
> +            }
> +        }
> +        fputc('\n', stream); /* newline */
> +    }
> +    fflush(stream);
> +}
> +
> +static int my_trace(CURL *handle,
> +                    curl_infotype type,
> +                    char *data,
> +                    size_t size,
> +                    void *userp)
> +{
> +    struct data *config = (struct data *)userp;
> +    const char *text = "";
> +    (void)handle; /* prevent compiler warning */
> +
> +    switch (type) {
> +    case CURLINFO_TEXT:
> +        fprintf(stderr, "== Info: %s", data);
> +        /* FALLTHROUGH */
> +    case CURLINFO_END: /* in case a new one is introduced to shock us */
> +        break;
> +    case CURLINFO_HEADER_OUT:
> +        text = "=> Send header";
> +        break;
> +    case CURLINFO_DATA_OUT:
> +        text = "=> Send data";
> +        break;
> +    case CURLINFO_SSL_DATA_OUT:
> +        text = "=> Send SSL data";
> +        break;
> +    case CURLINFO_HEADER_IN:
> +        text = "<= Recv header";
> +        break;
> +    case CURLINFO_DATA_IN:
> +        text = "<= Recv data";
> +        break;
> +    case CURLINFO_SSL_DATA_IN:
> +        text = "<= Recv SSL data";
> +        break;
> +    }
> +
> +    dump(text, stderr, (unsigned char *)data, size, config->trace_ascii);
> +    return 0;
> +}

I presume this is left over from when you were debugging this. Printing
to stderr is not really something we would want in the code.


> +static int
> +virCHMonitorCurlPerform(CURL *handle)
> +{
> +    CURLcode errorCode;
> +    long responseCode = 0;
> +
> +    struct data config;
> +
> +    config.trace_ascii = 1; /* enable ascii tracing */
> +
> +    curl_easy_setopt(handle, CURLOPT_DEBUGFUNCTION, my_trace);
> +    curl_easy_setopt(handle, CURLOPT_DEBUGDATA, &config);
> +
> +    /* the DEBUGFUNCTION has no effect until we enable VERBOSE */
> +    curl_easy_setopt(handle, CURLOPT_VERBOSE, 1L);
> +
> +    errorCode = curl_easy_perform(handle);
> +
> +    if (errorCode != CURLE_OK) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR,
> +                       _("curl_easy_perform() returned an error: %s (%d)"),
> +                       curl_easy_strerror(errorCode), errorCode);
> +        return -1;
> +    }
> +
> +    errorCode = curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE,
> +                                  &responseCode);
> +
> +    if (errorCode != CURLE_OK) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR,
> +                       _("curl_easy_getinfo(CURLINFO_RESPONSE_CODE) returned an "
> +                         "error: %s (%d)"), curl_easy_strerror(errorCode),
> +                       errorCode);
> +        return -1;
> +    }
> +
> +    if (responseCode < 0) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                       _("curl_easy_getinfo(CURLINFO_RESPONSE_CODE) returned a "
> +                         "negative response code"));
> +        return -1;
> +    }
> +
> +    return responseCode;
> +}
> +
> +int
> +virCHMonitorPutNoContent(virCHMonitorPtr mon, const char *endpoint)
> +{
> +    char *url;

Change to...

 g_autofree  char *url = NULL;

and...

> +    int responseCode = 0;
> +    int ret = -1;
> +
> +    url = g_strdup_printf("%s/%s", URL_ROOT, endpoint);
> +
> +    virObjectLock(mon);
> +
> +    /* reset all options of a libcurl session handle at first */
> +    curl_easy_reset(mon->handle);
> +
> +    curl_easy_setopt(mon->handle, CURLOPT_UNIX_SOCKET_PATH, mon->socketpath);
> +    curl_easy_setopt(mon->handle, CURLOPT_URL, url);
> +    curl_easy_setopt(mon->handle, CURLOPT_PUT, true);
> +    curl_easy_setopt(mon->handle, CURLOPT_HTTPHEADER, NULL);
> +
> +    responseCode = virCHMonitorCurlPerform(mon->handle);
> +
> +    virObjectUnlock(mon);
> +
> +    if (responseCode == 200 || responseCode == 204)
> +        ret = 0;
> +
> +    VIR_FREE(url);

...you can drop this.

Likewise for any where else you malloc a variable and free it
in the same scope.

> +    return ret;
> +}
> +
> +int
> +virCHMonitorGet(virCHMonitorPtr mon, const char *endpoint)
> +{
> +    char *url;
> +    int responseCode = 0;
> +    int ret = -1;
> +
> +    url = g_strdup_printf("%s/%s", URL_ROOT, endpoint);
> +
> +    virObjectLock(mon);
> +
> +    /* reset all options of a libcurl session handle at first */
> +    curl_easy_reset(mon->handle);
> +
> +    curl_easy_setopt(mon->handle, CURLOPT_UNIX_SOCKET_PATH, mon->socketpath);
> +    curl_easy_setopt(mon->handle, CURLOPT_URL, url);
> +
> +    responseCode = virCHMonitorCurlPerform(mon->handle);
> +
> +    virObjectUnlock(mon);
> +
> +    if (responseCode == 200 || responseCode == 204)
> +        ret = 0;
> +
> +    VIR_FREE(url);
> +    return ret;
> +}


> +int
> +virCHMonitorCreateVM(virCHMonitorPtr mon)
> +{
> +    g_autofree char *url = NULL;
> +    int responseCode = 0;
> +    int ret = -1;
> +    g_autofree char *payload = NULL;
> +    struct curl_slist *headers = NULL;
> +
> +    url = g_strdup_printf("%s/%s", URL_ROOT, URL_VM_CREATE);
> +    headers = curl_slist_append(headers, "Accept: application/json");
> +    headers = curl_slist_append(headers, "Content-Type: application/json");
> +    headers = curl_slist_append(headers, "Expect:");

Is that empty "Expect:" header intentional ?

> +
> +    if (virCHMonitorBuildVMJson(mon->vm->def, &payload) != 0)
> +        return -1;
> +
> +    virObjectLock(mon);
> +
> +    /* reset all options of a libcurl session handle at first */
> +    curl_easy_reset(mon->handle);
> +
> +    curl_easy_setopt(mon->handle, CURLOPT_UNIX_SOCKET_PATH, mon->socketpath);
> +    curl_easy_setopt(mon->handle, CURLOPT_URL, url);
> +    curl_easy_setopt(mon->handle, CURLOPT_CUSTOMREQUEST, "PUT");
> +    curl_easy_setopt(mon->handle, CURLOPT_HTTPHEADER, headers);
> +    curl_easy_setopt(mon->handle, CURLOPT_POSTFIELDS, payload);
> +
> +    responseCode = virCHMonitorCurlPerform(mon->handle);
> +
> +    virObjectUnlock(mon);
> +
> +    if (responseCode == 200 || responseCode == 204)
> +        ret = 0;
> +
> +    curl_slist_free_all(headers);
> +    VIR_FREE(url);
> +    VIR_FREE(payload);
> +    return ret;
> +}


> +struct _virCHMonitor {
> +    virObjectLockable parent;
> +
> +    CURL *handle;

Does this curl handle keep a persistent connection open to the CH
process ?  And is that able to be used to detect immedaitely when
the CH process shuts down or crashes unexpectedly ?

> +
> +    char *socketpath;
> +
> +    pid_t pid;
> +
> +    virDomainObjPtr vm;
> +};
> +


> +int virCHMonitorResumeVM(virCHMonitorPtr mon);
> diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c
> new file mode 100644
> index 0000000000..15f4801549
> --- /dev/null
> +++ b/src/ch/ch_process.c
> @@ -0,0 +1,125 @@
> +/*
> + * Copyright Intel Corp. 2020
> + *
> + * ch_driver.h: header file for Cloud-Hypervisor driver functions
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library.  If not, see
> + * <http://www.gnu.org/licenses/>.
> + */
> +



> diff --git a/src/ch/meson.build b/src/ch/meson.build
> new file mode 100644
> index 0000000000..e41691bc05
> --- /dev/null
> +++ b/src/ch/meson.build
> @@ -0,0 +1,44 @@
> +ch_driver_sources = [
> +  'ch_conf.c',
> +  'ch_conf.h',
> +  'ch_domain.c',
> +  'ch_domain.h',
> +  'ch_driver.c',
> +  'ch_driver.h',
> +  'ch_monitor.c',
> +  'ch_monitor.h',
> +  'ch_process.c',
> +  'ch_process.h',
> +]
> +
> +driver_source_files += files(ch_driver_sources)
> +
> +stateful_driver_source_files += files(ch_driver_sources)
> +
> +if conf.has('WITH_CH')
> +  ch_driver_impl = static_library(
> +    'virt_driver_ch_impl',
> +    [
> +      ch_driver_sources,
> +    ],
> +    dependencies: [
> +      access_dep,
> +      curl_dep,
> +      log_dep,
> +      src_dep,
> +    ],
> +    include_directories: [
> +      conf_inc_dir,
> +    ],
> +  )
> +
> +  virt_modules += {
> +    'name': 'virt_driver_ch',
> +    'link_whole': [
> +      ch_driver_impl,
> +    ],
> +    'link_args': [
> +      libvirt_no_undefined,
> +    ],
> +  }
> +endif

We're moving to a world where each stateful virt driver has its
own daemon. The rules here are enough for libvirtd, but you'll
need to add rules to create a "virtchd" daemon. The qemu/meson.build
will show how - look for "virt_daemons".


> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index 5d3ae8bb28..11b183ad2c 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -130,6 +130,7 @@ VIR_ENUM_IMPL(virDomainVirt,
>                "parallels",
>                "bhyve",
>                "vz",
> +              "ch",
>  );
>  
>  VIR_ENUM_IMPL(virDomainOS,
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index 8a0f26f5c0..4dba588728 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -139,6 +139,7 @@ typedef enum {
>      VIR_DOMAIN_VIRT_PARALLELS,
>      VIR_DOMAIN_VIRT_BHYVE,
>      VIR_DOMAIN_VIRT_VZ,
> +    VIR_DOMAIN_VIRT_CH,

As mentioned earlier, not required since you're still using
KVM IIUC.



Overall, this looks like a reasonable start of the driver.

What is the security model for the processes ? The libvirt driver
here is running as root and spawning CH as root. We don't really
want the VM process running as root though. It really needs to
be unprivileged from a DAC POV. Obviously that's quite a bit more
work for you todo, but it should be opssible to share alot of the
security mgmt infrastructure that we already have for QEMU and
LXC drivers. It can manage DAC permissions and apply SELinux or
AppArmor MAC labelling/profiles.

The other big question is around device addressing. If using PCI,
then PCI address assignment logic is critical to ensure consistent
guest ABI. 

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [RFC] Add basic driver for the Cloud-Hypervisor
Posted by Douglas, William 3 years, 7 months ago
On Tue, Sep 15, 2020 at 5:53 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Thu, Aug 27, 2020 at 11:24:32AM -0700, William Douglas wrote:

<snip>

> > +virCHMonitorPtr
> > +virCHMonitorNew(virDomainObjPtr vm, const char *socketdir)
> > +{
> > +    virCHMonitorPtr ret = NULL;
> > +    virCHMonitorPtr mon = NULL;
> > +    virCommandPtr cmd = NULL;
> > +    int pings = 0;
> > +
> > +    if (virCHMonitorInitialize() < 0)
> > +        return NULL;
> > +
> > +    if (!(mon = virObjectLockableNew(virCHMonitorClass)))
> > +        return NULL;
> > +
> > +    mon->socketpath = g_strdup_printf("%s/%s-socket", socketdir, vm->def->name);
> > +
> > +    /* prepare to launch Cloud-Hypervisor socket */
> > +    if (!(cmd = chMonitorBuildSocketCmd(vm, mon->socketpath)))
> > +        goto cleanup;
> > +
> > +    if (virFileMakePath(socketdir) < 0) {
> > +        virReportSystemError(errno,
> > +                             _("Cannot create socket directory '%s'"),
> > +                             socketdir);
> > +        goto cleanup;
> > +    }
> > +
> > +    /* launch Cloud-Hypervisor socket */
> > +    if (virCommandRunAsync(cmd, &mon->pid) < 0)
> > +        goto cleanup;
> > +
> > +    /* get a curl handle */
> > +    mon->handle = curl_easy_init();
> > +
> > +    /* try to ping VMM socket 5 times to make sure it is ready */
> > +    while (pings < 5) {
> > +        if (virCHMonitorPingVMM(mon) == 0)
> > +            break;
> > +        if (pings == 5)
> > +            goto cleanup;
> > +
> > +        g_usleep(100 * 1000);
> > +    }
>
> This is highly undesirable. Is there no way to launch the CH process
> such that the socket is guaranteed to be accepting requests by the
> time it has forked into the background ? Or is there a way to pass
> in a pre-opened FD for the monitor socket ?
>
> This kind of wait with timeout will cause startup failures due to
> timeout under load.

Currently the cloud-hypervisor process doesn't fork into the
background so that was initially what I was thinking to add to
cloud-hypervsior. Would tracking the pid of the cloud-hypervsior
running in the background be required at that point (assuming the
socket path to control the daemon would be known and working given
virCommandRun returns successfully)?

<snip>

> What is the security model for the processes ? The libvirt driver
> here is running as root and spawning CH as root. We don't really
> want the VM process running as root though. It really needs to
> be unprivileged from a DAC POV. Obviously that's quite a bit more
> work for you todo, but it should be opssible to share alot of the
> security mgmt infrastructure that we already have for QEMU and
> LXC drivers. It can manage DAC permissions and apply SELinux or
> AppArmor MAC labelling/profiles.

Currently cloud-hypervisor runs with cap_net_admin permitted and
effective on it. The implementation will be updated to run under the
user's session using a non-root driver (with its own daemon as you had
mentioned earlier).

>
> The other big question is around device addressing. If using PCI,
> then PCI address assignment logic is critical to ensure consistent
> guest ABI.
>

I'll be adding the device passthrough for the actual driver submission
but if you would like to see an updated RFC with that added I could do
so.

> Regards,
> Daniel
> --
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>

Thank you very much for the very thorough comments, I'll be updating
my patch addressing all your feedback.

William


Re: [RFC] Add basic driver for the Cloud-Hypervisor
Posted by Daniel P. Berrangé 3 years, 7 months ago
On Mon, Sep 21, 2020 at 12:05:48PM -0700, Douglas, William wrote:
> On Tue, Sep 15, 2020 at 5:53 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
> >
> > On Thu, Aug 27, 2020 at 11:24:32AM -0700, William Douglas wrote:
> 
> <snip>
> 
> > > +virCHMonitorPtr
> > > +virCHMonitorNew(virDomainObjPtr vm, const char *socketdir)
> > > +{
> > > +    virCHMonitorPtr ret = NULL;
> > > +    virCHMonitorPtr mon = NULL;
> > > +    virCommandPtr cmd = NULL;
> > > +    int pings = 0;
> > > +
> > > +    if (virCHMonitorInitialize() < 0)
> > > +        return NULL;
> > > +
> > > +    if (!(mon = virObjectLockableNew(virCHMonitorClass)))
> > > +        return NULL;
> > > +
> > > +    mon->socketpath = g_strdup_printf("%s/%s-socket", socketdir, vm->def->name);
> > > +
> > > +    /* prepare to launch Cloud-Hypervisor socket */
> > > +    if (!(cmd = chMonitorBuildSocketCmd(vm, mon->socketpath)))
> > > +        goto cleanup;
> > > +
> > > +    if (virFileMakePath(socketdir) < 0) {
> > > +        virReportSystemError(errno,
> > > +                             _("Cannot create socket directory '%s'"),
> > > +                             socketdir);
> > > +        goto cleanup;
> > > +    }
> > > +
> > > +    /* launch Cloud-Hypervisor socket */
> > > +    if (virCommandRunAsync(cmd, &mon->pid) < 0)
> > > +        goto cleanup;
> > > +
> > > +    /* get a curl handle */
> > > +    mon->handle = curl_easy_init();
> > > +
> > > +    /* try to ping VMM socket 5 times to make sure it is ready */
> > > +    while (pings < 5) {
> > > +        if (virCHMonitorPingVMM(mon) == 0)
> > > +            break;
> > > +        if (pings == 5)
> > > +            goto cleanup;
> > > +
> > > +        g_usleep(100 * 1000);
> > > +    }
> >
> > This is highly undesirable. Is there no way to launch the CH process
> > such that the socket is guaranteed to be accepting requests by the
> > time it has forked into the background ? Or is there a way to pass
> > in a pre-opened FD for the monitor socket ?
> >
> > This kind of wait with timeout will cause startup failures due to
> > timeout under load.
> 
> Currently the cloud-hypervisor process doesn't fork into the
> background so that was initially what I was thinking to add to
> cloud-hypervsior. Would tracking the pid of the cloud-hypervsior
> running in the background be required at that point (assuming the
> socket path to control the daemon would be known and working given
> virCommandRun returns successfully)?

It doesn't especially matter whether cloud-hypervsior forks into
the background itself, or whether libvirt forks it into the
background when launching it.

The important thing is simply to ensure that whichever startup
approach is used can be implemented in a race-free manner without
needing busy-waits or timeouts.

If cloud-hypervisor forks into the background, then it would
need to write a pidfile so libvirt can determine the pid that
ended up running. The act of libvirt waiting for the pidfile
to be written is potentially racy though, so that might not be
be best.

Based on what we learnt from QEMU, I think being able to pass
in a pre-created UNIX listener socket is the best. That lets
libvirt immedately issue a connect() start after forking the
cloud-hypervisor process. If cloud-hypervisor succesfully
starts up, then the client socket becomes live and can be used.
if it fails to startup, then the client socket libvirt has
will get EOF and thus we'll see the startup failure. This
avoids any looping/sleeping/timeout.


> > What is the security model for the processes ? The libvirt driver
> > here is running as root and spawning CH as root. We don't really
> > want the VM process running as root though. It really needs to
> > be unprivileged from a DAC POV. Obviously that's quite a bit more
> > work for you todo, but it should be opssible to share alot of the
> > security mgmt infrastructure that we already have for QEMU and
> > LXC drivers. It can manage DAC permissions and apply SELinux or
> > AppArmor MAC labelling/profiles.
> 
> Currently cloud-hypervisor runs with cap_net_admin permitted and
> effective on it. The implementation will be updated to run under the
> user's session using a non-root driver (with its own daemon as you had
> mentioned earlier).
> 
> >
> > The other big question is around device addressing. If using PCI,
> > then PCI address assignment logic is critical to ensure consistent
> > guest ABI.
> >
> 
> I'll be adding the device passthrough for the actual driver submission
> but if you would like to see an updated RFC with that added I could do
> so.

No need to do that in an initial patch. It is just something I wanted
to raise as an item needed to make it sustainable for live migration
type uses cases in particular.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [RFC] Add basic driver for the Cloud-Hypervisor
Posted by Douglas, William 3 years, 7 months ago
On Tue, Sep 22, 2020 at 1:11 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Mon, Sep 21, 2020 at 12:05:48PM -0700, Douglas, William wrote:
> > On Tue, Sep 15, 2020 at 5:53 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > >
> > > On Thu, Aug 27, 2020 at 11:24:32AM -0700, William Douglas wrote:
> >
> > <snip>
> >
> > > > +virCHMonitorPtr
> > > > +virCHMonitorNew(virDomainObjPtr vm, const char *socketdir)
> > > > +{
> > > > +    virCHMonitorPtr ret = NULL;
> > > > +    virCHMonitorPtr mon = NULL;
> > > > +    virCommandPtr cmd = NULL;
> > > > +    int pings = 0;
> > > > +
> > > > +    if (virCHMonitorInitialize() < 0)
> > > > +        return NULL;
> > > > +
> > > > +    if (!(mon = virObjectLockableNew(virCHMonitorClass)))
> > > > +        return NULL;
> > > > +
> > > > +    mon->socketpath = g_strdup_printf("%s/%s-socket", socketdir, vm->def->name);
> > > > +
> > > > +    /* prepare to launch Cloud-Hypervisor socket */
> > > > +    if (!(cmd = chMonitorBuildSocketCmd(vm, mon->socketpath)))
> > > > +        goto cleanup;
> > > > +
> > > > +    if (virFileMakePath(socketdir) < 0) {
> > > > +        virReportSystemError(errno,
> > > > +                             _("Cannot create socket directory '%s'"),
> > > > +                             socketdir);
> > > > +        goto cleanup;
> > > > +    }
> > > > +
> > > > +    /* launch Cloud-Hypervisor socket */
> > > > +    if (virCommandRunAsync(cmd, &mon->pid) < 0)
> > > > +        goto cleanup;
> > > > +
> > > > +    /* get a curl handle */
> > > > +    mon->handle = curl_easy_init();
> > > > +
> > > > +    /* try to ping VMM socket 5 times to make sure it is ready */
> > > > +    while (pings < 5) {
> > > > +        if (virCHMonitorPingVMM(mon) == 0)
> > > > +            break;
> > > > +        if (pings == 5)
> > > > +            goto cleanup;
> > > > +
> > > > +        g_usleep(100 * 1000);
> > > > +    }
> > >
> > > This is highly undesirable. Is there no way to launch the CH process
> > > such that the socket is guaranteed to be accepting requests by the
> > > time it has forked into the background ? Or is there a way to pass
> > > in a pre-opened FD for the monitor socket ?
> > >
> > > This kind of wait with timeout will cause startup failures due to
> > > timeout under load.
> >
> > Currently the cloud-hypervisor process doesn't fork into the
> > background so that was initially what I was thinking to add to
> > cloud-hypervsior. Would tracking the pid of the cloud-hypervsior
> > running in the background be required at that point (assuming the
> > socket path to control the daemon would be known and working given
> > virCommandRun returns successfully)?
>
> It doesn't especially matter whether cloud-hypervsior forks into
> the background itself, or whether libvirt forks it into the
> background when launching it.
>
> The important thing is simply to ensure that whichever startup
> approach is used can be implemented in a race-free manner without
> needing busy-waits or timeouts.
>
> If cloud-hypervisor forks into the background, then it would
> need to write a pidfile so libvirt can determine the pid that
> ended up running. The act of libvirt waiting for the pidfile
> to be written is potentially racy though, so that might not be
> be best.
>
> Based on what we learnt from QEMU, I think being able to pass
> in a pre-created UNIX listener socket is the best. That lets
> libvirt immedately issue a connect() start after forking the
> cloud-hypervisor process. If cloud-hypervisor succesfully
> starts up, then the client socket becomes live and can be used.
> if it fails to startup, then the client socket libvirt has
> will get EOF and thus we'll see the startup failure. This
> avoids any looping/sleeping/timeout.

I think I'm misreading/missing something from the qemu setup but
looking at qemu_monitor.c's qemuMonitorOpenUnix function I see a retry
connect loop based on a timeout when trying to connect to a socket
(with a comment that leads me to believe it is possible to have the
monitor socket file not yet exist). I was expecting to see something
like forking qemu and passing the fd for the socket that was opened
that qemu should use (and that libvirt connects to) but I think I only
see that for the network socket fds that qemu is supposed to use. If
you could point me in the right direction here I'd appreciate it.


Re: [RFC] Add basic driver for the Cloud-Hypervisor
Posted by Ján Tomko 3 years, 7 months ago
On a Tuesday in 2020, Douglas, William wrote:
>On Tue, Sep 22, 2020 at 1:11 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
>>
>> On Mon, Sep 21, 2020 at 12:05:48PM -0700, Douglas, William wrote:
>> > On Tue, Sep 15, 2020 at 5:53 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
>> > >
>> > > On Thu, Aug 27, 2020 at 11:24:32AM -0700, William Douglas wrote:
>> >
>> > <snip>
>> >
>> > > > +virCHMonitorPtr
>> > > > +virCHMonitorNew(virDomainObjPtr vm, const char *socketdir)
>> > > > +{
>> > > > +    virCHMonitorPtr ret = NULL;
>> > > > +    virCHMonitorPtr mon = NULL;
>> > > > +    virCommandPtr cmd = NULL;
>> > > > +    int pings = 0;
>> > > > +
>> > > > +    if (virCHMonitorInitialize() < 0)
>> > > > +        return NULL;
>> > > > +
>> > > > +    if (!(mon = virObjectLockableNew(virCHMonitorClass)))
>> > > > +        return NULL;
>> > > > +
>> > > > +    mon->socketpath = g_strdup_printf("%s/%s-socket", socketdir, vm->def->name);
>> > > > +
>> > > > +    /* prepare to launch Cloud-Hypervisor socket */
>> > > > +    if (!(cmd = chMonitorBuildSocketCmd(vm, mon->socketpath)))
>> > > > +        goto cleanup;
>> > > > +
>> > > > +    if (virFileMakePath(socketdir) < 0) {
>> > > > +        virReportSystemError(errno,
>> > > > +                             _("Cannot create socket directory '%s'"),
>> > > > +                             socketdir);
>> > > > +        goto cleanup;
>> > > > +    }
>> > > > +
>> > > > +    /* launch Cloud-Hypervisor socket */
>> > > > +    if (virCommandRunAsync(cmd, &mon->pid) < 0)
>> > > > +        goto cleanup;
>> > > > +
>> > > > +    /* get a curl handle */
>> > > > +    mon->handle = curl_easy_init();
>> > > > +
>> > > > +    /* try to ping VMM socket 5 times to make sure it is ready */
>> > > > +    while (pings < 5) {
>> > > > +        if (virCHMonitorPingVMM(mon) == 0)
>> > > > +            break;
>> > > > +        if (pings == 5)
>> > > > +            goto cleanup;
>> > > > +
>> > > > +        g_usleep(100 * 1000);
>> > > > +    }
>> > >
>> > > This is highly undesirable. Is there no way to launch the CH process
>> > > such that the socket is guaranteed to be accepting requests by the
>> > > time it has forked into the background ? Or is there a way to pass
>> > > in a pre-opened FD for the monitor socket ?
>> > >
>> > > This kind of wait with timeout will cause startup failures due to
>> > > timeout under load.
>> >
>> > Currently the cloud-hypervisor process doesn't fork into the
>> > background so that was initially what I was thinking to add to
>> > cloud-hypervsior. Would tracking the pid of the cloud-hypervsior
>> > running in the background be required at that point (assuming the
>> > socket path to control the daemon would be known and working given
>> > virCommandRun returns successfully)?
>>
>> It doesn't especially matter whether cloud-hypervsior forks into
>> the background itself, or whether libvirt forks it into the
>> background when launching it.
>>
>> The important thing is simply to ensure that whichever startup
>> approach is used can be implemented in a race-free manner without
>> needing busy-waits or timeouts.
>>
>> If cloud-hypervisor forks into the background, then it would
>> need to write a pidfile so libvirt can determine the pid that
>> ended up running. The act of libvirt waiting for the pidfile
>> to be written is potentially racy though, so that might not be
>> be best.
>>
>> Based on what we learnt from QEMU, I think being able to pass
>> in a pre-created UNIX listener socket is the best. That lets
>> libvirt immedately issue a connect() start after forking the
>> cloud-hypervisor process. If cloud-hypervisor succesfully
>> starts up, then the client socket becomes live and can be used.
>> if it fails to startup, then the client socket libvirt has
>> will get EOF and thus we'll see the startup failure. This
>> avoids any looping/sleeping/timeout.
>
>I think I'm misreading/missing something from the qemu setup but
>looking at qemu_monitor.c's qemuMonitorOpenUnix function I see a retry
>connect loop based on a timeout when trying to connect to a socket

That loop is guarded by: if (retry)

The important code path:
qemuProcessLaunch
`_ qemuProcessWaitForMonitor
   `_ qemuConnectMonitor

qemuProcessWaitForMonitor sets retry=false if we're dealing with
a recent enough QEMU that supports FD passing for its chardevs

The logic that creates the socket on libvirt's side lives (sadly)
in qemuBuildChrChardevStr.

Jano

>(with a comment that leads me to believe it is possible to have the
>monitor socket file not yet exist). I was expecting to see something
>like forking qemu and passing the fd for the socket that was opened
>that qemu should use (and that libvirt connects to) but I think I only
>see that for the network socket fds that qemu is supposed to use. If
>you could point me in the right direction here I'd appreciate it.
>
>
Re: [RFC] Add basic driver for the Cloud-Hypervisor
Posted by Douglas, William 3 years, 7 months ago
On Tue, Sep 22, 2020 at 12:07 PM Ján Tomko <jtomko@redhat.com> wrote:
>
> On a Tuesday in 2020, Douglas, William wrote:
> >On Tue, Sep 22, 2020 at 1:11 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
> >>
> >> On Mon, Sep 21, 2020 at 12:05:48PM -0700, Douglas, William wrote:
> >> > On Tue, Sep 15, 2020 at 5:53 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
> >> > >
> >> > > On Thu, Aug 27, 2020 at 11:24:32AM -0700, William Douglas wrote:
> >> >
> >> > <snip>
> >> >
> >> > > > +virCHMonitorPtr
> >> > > > +virCHMonitorNew(virDomainObjPtr vm, const char *socketdir)
> >> > > > +{
> >> > > > +    virCHMonitorPtr ret = NULL;
> >> > > > +    virCHMonitorPtr mon = NULL;
> >> > > > +    virCommandPtr cmd = NULL;
> >> > > > +    int pings = 0;
> >> > > > +
> >> > > > +    if (virCHMonitorInitialize() < 0)
> >> > > > +        return NULL;
> >> > > > +
> >> > > > +    if (!(mon = virObjectLockableNew(virCHMonitorClass)))
> >> > > > +        return NULL;
> >> > > > +
> >> > > > +    mon->socketpath = g_strdup_printf("%s/%s-socket", socketdir, vm->def->name);
> >> > > > +
> >> > > > +    /* prepare to launch Cloud-Hypervisor socket */
> >> > > > +    if (!(cmd = chMonitorBuildSocketCmd(vm, mon->socketpath)))
> >> > > > +        goto cleanup;
> >> > > > +
> >> > > > +    if (virFileMakePath(socketdir) < 0) {
> >> > > > +        virReportSystemError(errno,
> >> > > > +                             _("Cannot create socket directory '%s'"),
> >> > > > +                             socketdir);
> >> > > > +        goto cleanup;
> >> > > > +    }
> >> > > > +
> >> > > > +    /* launch Cloud-Hypervisor socket */
> >> > > > +    if (virCommandRunAsync(cmd, &mon->pid) < 0)
> >> > > > +        goto cleanup;
> >> > > > +
> >> > > > +    /* get a curl handle */
> >> > > > +    mon->handle = curl_easy_init();
> >> > > > +
> >> > > > +    /* try to ping VMM socket 5 times to make sure it is ready */
> >> > > > +    while (pings < 5) {
> >> > > > +        if (virCHMonitorPingVMM(mon) == 0)
> >> > > > +            break;
> >> > > > +        if (pings == 5)
> >> > > > +            goto cleanup;
> >> > > > +
> >> > > > +        g_usleep(100 * 1000);
> >> > > > +    }
> >> > >
> >> > > This is highly undesirable. Is there no way to launch the CH process
> >> > > such that the socket is guaranteed to be accepting requests by the
> >> > > time it has forked into the background ? Or is there a way to pass
> >> > > in a pre-opened FD for the monitor socket ?
> >> > >
> >> > > This kind of wait with timeout will cause startup failures due to
> >> > > timeout under load.
> >> >
> >> > Currently the cloud-hypervisor process doesn't fork into the
> >> > background so that was initially what I was thinking to add to
> >> > cloud-hypervsior. Would tracking the pid of the cloud-hypervsior
> >> > running in the background be required at that point (assuming the
> >> > socket path to control the daemon would be known and working given
> >> > virCommandRun returns successfully)?
> >>
> >> It doesn't especially matter whether cloud-hypervsior forks into
> >> the background itself, or whether libvirt forks it into the
> >> background when launching it.
> >>
> >> The important thing is simply to ensure that whichever startup
> >> approach is used can be implemented in a race-free manner without
> >> needing busy-waits or timeouts.
> >>
> >> If cloud-hypervisor forks into the background, then it would
> >> need to write a pidfile so libvirt can determine the pid that
> >> ended up running. The act of libvirt waiting for the pidfile
> >> to be written is potentially racy though, so that might not be
> >> be best.
> >>
> >> Based on what we learnt from QEMU, I think being able to pass
> >> in a pre-created UNIX listener socket is the best. That lets
> >> libvirt immedately issue a connect() start after forking the
> >> cloud-hypervisor process. If cloud-hypervisor succesfully
> >> starts up, then the client socket becomes live and can be used.
> >> if it fails to startup, then the client socket libvirt has
> >> will get EOF and thus we'll see the startup failure. This
> >> avoids any looping/sleeping/timeout.
> >
> >I think I'm misreading/missing something from the qemu setup but
> >looking at qemu_monitor.c's qemuMonitorOpenUnix function I see a retry
> >connect loop based on a timeout when trying to connect to a socket
>
> That loop is guarded by: if (retry)
>
> The important code path:
> qemuProcessLaunch
> `_ qemuProcessWaitForMonitor
>    `_ qemuConnectMonitor
>
> qemuProcessWaitForMonitor sets retry=false if we're dealing with
> a recent enough QEMU that supports FD passing for its chardevs
>
> The logic that creates the socket on libvirt's side lives (sadly)
> in qemuBuildChrChardevStr.
>

Ah I was missing the possibility of retry being set to false (I saw
false would be set on a reconnect I think but didn't look much
further) and the entirety of the qemuBuildChrChardevStr being the key
part of the setup. Thank you very much!