Toggle navigation
:p
atchew
Login
If a domU has a qemu-xen instance attached, it is required to call qemus "xen-save-devices-state" method. Without it, the receiving side of a PV or PVH migration may be unable to lock the image: xen be: qdisk-51712: xen be: qdisk-51712: error: Failed to get "write" lock error: Failed to get "write" lock xen be: qdisk-51712: xen be: qdisk-51712: initialise() failed initialise() failed To fix this bug, libxl__domain_suspend_device_model() and libxl__domain_resume_device_model() have to be called not only for HVM, but also if the active device_model is QEMU_XEN. Unfortunately, libxl__domain_build_info_setdefault() hardcodes b_info->device_model_version to QEMU_XEN if it does not know it any better. This breaks domUs without a device_model. libxl__qmp_stop() would wait 10 seconds in qmp_open() for a qemu that will never appear. During this long timeframe the domU remains in state paused on the sending side. As a result network connections may be dropped. Once this bug is fixed as well, by just removing that assumption, there is no code to actually initialise b_info->device_model_version. There is a helper function libxl__need_xenpv_qemu(), which is used in various places to decide if any device_model has to be spawned. This function can not be used as is, just to fill b_info->device_model_version, because store_libxl_entry() was already called earlier. Update this function to receive a domid to work with, instead of reading xenstore. Rearrange the code and initialize b_info->device_model_version in libxl__domain_build_info_setdefault() per DOMAIN_TYPE. Update initiate_domain_create() to set b_info->device_model_version if it was not set earlier, using the updated libxl__need_xenpv_qemu(). Introduce LIBXL_DEVICE_MODEL_VERSION_NONE_REQUIRED for PV and PVH that have no need for a device_model. Update existing users of libxl__need_xenpv_qemu() to use b_info->device_model_version for their check if a device_model is needed. v02: - update wording in a comment - remove stale goto in domcreate_launch_dm - initialize ret in libxl__need_xenpv_qemu Signed-off-by: Olaf Hering <olaf@aepfle.de> Cc: Roger Pau Monné <roger.pau@citrix.com> Cc: Anthony PERARD <anthony.perard@citrix.com> --- tools/libxl/libxl_create.c | 39 +++++++++++++++++++++++++++++++-------- tools/libxl/libxl_dm.c | 40 +++++++++++++++++++++++----------------- tools/libxl/libxl_dom_suspend.c | 8 ++++++-- tools/libxl/libxl_internal.h | 3 ++- tools/libxl/libxl_types.idl | 1 + 5 files changed, 63 insertions(+), 28 deletions(-) diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index XXXXXXX..XXXXXXX 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -XXX,XX +XXX,XX @@ int libxl__domain_build_info_setdefault(libxl__gc *gc, b_info->device_model_ssidref = SECINITSID_DOMDM; if (!b_info->device_model_version) { - if (b_info->type == LIBXL_DOMAIN_TYPE_HVM) { + switch (b_info->type) { + case LIBXL_DOMAIN_TYPE_HVM: if (libxl_defbool_val(b_info->device_model_stubdomain)) { b_info->device_model_version = LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL; } else { b_info->device_model_version = libxl__default_device_model(gc); } - } else { - b_info->device_model_version = - LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN; + break; + case LIBXL_DOMAIN_TYPE_PV: + case LIBXL_DOMAIN_TYPE_PVH: + default: + /* may be set later */ + break; } if (b_info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { @@ -XXX,XX +XXX,XX @@ static void initiate_domain_create(libxl__egc *egc, goto error_out; } + if (d_config->b_info.device_model_version + == LIBXL_DEVICE_MODEL_VERSION_UNKNOWN) { + ret = libxl__need_xenpv_qemu(gc, d_config, domid); + if (ret) + d_config->b_info.device_model_version = + LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN; + else + d_config->b_info.device_model_version = + LIBXL_DEVICE_MODEL_VERSION_NONE_REQUIRED; + } + dcs->guest_domid = domid; dcs->sdss.dm.guest_domid = 0; /* means we haven't spawned */ @@ -XXX,XX +XXX,XX @@ static void domcreate_launch_dm(libxl__egc *egc, libxl__multidev *multidev, libxl__domain_create_state *dcs = CONTAINER_OF(multidev, *dcs, multidev); STATE_AO_GC(dcs->ao); int i; + bool need_qemu; /* convenience aliases */ const uint32_t domid = dcs->guest_domid; @@ -XXX,XX +XXX,XX @@ static void domcreate_launch_dm(libxl__egc *egc, libxl__multidev *multidev, libxl__device_console_add(gc, domid, &console, state, &device); libxl__device_console_dispose(&console); - ret = libxl__need_xenpv_qemu(gc, d_config); - if (ret < 0) - goto error_out; - if (ret) { + switch (d_config->b_info.device_model_version) { + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: + need_qemu = true; + break; + default: + need_qemu = false; + break; + } + + if (need_qemu) { dcs->sdss.dm.guest_domid = domid; libxl__spawn_local_dm(egc, &dcs->sdss.dm); return; diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c index XXXXXXX..XXXXXXX 100644 --- a/tools/libxl/libxl_dm.c +++ b/tools/libxl/libxl_dm.c @@ -XXX,XX +XXX,XX @@ static void spawn_stub_launch_dm(libxl__egc *egc, libxl__domain_build_state *const d_state = sdss->dm.build_state; libxl__domain_build_state *const stubdom_state = &sdss->dm_state; uint32_t dm_domid = sdss->pvqemu.guest_domid; - int need_qemu; + bool need_qemu; if (ret) { LOGD(ERROR, guest_domid, "error connecting disk devices"); @@ -XXX,XX +XXX,XX @@ static void spawn_stub_launch_dm(libxl__egc *egc, } } - need_qemu = libxl__need_xenpv_qemu(gc, dm_config); + switch (dm_config->b_info.device_model_version) { + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: + need_qemu = true; + break; + default: + need_qemu = false; + break; + } for (i = 0; i < num_console; i++) { libxl__device device; @@ -XXX,XX +XXX,XX @@ static void kill_device_model_uid_cb(libxl__egc *egc, } /* Return 0 if no dm needed, 1 if needed and <0 if error. */ -int libxl__need_xenpv_qemu(libxl__gc *gc, libxl_domain_config *d_config) +int libxl__need_xenpv_qemu(libxl__gc *gc, libxl_domain_config *d_config, uint32_t domid) { - int idx, i, ret, num; - uint32_t domid; + int idx, i, ret = 0, num; const struct libxl_device_type *dt; - ret = libxl__get_domid(gc, &domid); - if (ret) { - LOG(ERROR, "unable to get domain id"); - goto out; - } - if (d_config->num_vfbs > 0 || d_config->num_p9s > 0) { ret = 1; goto out; @@ -XXX,XX +XXX,XX @@ int libxl__dm_check_start(libxl__gc *gc, libxl_domain_config *d_config, uint32_t domid) { int rc; + bool need_qemu; if (libxl__dm_active(gc, domid)) return 0; - rc = libxl__need_xenpv_qemu(gc, d_config); - if (rc < 0) - goto out; - - if (!rc) + switch (d_config->b_info.device_model_version) { + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: + need_qemu = true; + break; + default: + need_qemu = false; + break; + } + if (need_qemu == false) return 0; LOGD(ERROR, domid, "device model required but not running"); rc = ERROR_FAIL; -out: return rc; } diff --git a/tools/libxl/libxl_dom_suspend.c b/tools/libxl/libxl_dom_suspend.c index XXXXXXX..XXXXXXX 100644 --- a/tools/libxl/libxl_dom_suspend.c +++ b/tools/libxl/libxl_dom_suspend.c @@ -XXX,XX +XXX,XX @@ static void domain_suspend_common_guest_suspended(libxl__egc *egc, libxl__ev_xswatch_deregister(gc, &dsps->guest_watch); libxl__ev_time_deregister(gc, &dsps->guest_timeout); - if (dsps->type == LIBXL_DOMAIN_TYPE_HVM) { + if (dsps->type == LIBXL_DOMAIN_TYPE_HVM || + libxl__device_model_version_running(gc, dsps->domid) == + LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { dsps->callback_device_model_done = domain_suspend_common_done; libxl__domain_suspend_device_model(egc, dsps); /* must be last */ return; @@ -XXX,XX +XXX,XX @@ int libxl__domain_resume(libxl__gc *gc, uint32_t domid, int suspend_cancel) goto out; } - if (type == LIBXL_DOMAIN_TYPE_HVM) { + if (type == LIBXL_DOMAIN_TYPE_HVM || + libxl__device_model_version_running(gc, domid) == + LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { rc = libxl__domain_resume_device_model(gc, domid); if (rc) { LOGD(ERROR, domid, "failed to resume device model:%d", rc); diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index XXXXXXX..XXXXXXX 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -XXX,XX +XXX,XX @@ _hidden int libxl__domain_build(libxl__gc *gc, _hidden const char *libxl__domain_device_model(libxl__gc *gc, const libxl_domain_build_info *info); _hidden int libxl__need_xenpv_qemu(libxl__gc *gc, - libxl_domain_config *d_config); + libxl_domain_config *d_config, + uint32_t domid); _hidden bool libxl__query_qemu_backend(libxl__gc *gc, uint32_t domid, uint32_t backend_id, diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index XXXXXXX..XXXXXXX 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -XXX,XX +XXX,XX @@ libxl_device_model_version = Enumeration("device_model_version", [ (0, "UNKNOWN"), (1, "QEMU_XEN_TRADITIONAL"), # Historical qemu-xen device model (qemu-dm) (2, "QEMU_XEN"), # Upstream based qemu-xen device model + (3, "NONE_REQUIRED"), ]) libxl_console_type = Enumeration("console_type", [ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
If a domU has a qemu-xen instance attached, it is required to call qemus "xen-save-devices-state" method. Without it, the receiving side of a PV or PVH migration may be unable to lock the image: xen be: qdisk-51712: xen be: qdisk-51712: error: Failed to get "write" lock error: Failed to get "write" lock xen be: qdisk-51712: xen be: qdisk-51712: initialise() failed initialise() failed To fix this bug, libxl__domain_suspend_device_model() and libxl__domain_resume_device_model() have to be called not only for HVM, but also if the active device_model is QEMU_XEN. Unfortunately, libxl__domain_build_info_setdefault() hardcodes b_info->device_model_version to QEMU_XEN if it does not know it any better. As a result libxl__device_model_version_running() will return incorrect values. This breaks domUs without a device_model. libxl__qmp_stop() would wait 10 seconds in qmp_open() for a qemu that will never appear. During this long timeframe the domU remains in state paused on the sending side. As a result network connections may be dropped. Once this bug is fixed as well, by just removing the assumption that every domU has a QEMU_XEN, there is no code to actually initialise b_info->device_model_version. There is a helper function libxl__need_xenpv_qemu(), which is used in various places to decide if a device_model has to be spawned. This function can not be used as is, just to fill device_model_version, because store_libxl_entry() was already called earlier. Create a new function to set device_model_version. Move existing code from libxl__domain_build_info_setdefault() to cover the HVM case. Add new code to cover non-HVM case, use libxl__need_xenpv_qemu() to set device_model_version. Update libxl__spawn_stub_dm() and initiate_domain_create() to call the new function prior libxl__domain_build_info_setdefault() because device_mode_version is expected to be initialzed. libxl_domain_need_memory() needs no update because it does not have a d_config available anyway, and the callers provide a populated b_info. Introduce LIBXL_DEVICE_MODEL_VERSION_NONE_REQUIRED for PV and PVH that have no need for a device_model to make the state explicit. v03: - rearrange code to make sure device_model_version is initialized before store_libxl_entry() is called v02: - update wording in a comment - remove stale goto in domcreate_launch_dm - initialize ret in libxl__need_xenpv_qemu Signed-off-by: Olaf Hering <olaf@aepfle.de> Cc: Roger Pau Monné <roger.pau@citrix.com> Cc: Anthony PERARD <anthony.perard@citrix.com> --- v3 not runtime tested tools/libxl/libxl_create.c | 95 +++++++++++++++++++++++++++-------------- tools/libxl/libxl_dm.c | 2 + tools/libxl/libxl_dom_suspend.c | 8 +++- tools/libxl/libxl_internal.h | 2 + tools/libxl/libxl_types.idl | 1 + 5 files changed, 73 insertions(+), 35 deletions(-) diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index XXXXXXX..XXXXXXX 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -XXX,XX +XXX,XX @@ #include <xen-xsm/flask/flask.h> +int libxl__domain_set_device_model(libxl__gc *gc, libxl_domain_config *d_config) +{ + libxl_domain_build_info *b_info = &d_config->b_info; + int ret; + + if (b_info->device_model_version) + return 0; + + switch (b_info->type) { + case LIBXL_DOMAIN_TYPE_HVM: + if (libxl_defbool_val(b_info->device_model_stubdomain)) { + b_info->device_model_version = + LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL; + } else { + b_info->device_model_version = libxl__default_device_model(gc); + } + break; + default: + ret = libxl__need_xenpv_qemu(gc, d_config); + switch (ret) { + case 1: + d_config->b_info.device_model_version = + LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN; + break; + case 0: + d_config->b_info.device_model_version = + LIBXL_DEVICE_MODEL_VERSION_NONE_REQUIRED; + break; + default: + LOGE(ERROR, "Unable to determine QEMU requisite"); + return ERROR_FAIL; + } + } + + if (b_info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { + const char *dm; + + dm = libxl__domain_device_model(gc, b_info); + ret = access(dm, X_OK); + if (ret < 0) { + /* qemu-xen unavailable, use qemu-xen-traditional */ + if (errno == ENOENT) { + LOGE(INFO, "qemu-xen is unavailable" + ", using qemu-xen-traditional instead"); + b_info->device_model_version = + LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL; + } else { + LOGE(ERROR, "qemu-xen access error"); + return ERROR_FAIL; + } + } + } + + return 0; +} + int libxl__domain_create_info_setdefault(libxl__gc *gc, libxl_domain_create_info *c_info) { @@ -XXX,XX +XXX,XX @@ int libxl__domain_build_info_setdefault(libxl__gc *gc, !b_info->device_model_ssidref) b_info->device_model_ssidref = SECINITSID_DOMDM; - if (!b_info->device_model_version) { - if (b_info->type == LIBXL_DOMAIN_TYPE_HVM) { - if (libxl_defbool_val(b_info->device_model_stubdomain)) { - b_info->device_model_version = - LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL; - } else { - b_info->device_model_version = libxl__default_device_model(gc); - } - } else { - b_info->device_model_version = - LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN; - } - if (b_info->device_model_version - == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { - const char *dm; - - dm = libxl__domain_device_model(gc, b_info); - rc = access(dm, X_OK); - if (rc < 0) { - /* qemu-xen unavailable, use qemu-xen-traditional */ - if (errno == ENOENT) { - LOGE(INFO, "qemu-xen is unavailable" - ", using qemu-xen-traditional instead"); - b_info->device_model_version = - LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL; - } else { - LOGE(ERROR, "qemu-xen access error"); - return ERROR_FAIL; - } - } - } - } - if (b_info->blkdev_start == NULL) b_info->blkdev_start = libxl__strdup(NOGC, "xvda"); @@ -XXX,XX +XXX,XX @@ static void initiate_domain_create(libxl__egc *egc, goto error_out; } + ret = libxl__domain_set_device_model(gc, d_config); + if (ret) { + LOGD(ERROR, domid, "Unable to set domain device model"); + goto error_out; + } + ret = libxl__domain_create_info_setdefault(gc, &d_config->c_info); if (ret) { LOGD(ERROR, domid, "Unable to set domain create info defaults"); diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c index XXXXXXX..XXXXXXX 100644 --- a/tools/libxl/libxl_dm.c +++ b/tools/libxl/libxl_dm.c @@ -XXX,XX +XXX,XX @@ void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *sdss) dm_config->c_info.run_hotplug_scripts = guest_config->c_info.run_hotplug_scripts; + ret = libxl__domain_set_device_model(gc, dm_config); + if (ret) goto out; ret = libxl__domain_create_info_setdefault(gc, &dm_config->c_info); if (ret) goto out; ret = libxl__domain_build_info_setdefault(gc, &dm_config->b_info); diff --git a/tools/libxl/libxl_dom_suspend.c b/tools/libxl/libxl_dom_suspend.c index XXXXXXX..XXXXXXX 100644 --- a/tools/libxl/libxl_dom_suspend.c +++ b/tools/libxl/libxl_dom_suspend.c @@ -XXX,XX +XXX,XX @@ static void domain_suspend_common_guest_suspended(libxl__egc *egc, libxl__ev_xswatch_deregister(gc, &dsps->guest_watch); libxl__ev_time_deregister(gc, &dsps->guest_timeout); - if (dsps->type == LIBXL_DOMAIN_TYPE_HVM) { + if (dsps->type == LIBXL_DOMAIN_TYPE_HVM || + libxl__device_model_version_running(gc, dsps->domid) == + LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { dsps->callback_device_model_done = domain_suspend_common_done; libxl__domain_suspend_device_model(egc, dsps); /* must be last */ return; @@ -XXX,XX +XXX,XX @@ int libxl__domain_resume(libxl__gc *gc, uint32_t domid, int suspend_cancel) goto out; } - if (type == LIBXL_DOMAIN_TYPE_HVM) { + if (type == LIBXL_DOMAIN_TYPE_HVM || + libxl__device_model_version_running(gc, domid) == + LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { rc = libxl__domain_resume_device_model(gc, domid); if (rc) { LOGD(ERROR, domid, "failed to resume device model:%d", rc); diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index XXXXXXX..XXXXXXX 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -XXX,XX +XXX,XX @@ _hidden int libxl__device_nextid(libxl__gc *gc, uint32_t domid, _hidden int libxl__resolve_domid(libxl__gc *gc, const char *name, uint32_t *domid); +_hidden int libxl__domain_set_device_model(libxl__gc *gc, + libxl_domain_config *d_config); /* * For each aggregate type which can be used as an input we provide: * diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index XXXXXXX..XXXXXXX 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -XXX,XX +XXX,XX @@ libxl_device_model_version = Enumeration("device_model_version", [ (0, "UNKNOWN"), (1, "QEMU_XEN_TRADITIONAL"), # Historical qemu-xen device model (qemu-dm) (2, "QEMU_XEN"), # Upstream based qemu-xen device model + (3, "NONE_REQUIRED"), ]) libxl_console_type = Enumeration("console_type", [ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel