[PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters

wucf@linux.ibm.com posted 16 patches 3 months ago
There is a newer version of this series
[PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by wucf@linux.ibm.com 3 months ago
From: Chun Feng Wu <wucf@linux.ibm.com>

When attaching disk along with specified throttle groups, those groups will be chained up by parent node name, this change includes service side codes:
* Each filter references one throttle group by group name
* Update "qemuDomainDiskGetTopNodename" to take top throttle node name if disk has throttles
* Each filter has a nodename, and those filters are chained up in sequence
* Filter nodename index is generated by reusing qemuDomainStorageIDNew and current global sequence number is persistented in virDomainObj->privateData(qemuDomainObjPrivate)->nodenameindex
* During hotplug, filter is created through QMP request("blockdev-add" with "driver":"throttle") to QEMU
* Delete filters by "qemuBlockThrottleFiltersDetach"("blockdev-del") when detaching device
* Use "iotune" and "throttlefilters" exclusively for specific disk

Signed-off-by: Chun Feng Wu <wucf@linux.ibm.com>
---
 src/conf/domain_validate.c |   8 +++
 src/qemu/qemu_block.c      | 131 +++++++++++++++++++++++++++++++++++++
 src/qemu/qemu_block.h      |  53 +++++++++++++++
 src/qemu/qemu_command.c    |  84 ++++++++++++++++++++++++
 src/qemu/qemu_command.h    |   9 +++
 src/qemu/qemu_domain.c     |  39 ++++++++++-
 src/qemu/qemu_domain.h     |   8 +++
 src/qemu/qemu_driver.c     |   6 ++
 src/qemu/qemu_hotplug.c    |  33 ++++++++++
 9 files changed, 370 insertions(+), 1 deletion(-)

diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
index 395e036e8f..4cc5ed7577 100644
--- a/src/conf/domain_validate.c
+++ b/src/conf/domain_validate.c
@@ -942,6 +942,14 @@ virDomainDiskDefValidate(const virDomainDef *def,
         return -1;
     }
 
+    if (disk->throttlefilters && (disk->blkdeviotune.group_name ||
+        virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) {
+        virReportError(VIR_ERR_XML_ERROR,
+                       _("block 'throttlefilters' can't be used together with 'iotune' for disk '%1$s'"),
+                       disk->dst);
+        return -1;
+    }
+
     return 0;
 }
 
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
index 738b72d7ea..9b8ff65887 100644
--- a/src/qemu/qemu_block.c
+++ b/src/qemu/qemu_block.c
@@ -2739,6 +2739,137 @@ qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData,
 }
 
 
+void
+qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter,
+                                   char *nodename)
+{
+    g_free(filter->nodename);
+    filter->nodename = nodename;
+}
+
+
+const char *
+qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef *filter)
+{
+    return filter->nodename;
+}
+
+
+/**
+ * qemuBlockThrottleFilterGetProps:
+ * @filter: throttle filter
+ * @parentNodeName: parent nodename of @filter
+ *
+ * Build "arguments" part of "blockdev-add" QMP cmd.
+ * e.g. {"execute":"blockdev-add", "arguments":{"driver":"throttle",
+ * "node-name":"libvirt-2-filter",  "throttle-group":"limits0",
+ * "file": "libvirt-1-format"}}
+ */
+virJSONValue *
+qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter,
+                                const char *parentNodeName)
+{
+    g_autoptr(virJSONValue) props = NULL;
+
+    if (virJSONValueObjectAdd(&props,
+                              "s:driver", "throttle",
+                              "s:node-name", qemuBlockThrottleFilterGetNodename(filter),
+                              "s:throttle-group", filter->group_name,
+                              "s:file", parentNodeName,
+                              NULL) < 0)
+        return 0;
+
+    return g_steal_pointer(&props);
+}
+
+
+void
+qemuBlockThrottleFilterAttachDataFree(qemuBlockThrottleFilterAttachData *data)
+{
+    if (!data)
+        return;
+
+    virJSONValueFree(data->filterProps);
+    g_free(data);
+}
+
+
+qemuBlockThrottleFilterAttachData *
+qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef *filter,
+                                             const char *parentNodeName)
+{
+    g_autoptr(qemuBlockThrottleFilterAttachData) data = NULL;
+
+    data = g_new0(qemuBlockThrottleFilterAttachData, 1);
+
+    if (!(data->filterProps = qemuBlockThrottleFilterGetProps(filter, parentNodeName)))
+        return NULL;
+
+    data->filterNodeName = qemuBlockThrottleFilterGetNodename(filter);
+    data->filterAttached = true;
+
+    return g_steal_pointer(&data);
+}
+
+
+void
+qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon,
+                                      qemuBlockThrottleFilterAttachData *data)
+{
+    virErrorPtr orig_err;
+
+    virErrorPreserveLast(&orig_err);
+
+    if (data->filterAttached)
+        ignore_value(qemuMonitorBlockdevDel(mon, data->filterNodeName));
+
+    virErrorRestore(&orig_err);
+}
+
+
+void
+qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data)
+{
+    size_t i;
+
+    if (!data)
+        return;
+
+    for (i = 0; i < data->nfilterdata; i++)
+        qemuBlockThrottleFilterAttachDataFree(data->filterdata[i]);
+
+    g_free(data->filterdata);
+    g_free(data);
+}
+
+
+int
+qemuBlockThrottleFiltersAttach(qemuMonitor *mon,
+                               qemuBlockThrottleFiltersData *data)
+{
+    size_t i;
+
+    for (i = 0; i < data->nfilterdata; i++) {
+        if (qemuMonitorBlockdevAdd(mon, &data->filterdata[i]->filterProps) < 0)
+            return -1;
+        data->filterdata[i]->filterAttached = true;
+    }
+
+    return 0;
+}
+
+
+void
+qemuBlockThrottleFiltersDetach(qemuMonitor *mon,
+                               qemuBlockThrottleFiltersData *data)
+{
+    size_t i;
+
+    for (i = data->nfilterdata; i > 0; i--)
+        qemuBlockThrottleFilterAttachRollback(mon, data->filterdata[i-1]);
+}
+
+
 int
 qemuBlockRemoveImageMetadata(virQEMUDriver *driver,
                              virDomainObj *vm,
diff --git a/src/qemu/qemu_block.h b/src/qemu/qemu_block.h
index f9e961d85d..9888954ce4 100644
--- a/src/qemu/qemu_block.h
+++ b/src/qemu/qemu_block.h
@@ -214,6 +214,59 @@ qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData,
                                        virStorageSource *src,
                                        virStorageSource *templ);
 
+void
+qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter,
+                                   char *nodename);
+
+const char *
+qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef *filter);
+
+virJSONValue *
+qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter,
+                                const char *parentNodeName);
+
+typedef struct qemuBlockThrottleFilterAttachData qemuBlockThrottleFilterAttachData;
+struct qemuBlockThrottleFilterAttachData {
+    virJSONValue *filterProps;
+    const char *filterNodeName;
+    bool filterAttached;
+};
+
+qemuBlockThrottleFilterAttachData *
+qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef *throttlefilter,
+                                             const char *parentNodeName);
+
+void
+qemuBlockThrottleFilterAttachDataFree(qemuBlockThrottleFilterAttachData *data);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFilterAttachData,
+                              qemuBlockThrottleFilterAttachDataFree);
+
+void
+qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon,
+                                      qemuBlockThrottleFilterAttachData *data);
+
+struct _qemuBlockThrottleFiltersData {
+    qemuBlockThrottleFilterAttachData **filterdata;
+    size_t nfilterdata;
+};
+
+typedef struct _qemuBlockThrottleFiltersData qemuBlockThrottleFiltersData;
+
+void
+qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFiltersData,
+                              qemuBlockThrottleFiltersDataFree);
+
+int
+qemuBlockThrottleFiltersAttach(qemuMonitor *mon,
+                               qemuBlockThrottleFiltersData *data);
+
+void
+qemuBlockThrottleFiltersDetach(qemuMonitor *mon,
+                               qemuBlockThrottleFiltersData *data);
+
 int
 qemuBlockRemoveImageMetadata(virQEMUDriver *driver,
                              virDomainObj *vm,
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 2d0eddc79e..5ccae956d3 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -1582,6 +1582,13 @@ qemuDiskConfigBlkdeviotuneEnabled(const virDomainDiskDef *disk)
 }
 
 
+bool
+qemuDiskConfigThrottleFiltersEnabled(const virDomainDiskDef *disk)
+{
+    return disk->nthrottlefilters > 0;
+}
+
+
 /**
  * qemuDiskBusIsSD:
  * @bus: disk bus
@@ -11055,6 +11062,83 @@ qemuBuildStorageSourceChainAttachPrepareBlockdevOne(qemuBlockStorageSourceChainD
 }
 
 
+/**
+ * qemuBuildThrottleFiltersAttachPrepareBlockdevOne:
+ * @data: filter chain data, which consists of array of filters and size of such array
+ * @throttlefilter: new filter to be added into filter array
+ * @parentNodeName: parent nodename for this new throttlefilter, which is used to build "blockdev-add" QMP request
+ *
+ * Build filter node chain to provide more flexibility for block disk I/O limits
+ */
+static int
+qemuBuildThrottleFiltersAttachPrepareBlockdevOne(qemuBlockThrottleFiltersData *data,
+                                                 virDomainThrottleFilterDef *throttlefilter,
+                                                 const char *parentNodeName)
+{
+    g_autoptr(qemuBlockThrottleFilterAttachData) elem = NULL;
+
+    if (!(elem = qemuBlockThrottleFilterAttachPrepareBlockdev(throttlefilter, parentNodeName)))
+        return -1;
+
+    VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem);
+    return 0;
+}
+
+
+/**
+ * qemuBuildThrottleFiltersAttachPrepareBlockdev:
+ * @disk: domain disk
+ *
+ * Build filter node chain to provide more flexibility for block disk I/O limits
+ */
+qemuBlockThrottleFiltersData *
+qemuBuildThrottleFiltersAttachPrepareBlockdev(virDomainDiskDef *disk)
+{
+    g_autoptr(qemuBlockThrottleFiltersData) data = NULL;
+    size_t i;
+    const char * parentNodeName = NULL;
+    g_autofree char *tmp_nodename = NULL;
+
+    data = g_new0(qemuBlockThrottleFiltersData, 1);
+    /* get starting parentNodename, e.g. libvirt-1-format */
+    parentNodeName = qemuBlockStorageSourceGetEffectiveNodename(disk->src);
+    /* build filterdata, which contains all filters info and sequence info through parentNodeName */
+    for (i = 0; i < disk->nthrottlefilters; i++) {
+        tmp_nodename = g_strdup(parentNodeName);
+        if (qemuBuildThrottleFiltersAttachPrepareBlockdevOne(data, disk->throttlefilters[i], tmp_nodename) < 0)
+            return NULL;
+        parentNodeName = disk->throttlefilters[i]->nodename;
+    }
+
+    return g_steal_pointer(&data);
+}
+
+
+/**
+ * qemuBuildThrottleFiltersDetachPrepareBlockdev:
+ * @disk: domain disk
+ *
+ * Build filters data for later "blockdev-del"
+ */
+qemuBlockThrottleFiltersData *
+qemuBuildThrottleFiltersDetachPrepareBlockdev(virDomainDiskDef *disk)
+{
+    g_autoptr(qemuBlockThrottleFiltersData) data = g_new0(qemuBlockThrottleFiltersData, 1);
+    size_t i;
+
+    /* build filterdata, which contains filters info and sequence info */
+    for (i = 0; i < disk->nthrottlefilters; i++) {
+        g_autoptr(qemuBlockThrottleFilterAttachData) elem = g_new0(qemuBlockThrottleFilterAttachData, 1);
+        /* ignore other fields since the following info are enough for "blockdev-del" */
+        elem->filterNodeName = qemuBlockThrottleFilterGetNodename(disk->throttlefilters[i]);
+        elem->filterAttached = true;
+
+        VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem);
+    }
+    return g_steal_pointer(&data);
+}
+
+
 /**
  * qemuBuildStorageSourceChainAttachPrepareBlockdev:
  * @top: storage source chain
diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
index dca8877703..ce39acfb2c 100644
--- a/src/qemu/qemu_command.h
+++ b/src/qemu/qemu_command.h
@@ -116,6 +116,15 @@ qemuBlockStorageSourceChainData *
 qemuBuildStorageSourceChainAttachPrepareBlockdevTop(virStorageSource *top,
                                                     virStorageSource *backingStore);
 
+qemuBlockThrottleFiltersData *
+qemuBuildThrottleFiltersAttachPrepareBlockdev(virDomainDiskDef *disk);
+
+qemuBlockThrottleFiltersData *
+qemuBuildThrottleFiltersDetachPrepareBlockdev(virDomainDiskDef *disk);
+
+bool
+qemuDiskConfigThrottleFiltersEnabled(const virDomainDiskDef *disk);
+
 virJSONValue *
 qemuBuildDiskDeviceProps(const virDomainDef *def,
                          virDomainDiskDef *disk,
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 7ba2ea4a5e..2831036e23 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -7989,7 +7989,8 @@ qemuDomainDetermineDiskChain(virQEMUDriver *driver,
  * @disk: disk definition object
  *
  * Returns the pointer to the node-name of the topmost layer used by @disk as
- * backend. Currently returns the nodename of the copy-on-read filter if enabled
+ * backend. Currently returns the nodename of top throttle filter if enabled
+ * or the nodename of the copy-on-read filter if enabled
  * or the nodename of the top image's format driver. Empty disks return NULL.
  * This must be used only with disks instantiated via -blockdev (thus not
  * for SD cards).
@@ -8005,6 +8006,10 @@ qemuDomainDiskGetTopNodename(virDomainDiskDef *disk)
     if (disk->copy_on_read == VIR_TRISTATE_SWITCH_ON)
         return priv->nodeCopyOnRead;
 
+    /* If disk has throttles, take top throttle node name */
+    if (disk->nthrottlefilters > 0)
+        return disk->throttlefilters[disk->nthrottlefilters-1]->nodename;
+
     return qemuBlockStorageSourceGetEffectiveNodename(disk->src);
 }
 
@@ -11336,6 +11341,32 @@ qemuDomainPrepareStorageSourceBlockdevNodename(virDomainDiskDef *disk,
 }
 
 
+int
+qemuDomainPrepareThrottleFilterBlockdevNodename(virDomainThrottleFilterDef *filter,
+                                                const char *nodenameprefix)
+{
+    char *nodename = g_strdup_printf("%s-filter", nodenameprefix);
+
+    qemuBlockThrottleFilterSetNodename(filter, nodename);
+
+    return 0;
+}
+
+
+int
+qemuDomainPrepareThrottleFilterBlockdev(virDomainThrottleFilterDef *filter,
+                                        qemuDomainObjPrivate *priv)
+{
+    g_autofree char *nodenameprefix = NULL;
+
+    filter->id = qemuDomainStorageIDNew(priv);
+
+    nodenameprefix = g_strdup_printf("libvirt-%u", filter->id);
+
+    return qemuDomainPrepareThrottleFilterBlockdevNodename(filter,  nodenameprefix);
+}
+
+
 int
 qemuDomainPrepareStorageSourceBlockdev(virDomainDiskDef *disk,
                                        virStorageSource *src,
@@ -11359,6 +11390,7 @@ qemuDomainPrepareDiskSourceBlockdev(virDomainDiskDef *disk,
 {
     qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
     virStorageSource *n;
+    size_t i;
 
     if (disk->copy_on_read == VIR_TRISTATE_SWITCH_ON &&
         !diskPriv->nodeCopyOnRead)
@@ -11369,6 +11401,11 @@ qemuDomainPrepareDiskSourceBlockdev(virDomainDiskDef *disk,
             return -1;
     }
 
+    for (i = 0; i < disk->nthrottlefilters; i++) {
+        if (qemuDomainPrepareThrottleFilterBlockdev(disk->throttlefilters[i], priv) < 0)
+            return -1;
+    }
+
     return 0;
 }
 
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index a3089ea449..38bba4e3ae 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -772,6 +772,14 @@ int qemuDomainPrepareStorageSourceBlockdev(virDomainDiskDef *disk,
                                            qemuDomainObjPrivate *priv,
                                            virQEMUDriverConfig *cfg);
 
+int
+qemuDomainPrepareThrottleFilterBlockdev(virDomainThrottleFilterDef *filter,
+                                        qemuDomainObjPrivate *priv);
+
+int
+qemuDomainPrepareThrottleFilterBlockdevNodename(virDomainThrottleFilterDef *filter,
+                                                const char *nodenameprefix);
+
 void qemuDomainCleanupAdd(virDomainObj *vm,
                           qemuDomainCleanupCallback cb);
 void qemuDomainCleanupRemove(virDomainObj *vm,
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index f7e435d6d5..60989ae693 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -14828,6 +14828,12 @@ qemuDomainDiskBlockIoTuneIsSupported(virDomainDiskDef *disk)
         return false;
     }
 
+    if (disk->throttlefilters) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("block 'iotune' can't be used together with 'throttlefilters' for disk '%1$s'"), disk->dst);
+        return false;
+    }
+
     return true;
 }
 
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index 4a3f4f657e..103b9e9f00 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -657,6 +657,7 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
                             virDomainAsyncJob asyncJob)
 {
     g_autoptr(qemuBlockStorageSourceChainData) data = NULL;
+    g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL;
     qemuDomainObjPrivate *priv = vm->privateData;
     g_autoptr(virJSONValue) devprops = NULL;
     bool extensionDeviceAttached = false;
@@ -695,6 +696,19 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
         if (rc < 0)
             goto rollback;
 
+        /* Setup throttling filters
+        * add additional "blockdev-add"(throttle filter) between "blockdev-add" (qemuBlockStorageSourceChainAttach) and "device_add" (qemuDomainAttachExtensionDevice)
+        */
+        if ((filterData = qemuBuildThrottleFiltersAttachPrepareBlockdev(disk))) {
+            if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
+                return -1;
+            /* QMP requests("blockdev-add" with "driver":"throttle") to QEMU */
+            rc = qemuBlockThrottleFiltersAttach(priv->mon, filterData);
+            qemuDomainObjExitMonitor(vm);
+            if (rc < 0)
+                goto rollback;
+        }
+
         if (disk->transient) {
             g_autoptr(qemuBlockStorageSourceAttachData) backend = NULL;
             g_autoptr(GHashTable) blockNamedNodeData = NULL;
@@ -766,6 +780,8 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
     if (extensionDeviceAttached)
         ignore_value(qemuDomainDetachExtensionDevice(priv->mon, &disk->info));
 
+    qemuBlockThrottleFiltersDetach(priv->mon, filterData);
+
     qemuBlockStorageSourceChainDetach(priv->mon, data);
 
     qemuDomainObjExitMonitor(vm);
@@ -921,6 +937,14 @@ qemuDomainAttachDeviceDiskLiveInternal(virQEMUDriver *driver,
     bool releaseSeclabel = false;
     int ret = -1;
 
+    if (disk->device == VIR_DOMAIN_DISK_DEVICE_CDROM) {
+        if (disk->nthrottlefilters > 0) {
+            virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
+                        _("cdrom device with throttle filters isn't supported"));
+            return -1;
+        }
+    }
+
     if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY) {
         virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
                        _("floppy device hotplug isn't supported"));
@@ -4499,6 +4523,7 @@ qemuDomainRemoveDiskDevice(virQEMUDriver *driver,
 {
     qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
     g_autoptr(qemuBlockStorageSourceChainData) diskBackend = NULL;
+    g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL;
     size_t i;
     qemuDomainObjPrivate *priv = vm->privateData;
     int ret = -1;
@@ -4537,6 +4562,14 @@ qemuDomainRemoveDiskDevice(virQEMUDriver *driver,
         }
     }
 
+    qemuDomainObjEnterMonitor(vm);
+    /* QMP request("blockdev-del") to QEMU to delete throttle filter*/
+    if ((filterData = qemuBuildThrottleFiltersDetachPrepareBlockdev(disk))) {
+        /* "qemuBlockThrottleFiltersDetach" is used in rollback within "qemuDomainAttachDiskGeneric" as well */
+        qemuBlockThrottleFiltersDetach(priv->mon, filterData);
+    }
+    qemuDomainObjExitMonitor(vm);
+
     qemuDomainObjEnterMonitor(vm);
 
     if (diskBackend)
-- 
2.34.1
Re: [PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by Peter Krempa 1 month, 1 week ago
On Wed, Jun 12, 2024 at 03:02:17 -0700, wucf@linux.ibm.com wrote:
> From: Chun Feng Wu <wucf@linux.ibm.com>
> 
> When attaching disk along with specified throttle groups, those groups will be chained up by parent node name, this change includes service side codes:
> * Each filter references one throttle group by group name
> * Update "qemuDomainDiskGetTopNodename" to take top throttle node name if disk has throttles
> * Each filter has a nodename, and those filters are chained up in sequence
> * Filter nodename index is generated by reusing qemuDomainStorageIDNew and current global sequence number is persistented in virDomainObj->privateData(qemuDomainObjPrivate)->nodenameindex
> * During hotplug, filter is created through QMP request("blockdev-add" with "driver":"throttle") to QEMU
> * Delete filters by "qemuBlockThrottleFiltersDetach"("blockdev-del") when detaching device
> * Use "iotune" and "throttlefilters" exclusively for specific disk
> 
> Signed-off-by: Chun Feng Wu <wucf@linux.ibm.com>
> ---
>  src/conf/domain_validate.c |   8 +++
>  src/qemu/qemu_block.c      | 131 +++++++++++++++++++++++++++++++++++++
>  src/qemu/qemu_block.h      |  53 +++++++++++++++
>  src/qemu/qemu_command.c    |  84 ++++++++++++++++++++++++
>  src/qemu/qemu_command.h    |   9 +++
>  src/qemu/qemu_domain.c     |  39 ++++++++++-
>  src/qemu/qemu_domain.h     |   8 +++
>  src/qemu/qemu_driver.c     |   6 ++
>  src/qemu/qemu_hotplug.c    |  33 ++++++++++
>  9 files changed, 370 insertions(+), 1 deletion(-)
> 
> diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
> index 395e036e8f..4cc5ed7577 100644
> --- a/src/conf/domain_validate.c
> +++ b/src/conf/domain_validate.c
> @@ -942,6 +942,14 @@ virDomainDiskDefValidate(const virDomainDef *def,
>          return -1;
>      }
>  
> +    if (disk->throttlefilters && (disk->blkdeviotune.group_name ||
> +        virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) {

This is hard to read as you've broken up the line within a nested brace.
It may confuse readers.

Rewrite as:

    if (disk->throttlefilters &&
        (disk->blkdeviotune.group_name ||
        virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) {

> +        virReportError(VIR_ERR_XML_ERROR,

Operation unsupported.

> +                       _("block 'throttlefilters' can't be used together with 'iotune' for disk '%1$s'"),
> +                       disk->dst);
> +        return -1;
> +    }
> +
>      return 0;
>  }
>  
> diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
> index 738b72d7ea..9b8ff65887 100644
> --- a/src/qemu/qemu_block.c
> +++ b/src/qemu/qemu_block.c
> @@ -2739,6 +2739,137 @@ qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData,
>  }
>  
>  
> +void
> +qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter,
> +                                   char *nodename)
> +{
> +    g_free(filter->nodename);
> +    filter->nodename = nodename;
> +}
> +
> +
> +const char *
> +qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef *filter)
> +{
> +    return filter->nodename;
> +}

All of this helper infrastructure doesn't belong to a patch that is
declaring that it's dealing with the setup of qemu. (I also wrote that
below as I've noticed it there).

It makes this patch too complex and thus hard to review. It's also the
reason it takes me so long. I'm demotivated on such commits as it takes
way too long to go through.

Any setup of nodenames and other libvirt-internal stuff, such as
validatio of config etc should be separated.


> +/**
> + * qemuBlockThrottleFilterGetProps:
> + * @filter: throttle filter
> + * @parentNodeName: parent nodename of @filter
> + *
> + * Build "arguments" part of "blockdev-add" QMP cmd.
> + * e.g. {"execute":"blockdev-add", "arguments":{"driver":"throttle",
> + * "node-name":"libvirt-2-filter",  "throttle-group":"limits0",
> + * "file": "libvirt-1-format"}}

Avoid this JSON in comments, as it's obvious from the code. Rather
describe any special handling if needed.

> + */
> +virJSONValue *
> +qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter,
> +                                const char *parentNodeName)
> +{
> +    g_autoptr(virJSONValue) props = NULL;
> +
> +    if (virJSONValueObjectAdd(&props,
> +                              "s:driver", "throttle",
> +                              "s:node-name", qemuBlockThrottleFilterGetNodename(filter),
> +                              "s:throttle-group", filter->group_name,
> +                              "s:file", parentNodeName,
> +                              NULL) < 0)
> +        return 0;

return NULL; It looks confusing because 0 is success in our code.

> +
> +    return g_steal_pointer(&props);
> +}

[...]

> +qemuBlockThrottleFilterAttachData *
> +qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef *filter,
> +                                             const char *parentNodeName)
> +{
> +    g_autoptr(qemuBlockThrottleFilterAttachData) data = NULL;
> +
> +    data = g_new0(qemuBlockThrottleFilterAttachData, 1);
> +
> +    if (!(data->filterProps = qemuBlockThrottleFilterGetProps(filter, parentNodeName)))
> +        return NULL;
> +
> +    data->filterNodeName = qemuBlockThrottleFilterGetNodename(filter);
> +    data->filterAttached = true;

Well, if you set this to true right here, it's pointless to even have
the variable.

The code you've copied this from uses the 'Attached' variable so that
it's set only after the object is created in qemu. This allows safe
rollback.

If you set it to true for everything you will attempt to potentially
roll back stuff that was not yet set in qemu, making the variable itself
pointless.

> +
> +    return g_steal_pointer(&data);
> +}
> +
> +
> +void
> +qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon,
> +                                      qemuBlockThrottleFilterAttachData *data)
> +{
> +    virErrorPtr orig_err;
> +
> +    virErrorPreserveLast(&orig_err);
> +
> +    if (data->filterAttached)
> +        ignore_value(qemuMonitorBlockdevDel(mon, data->filterNodeName));

... here ...

> +
> +    virErrorRestore(&orig_err);
> +}
> +
> +
> +void
> +qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data)
> +{
> +    size_t i;
> +
> +    if (!data)
> +        return;
> +
> +    for (i = 0; i < data->nfilterdata; i++)
> +        qemuBlockThrottleFilterAttachDataFree(data->filterdata[i]);
> +
> +    g_free(data->filterdata);
> +    g_free(data);
> +}
> +
> +
> +int
> +qemuBlockThrottleFiltersAttach(qemuMonitor *mon,
> +                               qemuBlockThrottleFiltersData *data)
> +{
> +    size_t i;
> +
> +    for (i = 0; i < data->nfilterdata; i++) {
> +        if (qemuMonitorBlockdevAdd(mon, &data->filterdata[i]->filterProps) < 0)

This function requires the monitor to be locked. If you want to have
this as a separate function you must document it and state this fact.

> +            return -1;
> +        data->filterdata[i]->filterAttached = true;
> +    }
> +
> +    return 0;
> +}
> +
> +
> +void
> +qemuBlockThrottleFiltersDetach(qemuMonitor *mon,
> +                               qemuBlockThrottleFiltersData *data)
> +{
> +    size_t i;
> +
> +    for (i = data->nfilterdata; i > 0; i--)
> +        qemuBlockThrottleFilterAttachRollback(mon, data->filterdata[i-1]);
> +}
> +
> +
>  int
>  qemuBlockRemoveImageMetadata(virQEMUDriver *driver,
>                               virDomainObj *vm,
> diff --git a/src/qemu/qemu_block.h b/src/qemu/qemu_block.h
> index f9e961d85d..9888954ce4 100644
> --- a/src/qemu/qemu_block.h
> +++ b/src/qemu/qemu_block.h
> @@ -214,6 +214,59 @@ qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData,
>                                         virStorageSource *src,
>                                         virStorageSource *templ);
>  
> +void
> +qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter,
> +                                   char *nodename);
> +
> +const char *
> +qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef *filter);
> +
> +virJSONValue *
> +qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter,
> +                                const char *parentNodeName);

This function isn't used outside of qemu_block.c so it doesn't need to
be exported.

> +
> +typedef struct qemuBlockThrottleFilterAttachData qemuBlockThrottleFilterAttachData;
> +struct qemuBlockThrottleFilterAttachData {
> +    virJSONValue *filterProps;
> +    const char *filterNodeName;
> +    bool filterAttached;
> +};
> +
> +qemuBlockThrottleFilterAttachData *
> +qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef *throttlefilter,
> +                                             const char *parentNodeName);
> +
> +void
> +qemuBlockThrottleFilterAttachDataFree(qemuBlockThrottleFilterAttachData *data);
> +
> +G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFilterAttachData,
> +                              qemuBlockThrottleFilterAttachDataFree);
> +
> +void
> +qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon,
> +                                      qemuBlockThrottleFilterAttachData *data);
> +
> +struct _qemuBlockThrottleFiltersData {
> +    qemuBlockThrottleFilterAttachData **filterdata;
> +    size_t nfilterdata;
> +};
> +
> +typedef struct _qemuBlockThrottleFiltersData qemuBlockThrottleFiltersData;
> +
> +void
> +qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data);
> +
> +G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFiltersData,
> +                              qemuBlockThrottleFiltersDataFree);
> +
> +int
> +qemuBlockThrottleFiltersAttach(qemuMonitor *mon,
> +                               qemuBlockThrottleFiltersData *data);
> +
> +void
> +qemuBlockThrottleFiltersDetach(qemuMonitor *mon,
> +                               qemuBlockThrottleFiltersData *data);
> +
>  int
>  qemuBlockRemoveImageMetadata(virQEMUDriver *driver,
>                               virDomainObj *vm,
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 2d0eddc79e..5ccae956d3 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -1582,6 +1582,13 @@ qemuDiskConfigBlkdeviotuneEnabled(const virDomainDiskDef *disk)
>  }
>  
>  
> +bool
> +qemuDiskConfigThrottleFiltersEnabled(const virDomainDiskDef *disk)

This function doesn't need to be exported.

> +{
> +    return disk->nthrottlefilters > 0;
> +}
> +
> +
>  /**
>   * qemuDiskBusIsSD:
>   * @bus: disk bus
> @@ -11055,6 +11062,83 @@ qemuBuildStorageSourceChainAttachPrepareBlockdevOne(qemuBlockStorageSourceChainD
>  }
>  
>  
> +/**
> + * qemuBuildThrottleFiltersAttachPrepareBlockdevOne:
> + * @data: filter chain data, which consists of array of filters and size of such array
> + * @throttlefilter: new filter to be added into filter array
> + * @parentNodeName: parent nodename for this new throttlefilter, which is used to build "blockdev-add" QMP request

the stuff after the comma can be dropped.

> + *
> + * Build filter node chain to provide more flexibility for block disk I/O limits
> + */
> +static int
> +qemuBuildThrottleFiltersAttachPrepareBlockdevOne(qemuBlockThrottleFiltersData *data,
> +                                                 virDomainThrottleFilterDef *throttlefilter,
> +                                                 const char *parentNodeName)
> +{
> +    g_autoptr(qemuBlockThrottleFilterAttachData) elem = NULL;
> +
> +    if (!(elem = qemuBlockThrottleFilterAttachPrepareBlockdev(throttlefilter, parentNodeName)))
> +        return -1;
> +
> +    VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem);
> +    return 0;
> +}
> +
> +
> +/**
> + * qemuBuildThrottleFiltersAttachPrepareBlockdev:
> + * @disk: domain disk
> + *
> + * Build filter node chain to provide more flexibility for block disk I/O limits
> + */
> +qemuBlockThrottleFiltersData *
> +qemuBuildThrottleFiltersAttachPrepareBlockdev(virDomainDiskDef *disk)
> +{
> +    g_autoptr(qemuBlockThrottleFiltersData) data = NULL;
> +    size_t i;
> +    const char * parentNodeName = NULL;
> +    g_autofree char *tmp_nodename = NULL;
> +
> +    data = g_new0(qemuBlockThrottleFiltersData, 1);
> +    /* get starting parentNodename, e.g. libvirt-1-format */
> +    parentNodeName = qemuBlockStorageSourceGetEffectiveNodename(disk->src);
> +    /* build filterdata, which contains all filters info and sequence info through parentNodeName */
> +    for (i = 0; i < disk->nthrottlefilters; i++) {
> +        tmp_nodename = g_strdup(parentNodeName);
> +        if (qemuBuildThrottleFiltersAttachPrepareBlockdevOne(data, disk->throttlefilters[i], tmp_nodename) < 0)
> +            return NULL;

The nodename copied into 'tmp_nodename' is leaked on every iteration.
Also qemuBuildThrottleFiltersAttachPrepareBlockdevOne doesn't modify it
so there's no point in copying it.

> +        parentNodeName = disk->throttlefilters[i]->nodename;
> +    }
> +
> +    return g_steal_pointer(&data);
> +}
> +
> +
> +/**
> + * qemuBuildThrottleFiltersDetachPrepareBlockdev:
> + * @disk: domain disk
> + *
> + * Build filters data for later "blockdev-del"
> + */
> +qemuBlockThrottleFiltersData *
> +qemuBuildThrottleFiltersDetachPrepareBlockdev(virDomainDiskDef *disk)
> +{
> +    g_autoptr(qemuBlockThrottleFiltersData) data = g_new0(qemuBlockThrottleFiltersData, 1);
> +    size_t i;
> +
> +    /* build filterdata, which contains filters info and sequence info */
> +    for (i = 0; i < disk->nthrottlefilters; i++) {
> +        g_autoptr(qemuBlockThrottleFilterAttachData) elem = g_new0(qemuBlockThrottleFilterAttachData, 1);
> +        /* ignore other fields since the following info are enough for "blockdev-del" */
> +        elem->filterNodeName = qemuBlockThrottleFilterGetNodename(disk->throttlefilters[i]);

So I didn't yet see any code that serializes any of this in the status
XML, thus it seems that this will not work after you restart
libvirtd/virtqemud if a VM with filters is running, and then try to
detach disks. You'll need to add that, or base the filter nodename on
something else.

Note that presence of the node name is authoritative and we must not try
to regenerate it. That would hinder changing the node names in the
future.

See how the copyOnRead layer node name is stored.

> +        elem->filterAttached = true;
> +
> +        VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem);
> +    }
> +    return g_steal_pointer(&data);
> +}
> +
> +
>  /**
>   * qemuBuildStorageSourceChainAttachPrepareBlockdev:
>   * @top: storage source chain


[...]


> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 7ba2ea4a5e..2831036e23 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -7989,7 +7989,8 @@ qemuDomainDetermineDiskChain(virQEMUDriver *driver,
>   * @disk: disk definition object
>   *
>   * Returns the pointer to the node-name of the topmost layer used by @disk as
> - * backend. Currently returns the nodename of the copy-on-read filter if enabled
> + * backend. Currently returns the nodename of top throttle filter if enabled
> + * or the nodename of the copy-on-read filter if enabled
>   * or the nodename of the top image's format driver. Empty disks return NULL.
>   * This must be used only with disks instantiated via -blockdev (thus not
>   * for SD cards).
> @@ -8005,6 +8006,10 @@ qemuDomainDiskGetTopNodename(virDomainDiskDef *disk)
>      if (disk->copy_on_read == VIR_TRISTATE_SWITCH_ON)
>          return priv->nodeCopyOnRead;
>  
> +    /* If disk has throttles, take top throttle node name */
> +    if (disk->nthrottlefilters > 0)
> +        return disk->throttlefilters[disk->nthrottlefilters-1]->nodename;


       return disk->throttlefilters[disk->nthrottlefilters - 1]->nodename;

       (extra spaces around subtraction operator)


>      return qemuBlockStorageSourceGetEffectiveNodename(disk->src);
>  }
>  
> @@ -11336,6 +11341,32 @@ qemuDomainPrepareStorageSourceBlockdevNodename(virDomainDiskDef *disk,
>  }
>  
>  
> +int
> +qemuDomainPrepareThrottleFilterBlockdevNodename(virDomainThrottleFilterDef *filter,

This function is used only in this file so it doesn't need to be
exported. Also it's used only in one place so it can be inlined as it
makes the code harder to follow than necessary.

> +                                                const char *nodenameprefix)
> +{
> +    char *nodename = g_strdup_printf("%s-filter", nodenameprefix);
> +
> +    qemuBlockThrottleFilterSetNodename(filter, nodename);
> +
> +    return 0;
> +}
> +
> +
> +int
> +qemuDomainPrepareThrottleFilterBlockdev(virDomainThrottleFilterDef *filter,
> +                                        qemuDomainObjPrivate *priv)

This function is used only in this module thus it doesn't need to be
exported. Also it can't fail so there's no point in having a return
value.

> +{
> +    g_autofree char *nodenameprefix = NULL;
> +
> +    filter->id = qemuDomainStorageIDNew(priv);
> +
> +    nodenameprefix = g_strdup_printf("libvirt-%u", filter->id);

       nodename = g_strdup_printf("libvirt-%u-filter" ...

Instead of the convoluted mess and the below call.


> +
> +    return qemuDomainPrepareThrottleFilterBlockdevNodename(filter,  nodenameprefix);

[...]

> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index f7e435d6d5..60989ae693 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -14828,6 +14828,12 @@ qemuDomainDiskBlockIoTuneIsSupported(virDomainDiskDef *disk)
>          return false;
>      }
>  
> +    if (disk->throttlefilters) {
> +        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> +                       _("block 'iotune' can't be used together with 'throttlefilters' for disk '%1$s'"), disk->dst);
> +        return false;
> +    }

Preferrably add all this infrastructure which doesn't deal with the
setup of qemu in a separate patch.

This patch is overly complex as it intermixes all of these validation
bits with the actual qemu interaction.

> +
>      return true;
>  }
>  
> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
> index 4a3f4f657e..103b9e9f00 100644
> --- a/src/qemu/qemu_hotplug.c
> +++ b/src/qemu/qemu_hotplug.c
> @@ -657,6 +657,7 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
>                              virDomainAsyncJob asyncJob)
>  {
>      g_autoptr(qemuBlockStorageSourceChainData) data = NULL;
> +    g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL;
>      qemuDomainObjPrivate *priv = vm->privateData;
>      g_autoptr(virJSONValue) devprops = NULL;
>      bool extensionDeviceAttached = false;
> @@ -695,6 +696,19 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
>          if (rc < 0)
>              goto rollback;
>  
> +        /* Setup throttling filters
> +        * add additional "blockdev-add"(throttle filter) between "blockdev-add" (qemuBlockStorageSourceChainAttach) and "device_add" (qemuDomainAttachExtensionDevice)

Drop above line.

> +        */
> +        if ((filterData = qemuBuildThrottleFiltersAttachPrepareBlockdev(disk))) {
> +            if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
> +                return -1;
> +            /* QMP requests("blockdev-add" with "driver":"throttle") to QEMU */
> +            rc = qemuBlockThrottleFiltersAttach(priv->mon, filterData);
> +            qemuDomainObjExitMonitor(vm);
> +            if (rc < 0)
> +                goto rollback;
> +        }

The ordering is wrong. In the prepare step you are ordering the node
name dependencies such as

 device -> copyOnRead Layer -> throttle -> image chain

OBviously to ensure hierarchy at hotplug they need to be plugged from
the end.

This block here is placed _AFTER_ copyOnRead layer instantiation, so
that will not work with disks with copyOnRead.


> +
>          if (disk->transient) {
>              g_autoptr(qemuBlockStorageSourceAttachData) backend = NULL;
>              g_autoptr(GHashTable) blockNamedNodeData = NULL;
> @@ -766,6 +780,8 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
>      if (extensionDeviceAttached)
>          ignore_value(qemuDomainDetachExtensionDevice(priv->mon, &disk->info));
>  
> +    qemuBlockThrottleFiltersDetach(priv->mon, filterData);
> +
>      qemuBlockStorageSourceChainDetach(priv->mon, data);
>  
>      qemuDomainObjExitMonitor(vm);
> @@ -921,6 +937,14 @@ qemuDomainAttachDeviceDiskLiveInternal(virQEMUDriver *driver,
>      bool releaseSeclabel = false;
>      int ret = -1;
>  
> +    if (disk->device == VIR_DOMAIN_DISK_DEVICE_CDROM) {
> +        if (disk->nthrottlefilters > 0) {
> +            virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
> +                        _("cdrom device with throttle filters isn't supported"));

So and now this is why I don't like the fact that you are doing this on
a per-disk level. On a per-image level (if you'd instantiate 'throttle'
layers when adding the image) this would not be a problem.

I'd strongly prefer if you modify this such that the trottle layers will
be instantiated at per-storage-source level both in XML and in the code.

> +            return -1;
> +        }
> +    }
> +
>      if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY) {
>          virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
>                         _("floppy device hotplug isn't supported"));
> @@ -4499,6 +4523,7 @@ qemuDomainRemoveDiskDevice(virQEMUDriver *driver,
>  {
>      qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
>      g_autoptr(qemuBlockStorageSourceChainData) diskBackend = NULL;
> +    g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL;
>      size_t i;
>      qemuDomainObjPrivate *priv = vm->privateData;
>      int ret = -1;
> @@ -4537,6 +4562,14 @@ qemuDomainRemoveDiskDevice(virQEMUDriver *driver,
>          }
>      }
>  
> +    qemuDomainObjEnterMonitor(vm);
> +    /* QMP request("blockdev-del") to QEMU to delete throttle filter*/
> +    if ((filterData = qemuBuildThrottleFiltersDetachPrepareBlockdev(disk))) {
> +        /* "qemuBlockThrottleFiltersDetach" is used in rollback within "qemuDomainAttachDiskGeneric" as well */
> +        qemuBlockThrottleFiltersDetach(priv->mon, filterData);

In which case this'd be automatic.


> +    }
> +    qemuDomainObjExitMonitor(vm);
> +
>      qemuDomainObjEnterMonitor(vm);
>  
>      if (diskBackend)
> -- 
> 2.34.1
> 

This patch should also contain the corresponding commandline
infrastructure for the filter itself.

The disk backend code is specifically unified by using the intermediate
struct so that it's guaranteed that both hotplug and commandline work
the same. Thus they need to be added simultaenously.
Re: [PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by Chun Feng Wu 1 month ago
On 2024/8/9 22:04, Peter Krempa wrote:
> On Wed, Jun 12, 2024 at 03:02:17 -0700,wucf@linux.ibm.com  wrote:
>> From: Chun Feng Wu<wucf@linux.ibm.com>
>>
>> When attaching disk along with specified throttle groups, those groups will be chained up by parent node name, this change includes service side codes:
>> * Each filter references one throttle group by group name
>> * Update "qemuDomainDiskGetTopNodename" to take top throttle node name if disk has throttles
>> * Each filter has a nodename, and those filters are chained up in sequence
>> * Filter nodename index is generated by reusing qemuDomainStorageIDNew and current global sequence number is persistented in virDomainObj->privateData(qemuDomainObjPrivate)->nodenameindex
>> * During hotplug, filter is created through QMP request("blockdev-add" with "driver":"throttle") to QEMU
>> * Delete filters by "qemuBlockThrottleFiltersDetach"("blockdev-del") when detaching device
>> * Use "iotune" and "throttlefilters" exclusively for specific disk
>>
>> Signed-off-by: Chun Feng Wu<wucf@linux.ibm.com>
>> ---
>>   src/conf/domain_validate.c |   8 +++
>>   src/qemu/qemu_block.c      | 131 +++++++++++++++++++++++++++++++++++++
>>   src/qemu/qemu_block.h      |  53 +++++++++++++++
>>   src/qemu/qemu_command.c    |  84 ++++++++++++++++++++++++
>>   src/qemu/qemu_command.h    |   9 +++
>>   src/qemu/qemu_domain.c     |  39 ++++++++++-
>>   src/qemu/qemu_domain.h     |   8 +++
>>   src/qemu/qemu_driver.c     |   6 ++
>>   src/qemu/qemu_hotplug.c    |  33 ++++++++++
>>   9 files changed, 370 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
>> index 395e036e8f..4cc5ed7577 100644
>> --- a/src/conf/domain_validate.c
>> +++ b/src/conf/domain_validate.c
>> @@ -942,6 +942,14 @@ virDomainDiskDefValidate(const virDomainDef *def,
>>           return -1;
>>       }
>>   
>> +    if (disk->throttlefilters && (disk->blkdeviotune.group_name ||
>> +        virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) {
> This is hard to read as you've broken up the line within a nested brace.
> It may confuse readers.
>
> Rewrite as:
>
>      if (disk->throttlefilters &&
>          (disk->blkdeviotune.group_name ||
>          virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) {
>
>> +        virReportError(VIR_ERR_XML_ERROR,
> Operation unsupported.
>
>> +                       _("block 'throttlefilters' can't be used together with 'iotune' for disk '%1$s'"),
>> +                       disk->dst);
>> +        return -1;
>> +    }
>> +
>>       return 0;
>>   }
>>   
>> diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
>> index 738b72d7ea..9b8ff65887 100644
>> --- a/src/qemu/qemu_block.c
>> +++ b/src/qemu/qemu_block.c
>> @@ -2739,6 +2739,137 @@ qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData,
>>   }
>>   
>>   
>> +void
>> +qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter,
>> +                                   char *nodename)
>> +{
>> +    g_free(filter->nodename);
>> +    filter->nodename = nodename;
>> +}
>> +
>> +
>> +const char *
>> +qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef *filter)
>> +{
>> +    return filter->nodename;
>> +}
> All of this helper infrastructure doesn't belong to a patch that is
> declaring that it's dealing with the setup of qemu. (I also wrote that
> below as I've noticed it there).
>
> It makes this patch too complex and thus hard to review. It's also the
> reason it takes me so long. I'm demotivated on such commits as it takes
> way too long to go through.
>
> Any setup of nodenames and other libvirt-internal stuff, such as
> validatio of config etc should be separated.
>
>
>> +/**
>> + * qemuBlockThrottleFilterGetProps:
>> + * @filter: throttle filter
>> + * @parentNodeName: parent nodename of @filter
>> + *
>> + * Build "arguments" part of "blockdev-add" QMP cmd.
>> + * e.g. {"execute":"blockdev-add", "arguments":{"driver":"throttle",
>> + * "node-name":"libvirt-2-filter",  "throttle-group":"limits0",
>> + * "file": "libvirt-1-format"}}
> Avoid this JSON in comments, as it's obvious from the code. Rather
> describe any special handling if needed.
>
>> + */
>> +virJSONValue *
>> +qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter,
>> +                                const char *parentNodeName)
>> +{
>> +    g_autoptr(virJSONValue) props = NULL;
>> +
>> +    if (virJSONValueObjectAdd(&props,
>> +                              "s:driver", "throttle",
>> +                              "s:node-name", qemuBlockThrottleFilterGetNodename(filter),
>> +                              "s:throttle-group", filter->group_name,
>> +                              "s:file", parentNodeName,
>> +                              NULL) < 0)
>> +        return 0;
> return NULL; It looks confusing because 0 is success in our code.
>
>> +
>> +    return g_steal_pointer(&props);
>> +}
> [...]
>
>> +qemuBlockThrottleFilterAttachData *
>> +qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef *filter,
>> +                                             const char *parentNodeName)
>> +{
>> +    g_autoptr(qemuBlockThrottleFilterAttachData) data = NULL;
>> +
>> +    data = g_new0(qemuBlockThrottleFilterAttachData, 1);
>> +
>> +    if (!(data->filterProps = qemuBlockThrottleFilterGetProps(filter, parentNodeName)))
>> +        return NULL;
>> +
>> +    data->filterNodeName = qemuBlockThrottleFilterGetNodename(filter);
>> +    data->filterAttached = true;
> Well, if you set this to true right here, it's pointless to even have
> the variable.
>
> The code you've copied this from uses the 'Attached' variable so that
> it's set only after the object is created in qemu. This allows safe
> rollback.
>
> If you set it to true for everything you will attempt to potentially
> roll back stuff that was not yet set in qemu, making the variable itself
> pointless.
>
>> +
>> +    return g_steal_pointer(&data);
>> +}
>> +
>> +
>> +void
>> +qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon,
>> +                                      qemuBlockThrottleFilterAttachData *data)
>> +{
>> +    virErrorPtr orig_err;
>> +
>> +    virErrorPreserveLast(&orig_err);
>> +
>> +    if (data->filterAttached)
>> +        ignore_value(qemuMonitorBlockdevDel(mon, data->filterNodeName));
> ... here ...
>
>> +
>> +    virErrorRestore(&orig_err);
>> +}
>> +
>> +
>> +void
>> +qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data)
>> +{
>> +    size_t i;
>> +
>> +    if (!data)
>> +        return;
>> +
>> +    for (i = 0; i < data->nfilterdata; i++)
>> +        qemuBlockThrottleFilterAttachDataFree(data->filterdata[i]);
>> +
>> +    g_free(data->filterdata);
>> +    g_free(data);
>> +}
>> +
>> +
>> +int
>> +qemuBlockThrottleFiltersAttach(qemuMonitor *mon,
>> +                               qemuBlockThrottleFiltersData *data)
>> +{
>> +    size_t i;
>> +
>> +    for (i = 0; i < data->nfilterdata; i++) {
>> +        if (qemuMonitorBlockdevAdd(mon, &data->filterdata[i]->filterProps) < 0)
> This function requires the monitor to be locked. If you want to have
> this as a separate function you must document it and state this fact.
>
>> +            return -1;
>> +        data->filterdata[i]->filterAttached = true;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +
>> +void
>> +qemuBlockThrottleFiltersDetach(qemuMonitor *mon,
>> +                               qemuBlockThrottleFiltersData *data)
>> +{
>> +    size_t i;
>> +
>> +    for (i = data->nfilterdata; i > 0; i--)
>> +        qemuBlockThrottleFilterAttachRollback(mon, data->filterdata[i-1]);
>> +}
>> +
>> +
>>   int
>>   qemuBlockRemoveImageMetadata(virQEMUDriver *driver,
>>                                virDomainObj *vm,
>> diff --git a/src/qemu/qemu_block.h b/src/qemu/qemu_block.h
>> index f9e961d85d..9888954ce4 100644
>> --- a/src/qemu/qemu_block.h
>> +++ b/src/qemu/qemu_block.h
>> @@ -214,6 +214,59 @@ qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData,
>>                                          virStorageSource *src,
>>                                          virStorageSource *templ);
>>   
>> +void
>> +qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter,
>> +                                   char *nodename);
>> +
>> +const char *
>> +qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef *filter);
>> +
>> +virJSONValue *
>> +qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter,
>> +                                const char *parentNodeName);
> This function isn't used outside of qemu_block.c so it doesn't need to
> be exported.
>
>> +
>> +typedef struct qemuBlockThrottleFilterAttachData qemuBlockThrottleFilterAttachData;
>> +struct qemuBlockThrottleFilterAttachData {
>> +    virJSONValue *filterProps;
>> +    const char *filterNodeName;
>> +    bool filterAttached;
>> +};
>> +
>> +qemuBlockThrottleFilterAttachData *
>> +qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef *throttlefilter,
>> +                                             const char *parentNodeName);
>> +
>> +void
>> +qemuBlockThrottleFilterAttachDataFree(qemuBlockThrottleFilterAttachData *data);
>> +
>> +G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFilterAttachData,
>> +                              qemuBlockThrottleFilterAttachDataFree);
>> +
>> +void
>> +qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon,
>> +                                      qemuBlockThrottleFilterAttachData *data);
>> +
>> +struct _qemuBlockThrottleFiltersData {
>> +    qemuBlockThrottleFilterAttachData **filterdata;
>> +    size_t nfilterdata;
>> +};
>> +
>> +typedef struct _qemuBlockThrottleFiltersData qemuBlockThrottleFiltersData;
>> +
>> +void
>> +qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data);
>> +
>> +G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFiltersData,
>> +                              qemuBlockThrottleFiltersDataFree);
>> +
>> +int
>> +qemuBlockThrottleFiltersAttach(qemuMonitor *mon,
>> +                               qemuBlockThrottleFiltersData *data);
>> +
>> +void
>> +qemuBlockThrottleFiltersDetach(qemuMonitor *mon,
>> +                               qemuBlockThrottleFiltersData *data);
>> +
>>   int
>>   qemuBlockRemoveImageMetadata(virQEMUDriver *driver,
>>                                virDomainObj *vm,
>> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
>> index 2d0eddc79e..5ccae956d3 100644
>> --- a/src/qemu/qemu_command.c
>> +++ b/src/qemu/qemu_command.c
>> @@ -1582,6 +1582,13 @@ qemuDiskConfigBlkdeviotuneEnabled(const virDomainDiskDef *disk)
>>   }
>>   
>>   
>> +bool
>> +qemuDiskConfigThrottleFiltersEnabled(const virDomainDiskDef *disk)
> This function doesn't need to be exported.
>
>> +{
>> +    return disk->nthrottlefilters > 0;
>> +}
>> +
>> +
>>   /**
>>    * qemuDiskBusIsSD:
>>    * @bus: disk bus
>> @@ -11055,6 +11062,83 @@ qemuBuildStorageSourceChainAttachPrepareBlockdevOne(qemuBlockStorageSourceChainD
>>   }
>>   
>>   
>> +/**
>> + * qemuBuildThrottleFiltersAttachPrepareBlockdevOne:
>> + * @data: filter chain data, which consists of array of filters and size of such array
>> + * @throttlefilter: new filter to be added into filter array
>> + * @parentNodeName: parent nodename for this new throttlefilter, which is used to build "blockdev-add" QMP request
> the stuff after the comma can be dropped.
>
>> + *
>> + * Build filter node chain to provide more flexibility for block disk I/O limits
>> + */
>> +static int
>> +qemuBuildThrottleFiltersAttachPrepareBlockdevOne(qemuBlockThrottleFiltersData *data,
>> +                                                 virDomainThrottleFilterDef *throttlefilter,
>> +                                                 const char *parentNodeName)
>> +{
>> +    g_autoptr(qemuBlockThrottleFilterAttachData) elem = NULL;
>> +
>> +    if (!(elem = qemuBlockThrottleFilterAttachPrepareBlockdev(throttlefilter, parentNodeName)))
>> +        return -1;
>> +
>> +    VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem);
>> +    return 0;
>> +}
>> +
>> +
>> +/**
>> + * qemuBuildThrottleFiltersAttachPrepareBlockdev:
>> + * @disk: domain disk
>> + *
>> + * Build filter node chain to provide more flexibility for block disk I/O limits
>> + */
>> +qemuBlockThrottleFiltersData *
>> +qemuBuildThrottleFiltersAttachPrepareBlockdev(virDomainDiskDef *disk)
>> +{
>> +    g_autoptr(qemuBlockThrottleFiltersData) data = NULL;
>> +    size_t i;
>> +    const char * parentNodeName = NULL;
>> +    g_autofree char *tmp_nodename = NULL;
>> +
>> +    data = g_new0(qemuBlockThrottleFiltersData, 1);
>> +    /* get starting parentNodename, e.g. libvirt-1-format */
>> +    parentNodeName = qemuBlockStorageSourceGetEffectiveNodename(disk->src);
>> +    /* build filterdata, which contains all filters info and sequence info through parentNodeName */
>> +    for (i = 0; i < disk->nthrottlefilters; i++) {
>> +        tmp_nodename = g_strdup(parentNodeName);
>> +        if (qemuBuildThrottleFiltersAttachPrepareBlockdevOne(data, disk->throttlefilters[i], tmp_nodename) < 0)
>> +            return NULL;
> The nodename copied into 'tmp_nodename' is leaked on every iteration.
> Also qemuBuildThrottleFiltersAttachPrepareBlockdevOne doesn't modify it
> so there's no point in copying it.
>
>> +        parentNodeName = disk->throttlefilters[i]->nodename;
>> +    }
>> +
>> +    return g_steal_pointer(&data);
>> +}
>> +
>> +
>> +/**
>> + * qemuBuildThrottleFiltersDetachPrepareBlockdev:
>> + * @disk: domain disk
>> + *
>> + * Build filters data for later "blockdev-del"
>> + */
>> +qemuBlockThrottleFiltersData *
>> +qemuBuildThrottleFiltersDetachPrepareBlockdev(virDomainDiskDef *disk)
>> +{
>> +    g_autoptr(qemuBlockThrottleFiltersData) data = g_new0(qemuBlockThrottleFiltersData, 1);
>> +    size_t i;
>> +
>> +    /* build filterdata, which contains filters info and sequence info */
>> +    for (i = 0; i < disk->nthrottlefilters; i++) {
>> +        g_autoptr(qemuBlockThrottleFilterAttachData) elem = g_new0(qemuBlockThrottleFilterAttachData, 1);
>> +        /* ignore other fields since the following info are enough for "blockdev-del" */
>> +        elem->filterNodeName = qemuBlockThrottleFilterGetNodename(disk->throttlefilters[i]);
> So I didn't yet see any code that serializes any of this in the status
> XML, thus it seems that this will not work after you restart
> libvirtd/virtqemud if a VM with filters is running, and then try to
> detach disks. You'll need to add that, or base the filter nodename on
> something else.
>
> Note that presence of the node name is authoritative and we must not try
> to regenerate it. That would hinder changing the node names in the
> future.
>
> See how the copyOnRead layer node name is stored.

Filter node name is generated by rule "libvirt-nodenameindex-filter" in 
method

"qemuDomainPrepareThrottleFilterBlockdev", which is called by

"qemuDomainPrepareDiskSourceBlockdev".

it follows the same way like "libvirt-nodenameindex-format" node.

And I tested restarting libvirtd, vm with throttle works well in this case.

>> +        elem->filterAttached = true;
>> +
>> +        VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem);
>> +    }
>> +    return g_steal_pointer(&data);
>> +}
>> +
>> +
>>   /**
>>    * qemuBuildStorageSourceChainAttachPrepareBlockdev:
>>    * @top: storage source chain
>
> [...]
>
>
>> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>> index 7ba2ea4a5e..2831036e23 100644
>> --- a/src/qemu/qemu_domain.c
>> +++ b/src/qemu/qemu_domain.c
>> @@ -7989,7 +7989,8 @@ qemuDomainDetermineDiskChain(virQEMUDriver *driver,
>>    * @disk: disk definition object
>>    *
>>    * Returns the pointer to the node-name of the topmost layer used by @disk as
>> - * backend. Currently returns the nodename of the copy-on-read filter if enabled
>> + * backend. Currently returns the nodename of top throttle filter if enabled
>> + * or the nodename of the copy-on-read filter if enabled
>>    * or the nodename of the top image's format driver. Empty disks return NULL.
>>    * This must be used only with disks instantiated via -blockdev (thus not
>>    * for SD cards).
>> @@ -8005,6 +8006,10 @@ qemuDomainDiskGetTopNodename(virDomainDiskDef *disk)
>>       if (disk->copy_on_read == VIR_TRISTATE_SWITCH_ON)
>>           return priv->nodeCopyOnRead;
>>   
>> +    /* If disk has throttles, take top throttle node name */
>> +    if (disk->nthrottlefilters > 0)
>> +        return disk->throttlefilters[disk->nthrottlefilters-1]->nodename;
>
>         return disk->throttlefilters[disk->nthrottlefilters - 1]->nodename;
>
>         (extra spaces around subtraction operator)
>
>
>>       return qemuBlockStorageSourceGetEffectiveNodename(disk->src);
>>   }
>>   
>> @@ -11336,6 +11341,32 @@ qemuDomainPrepareStorageSourceBlockdevNodename(virDomainDiskDef *disk,
>>   }
>>   
>>   
>> +int
>> +qemuDomainPrepareThrottleFilterBlockdevNodename(virDomainThrottleFilterDef *filter,
> This function is used only in this file so it doesn't need to be
> exported. Also it's used only in one place so it can be inlined as it
> makes the code harder to follow than necessary.
>
>> +                                                const char *nodenameprefix)
>> +{
>> +    char *nodename = g_strdup_printf("%s-filter", nodenameprefix);
>> +
>> +    qemuBlockThrottleFilterSetNodename(filter, nodename);
>> +
>> +    return 0;
>> +}
>> +
>> +
>> +int
>> +qemuDomainPrepareThrottleFilterBlockdev(virDomainThrottleFilterDef *filter,
>> +                                        qemuDomainObjPrivate *priv)
> This function is used only in this module thus it doesn't need to be
> exported. Also it can't fail so there's no point in having a return
> value.
>
>> +{
>> +    g_autofree char *nodenameprefix = NULL;
>> +
>> +    filter->id = qemuDomainStorageIDNew(priv);
>> +
>> +    nodenameprefix = g_strdup_printf("libvirt-%u", filter->id);
>         nodename = g_strdup_printf("libvirt-%u-filter" ...
>
> Instead of the convoluted mess and the below call.
>
>
>> +
>> +    return qemuDomainPrepareThrottleFilterBlockdevNodename(filter,  nodenameprefix);
> [...]
>
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index f7e435d6d5..60989ae693 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -14828,6 +14828,12 @@ qemuDomainDiskBlockIoTuneIsSupported(virDomainDiskDef *disk)
>>           return false;
>>       }
>>   
>> +    if (disk->throttlefilters) {
>> +        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> +                       _("block 'iotune' can't be used together with 'throttlefilters' for disk '%1$s'"), disk->dst);
>> +        return false;
>> +    }
> Preferrably add all this infrastructure which doesn't deal with the
> setup of qemu in a separate patch.
>
> This patch is overly complex as it intermixes all of these validation
> bits with the actual qemu interaction.
>
>> +
>>       return true;
>>   }
>>   
>> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
>> index 4a3f4f657e..103b9e9f00 100644
>> --- a/src/qemu/qemu_hotplug.c
>> +++ b/src/qemu/qemu_hotplug.c
>> @@ -657,6 +657,7 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
>>                               virDomainAsyncJob asyncJob)
>>   {
>>       g_autoptr(qemuBlockStorageSourceChainData) data = NULL;
>> +    g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL;
>>       qemuDomainObjPrivate *priv = vm->privateData;
>>       g_autoptr(virJSONValue) devprops = NULL;
>>       bool extensionDeviceAttached = false;
>> @@ -695,6 +696,19 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
>>           if (rc < 0)
>>               goto rollback;
>>   
>> +        /* Setup throttling filters
>> +        * add additional "blockdev-add"(throttle filter) between "blockdev-add" (qemuBlockStorageSourceChainAttach) and "device_add" (qemuDomainAttachExtensionDevice)
> Drop above line.
>
>> +        */
>> +        if ((filterData = qemuBuildThrottleFiltersAttachPrepareBlockdev(disk))) {
>> +            if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
>> +                return -1;
>> +            /* QMP requests("blockdev-add" with "driver":"throttle") to QEMU */
>> +            rc = qemuBlockThrottleFiltersAttach(priv->mon, filterData);
>> +            qemuDomainObjExitMonitor(vm);
>> +            if (rc < 0)
>> +                goto rollback;
>> +        }
> The ordering is wrong. In the prepare step you are ordering the node
> name dependencies such as
>
>   device -> copyOnRead Layer -> throttle -> image chain
>
> OBviously to ensure hierarchy at hotplug they need to be plugged from
> the end.
>
> This block here is placed _AFTER_ copyOnRead layer instantiation, so
> that will not work with disks with copyOnRead.

After updating qemuBuildThrottleFiltersAttachPrepareBlockdev and 
qemuDomainDiskGetTopNodename,

order can be adjusted to be "device -> throttle-> copyOnRead Layer-> 
image chain", and throttle

works with copyOnRead, but I am also considering your suggestion about 
per-storage-source

>
>> +
>>           if (disk->transient) {
>>               g_autoptr(qemuBlockStorageSourceAttachData) backend = NULL;
>>               g_autoptr(GHashTable) blockNamedNodeData = NULL;
>> @@ -766,6 +780,8 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
>>       if (extensionDeviceAttached)
>>           ignore_value(qemuDomainDetachExtensionDevice(priv->mon, &disk->info));
>>   
>> +    qemuBlockThrottleFiltersDetach(priv->mon, filterData);
>> +
>>       qemuBlockStorageSourceChainDetach(priv->mon, data);
>>   
>>       qemuDomainObjExitMonitor(vm);
>> @@ -921,6 +937,14 @@ qemuDomainAttachDeviceDiskLiveInternal(virQEMUDriver *driver,
>>       bool releaseSeclabel = false;
>>       int ret = -1;
>>   
>> +    if (disk->device == VIR_DOMAIN_DISK_DEVICE_CDROM) {
>> +        if (disk->nthrottlefilters > 0) {
>> +            virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
>> +                        _("cdrom device with throttle filters isn't supported"));
> So and now this is why I don't like the fact that you are doing this on
> a per-disk level. On a per-image level (if you'd instantiate 'throttle'
> layers when adding the image) this would not be a problem.
>
> I'd strongly prefer if you modify this such that the trottle layers will
> be instantiated at per-storage-source level both in XML and in the code.

regarding per-storage-source, do you mean xml like below? if so, when 
specifying "--throttle-groups" in "attach-disk"

it apply groups on top source(vm1_disk_2.qcow2) only? is there scenario 
to apply throttle-groups on backing store source?

   <disk type='file' device='disk'>
       <driver name='qemu' type='qcow2'/>
       <source file='/virt/disks/vm1_disk_2.qcow2' index='1'>
           <throttlefilters>
             <throttlefilter group='limit0'/>
           </throttlefilters>
       </source>
       <backingStore type='file' index='4'>
         <format type='qcow2'/>
         <source file='/virt/disks/backing.qcow2'>
           <throttlefilters>
             <throttlefilter group='limit1'/>
           </throttlefilters>
         </source>
         <backingStore/>
       </backingStore>
       <target dev='vdc' bus='virtio'/>
       <alias name='virtio-disk2'/>
       <address type='pci' domain='0x0000' bus='0x00' slot='0x06' 
function='0x0'/>
    </disk>
>> +            return -1;
>> +        }
>> +    }
>> +
>>       if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY) {
>>           virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
>>                          _("floppy device hotplug isn't supported"));
>> @@ -4499,6 +4523,7 @@ qemuDomainRemoveDiskDevice(virQEMUDriver *driver,
>>   {
>>       qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
>>       g_autoptr(qemuBlockStorageSourceChainData) diskBackend = NULL;
>> +    g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL;
>>       size_t i;
>>       qemuDomainObjPrivate *priv = vm->privateData;
>>       int ret = -1;
>> @@ -4537,6 +4562,14 @@ qemuDomainRemoveDiskDevice(virQEMUDriver *driver,
>>           }
>>       }
>>   
>> +    qemuDomainObjEnterMonitor(vm);
>> +    /* QMP request("blockdev-del") to QEMU to delete throttle filter*/
>> +    if ((filterData = qemuBuildThrottleFiltersDetachPrepareBlockdev(disk))) {
>> +        /* "qemuBlockThrottleFiltersDetach" is used in rollback within "qemuDomainAttachDiskGeneric" as well */
>> +        qemuBlockThrottleFiltersDetach(priv->mon, filterData);
> In which case this'd be automatic.
>
>
>> +    }
>> +    qemuDomainObjExitMonitor(vm);
>> +
>>       qemuDomainObjEnterMonitor(vm);
>>   
>>       if (diskBackend)
>> -- 
>> 2.34.1
>>
> This patch should also contain the corresponding commandline
> infrastructure for the filter itself.
>
> The disk backend code is specifically unified by using the intermediate
> struct so that it's guaranteed that both hotplug and commandline work
> the same. Thus they need to be added simultaenously.
>
-- 
Thanks and Regards,

Wu
Re: [PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by Peter Krempa 1 month ago
On Thu, Aug 15, 2024 at 13:04:47 +0800, Chun Feng Wu wrote:

(I'd suggest you trim irrelevant stuff when responding. It's hard to
find what you've responded to in this massive message .

> On 2024/8/9 22:04, Peter Krempa wrote:
> > On Wed, Jun 12, 2024 at 03:02:17 -0700,wucf@linux.ibm.com  wrote:
> > > From: Chun Feng Wu<wucf@linux.ibm.com>

[...]

> > > + * qemuBuildThrottleFiltersDetachPrepareBlockdev:
> > > + * @disk: domain disk
> > > + *
> > > + * Build filters data for later "blockdev-del"
> > > + */
> > > +qemuBlockThrottleFiltersData *
> > > +qemuBuildThrottleFiltersDetachPrepareBlockdev(virDomainDiskDef *disk)
> > > +{
> > > +    g_autoptr(qemuBlockThrottleFiltersData) data = g_new0(qemuBlockThrottleFiltersData, 1);
> > > +    size_t i;
> > > +
> > > +    /* build filterdata, which contains filters info and sequence info */
> > > +    for (i = 0; i < disk->nthrottlefilters; i++) {
> > > +        g_autoptr(qemuBlockThrottleFilterAttachData) elem = g_new0(qemuBlockThrottleFilterAttachData, 1);
> > > +        /* ignore other fields since the following info are enough for "blockdev-del" */
> > > +        elem->filterNodeName = qemuBlockThrottleFilterGetNodename(disk->throttlefilters[i]);
> > So I didn't yet see any code that serializes any of this in the status
> > XML, thus it seems that this will not work after you restart
> > libvirtd/virtqemud if a VM with filters is running, and then try to
> > detach disks. You'll need to add that, or base the filter nodename on
> > something else.
> > 
> > Note that presence of the node name is authoritative and we must not try
> > to regenerate it. That would hinder changing the node names in the
> > future.
> > 
> > See how the copyOnRead layer node name is stored.
> 
> Filter node name is generated by rule "libvirt-nodenameindex-filter" in
> method
> 
> "qemuDomainPrepareThrottleFilterBlockdev", which is called by
> 
> "qemuDomainPrepareDiskSourceBlockdev".
> 
> it follows the same way like "libvirt-nodenameindex-format" node.
> 
> And I tested restarting libvirtd, vm with throttle works well in this case.

The problem is that the actual error shows only in logs as the detaching
of the backend-nodes is on a code path that can't meaningfully report
errors. Did you check in the logs that:

 - the removal of the throttling node was actually requested
 - that it did have a proper node name

That is on hot-unplug of the disk having such config after you've
restarted libvirt.

The code for other disks always stores the nodenames as-generated in the
status XML (XML describing a running VM) which I don't see in this
series thus I infer it doesn't/can't work.

[...]


> > > @@ -921,6 +937,14 @@ qemuDomainAttachDeviceDiskLiveInternal(virQEMUDriver *driver,
> > >       bool releaseSeclabel = false;
> > >       int ret = -1;
> > > +    if (disk->device == VIR_DOMAIN_DISK_DEVICE_CDROM) {
> > > +        if (disk->nthrottlefilters > 0) {
> > > +            virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
> > > +                        _("cdrom device with throttle filters isn't supported"));
> > So and now this is why I don't like the fact that you are doing this on
> > a per-disk level. On a per-image level (if you'd instantiate 'throttle'
> > layers when adding the image) this would not be a problem.
> > 
> > I'd strongly prefer if you modify this such that the trottle layers will
> > be instantiated at per-storage-source level both in XML and in the code.
> 
> regarding per-storage-source, do you mean xml like below? if so, when
> specifying "--throttle-groups" in "attach-disk"
> 
> it apply groups on top source(vm1_disk_2.qcow2) only? is there scenario to
> apply throttle-groups on backing store source?
> 
>   <disk type='file' device='disk'>
>       <driver name='qemu' type='qcow2'/>
>       <source file='/virt/disks/vm1_disk_2.qcow2' index='1'>
>           <throttlefilters>
>             <throttlefilter group='limit0'/>
>           </throttlefilters>
>       </source>
>       <backingStore type='file' index='4'>
>         <format type='qcow2'/>
>         <source file='/virt/disks/backing.qcow2'>
>           <throttlefilters>
>             <throttlefilter group='limit1'/>
>           </throttlefilters>
>         </source>
>         <backingStore/>
>       </backingStore>
>       <target dev='vdc' bus='virtio'/>
>       <alias name='virtio-disk2'/>
>       <address type='pci' domain='0x0000' bus='0x00' slot='0x06'
> function='0x0'/>
>    </disk>


Yeah, something like this. That allows adding filters on other layers
too.

As I said it depends on how you expect to use this feature, because both
make sense.

I do reckon that in most cases this is an overkill though.

If you decide that this is too complicated, you can use the approach you
did, but then need to store the nodename of the throttle layer on the
disk level in the status XML.
Re: [PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by Chun Feng Wu 4 weeks ago
On 2024/8/16 23:14, Peter Krempa wrote:
> On Thu, Aug 15, 2024 at 13:04:47 +0800, Chun Feng Wu wrote:
>
> (I'd suggest you trim irrelevant stuff when responding. It's hard to
> find what you've responded to in this massive message .
>
>> On 2024/8/9 22:04, Peter Krempa wrote:
>>> On Wed, Jun 12, 2024 at 03:02:17 -0700,wucf@linux.ibm.com  wrote:
>>>> From: Chun Feng Wu<wucf@linux.ibm.com>
> [...]
>
>>>> + * qemuBuildThrottleFiltersDetachPrepareBlockdev:
>>>> + * @disk: domain disk
>>>> + *
>>>> + * Build filters data for later "blockdev-del"
>>>> + */
>>>> +qemuBlockThrottleFiltersData *
>>>> +qemuBuildThrottleFiltersDetachPrepareBlockdev(virDomainDiskDef *disk)
>>>> +{
>>>> +    g_autoptr(qemuBlockThrottleFiltersData) data = g_new0(qemuBlockThrottleFiltersData, 1);
>>>> +    size_t i;
>>>> +
>>>> +    /* build filterdata, which contains filters info and sequence info */
>>>> +    for (i = 0; i < disk->nthrottlefilters; i++) {
>>>> +        g_autoptr(qemuBlockThrottleFilterAttachData) elem = g_new0(qemuBlockThrottleFilterAttachData, 1);
>>>> +        /* ignore other fields since the following info are enough for "blockdev-del" */
>>>> +        elem->filterNodeName = qemuBlockThrottleFilterGetNodename(disk->throttlefilters[i]);
>>> So I didn't yet see any code that serializes any of this in the status
>>> XML, thus it seems that this will not work after you restart
>>> libvirtd/virtqemud if a VM with filters is running, and then try to
>>> detach disks. You'll need to add that, or base the filter nodename on
>>> something else.
>>>
>>> Note that presence of the node name is authoritative and we must not try
>>> to regenerate it. That would hinder changing the node names in the
>>> future.
>>>
>>> See how the copyOnRead layer node name is stored.
>> Filter node name is generated by rule "libvirt-nodenameindex-filter" in
>> method
>>
>> "qemuDomainPrepareThrottleFilterBlockdev", which is called by
>>
>> "qemuDomainPrepareDiskSourceBlockdev".
>>
>> it follows the same way like "libvirt-nodenameindex-format" node.
>>
>> And I tested restarting libvirtd, vm with throttle works well in this case.
> The problem is that the actual error shows only in logs as the detaching
> of the backend-nodes is on a code path that can't meaningfully report
> errors. Did you check in the logs that:
>
>   - the removal of the throttling node was actually requested
>   - that it did have a proper node name
>
> That is on hot-unplug of the disk having such config after you've
> restarted libvirt.
>
> The code for other disks always stores the nodenames as-generated in the
> status XML (XML describing a running VM) which I don't see in this
> series thus I infer it doesn't/can't work.
>
> [...]
>
>
>>>> @@ -921,6 +937,14 @@ qemuDomainAttachDeviceDiskLiveInternal(virQEMUDriver *driver,
>>>>        bool releaseSeclabel = false;
>>>>        int ret = -1;
>>>> +    if (disk->device == VIR_DOMAIN_DISK_DEVICE_CDROM) {
>>>> +        if (disk->nthrottlefilters > 0) {
>>>> +            virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
>>>> +                        _("cdrom device with throttle filters isn't supported"));
>>> So and now this is why I don't like the fact that you are doing this on
>>> a per-disk level. On a per-image level (if you'd instantiate 'throttle'
>>> layers when adding the image) this would not be a problem.
>>>
>>> I'd strongly prefer if you modify this such that the trottle layers will
>>> be instantiated at per-storage-source level both in XML and in the code.
>> regarding per-storage-source, do you mean xml like below? if so, when
>> specifying "--throttle-groups" in "attach-disk"
>>
>> it apply groups on top source(vm1_disk_2.qcow2) only? is there scenario to
>> apply throttle-groups on backing store source?
>>
>>    <disk type='file' device='disk'>
>>        <driver name='qemu' type='qcow2'/>
>>        <source file='/virt/disks/vm1_disk_2.qcow2' index='1'>
>>            <throttlefilters>
>>              <throttlefilter group='limit0'/>
>>            </throttlefilters>
>>        </source>
>>        <backingStore type='file' index='4'>
>>          <format type='qcow2'/>
>>          <source file='/virt/disks/backing.qcow2'>
>>            <throttlefilters>
>>              <throttlefilter group='limit1'/>
>>            </throttlefilters>
>>          </source>
>>          <backingStore/>
>>        </backingStore>
>>        <target dev='vdc' bus='virtio'/>
>>        <alias name='virtio-disk2'/>
>>        <address type='pci' domain='0x0000' bus='0x00' slot='0x06'
>> function='0x0'/>
>>     </disk>
>
> Yeah, something like this. That allows adding filters on other layers
> too.
>
> As I said it depends on how you expect to use this feature, because both
> make sense.
>
> I do reckon that in most cases this is an overkill though.
>
> If you decide that this is too complicated, you can use the approach you
> did, but then need to store the nodename of the throttle layer on the
> disk level in the status XML.

sure, then let's stick to current implementation about filter 
chain(per-disk), about status XML, do you mean <privateData> to store 
nodename? if so, does the following xml look good to you?

   <throttlefilters>
     <throttlefilter  group='limit2'>
       <privateData>
         <nodename type='throttle-filter' 
name='0123456789ABCDEF0123456789ABCDE'/>
       </privateData>
     </throttlefilter>
   </throttlefilters>

-- 
Thanks and Regards,

Wu
Re: [PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by Peter Krempa 4 weeks ago
On Mon, Aug 19, 2024 at 18:27:46 +0800, Chun Feng Wu wrote:
> 
> On 2024/8/16 23:14, Peter Krempa wrote:
> > On Thu, Aug 15, 2024 at 13:04:47 +0800, Chun Feng Wu wrote:
> > 
> > (I'd suggest you trim irrelevant stuff when responding. It's hard to
> > find what you've responded to in this massive message .
> > 
> > > On 2024/8/9 22:04, Peter Krempa wrote:
> > > > On Wed, Jun 12, 2024 at 03:02:17 -0700,wucf@linux.ibm.com  wrote:
> > > > > From: Chun Feng Wu<wucf@linux.ibm.com>

[...]

> > > it apply groups on top source(vm1_disk_2.qcow2) only? is there scenario to
> > > apply throttle-groups on backing store source?
> > > 
> > >    <disk type='file' device='disk'>
> > >        <driver name='qemu' type='qcow2'/>
> > >        <source file='/virt/disks/vm1_disk_2.qcow2' index='1'>
> > >            <throttlefilters>
> > >              <throttlefilter group='limit0'/>
> > >            </throttlefilters>
> > >        </source>
> > >        <backingStore type='file' index='4'>
> > >          <format type='qcow2'/>
> > >          <source file='/virt/disks/backing.qcow2'>
> > >            <throttlefilters>
> > >              <throttlefilter group='limit1'/>
> > >            </throttlefilters>
> > >          </source>
> > >          <backingStore/>
> > >        </backingStore>
> > >        <target dev='vdc' bus='virtio'/>
> > >        <alias name='virtio-disk2'/>
> > >        <address type='pci' domain='0x0000' bus='0x00' slot='0x06'
> > > function='0x0'/>
> > >     </disk>
> > 
> > Yeah, something like this. That allows adding filters on other layers
> > too.
> > 
> > As I said it depends on how you expect to use this feature, because both
> > make sense.
> > 
> > I do reckon that in most cases this is an overkill though.
> > 
> > If you decide that this is too complicated, you can use the approach you
> > did, but then need to store the nodename of the throttle layer on the
> > disk level in the status XML.
> 
> sure, then let's stick to current implementation about filter
> chain(per-disk), about status XML, do you mean <privateData> to store
> nodename? if so, does the following xml look good to you?
> 
>   <throttlefilters>
>     <throttlefilter  group='limit2'>
>       <privateData>
>         <nodename type='throttle-filter'
> name='0123456789ABCDEF0123456789ABCDE'/>
>       </privateData>
>     </throttlefilter>
>   </throttlefilters>

I suggest you don't add the privateData sub-element to the
'throttlefilters' as it will be very hard to plumb that in. You'd
require private data callbacks which the XML parser (generic) calls from
the qemu driver (specific) to parse this.

I'd rather suggest you use the disk private data section and format them
there. Use the group name as a key to find them as that is (or at least
should be) unique.
Re: [PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by Chun Feng Wu 3 weeks, 6 days ago
On 2024/8/19 18:59, Peter Krempa wrote:
> On Mon, Aug 19, 2024 at 18:27:46 +0800, Chun Feng Wu wrote:
>> On 2024/8/16 23:14, Peter Krempa wrote:
>>> On Thu, Aug 15, 2024 at 13:04:47 +0800, Chun Feng Wu wrote:
>>>
>>> (I'd suggest you trim irrelevant stuff when responding. It's hard to
>>> find what you've responded to in this massive message .
>>>
>>>> On 2024/8/9 22:04, Peter Krempa wrote:
>>>>> On Wed, Jun 12, 2024 at 03:02:17 -0700,wucf@linux.ibm.com  wrote:
>>>>>> From: Chun Feng Wu<wucf@linux.ibm.com>
> [...]
>
>>>> it apply groups on top source(vm1_disk_2.qcow2) only? is there scenario to
>>>> apply throttle-groups on backing store source?
>>>>
>>>>     <disk type='file' device='disk'>
>>>>         <driver name='qemu' type='qcow2'/>
>>>>         <source file='/virt/disks/vm1_disk_2.qcow2' index='1'>
>>>>             <throttlefilters>
>>>>               <throttlefilter group='limit0'/>
>>>>             </throttlefilters>
>>>>         </source>
>>>>         <backingStore type='file' index='4'>
>>>>           <format type='qcow2'/>
>>>>           <source file='/virt/disks/backing.qcow2'>
>>>>             <throttlefilters>
>>>>               <throttlefilter group='limit1'/>
>>>>             </throttlefilters>
>>>>           </source>
>>>>           <backingStore/>
>>>>         </backingStore>
>>>>         <target dev='vdc' bus='virtio'/>
>>>>         <alias name='virtio-disk2'/>
>>>>         <address type='pci' domain='0x0000' bus='0x00' slot='0x06'
>>>> function='0x0'/>
>>>>      </disk>
>>> Yeah, something like this. That allows adding filters on other layers
>>> too.
>>>
>>> As I said it depends on how you expect to use this feature, because both
>>> make sense.
>>>
>>> I do reckon that in most cases this is an overkill though.
>>>
>>> If you decide that this is too complicated, you can use the approach you
>>> did, but then need to store the nodename of the throttle layer on the
>>> disk level in the status XML.
>> sure, then let's stick to current implementation about filter
>> chain(per-disk), about status XML, do you mean <privateData> to store
>> nodename? if so, does the following xml look good to you?
>>
>>    <throttlefilters>
>>      <throttlefilter  group='limit2'>
>>        <privateData>
>>          <nodename type='throttle-filter'
>> name='0123456789ABCDEF0123456789ABCDE'/>
>>        </privateData>
>>      </throttlefilter>
>>    </throttlefilters>
> I suggest you don't add the privateData sub-element to the
> 'throttlefilters' as it will be very hard to plumb that in. You'd
> require private data callbacks which the XML parser (generic) calls from
> the qemu driver (specific) to parse this.
>
> I'd rather suggest you use the disk private data section and format them
> there. Use the group name as a key to find them as that is (or at least
> should be) unique.
>
if so, it could be the following xml:

<disk>
   <privateData>
        <nodenames>
            <nodename type='throttle-filter' 
name='libvirt-nodenameindex-filter' key='group_name'/>
        </nodenames>
   </privateData>
</disk>


and

struct _qemuDomainDiskPrivate {
...
GHashTable *throttleFilters;
...
}

any suggestion?

-- 
Thanks and Regards,

Wu
Re: [PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by Peter Krempa 3 weeks, 6 days ago
On Tue, Aug 20, 2024 at 22:48:53 +0800, Chun Feng Wu wrote:
> 
> On 2024/8/19 18:59, Peter Krempa wrote:
> > On Mon, Aug 19, 2024 at 18:27:46 +0800, Chun Feng Wu wrote:
> > > On 2024/8/16 23:14, Peter Krempa wrote:
> > > > On Thu, Aug 15, 2024 at 13:04:47 +0800, Chun Feng Wu wrote:
> > > > 
> > > > (I'd suggest you trim irrelevant stuff when responding. It's hard to
> > > > find what you've responded to in this massive message .
> > > > 
> > > > > On 2024/8/9 22:04, Peter Krempa wrote:
> > > > > > On Wed, Jun 12, 2024 at 03:02:17 -0700,wucf@linux.ibm.com  wrote:
> > > > > > > From: Chun Feng Wu<wucf@linux.ibm.com>
> > [...]
> > 
> > > > > it apply groups on top source(vm1_disk_2.qcow2) only? is there scenario to
> > > > > apply throttle-groups on backing store source?
> > > > > 
> > > > >     <disk type='file' device='disk'>
> > > > >         <driver name='qemu' type='qcow2'/>
> > > > >         <source file='/virt/disks/vm1_disk_2.qcow2' index='1'>
> > > > >             <throttlefilters>
> > > > >               <throttlefilter group='limit0'/>
> > > > >             </throttlefilters>
> > > > >         </source>
> > > > >         <backingStore type='file' index='4'>
> > > > >           <format type='qcow2'/>
> > > > >           <source file='/virt/disks/backing.qcow2'>
> > > > >             <throttlefilters>
> > > > >               <throttlefilter group='limit1'/>
> > > > >             </throttlefilters>
> > > > >           </source>
> > > > >           <backingStore/>
> > > > >         </backingStore>
> > > > >         <target dev='vdc' bus='virtio'/>
> > > > >         <alias name='virtio-disk2'/>
> > > > >         <address type='pci' domain='0x0000' bus='0x00' slot='0x06'
> > > > > function='0x0'/>
> > > > >      </disk>
> > > > Yeah, something like this. That allows adding filters on other layers
> > > > too.
> > > > 
> > > > As I said it depends on how you expect to use this feature, because both
> > > > make sense.
> > > > 
> > > > I do reckon that in most cases this is an overkill though.
> > > > 
> > > > If you decide that this is too complicated, you can use the approach you
> > > > did, but then need to store the nodename of the throttle layer on the
> > > > disk level in the status XML.
> > > sure, then let's stick to current implementation about filter
> > > chain(per-disk), about status XML, do you mean <privateData> to store
> > > nodename? if so, does the following xml look good to you?
> > > 
> > >    <throttlefilters>
> > >      <throttlefilter  group='limit2'>
> > >        <privateData>
> > >          <nodename type='throttle-filter'
> > > name='0123456789ABCDEF0123456789ABCDE'/>
> > >        </privateData>
> > >      </throttlefilter>
> > >    </throttlefilters>
> > I suggest you don't add the privateData sub-element to the
> > 'throttlefilters' as it will be very hard to plumb that in. You'd
> > require private data callbacks which the XML parser (generic) calls from
> > the qemu driver (specific) to parse this.
> > 
> > I'd rather suggest you use the disk private data section and format them
> > there. Use the group name as a key to find them as that is (or at least
> > should be) unique.
> > 
> if so, it could be the following xml:
> 
> <disk>
>   <privateData>
>        <nodenames>
>            <nodename type='throttle-filter'
> name='libvirt-nodenameindex-filter' key='group_name'/>
>        </nodenames>
>   </privateData>
> </disk>

Yes. Although I'd name the 'key' field 'group' instead.

> and
> 
> struct _qemuDomainDiskPrivate {
> ...
> GHashTable *throttleFilters;
> ...
> }

This shouldn't be needed. You can store the nodenames in the same data
structure you do now. The formatter will format it from the data
structure and parser will fill it into the appropriate
fields based on the group name.

Yes it's O(N2)-ish complexity but N is small, so having a hash
table doesn't make sense.

In fact you don't want a hash table because you need the
filters ordered in the order defined in the XML and a hash table would
re-order them randomly every time.
Re: [PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by Chun Feng Wu 3 weeks, 5 days ago
On 2024/8/20 23:08, Peter Krempa wrote:
> On Tue, Aug 20, 2024 at 22:48:53 +0800, Chun Feng Wu wrote:
>> On 2024/8/19 18:59, Peter Krempa wrote:
>>> On Mon, Aug 19, 2024 at 18:27:46 +0800, Chun Feng Wu wrote:
>>>> On 2024/8/16 23:14, Peter Krempa wrote:
>>>>> On Thu, Aug 15, 2024 at 13:04:47 +0800, Chun Feng Wu wrote:
>>>>>
>>>>> (I'd suggest you trim irrelevant stuff when responding. It's hard to
>>>>> find what you've responded to in this massive message .
>>>>>
>>>>>> On 2024/8/9 22:04, Peter Krempa wrote:
>>>>>>> On Wed, Jun 12, 2024 at 03:02:17 -0700,wucf@linux.ibm.com  wrote:
>>>>>>>> From: Chun Feng Wu<wucf@linux.ibm.com>
>>> [...]
>>>
>>>>>> it apply groups on top source(vm1_disk_2.qcow2) only? is there scenario to
>>>>>> apply throttle-groups on backing store source?
>>>>>>
>>>>>>      <disk type='file' device='disk'>
>>>>>>          <driver name='qemu' type='qcow2'/>
>>>>>>          <source file='/virt/disks/vm1_disk_2.qcow2' index='1'>
>>>>>>              <throttlefilters>
>>>>>>                <throttlefilter group='limit0'/>
>>>>>>              </throttlefilters>
>>>>>>          </source>
>>>>>>          <backingStore type='file' index='4'>
>>>>>>            <format type='qcow2'/>
>>>>>>            <source file='/virt/disks/backing.qcow2'>
>>>>>>              <throttlefilters>
>>>>>>                <throttlefilter group='limit1'/>
>>>>>>              </throttlefilters>
>>>>>>            </source>
>>>>>>            <backingStore/>
>>>>>>          </backingStore>
>>>>>>          <target dev='vdc' bus='virtio'/>
>>>>>>          <alias name='virtio-disk2'/>
>>>>>>          <address type='pci' domain='0x0000' bus='0x00' slot='0x06'
>>>>>> function='0x0'/>
>>>>>>       </disk>
>>>>> Yeah, something like this. That allows adding filters on other layers
>>>>> too.
>>>>>
>>>>> As I said it depends on how you expect to use this feature, because both
>>>>> make sense.
>>>>>
>>>>> I do reckon that in most cases this is an overkill though.
>>>>>
>>>>> If you decide that this is too complicated, you can use the approach you
>>>>> did, but then need to store the nodename of the throttle layer on the
>>>>> disk level in the status XML.
>>>> sure, then let's stick to current implementation about filter
>>>> chain(per-disk), about status XML, do you mean <privateData> to store
>>>> nodename? if so, does the following xml look good to you?
>>>>
>>>>     <throttlefilters>
>>>>       <throttlefilter  group='limit2'>
>>>>         <privateData>
>>>>           <nodename type='throttle-filter'
>>>> name='0123456789ABCDEF0123456789ABCDE'/>
>>>>         </privateData>
>>>>       </throttlefilter>
>>>>     </throttlefilters>
>>> I suggest you don't add the privateData sub-element to the
>>> 'throttlefilters' as it will be very hard to plumb that in. You'd
>>> require private data callbacks which the XML parser (generic) calls from
>>> the qemu driver (specific) to parse this.
>>>
>>> I'd rather suggest you use the disk private data section and format them
>>> there. Use the group name as a key to find them as that is (or at least
>>> should be) unique.
>>>
>> if so, it could be the following xml:
>>
>> <disk>
>>    <privateData>
>>         <nodenames>
>>             <nodename type='throttle-filter'
>> name='libvirt-nodenameindex-filter' key='group_name'/>
>>         </nodenames>
>>    </privateData>
>> </disk>
> Yes. Although I'd name the 'key' field 'group' instead.
>
>> and
>>
>> struct _qemuDomainDiskPrivate {
>> ...
>> GHashTable *throttleFilters;
>> ...
>> }
> This shouldn't be needed. You can store the nodenames in the same data
> structure you do now. The formatter will format it from the data
> structure and parser will fill it into the appropriate
> fields based on the group name.
>
> Yes it's O(N2)-ish complexity but N is small, so having a hash
> table doesn't make sense.
>
> In fact you don't want a hash table because you need the
> filters ordered in the order defined in the XML and a hash table would
> re-order them randomly every time.
Thanks for your suggestion! it makes more sense, I am drafting this part 
in v4

-- 
Thanks and Regards,

Wu
Re: [PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by Chun Feng Wu 1 month ago
On 2024/8/15 13:04, Chun Feng Wu wrote:
>
> On 2024/8/9 22:04, Peter Krempa wrote:
>> On Wed, Jun 12, 2024 at 03:02:17 -0700,wucf@linux.ibm.com  wrote:
>>> From: Chun Feng Wu<wucf@linux.ibm.com>
>>>
>>> When attaching disk along with specified throttle groups, those 
>>> groups will be chained up by parent node name, this change includes 
>>> service side codes:
>>> * Each filter references one throttle group by group name
>>> * Update "qemuDomainDiskGetTopNodename" to take top throttle node 
>>> name if disk has throttles
>>> * Each filter has a nodename, and those filters are chained up in 
>>> sequence
>>> * Filter nodename index is generated by reusing 
>>> qemuDomainStorageIDNew and current global sequence number is 
>>> persistented in 
>>> virDomainObj->privateData(qemuDomainObjPrivate)->nodenameindex
>>> * During hotplug, filter is created through QMP 
>>> request("blockdev-add" with "driver":"throttle") to QEMU
>>> * Delete filters by "qemuBlockThrottleFiltersDetach"("blockdev-del") 
>>> when detaching device
>>> * Use "iotune" and "throttlefilters" exclusively for specific disk
>>>
>>> Signed-off-by: Chun Feng Wu<wucf@linux.ibm.com>
>>> ---
>>>   src/conf/domain_validate.c |   8 +++
>>>   src/qemu/qemu_block.c      | 131 
>>> +++++++++++++++++++++++++++++++++++++
>>>   src/qemu/qemu_block.h      |  53 +++++++++++++++
>>>   src/qemu/qemu_command.c    |  84 ++++++++++++++++++++++++
>>>   src/qemu/qemu_command.h    |   9 +++
>>>   src/qemu/qemu_domain.c     |  39 ++++++++++-
>>>   src/qemu/qemu_domain.h     |   8 +++
>>>   src/qemu/qemu_driver.c     |   6 ++
>>>   src/qemu/qemu_hotplug.c    |  33 ++++++++++
>>>   9 files changed, 370 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
>>> index 395e036e8f..4cc5ed7577 100644
>>> --- a/src/conf/domain_validate.c
>>> +++ b/src/conf/domain_validate.c
>>> @@ -942,6 +942,14 @@ virDomainDiskDefValidate(const virDomainDef *def,
>>>           return -1;
>>>       }
>>>   +    if (disk->throttlefilters && (disk->blkdeviotune.group_name ||
>>> + virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) {
>> This is hard to read as you've broken up the line within a nested brace.
>> It may confuse readers.
>>
>> Rewrite as:
>>
>>      if (disk->throttlefilters &&
>>          (disk->blkdeviotune.group_name ||
>> virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) {
>>
>>> + virReportError(VIR_ERR_XML_ERROR,
>> Operation unsupported.
>>
>>> +                       _("block 'throttlefilters' can't be used 
>>> together with 'iotune' for disk '%1$s'"),
>>> +                       disk->dst);
>>> +        return -1;
>>> +    }
>>> +
>>>       return 0;
>>>   }
>>>   diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
>>> index 738b72d7ea..9b8ff65887 100644
>>> --- a/src/qemu/qemu_block.c
>>> +++ b/src/qemu/qemu_block.c
>>> @@ -2739,6 +2739,137 @@ 
>>> qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData,
>>>   }
>>>     +void
>>> +qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter,
>>> +                                   char *nodename)
>>> +{
>>> +    g_free(filter->nodename);
>>> +    filter->nodename = nodename;
>>> +}
>>> +
>>> +
>>> +const char *
>>> +qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef *filter)
>>> +{
>>> +    return filter->nodename;
>>> +}
>> All of this helper infrastructure doesn't belong to a patch that is
>> declaring that it's dealing with the setup of qemu. (I also wrote that
>> below as I've noticed it there).
>>
>> It makes this patch too complex and thus hard to review. It's also the
>> reason it takes me so long. I'm demotivated on such commits as it takes
>> way too long to go through.
>>
>> Any setup of nodenames and other libvirt-internal stuff, such as
>> validatio of config etc should be separated.
>>
>>
>>> +/**
>>> + * qemuBlockThrottleFilterGetProps:
>>> + * @filter: throttle filter
>>> + * @parentNodeName: parent nodename of @filter
>>> + *
>>> + * Build "arguments" part of "blockdev-add" QMP cmd.
>>> + * e.g. {"execute":"blockdev-add", "arguments":{"driver":"throttle",
>>> + * "node-name":"libvirt-2-filter", "throttle-group":"limits0",
>>> + * "file": "libvirt-1-format"}}
>> Avoid this JSON in comments, as it's obvious from the code. Rather
>> describe any special handling if needed.
>>
>>> + */
>>> +virJSONValue *
>>> +qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter,
>>> +                                const char *parentNodeName)
>>> +{
>>> +    g_autoptr(virJSONValue) props = NULL;
>>> +
>>> +    if (virJSONValueObjectAdd(&props,
>>> +                              "s:driver", "throttle",
>>> +                              "s:node-name", 
>>> qemuBlockThrottleFilterGetNodename(filter),
>>> +                              "s:throttle-group", filter->group_name,
>>> +                              "s:file", parentNodeName,
>>> +                              NULL) < 0)
>>> +        return 0;
>> return NULL; It looks confusing because 0 is success in our code.
>>
>>> +
>>> +    return g_steal_pointer(&props);
>>> +}
>> [...]
>>
>>> +qemuBlockThrottleFilterAttachData *
>>> +qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef 
>>> *filter,
>>> +                                             const char 
>>> *parentNodeName)
>>> +{
>>> +    g_autoptr(qemuBlockThrottleFilterAttachData) data = NULL;
>>> +
>>> +    data = g_new0(qemuBlockThrottleFilterAttachData, 1);
>>> +
>>> +    if (!(data->filterProps = 
>>> qemuBlockThrottleFilterGetProps(filter, parentNodeName)))
>>> +        return NULL;
>>> +
>>> +    data->filterNodeName = qemuBlockThrottleFilterGetNodename(filter);
>>> +    data->filterAttached = true;
>> Well, if you set this to true right here, it's pointless to even have
>> the variable.
>>
>> The code you've copied this from uses the 'Attached' variable so that
>> it's set only after the object is created in qemu. This allows safe
>> rollback.
>>
>> If you set it to true for everything you will attempt to potentially
>> roll back stuff that was not yet set in qemu, making the variable itself
>> pointless.
>>
>>> +
>>> +    return g_steal_pointer(&data);
>>> +}
>>> +
>>> +
>>> +void
>>> +qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon,
>>> + qemuBlockThrottleFilterAttachData *data)
>>> +{
>>> +    virErrorPtr orig_err;
>>> +
>>> +    virErrorPreserveLast(&orig_err);
>>> +
>>> +    if (data->filterAttached)
>>> +        ignore_value(qemuMonitorBlockdevDel(mon, 
>>> data->filterNodeName));
>> ... here ...
>>
>>> +
>>> +    virErrorRestore(&orig_err);
>>> +}
>>> +
>>> +
>>> +void
>>> +qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data)
>>> +{
>>> +    size_t i;
>>> +
>>> +    if (!data)
>>> +        return;
>>> +
>>> +    for (i = 0; i < data->nfilterdata; i++)
>>> + qemuBlockThrottleFilterAttachDataFree(data->filterdata[i]);
>>> +
>>> +    g_free(data->filterdata);
>>> +    g_free(data);
>>> +}
>>> +
>>> +
>>> +int
>>> +qemuBlockThrottleFiltersAttach(qemuMonitor *mon,
>>> +                               qemuBlockThrottleFiltersData *data)
>>> +{
>>> +    size_t i;
>>> +
>>> +    for (i = 0; i < data->nfilterdata; i++) {
>>> +        if (qemuMonitorBlockdevAdd(mon, 
>>> &data->filterdata[i]->filterProps) < 0)
>> This function requires the monitor to be locked. If you want to have
>> this as a separate function you must document it and state this fact.
>>
>>> +            return -1;
>>> +        data->filterdata[i]->filterAttached = true;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +
>>> +void
>>> +qemuBlockThrottleFiltersDetach(qemuMonitor *mon,
>>> +                               qemuBlockThrottleFiltersData *data)
>>> +{
>>> +    size_t i;
>>> +
>>> +    for (i = data->nfilterdata; i > 0; i--)
>>> +        qemuBlockThrottleFilterAttachRollback(mon, 
>>> data->filterdata[i-1]);
>>> +}
>>> +
>>> +
>>>   int
>>>   qemuBlockRemoveImageMetadata(virQEMUDriver *driver,
>>>                                virDomainObj *vm,
>>> diff --git a/src/qemu/qemu_block.h b/src/qemu/qemu_block.h
>>> index f9e961d85d..9888954ce4 100644
>>> --- a/src/qemu/qemu_block.h
>>> +++ b/src/qemu/qemu_block.h
>>> @@ -214,6 +214,59 @@ 
>>> qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData,
>>>                                          virStorageSource *src,
>>>                                          virStorageSource *templ);
>>>   +void
>>> +qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter,
>>> +                                   char *nodename);
>>> +
>>> +const char *
>>> +qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef 
>>> *filter);
>>> +
>>> +virJSONValue *
>>> +qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter,
>>> +                                const char *parentNodeName);
>> This function isn't used outside of qemu_block.c so it doesn't need to
>> be exported.
>>
>>> +
>>> +typedef struct qemuBlockThrottleFilterAttachData 
>>> qemuBlockThrottleFilterAttachData;
>>> +struct qemuBlockThrottleFilterAttachData {
>>> +    virJSONValue *filterProps;
>>> +    const char *filterNodeName;
>>> +    bool filterAttached;
>>> +};
>>> +
>>> +qemuBlockThrottleFilterAttachData *
>>> +qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef 
>>> *throttlefilter,
>>> +                                             const char 
>>> *parentNodeName);
>>> +
>>> +void
>>> +qemuBlockThrottleFilterAttachDataFree(qemuBlockThrottleFilterAttachData 
>>> *data);
>>> +
>>> +G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFilterAttachData,
>>> + qemuBlockThrottleFilterAttachDataFree);
>>> +
>>> +void
>>> +qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon,
>>> + qemuBlockThrottleFilterAttachData *data);
>>> +
>>> +struct _qemuBlockThrottleFiltersData {
>>> +    qemuBlockThrottleFilterAttachData **filterdata;
>>> +    size_t nfilterdata;
>>> +};
>>> +
>>> +typedef struct _qemuBlockThrottleFiltersData 
>>> qemuBlockThrottleFiltersData;
>>> +
>>> +void
>>> +qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data);
>>> +
>>> +G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFiltersData,
>>> + qemuBlockThrottleFiltersDataFree);
>>> +
>>> +int
>>> +qemuBlockThrottleFiltersAttach(qemuMonitor *mon,
>>> +                               qemuBlockThrottleFiltersData *data);
>>> +
>>> +void
>>> +qemuBlockThrottleFiltersDetach(qemuMonitor *mon,
>>> +                               qemuBlockThrottleFiltersData *data);
>>> +
>>>   int
>>>   qemuBlockRemoveImageMetadata(virQEMUDriver *driver,
>>>                                virDomainObj *vm,
>>> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
>>> index 2d0eddc79e..5ccae956d3 100644
>>> --- a/src/qemu/qemu_command.c
>>> +++ b/src/qemu/qemu_command.c
>>> @@ -1582,6 +1582,13 @@ qemuDiskConfigBlkdeviotuneEnabled(const 
>>> virDomainDiskDef *disk)
>>>   }
>>>     +bool
>>> +qemuDiskConfigThrottleFiltersEnabled(const virDomainDiskDef *disk)
>> This function doesn't need to be exported.
>>
>>> +{
>>> +    return disk->nthrottlefilters > 0;
>>> +}
>>> +
>>> +
>>>   /**
>>>    * qemuDiskBusIsSD:
>>>    * @bus: disk bus
>>> @@ -11055,6 +11062,83 @@ 
>>> qemuBuildStorageSourceChainAttachPrepareBlockdevOne(qemuBlockStorageSourceChainD
>>>   }
>>>     +/**
>>> + * qemuBuildThrottleFiltersAttachPrepareBlockdevOne:
>>> + * @data: filter chain data, which consists of array of filters and 
>>> size of such array
>>> + * @throttlefilter: new filter to be added into filter array
>>> + * @parentNodeName: parent nodename for this new throttlefilter, 
>>> which is used to build "blockdev-add" QMP request
>> the stuff after the comma can be dropped.
>>
>>> + *
>>> + * Build filter node chain to provide more flexibility for block 
>>> disk I/O limits
>>> + */
>>> +static int
>>> +qemuBuildThrottleFiltersAttachPrepareBlockdevOne(qemuBlockThrottleFiltersData 
>>> *data,
>>> + virDomainThrottleFilterDef *throttlefilter,
>>> +                                                 const char 
>>> *parentNodeName)
>>> +{
>>> +    g_autoptr(qemuBlockThrottleFilterAttachData) elem = NULL;
>>> +
>>> +    if (!(elem = 
>>> qemuBlockThrottleFilterAttachPrepareBlockdev(throttlefilter, 
>>> parentNodeName)))
>>> +        return -1;
>>> +
>>> +    VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem);
>>> +    return 0;
>>> +}
>>> +
>>> +
>>> +/**
>>> + * qemuBuildThrottleFiltersAttachPrepareBlockdev:
>>> + * @disk: domain disk
>>> + *
>>> + * Build filter node chain to provide more flexibility for block 
>>> disk I/O limits
>>> + */
>>> +qemuBlockThrottleFiltersData *
>>> +qemuBuildThrottleFiltersAttachPrepareBlockdev(virDomainDiskDef *disk)
>>> +{
>>> +    g_autoptr(qemuBlockThrottleFiltersData) data = NULL;
>>> +    size_t i;
>>> +    const char * parentNodeName = NULL;
>>> +    g_autofree char *tmp_nodename = NULL;
>>> +
>>> +    data = g_new0(qemuBlockThrottleFiltersData, 1);
>>> +    /* get starting parentNodename, e.g. libvirt-1-format */
>>> +    parentNodeName = 
>>> qemuBlockStorageSourceGetEffectiveNodename(disk->src);
>>> +    /* build filterdata, which contains all filters info and 
>>> sequence info through parentNodeName */
>>> +    for (i = 0; i < disk->nthrottlefilters; i++) {
>>> +        tmp_nodename = g_strdup(parentNodeName);
>>> +        if (qemuBuildThrottleFiltersAttachPrepareBlockdevOne(data, 
>>> disk->throttlefilters[i], tmp_nodename) < 0)
>>> +            return NULL;
>> The nodename copied into 'tmp_nodename' is leaked on every iteration.
>> Also qemuBuildThrottleFiltersAttachPrepareBlockdevOne doesn't modify it
>> so there's no point in copying it.
>>
>>> +        parentNodeName = disk->throttlefilters[i]->nodename;
>>> +    }
>>> +
>>> +    return g_steal_pointer(&data);
>>> +}
>>> +
>>> +
>>> +/**
>>> + * qemuBuildThrottleFiltersDetachPrepareBlockdev:
>>> + * @disk: domain disk
>>> + *
>>> + * Build filters data for later "blockdev-del"
>>> + */
>>> +qemuBlockThrottleFiltersData *
>>> +qemuBuildThrottleFiltersDetachPrepareBlockdev(virDomainDiskDef *disk)
>>> +{
>>> +    g_autoptr(qemuBlockThrottleFiltersData) data = 
>>> g_new0(qemuBlockThrottleFiltersData, 1);
>>> +    size_t i;
>>> +
>>> +    /* build filterdata, which contains filters info and sequence 
>>> info */
>>> +    for (i = 0; i < disk->nthrottlefilters; i++) {
>>> +        g_autoptr(qemuBlockThrottleFilterAttachData) elem = 
>>> g_new0(qemuBlockThrottleFilterAttachData, 1);
>>> +        /* ignore other fields since the following info are enough 
>>> for "blockdev-del" */
>>> +        elem->filterNodeName = 
>>> qemuBlockThrottleFilterGetNodename(disk->throttlefilters[i]);
>> So I didn't yet see any code that serializes any of this in the status
>> XML, thus it seems that this will not work after you restart
>> libvirtd/virtqemud if a VM with filters is running, and then try to
>> detach disks. You'll need to add that, or base the filter nodename on
>> something else.
>>
>> Note that presence of the node name is authoritative and we must not try
>> to regenerate it. That would hinder changing the node names in the
>> future.
>>
>> See how the copyOnRead layer node name is stored.
>
> Filter node name is generated by rule "libvirt-nodenameindex-filter" 
> in method
>
> "qemuDomainPrepareThrottleFilterBlockdev", which is called by
>
> "qemuDomainPrepareDiskSourceBlockdev".
>
> it follows the same way like "libvirt-nodenameindex-format" node.
>
> And I tested restarting libvirtd, vm with throttle works well in this 
> case.
>
>>> +        elem->filterAttached = true;
>>> +
>>> +        VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem);
>>> +    }
>>> +    return g_steal_pointer(&data);
>>> +}
>>> +
>>> +
>>>   /**
>>>    * qemuBuildStorageSourceChainAttachPrepareBlockdev:
>>>    * @top: storage source chain
>>
>> [...]
>>
>>
>>> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>>> index 7ba2ea4a5e..2831036e23 100644
>>> --- a/src/qemu/qemu_domain.c
>>> +++ b/src/qemu/qemu_domain.c
>>> @@ -7989,7 +7989,8 @@ qemuDomainDetermineDiskChain(virQEMUDriver 
>>> *driver,
>>>    * @disk: disk definition object
>>>    *
>>>    * Returns the pointer to the node-name of the topmost layer used 
>>> by @disk as
>>> - * backend. Currently returns the nodename of the copy-on-read 
>>> filter if enabled
>>> + * backend. Currently returns the nodename of top throttle filter 
>>> if enabled
>>> + * or the nodename of the copy-on-read filter if enabled
>>>    * or the nodename of the top image's format driver. Empty disks 
>>> return NULL.
>>>    * This must be used only with disks instantiated via -blockdev 
>>> (thus not
>>>    * for SD cards).
>>> @@ -8005,6 +8006,10 @@ qemuDomainDiskGetTopNodename(virDomainDiskDef 
>>> *disk)
>>>       if (disk->copy_on_read == VIR_TRISTATE_SWITCH_ON)
>>>           return priv->nodeCopyOnRead;
>>>   +    /* If disk has throttles, take top throttle node name */
>>> +    if (disk->nthrottlefilters > 0)
>>> +        return 
>>> disk->throttlefilters[disk->nthrottlefilters-1]->nodename;
>>
>>         return disk->throttlefilters[disk->nthrottlefilters - 
>> 1]->nodename;
>>
>>         (extra spaces around subtraction operator)
>>
>>
>>>       return qemuBlockStorageSourceGetEffectiveNodename(disk->src);
>>>   }
>>>   @@ -11336,6 +11341,32 @@ 
>>> qemuDomainPrepareStorageSourceBlockdevNodename(virDomainDiskDef *disk,
>>>   }
>>>     +int
>>> +qemuDomainPrepareThrottleFilterBlockdevNodename(virDomainThrottleFilterDef 
>>> *filter,
>> This function is used only in this file so it doesn't need to be
>> exported. Also it's used only in one place so it can be inlined as it
>> makes the code harder to follow than necessary.
>>
>>> + const char *nodenameprefix)
>>> +{
>>> +    char *nodename = g_strdup_printf("%s-filter", nodenameprefix);
>>> +
>>> +    qemuBlockThrottleFilterSetNodename(filter, nodename);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +
>>> +int
>>> +qemuDomainPrepareThrottleFilterBlockdev(virDomainThrottleFilterDef 
>>> *filter,
>>> +                                        qemuDomainObjPrivate *priv)
>> This function is used only in this module thus it doesn't need to be
>> exported. Also it can't fail so there's no point in having a return
>> value.
>>
>>> +{
>>> +    g_autofree char *nodenameprefix = NULL;
>>> +
>>> +    filter->id = qemuDomainStorageIDNew(priv);
>>> +
>>> +    nodenameprefix = g_strdup_printf("libvirt-%u", filter->id);
>>         nodename = g_strdup_printf("libvirt-%u-filter" ...
>>
>> Instead of the convoluted mess and the below call.
>>
>>
>>> +
>>> +    return qemuDomainPrepareThrottleFilterBlockdevNodename(filter, 
>>> nodenameprefix);
>> [...]
>>
>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>>> index f7e435d6d5..60989ae693 100644
>>> --- a/src/qemu/qemu_driver.c
>>> +++ b/src/qemu/qemu_driver.c
>>> @@ -14828,6 +14828,12 @@ 
>>> qemuDomainDiskBlockIoTuneIsSupported(virDomainDiskDef *disk)
>>>           return false;
>>>       }
>>>   +    if (disk->throttlefilters) {
>>> +        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>>> +                       _("block 'iotune' can't be used together 
>>> with 'throttlefilters' for disk '%1$s'"), disk->dst);
>>> +        return false;
>>> +    }
>> Preferrably add all this infrastructure which doesn't deal with the
>> setup of qemu in a separate patch.
>>
>> This patch is overly complex as it intermixes all of these validation
>> bits with the actual qemu interaction.
>>
>>> +
>>>       return true;
>>>   }
>>>   diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
>>> index 4a3f4f657e..103b9e9f00 100644
>>> --- a/src/qemu/qemu_hotplug.c
>>> +++ b/src/qemu/qemu_hotplug.c
>>> @@ -657,6 +657,7 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
>>>                               virDomainAsyncJob asyncJob)
>>>   {
>>>       g_autoptr(qemuBlockStorageSourceChainData) data = NULL;
>>> +    g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL;
>>>       qemuDomainObjPrivate *priv = vm->privateData;
>>>       g_autoptr(virJSONValue) devprops = NULL;
>>>       bool extensionDeviceAttached = false;
>>> @@ -695,6 +696,19 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
>>>           if (rc < 0)
>>>               goto rollback;
>>>   +        /* Setup throttling filters
>>> +        * add additional "blockdev-add"(throttle filter) between 
>>> "blockdev-add" (qemuBlockStorageSourceChainAttach) and "device_add" 
>>> (qemuDomainAttachExtensionDevice)
>> Drop above line.
>>
>>> +        */
>>> +        if ((filterData = 
>>> qemuBuildThrottleFiltersAttachPrepareBlockdev(disk))) {
>>> +            if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
>>> +                return -1;
>>> +            /* QMP requests("blockdev-add" with 
>>> "driver":"throttle") to QEMU */
>>> +            rc = qemuBlockThrottleFiltersAttach(priv->mon, 
>>> filterData);
>>> +            qemuDomainObjExitMonitor(vm);
>>> +            if (rc < 0)
>>> +                goto rollback;
>>> +        }
>> The ordering is wrong. In the prepare step you are ordering the node
>> name dependencies such as
>>
>>   device -> copyOnRead Layer -> throttle -> image chain
>>
>> OBviously to ensure hierarchy at hotplug they need to be plugged from
>> the end.
>>
>> This block here is placed _AFTER_ copyOnRead layer instantiation, so
>> that will not work with disks with copyOnRead.
>
> After updating qemuBuildThrottleFiltersAttachPrepareBlockdev and 
> qemuDomainDiskGetTopNodename,
>
> order can be adjusted to be "device -> throttle-> copyOnRead Layer-> 
> image chain", and throttle
>
> works with copyOnRead, but I am also considering your suggestion about 
> per-storage-source
>
>>
>>> +
>>>           if (disk->transient) {
>>>               g_autoptr(qemuBlockStorageSourceAttachData) backend = 
>>> NULL;
>>>               g_autoptr(GHashTable) blockNamedNodeData = NULL;
>>> @@ -766,6 +780,8 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
>>>       if (extensionDeviceAttached)
>>> ignore_value(qemuDomainDetachExtensionDevice(priv->mon, &disk->info));
>>>   +    qemuBlockThrottleFiltersDetach(priv->mon, filterData);
>>> +
>>>       qemuBlockStorageSourceChainDetach(priv->mon, data);
>>>         qemuDomainObjExitMonitor(vm);
>>> @@ -921,6 +937,14 @@ 
>>> qemuDomainAttachDeviceDiskLiveInternal(virQEMUDriver *driver,
>>>       bool releaseSeclabel = false;
>>>       int ret = -1;
>>>   +    if (disk->device == VIR_DOMAIN_DISK_DEVICE_CDROM) {
>>> +        if (disk->nthrottlefilters > 0) {
>>> +            virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
>>> +                        _("cdrom device with throttle filters isn't 
>>> supported"));
>> So and now this is why I don't like the fact that you are doing this on
>> a per-disk level. On a per-image level (if you'd instantiate 'throttle'
>> layers when adding the image) this would not be a problem.
>>
>> I'd strongly prefer if you modify this such that the trottle layers will
>> be instantiated at per-storage-source level both in XML and in the code.
>
> regarding per-storage-source, do you mean xml like below? if so, when 
> specifying "--throttle-groups" in "attach-disk"
>
> it apply groups on top source(vm1_disk_2.qcow2) only? is there 
> scenario to apply throttle-groups on backing store source?
>
>   <disk type='file' device='disk'>
>       <driver name='qemu' type='qcow2'/>
>       <source file='/virt/disks/vm1_disk_2.qcow2' index='1'>
>           <throttlefilters>
>             <throttlefilter group='limit0'/>
>           </throttlefilters>
>       </source>
>       <backingStore type='file' index='4'>
>         <format type='qcow2'/>
>         <source file='/virt/disks/backing.qcow2'>
>           <throttlefilters>
>             <throttlefilter group='limit1'/>
>           </throttlefilters>
>         </source>
>         <backingStore/>
>       </backingStore>
>       <target dev='vdc' bus='virtio'/>
>       <alias name='virtio-disk2'/>
>       <address type='pci' domain='0x0000' bus='0x00' slot='0x06' 
> function='0x0'/>
>    </disk>
and regarding code part for per-storage-source, do you mean preparing 
throttle filter chain json within 
"qemuBuildStorageSourceChainAttachPrepareBlockdev(virStorageSource 
*top)"? if so, current 
"qemuBuildStorageSourceChainAttachPrepareBlockdev" doesn't include 
copy-on-read layer, however, throttlefilter chain depends on it for top 
source, in that case, should preparation of copy-on-read be moved into 
"qemuBuildStorageSourceChainAttachPrepareBlockdev" as well? if yes, 
current parameter "virStorageSource *top" is not enough, also, do you 
see any concern about updating 
"qemuBuildStorageSourceChainAttachPrepareBlockdev" for throttle chain, 
it seems it has a lot of callers
>>> +            return -1;
>>> +        }
>>> +    }
>>> +
>>>       if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY) {
>>>           virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
>>>                          _("floppy device hotplug isn't supported"));
>>> @@ -4499,6 +4523,7 @@ qemuDomainRemoveDiskDevice(virQEMUDriver *driver,
>>>   {
>>>       qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
>>>       g_autoptr(qemuBlockStorageSourceChainData) diskBackend = NULL;
>>> +    g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL;
>>>       size_t i;
>>>       qemuDomainObjPrivate *priv = vm->privateData;
>>>       int ret = -1;
>>> @@ -4537,6 +4562,14 @@ qemuDomainRemoveDiskDevice(virQEMUDriver 
>>> *driver,
>>>           }
>>>       }
>>>   +    qemuDomainObjEnterMonitor(vm);
>>> +    /* QMP request("blockdev-del") to QEMU to delete throttle filter*/
>>> +    if ((filterData = 
>>> qemuBuildThrottleFiltersDetachPrepareBlockdev(disk))) {
>>> +        /* "qemuBlockThrottleFiltersDetach" is used in rollback 
>>> within "qemuDomainAttachDiskGeneric" as well */
>>> +        qemuBlockThrottleFiltersDetach(priv->mon, filterData);
>> In which case this'd be automatic.
>>
>>
>>> +    }
>>> +    qemuDomainObjExitMonitor(vm);
>>> +
>>>       qemuDomainObjEnterMonitor(vm);
>>>         if (diskBackend)
>>> -- 
>>> 2.34.1
>>>
>> This patch should also contain the corresponding commandline
>> infrastructure for the filter itself.
>>
>> The disk backend code is specifically unified by using the intermediate
>> struct so that it's guaranteed that both hotplug and commandline work
>> the same. Thus they need to be added simultaenously.
>>
-- 
Thanks and Regards,

Wu
Re: [PATCH RFC v3 09/16] qemu: hotplug: Support hot attach and detach block disk along with throttle filters
Posted by Peter Krempa 1 month ago
On Fri, Aug 16, 2024 at 11:51:17 +0800, Chun Feng Wu wrote:
> 
> On 2024/8/15 13:04, Chun Feng Wu wrote:
> > 
> > On 2024/8/9 22:04, Peter Krempa wrote:
> > > On Wed, Jun 12, 2024 at 03:02:17 -0700,wucf@linux.ibm.com  wrote:
> > > > From: Chun Feng Wu<wucf@linux.ibm.com>

[...]

> > > I'd strongly prefer if you modify this such that the trottle layers will
> > > be instantiated at per-storage-source level both in XML and in the code.
> > 
> > regarding per-storage-source, do you mean xml like below? if so, when
> > specifying "--throttle-groups" in "attach-disk"
> > 
> > it apply groups on top source(vm1_disk_2.qcow2) only? is there scenario
> > to apply throttle-groups on backing store source?
> > 
> >   <disk type='file' device='disk'>
> >       <driver name='qemu' type='qcow2'/>
> >       <source file='/virt/disks/vm1_disk_2.qcow2' index='1'>
> >           <throttlefilters>
> >             <throttlefilter group='limit0'/>
> >           </throttlefilters>
> >       </source>
> >       <backingStore type='file' index='4'>
> >         <format type='qcow2'/>
> >         <source file='/virt/disks/backing.qcow2'>
> >           <throttlefilters>
> >             <throttlefilter group='limit1'/>
> >           </throttlefilters>
> >         </source>
> >         <backingStore/>
> >       </backingStore>
> >       <target dev='vdc' bus='virtio'/>
> >       <alias name='virtio-disk2'/>
> >       <address type='pci' domain='0x0000' bus='0x00' slot='0x06'
> > function='0x0'/>
> >    </disk>
> and regarding code part for per-storage-source, do you mean preparing
> throttle filter chain json within
> "qemuBuildStorageSourceChainAttachPrepareBlockdev(virStorageSource *top)"?
> if so, current "qemuBuildStorageSourceChainAttachPrepareBlockdev" doesn't
> include copy-on-read layer, however, throttlefilter chain depends on it for

Yeah, this would necessarily mean that copy-on-read is on top of the
throttle layer.


> top source, in that case, should preparation of copy-on-read be moved into
> "qemuBuildStorageSourceChainAttachPrepareBlockdev" as well? if yes, current

No. There's no sense to have multiple copy-on-read layers.

> parameter "virStorageSource *top" is not enough, also, do you see any
> concern about updating "qemuBuildStorageSourceChainAttachPrepareBlockdev"
> for throttle chain, it seems it has a lot of callers


As I've said in previous reply, having it on per-node level may be
overkill. If you don't ever forsee to use this differently then maybe
let's stick to the per-disk config, by just keeping proper ordering and
remembering the node names.