[PATCH 1/2] Add discard_no_unref option for qcow2 images

Jean-Louis Dupond posted 2 patches 1 year, 5 months ago
There is a newer version of this series
[PATCH 1/2] Add discard_no_unref option for qcow2 images
Posted by Jean-Louis Dupond 1 year, 5 months ago
Qemu 8.1.0 will add discard_no_unref option for qcow2 images.
When this option is enabled (default=false), then it will no longer
unreference clusters when guest does a discard, but it will just free
the blocks (useful for incremental backups for example) and pass the
discard to the lower layer.

This was implemented to avoid fragmentation within the qcow2 image.

Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
---
 docs/formatdomain.rst                         |  6 +++
 src/conf/domain_conf.c                        |  8 +++
 src/conf/domain_conf.h                        |  1 +
 src/conf/domain_validate.c                    | 15 ++++++
 src/conf/schemas/domaincommon.rng             |  8 +++
 src/conf/storage_source_conf.c                |  1 +
 src/conf/storage_source_conf.h                |  1 +
 src/qemu/qemu_block.c                         | 11 ++---
 src/qemu/qemu_domain.c                        |  1 +
 src/qemu/qemu_driver.c                        |  4 +-
 src/vz/vz_utils.c                             |  6 +++
 tests/qemusecuritytest.c                      |  1 +
 .../disk-discard_no_unref.x86_64-latest.args  | 39 +++++++++++++++
 .../disk-discard_no_unref.xml                 | 39 +++++++++++++++
 tests/qemuxml2argvtest.c                      |  1 +
 .../disk-discard_no_unref.x86_64-latest.xml   | 49 +++++++++++++++++++
 tests/qemuxml2xmltest.c                       |  1 +
 17 files changed, 185 insertions(+), 7 deletions(-)
 create mode 100644 tests/qemuxml2argvdata/disk-discard_no_unref.x86_64-latest.args
 create mode 100644 tests/qemuxml2argvdata/disk-discard_no_unref.xml
 create mode 100644 tests/qemuxml2xmloutdata/disk-discard_no_unref.x86_64-latest.xml

diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
index c3526439bf..6e89f8e01f 100644
--- a/docs/formatdomain.rst
+++ b/docs/formatdomain.rst
@@ -3284,6 +3284,12 @@ paravirtualized driver is specified via the ``disk`` element.
       format driver of the ``qemu`` hypervisor can be controlled via the
       ``max_size`` subelement (see example below).
 
+      The optional ``discard_no_unref`` attribute can be set to control the way
+      the ``qemu`` hypervisor handles guest discard commands inside the qcow2
+      image. When enabled, a discard from within the guest will not cause the
+      qcow2 image to remove the cluster references, but will still send the
+      discard command to its lower layer. :since:`Since 9.5.0`
+
       In the majority of cases the default configuration used by the hypervisor
       is sufficient so modifying this setting should not be necessary. For
       specifics on how the metadata cache of ``qcow2`` in ``qemu`` behaves refer
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 0edb1bda9d..f28cb64eb5 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -7826,6 +7826,10 @@ virDomainDiskDefDriverParseXML(virDomainDiskDef *def,
     if (virXMLPropUInt(cur, "queue_size", 10, VIR_XML_PROP_NONE, &def->queue_size) < 0)
         return -1;
 
+    if (virXMLPropTristateSwitch(cur, "discard_no_unref", VIR_XML_PROP_NONE,
+                                 &def->discard_no_unref) < 0)
+        return -1;
+
     return 0;
 }
 
@@ -22501,6 +22505,10 @@ virDomainDiskDefFormatDriver(virBuffer *buf,
         virBufferAsprintf(&attrBuf, " detect_zeroes='%s'",
                           virDomainDiskDetectZeroesTypeToString(disk->detect_zeroes));
 
+    if (disk->discard_no_unref)
+        virBufferAsprintf(&attrBuf, " discard_no_unref='%s'",
+                          virTristateSwitchTypeToString(disk->discard_no_unref));
+
     if (disk->queues)
         virBufferAsprintf(&attrBuf, " queues='%u'", disk->queues);
 
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 629e32c39f..cddaa3824d 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -606,6 +606,7 @@ struct _virDomainDiskDef {
     virDomainDiskDiscard discard;
     unsigned int iothread; /* unused = 0, > 0 specific thread # */
     virDomainDiskDetectZeroes detect_zeroes;
+    virTristateSwitch discard_no_unref;
     char *domain_name; /* backend domain name */
     unsigned int queues;
     unsigned int queue_size;
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
index 80d6a2ffd9..34508b1474 100644
--- a/src/conf/domain_validate.c
+++ b/src/conf/domain_validate.c
@@ -379,6 +379,12 @@ virDomainDiskVhostUserValidate(const virDomainDiskDef *disk)
         return -1;
     }
 
+    if (disk->discard_no_unref) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("discard_no_unref is not supported with vhostuser disk"));
+        return -1;
+    }
+
     /* Unsupported driver elements */
 
     if (disk->src->metadataCacheMaxSize > 0) {
@@ -921,6 +927,15 @@ virDomainDiskDefValidate(const virDomainDef *def,
         return -1;
     }
 
+    if (disk->discard_no_unref == VIR_TRISTATE_SWITCH_ON) {
+        if (disk->src->readonly) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                           _("discard_no_unref is not compatible with read-only disk '%1$s'"),
+                           disk->dst);
+            return -1;
+        }
+    }
+
     return 0;
 }
 
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
index c1725bb511..0b49fbcfeb 100644
--- a/src/conf/schemas/domaincommon.rng
+++ b/src/conf/schemas/domaincommon.rng
@@ -2514,6 +2514,9 @@
       <optional>
         <ref name="detect_zeroes"/>
       </optional>
+      <optional>
+        <ref name="discard_no_unref"/>
+      </optional>
       <optional>
         <attribute name="queues">
           <ref name="positiveInteger"/>
@@ -2629,6 +2632,11 @@
       </choice>
     </attribute>
   </define>
+  <define name="discard_no_unref">
+    <attribute name="discard_no_unref">
+      <ref name="virOnOff"/>
+    </attribute>
+  </define>
   <define name="controller">
     <element name="controller">
       <optional>
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
index 99061e4df7..dcac3a8ff6 100644
--- a/src/conf/storage_source_conf.c
+++ b/src/conf/storage_source_conf.c
@@ -809,6 +809,7 @@ virStorageSourceCopy(const virStorageSource *src,
     def->cachemode = src->cachemode;
     def->discard = src->discard;
     def->detect_zeroes = src->detect_zeroes;
+    def->discard_no_unref = src->discard_no_unref;
     def->sslverify = src->sslverify;
     def->readahead = src->readahead;
     def->timeout = src->timeout;
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
index c6187dda59..f13e7c756a 100644
--- a/src/conf/storage_source_conf.h
+++ b/src/conf/storage_source_conf.h
@@ -399,6 +399,7 @@ struct _virStorageSource {
     int cachemode; /* enum virDomainDiskCache */
     int discard; /* enum virDomainDiskDiscard */
     int detect_zeroes; /* enum virDomainDiskDetectZeroes */
+    virTristateSwitch discard_no_unref;
 
     bool floppyimg; /* set to true if the storage source is going to be used
                        as a source for floppy drive */
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
index 8b2159f845..dcdf883926 100644
--- a/src/qemu/qemu_block.c
+++ b/src/qemu/qemu_block.c
@@ -1117,12 +1117,11 @@ qemuBlockStorageSourceGetFormatQcow2Props(virStorageSource *src,
      * see: qemu.git/docs/qcow2-cache.txt
      * https://git.qemu.org/?p=qemu.git;a=blob;f=docs/qcow2-cache.txt
      */
-    if (src->metadataCacheMaxSize > 0) {
-        if (virJSONValueObjectAdd(&props,
-                                  "U:cache-size", src->metadataCacheMaxSize,
-                                  NULL) < 0)
-            return -1;
-    }
+    if (virJSONValueObjectAdd(&props,
+                              "P:cache-size", src->metadataCacheMaxSize,
+                              "T:discard-no-unref", src->discard_no_unref,
+                              NULL) < 0)
+        return -1;
 
     return 0;
 }
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 8f77e8fc58..a22dac9884 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -11022,6 +11022,7 @@ qemuDomainPrepareDiskSourceData(virDomainDiskDef *disk,
     src->iomode = disk->iomode;
     src->cachemode = disk->cachemode;
     src->discard = disk->discard;
+    src->discard_no_unref = disk->discard_no_unref;
 
     if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY)
         src->floppyimg = true;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 56f4cd6197..f4aaa28ba3 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -14225,8 +14225,10 @@ qemuDomainBlockCopyCommon(virDomainObj *vm,
      * into the topmost virStorage source of the disk chain.
      * Since 'mirror' has the ambition to replace it we need to propagate
      * it into the mirror too. We do it directly as otherwise we'd need
-     * to modify all callers of 'qemuDomainPrepareStorageSourceBlockdev' */
+     * to modify all callers of 'qemuDomainPrepareStorageSourceBlockdev'
+     * Same for discard_no_unref */
     mirror->detect_zeroes = disk->detect_zeroes;
+    mirror->discard_no_unref = disk->discard_no_unref;
 
     /* If reusing an external image that includes a backing file but the user
      * did not enumerate the chain in the XML we need to detect the chain */
diff --git a/src/vz/vz_utils.c b/src/vz/vz_utils.c
index 2db1146149..7db7dbd419 100644
--- a/src/vz/vz_utils.c
+++ b/src/vz/vz_utils.c
@@ -347,6 +347,12 @@ vzCheckDiskUnsupportedParams(virDomainDiskDef *disk)
         return -1;
     }
 
+    if (disk->discard_no_unref) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("Disk discard_no_unref is not supported by vz driver."));
+        return -1;
+    }
+
     if (disk->startupPolicy != VIR_DOMAIN_STARTUP_POLICY_DEFAULT) {
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                        _("Setting up disk startup policy is not "
diff --git a/tests/qemusecuritytest.c b/tests/qemusecuritytest.c
index e2989c05aa..51c309cc57 100644
--- a/tests/qemusecuritytest.c
+++ b/tests/qemusecuritytest.c
@@ -212,6 +212,7 @@ mymain(void)
     DO_TEST_DOMAIN("disk-cdrom-tray");
     DO_TEST_DOMAIN("disk-copy_on_read");
     DO_TEST_DOMAIN("disk-detect-zeroes");
+    DO_TEST_DOMAIN("disk-discard_no_unref");
     DO_TEST_DOMAIN("disk-error-policy");
     DO_TEST_DOMAIN("disk-floppy");
     DO_TEST_DOMAIN("disk-floppy-q35");
diff --git a/tests/qemuxml2argvdata/disk-discard_no_unref.x86_64-latest.args b/tests/qemuxml2argvdata/disk-discard_no_unref.x86_64-latest.args
new file mode 100644
index 0000000000..4c493c2660
--- /dev/null
+++ b/tests/qemuxml2argvdata/disk-discard_no_unref.x86_64-latest.args
@@ -0,0 +1,39 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/var/lib/libvirt/qemu/domain--1-test \
+USER=test \
+LOGNAME=test \
+XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-test/.local/share \
+XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-test/.cache \
+XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-test/.config \
+/usr/bin/qemu-system-x86_64 \
+-name guest=test,debug-threads=on \
+-S \
+-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-test/master-key.aes"}' \
+-machine pc,usb=off,dump-guest-core=off,memory-backend=pc.ram,acpi=off \
+-accel tcg \
+-cpu qemu64 \
+-m 1024 \
+-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":1073741824}' \
+-overcommit mem-lock=off \
+-smp 1,sockets=1,cores=1,threads=1 \
+-uuid 92d7a226-cfae-425b-a6d3-00bbf9ec5c9e \
+-display none \
+-no-user-config \
+-nodefaults \
+-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \
+-mon chardev=charmonitor,id=monitor,mode=control \
+-rtc base=utc \
+-no-shutdown \
+-boot menu=on,strict=on \
+-device '{"driver":"piix3-usb-uhci","id":"usb","bus":"pci.0","addr":"0x1.0x2"}' \
+-blockdev '{"driver":"file","filename":"/var/lib/libvirt/images/f14.img","node-name":"libvirt-2-storage","auto-read-only":true,"discard":"unmap"}' \
+-blockdev '{"node-name":"libvirt-2-format","read-only":false,"discard":"unmap","driver":"qcow2","discard-no-unref":true,"file":"libvirt-2-storage"}' \
+-device '{"driver":"virtio-blk-pci","bus":"pci.0","addr":"0x4","drive":"libvirt-2-format","id":"virtio-disk0","bootindex":2}' \
+-blockdev '{"driver":"file","filename":"/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso","node-name":"libvirt-1-storage","auto-read-only":true,"discard":"unmap"}' \
+-blockdev '{"node-name":"libvirt-1-format","read-only":true,"discard":"ignore","driver":"raw","file":"libvirt-1-storage"}' \
+-device '{"driver":"ide-cd","bus":"ide.1","unit":0,"drive":"libvirt-1-format","id":"ide0-1-0","bootindex":1}' \
+-audiodev '{"id":"audio1","driver":"none"}' \
+-device '{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x2"}' \
+-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \
+-msg timestamp=on
diff --git a/tests/qemuxml2argvdata/disk-discard_no_unref.xml b/tests/qemuxml2argvdata/disk-discard_no_unref.xml
new file mode 100644
index 0000000000..34d61b9879
--- /dev/null
+++ b/tests/qemuxml2argvdata/disk-discard_no_unref.xml
@@ -0,0 +1,39 @@
+<domain type='qemu'>
+  <name>test</name>
+  <uuid>92d7a226-cfae-425b-a6d3-00bbf9ec5c9e</uuid>
+  <memory unit='KiB'>1048576</memory>
+  <currentMemory unit='KiB'>1048576</currentMemory>
+  <vcpu placement='static'>1</vcpu>
+  <os>
+    <type arch='x86_64' machine='pc'>hvm</type>
+    <boot dev='cdrom'/>
+    <boot dev='hd'/>
+    <bootmenu enable='yes'/>
+  </os>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>restart</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu-system-x86_64</emulator>
+    <!-- For this disk, intentionally stress parser resilience to
+         atypical ordering -->
+    <disk device='disk'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/>
+      <source file='/var/lib/libvirt/images/f14.img'/>
+      <target dev='vda' bus='virtio'/>
+      <driver discard='unmap' discard_no_unref='on' name='qemu' type='qcow2'/>
+    </disk>
+    <disk type='file' device='cdrom'>
+      <driver discard='ignore'/>
+      <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/>
+      <target dev='hdc' bus='ide'/>
+      <readonly/>
+      <address type='drive' controller='0' bus='1' target='0' unit='0'/>
+    </disk>
+    <controller type='usb' index='0'/>
+    <controller type='ide' index='0'/>
+    <controller type='pci' index='0' model='pci-root'/>
+    <memballoon model='virtio'/>
+  </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index d914d8cbea..2dda9c4501 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -1245,6 +1245,7 @@ mymain(void)
     DO_TEST_CAPS_LATEST("disk-copy_on_read");
     DO_TEST_CAPS_LATEST("disk-discard");
     DO_TEST_CAPS_LATEST("disk-detect-zeroes");
+    DO_TEST_CAPS_LATEST("disk-discard_no_unref");
     DO_TEST_CAPS_LATEST("disk-snapshot");
     DO_TEST_CAPS_LATEST_PARSE_ERROR("disk-same-targets");
     DO_TEST_CAPS_LATEST_PARSE_ERROR("disk-missing-target-invalid");
diff --git a/tests/qemuxml2xmloutdata/disk-discard_no_unref.x86_64-latest.xml b/tests/qemuxml2xmloutdata/disk-discard_no_unref.x86_64-latest.xml
new file mode 100644
index 0000000000..c2b49d4785
--- /dev/null
+++ b/tests/qemuxml2xmloutdata/disk-discard_no_unref.x86_64-latest.xml
@@ -0,0 +1,49 @@
+<domain type='qemu'>
+  <name>test</name>
+  <uuid>92d7a226-cfae-425b-a6d3-00bbf9ec5c9e</uuid>
+  <memory unit='KiB'>1048576</memory>
+  <currentMemory unit='KiB'>1048576</currentMemory>
+  <vcpu placement='static'>1</vcpu>
+  <os>
+    <type arch='x86_64' machine='pc'>hvm</type>
+    <boot dev='cdrom'/>
+    <boot dev='hd'/>
+    <bootmenu enable='yes'/>
+  </os>
+  <cpu mode='custom' match='exact' check='none'>
+    <model fallback='forbid'>qemu64</model>
+  </cpu>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>restart</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu-system-x86_64</emulator>
+    <disk type='file' device='disk'>
+      <driver name='qemu' type='qcow2' discard='unmap' discard_no_unref='on'/>
+      <source file='/var/lib/libvirt/images/f14.img'/>
+      <target dev='vda' bus='virtio'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/>
+    </disk>
+    <disk type='file' device='cdrom'>
+      <driver name='qemu' type='raw' discard='ignore'/>
+      <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/>
+      <target dev='hdc' bus='ide'/>
+      <readonly/>
+      <address type='drive' controller='0' bus='1' target='0' unit='0'/>
+    </disk>
+    <controller type='usb' index='0' model='piix3-uhci'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/>
+    </controller>
+    <controller type='ide' index='0'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/>
+    </controller>
+    <controller type='pci' index='0' model='pci-root'/>
+    <input type='mouse' bus='ps2'/>
+    <input type='keyboard' bus='ps2'/>
+    <audio id='1' type='none'/>
+    <memballoon model='virtio'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/>
+    </memballoon>
+  </devices>
+</domain>
diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c
index fb6fa3579f..6bdcb4bff5 100644
--- a/tests/qemuxml2xmltest.c
+++ b/tests/qemuxml2xmltest.c
@@ -599,6 +599,7 @@ mymain(void)
 
     DO_TEST_CAPS_LATEST("disk-discard");
     DO_TEST_CAPS_LATEST("disk-detect-zeroes");
+    DO_TEST_CAPS_LATEST("disk-discard_no_unref");
 
     DO_TEST_NOCAPS("disk-serial");
 
-- 
2.41.0
Re: [PATCH 1/2] Add discard_no_unref option for qcow2 images
Posted by Peter Krempa 1 year, 5 months ago
On Tue, Jun 06, 2023 at 11:54:40 +0200, Jean-Louis Dupond wrote:
> Qemu 8.1.0 will add discard_no_unref option for qcow2 images.
> When this option is enabled (default=false), then it will no longer
> unreference clusters when guest does a discard, but it will just free
> the blocks (useful for incremental backups for example) and pass the
> discard to the lower layer.
> 
> This was implemented to avoid fragmentation within the qcow2 image.
> 
> Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
> ---
>  docs/formatdomain.rst                         |  6 +++
>  src/conf/domain_conf.c                        |  8 +++
>  src/conf/domain_conf.h                        |  1 +
>  src/conf/domain_validate.c                    | 15 ++++++
>  src/conf/schemas/domaincommon.rng             |  8 +++
>  src/conf/storage_source_conf.c                |  1 +
>  src/conf/storage_source_conf.h                |  1 +
>  src/qemu/qemu_block.c                         | 11 ++---
>  src/qemu/qemu_domain.c                        |  1 +
>  src/qemu/qemu_driver.c                        |  4 +-
>  src/vz/vz_utils.c                             |  6 +++
>  tests/qemusecuritytest.c                      |  1 +
>  .../disk-discard_no_unref.x86_64-latest.args  | 39 +++++++++++++++
>  .../disk-discard_no_unref.xml                 | 39 +++++++++++++++
>  tests/qemuxml2argvtest.c                      |  1 +
>  .../disk-discard_no_unref.x86_64-latest.xml   | 49 +++++++++++++++++++
>  tests/qemuxml2xmltest.c                       |  1 +
>  17 files changed, 185 insertions(+), 7 deletions(-)
>  create mode 100644 tests/qemuxml2argvdata/disk-discard_no_unref.x86_64-latest.args
>  create mode 100644 tests/qemuxml2argvdata/disk-discard_no_unref.xml
>  create mode 100644 tests/qemuxml2xmloutdata/disk-discard_no_unref.x86_64-latest.xml
> 
> diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
> index c3526439bf..6e89f8e01f 100644
> --- a/docs/formatdomain.rst
> +++ b/docs/formatdomain.rst
> @@ -3284,6 +3284,12 @@ paravirtualized driver is specified via the ``disk`` element.
>        format driver of the ``qemu`` hypervisor can be controlled via the
>        ``max_size`` subelement (see example below).
>  
> +      The optional ``discard_no_unref`` attribute can be set to control the way
> +      the ``qemu`` hypervisor handles guest discard commands inside the qcow2
> +      image. When enabled, a discard from within the guest will not cause the
> +      qcow2 image to remove the cluster references, but will still send the
> +      discard command to its lower layer. :since:`Since 9.5.0`

I'm not quite sure that I understand from this description what it
actually does.

I understand that if a discard request from the guest gets to qemu, the
QCOW2 image keeps the reference, but the backing image unreferences it?

What would this be useful for?

> +
>        In the majority of cases the default configuration used by the hypervisor
>        is sufficient so modifying this setting should not be necessary. For
>        specifics on how the metadata cache of ``qcow2`` in ``qemu`` behaves refer

[...]

> diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
> index 80d6a2ffd9..34508b1474 100644
> --- a/src/conf/domain_validate.c
> +++ b/src/conf/domain_validate.c
> @@ -379,6 +379,12 @@ virDomainDiskVhostUserValidate(const virDomainDiskDef *disk)
>          return -1;
>      }
>  
> +    if (disk->discard_no_unref) {
> +        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> +                       _("discard_no_unref is not supported with vhostuser disk"));
> +        return -1;
> +    }

[1]

> +
>      /* Unsupported driver elements */
>  
>      if (disk->src->metadataCacheMaxSize > 0) {
> @@ -921,6 +927,15 @@ virDomainDiskDefValidate(const virDomainDef *def,
>          return -1;
>      }
>  
> +    if (disk->discard_no_unref == VIR_TRISTATE_SWITCH_ON) {
> +        if (disk->src->readonly) {
> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> +                           _("discard_no_unref is not compatible with read-only disk '%1$s'"),
> +                           disk->dst);
> +            return -1;
> +        }
> +    }
> +

Validation that the flag is used with qcow2 is missing. If you add that
[1] becomes pointless as vhost-user doesn't work with 'qcow2'


> diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
> index c1725bb511..0b49fbcfeb 100644
> --- a/src/conf/schemas/domaincommon.rng
> +++ b/src/conf/schemas/domaincommon.rng
> @@ -2514,6 +2514,9 @@
>        <optional>
>          <ref name="detect_zeroes"/>
>        </optional>
> +      <optional>
> +        <ref name="discard_no_unref"/>
> +      </optional>

You can declare the attribute directly here, having a definition for it
doesn't make too much sense.



> diff --git a/tests/qemuxml2argvdata/disk-discard_no_unref.xml b/tests/qemuxml2argvdata/disk-discard_no_unref.xml
> new file mode 100644
> index 0000000000..34d61b9879
> --- /dev/null
> +++ b/tests/qemuxml2argvdata/disk-discard_no_unref.xml
> @@ -0,0 +1,39 @@
> +<domain type='qemu'>
> +  <name>test</name>
> +  <uuid>92d7a226-cfae-425b-a6d3-00bbf9ec5c9e</uuid>
> +  <memory unit='KiB'>1048576</memory>
> +  <currentMemory unit='KiB'>1048576</currentMemory>
> +  <vcpu placement='static'>1</vcpu>
> +  <os>
> +    <type arch='x86_64' machine='pc'>hvm</type>
> +    <boot dev='cdrom'/>
> +    <boot dev='hd'/>
> +    <bootmenu enable='yes'/>
> +  </os>
> +  <clock offset='utc'/>
> +  <on_poweroff>destroy</on_poweroff>
> +  <on_reboot>restart</on_reboot>
> +  <on_crash>restart</on_crash>
> +  <devices>
> +    <emulator>/usr/bin/qemu-system-x86_64</emulator>
> +    <!-- For this disk, intentionally stress parser resilience to
> +         atypical ordering -->
> +    <disk device='disk'>
> +      <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/>
> +      <source file='/var/lib/libvirt/images/f14.img'/>
> +      <target dev='vda' bus='virtio'/>
> +      <driver discard='unmap' discard_no_unref='on' name='qemu' type='qcow2'/>
> +    </disk>
> +    <disk type='file' device='cdrom'>
> +      <driver discard='ignore'/>
> +      <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/>
> +      <target dev='hdc' bus='ide'/>
> +      <readonly/>
> +      <address type='drive' controller='0' bus='1' target='0' unit='0'/>
> +    </disk>

Please drop this irrelevant disk
Re: [PATCH 1/2] Add discard_no_unref option for qcow2 images
Posted by Jean-Louis Dupond 1 year, 5 months ago
On 8/06/2023 16:36, Peter Krempa wrote:
> On Tue, Jun 06, 2023 at 11:54:40 +0200, Jean-Louis Dupond wrote:
>> Qemu 8.1.0 will add discard_no_unref option for qcow2 images.
>> When this option is enabled (default=false), then it will no longer
>> unreference clusters when guest does a discard, but it will just free
>> the blocks (useful for incremental backups for example) and pass the
>> discard to the lower layer.
>>
>> This was implemented to avoid fragmentation within the qcow2 image.
>>
>> Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
>> ---
>>   docs/formatdomain.rst                         |  6 +++
>>   src/conf/domain_conf.c                        |  8 +++
>>   src/conf/domain_conf.h                        |  1 +
>>   src/conf/domain_validate.c                    | 15 ++++++
>>   src/conf/schemas/domaincommon.rng             |  8 +++
>>   src/conf/storage_source_conf.c                |  1 +
>>   src/conf/storage_source_conf.h                |  1 +
>>   src/qemu/qemu_block.c                         | 11 ++---
>>   src/qemu/qemu_domain.c                        |  1 +
>>   src/qemu/qemu_driver.c                        |  4 +-
>>   src/vz/vz_utils.c                             |  6 +++
>>   tests/qemusecuritytest.c                      |  1 +
>>   .../disk-discard_no_unref.x86_64-latest.args  | 39 +++++++++++++++
>>   .../disk-discard_no_unref.xml                 | 39 +++++++++++++++
>>   tests/qemuxml2argvtest.c                      |  1 +
>>   .../disk-discard_no_unref.x86_64-latest.xml   | 49 +++++++++++++++++++
>>   tests/qemuxml2xmltest.c                       |  1 +
>>   17 files changed, 185 insertions(+), 7 deletions(-)
>>   create mode 100644 tests/qemuxml2argvdata/disk-discard_no_unref.x86_64-latest.args
>>   create mode 100644 tests/qemuxml2argvdata/disk-discard_no_unref.xml
>>   create mode 100644 tests/qemuxml2xmloutdata/disk-discard_no_unref.x86_64-latest.xml
>>
>> diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
>> index c3526439bf..6e89f8e01f 100644
>> --- a/docs/formatdomain.rst
>> +++ b/docs/formatdomain.rst
>> @@ -3284,6 +3284,12 @@ paravirtualized driver is specified via the ``disk`` element.
>>         format driver of the ``qemu`` hypervisor can be controlled via the
>>         ``max_size`` subelement (see example below).
>>   
>> +      The optional ``discard_no_unref`` attribute can be set to control the way
>> +      the ``qemu`` hypervisor handles guest discard commands inside the qcow2
>> +      image. When enabled, a discard from within the guest will not cause the
>> +      qcow2 image to remove the cluster references, but will still send the
>> +      discard command to its lower layer. :since:`Since 9.5.0`
> I'm not quite sure that I understand from this description what it
> actually does.
>
> I understand that if a discard request from the guest gets to qemu, the
> QCOW2 image keeps the reference, but the backing image unreferences it?
>
> What would this be useful for?
When a guest does a discard request, it will normally unreference the 
discarded block in the qcow2 image.
Now this creates a lot of fragmentation in the image, and next to that 
causes the image to grow sometimes over 110%
of its virtual size. Because when allocating a new block, it always 
needs a continuous block, and because of the
default behavior, there are a lot of small holes in the qcow2 image 
which doesn't fit the new allocations.
But if we enable this option, then the clusters will not be 
unreferenced, which will fix the fragmentation,
as there will be no holes anymore in the qcow2 image.
Some more info in https://gitlab.com/qemu-project/qemu/-/issues/1621

>> +
>>         In the majority of cases the default configuration used by the hypervisor
>>         is sufficient so modifying this setting should not be necessary. For
>>         specifics on how the metadata cache of ``qcow2`` in ``qemu`` behaves refer
> [...]
>
>> diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
>> index 80d6a2ffd9..34508b1474 100644
>> --- a/src/conf/domain_validate.c
>> +++ b/src/conf/domain_validate.c
>> @@ -379,6 +379,12 @@ virDomainDiskVhostUserValidate(const virDomainDiskDef *disk)
>>           return -1;
>>       }
>>   
>> +    if (disk->discard_no_unref) {
>> +        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
>> +                       _("discard_no_unref is not supported with vhostuser disk"));
>> +        return -1;
>> +    }
> [1]
>
>> +
>>       /* Unsupported driver elements */
>>   
>>       if (disk->src->metadataCacheMaxSize > 0) {
>> @@ -921,6 +927,15 @@ virDomainDiskDefValidate(const virDomainDef *def,
>>           return -1;
>>       }
>>   
>> +    if (disk->discard_no_unref == VIR_TRISTATE_SWITCH_ON) {
>> +        if (disk->src->readonly) {
>> +            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>> +                           _("discard_no_unref is not compatible with read-only disk '%1$s'"),
>> +                           disk->dst);
>> +            return -1;
>> +        }
>> +    }
>> +
> Validation that the flag is used with qcow2 is missing. If you add that
> [1] becomes pointless as vhost-user doesn't work with 'qcow2'
>
Fixed
>> diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
>> index c1725bb511..0b49fbcfeb 100644
>> --- a/src/conf/schemas/domaincommon.rng
>> +++ b/src/conf/schemas/domaincommon.rng
>> @@ -2514,6 +2514,9 @@
>>         <optional>
>>           <ref name="detect_zeroes"/>
>>         </optional>
>> +      <optional>
>> +        <ref name="discard_no_unref"/>
>> +      </optional>
> You can declare the attribute directly here, having a definition for it
> doesn't make too much sense.
>
As the others were all with a reference, I did it also for this one. But 
fixed.
>
>> diff --git a/tests/qemuxml2argvdata/disk-discard_no_unref.xml b/tests/qemuxml2argvdata/disk-discard_no_unref.xml
>> new file mode 100644
>> index 0000000000..34d61b9879
>> --- /dev/null
>> +++ b/tests/qemuxml2argvdata/disk-discard_no_unref.xml
>> @@ -0,0 +1,39 @@
>> +<domain type='qemu'>
>> +  <name>test</name>
>> +  <uuid>92d7a226-cfae-425b-a6d3-00bbf9ec5c9e</uuid>
>> +  <memory unit='KiB'>1048576</memory>
>> +  <currentMemory unit='KiB'>1048576</currentMemory>
>> +  <vcpu placement='static'>1</vcpu>
>> +  <os>
>> +    <type arch='x86_64' machine='pc'>hvm</type>
>> +    <boot dev='cdrom'/>
>> +    <boot dev='hd'/>
>> +    <bootmenu enable='yes'/>
>> +  </os>
>> +  <clock offset='utc'/>
>> +  <on_poweroff>destroy</on_poweroff>
>> +  <on_reboot>restart</on_reboot>
>> +  <on_crash>restart</on_crash>
>> +  <devices>
>> +    <emulator>/usr/bin/qemu-system-x86_64</emulator>
>> +    <!-- For this disk, intentionally stress parser resilience to
>> +         atypical ordering -->
>> +    <disk device='disk'>
>> +      <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/>
>> +      <source file='/var/lib/libvirt/images/f14.img'/>
>> +      <target dev='vda' bus='virtio'/>
>> +      <driver discard='unmap' discard_no_unref='on' name='qemu' type='qcow2'/>
>> +    </disk>
>> +    <disk type='file' device='cdrom'>
>> +      <driver discard='ignore'/>
>> +      <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/>
>> +      <target dev='hdc' bus='ide'/>
>> +      <readonly/>
>> +      <address type='drive' controller='0' bus='1' target='0' unit='0'/>
>> +    </disk>
> Please drop this irrelevant disk
>
Fixed
I'll post a v2 version asap.


Thanks
Jean-Louis