[libvirt] [PATCH] qemuBuildMemoryBackendStr: Handle one more corner case

Michal Privoznik posted 1 patch 6 years, 8 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/6d9d7f5e3994296a26f5c6160d73455500671d2f.1502204645.git.mprivozn@redhat.com
src/qemu/qemu_command.c                            | 22 ++++++--
.../qemuxml2argv-hugepages-pages7.args             | 35 ++++++++++++
.../qemuxml2argv-hugepages-pages7.xml              | 65 ++++++++++++++++++++++
tests/qemuxml2argvtest.c                           |  2 +
.../qemuxml2xmlout-hugepages-pages7.xml            |  1 +
tests/qemuxml2xmltest.c                            |  1 +
6 files changed, 120 insertions(+), 6 deletions(-)
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages7.args
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages7.xml
create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-hugepages-pages7.xml
[libvirt] [PATCH] qemuBuildMemoryBackendStr: Handle one more corner case
Posted by Michal Privoznik 6 years, 8 months ago
https://bugzilla.redhat.com/show_bug.cgi?id=1458638

This code is so complicated because we allow enabling the same
bits at many places. Just like in this case: huge pages can be
enabled by global <hugepages/> element under <memoryBacking> or
on per <memory/> basis. To complicate things a bit more, users
are allowed to not specify any specific page size in which case
the default page size is used. And this is what is causing this
bug. If no page size is specified, @pagesize is keeping value of
zero throughout whole function. Therefore we need yet another
boolean to hold [use, don't use] information as we can't sue
@pagesize for that.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
---
 src/qemu/qemu_command.c                            | 22 ++++++--
 .../qemuxml2argv-hugepages-pages7.args             | 35 ++++++++++++
 .../qemuxml2argv-hugepages-pages7.xml              | 65 ++++++++++++++++++++++
 tests/qemuxml2argvtest.c                           |  2 +
 .../qemuxml2xmlout-hugepages-pages7.xml            |  1 +
 tests/qemuxml2xmltest.c                            |  1 +
 6 files changed, 120 insertions(+), 6 deletions(-)
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages7.args
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages7.xml
 create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-hugepages-pages7.xml

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 604650596..68fc13706 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3250,8 +3250,6 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
                           virBitmapPtr autoNodeset,
                           bool force)
 {
-    virDomainHugePagePtr master_hugepage = NULL;
-    virDomainHugePagePtr hugepage = NULL;
     virDomainNumatuneMemMode mode;
     const long system_page_size = virGetSystemPageSizeKB();
     virDomainMemoryAccess memAccess = mem->access;
@@ -3264,6 +3262,13 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
     bool nodeSpecified = virDomainNumatuneNodeSpecified(def->numa, mem->targetNode);
     unsigned long long pagesize = mem->pagesize;
     bool needHugepage = !!pagesize;
+    bool useHugepage = !!pagesize;
+
+    /* The difference between @needHugepage and @useHugepage is that the latter
+     * is true whenever huge page is defined for the current memory cell.
+     * Either directly, or transitively via global domain huge pages. The
+     * former is true whenever "memory-backend-file" must be used to satisfy
+     * @useHugepage. */
 
     *backendProps = NULL;
     *backendType = NULL;
@@ -3290,6 +3295,8 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
         mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT;
 
     if (pagesize == 0) {
+        virDomainHugePagePtr master_hugepage = NULL;
+        virDomainHugePagePtr hugepage = NULL;
         bool thisHugepage = false;
 
         /* Find the huge page size we want to use */
@@ -3327,8 +3334,10 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
             hugepage = master_hugepage;
         }
 
-        if (hugepage)
+        if (hugepage) {
             pagesize = hugepage->size;
+            useHugepage = true;
+        }
     }
 
     if (pagesize == system_page_size) {
@@ -3336,17 +3345,18 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
          * of regular system page size, it's as if they
          * hasn't specified any huge pages at all. */
         pagesize = 0;
-        hugepage = NULL;
+        needHugepage = false;
+        useHugepage = false;
     }
 
     if (!(props = virJSONValueNewObject()))
         return -1;
 
-    if (pagesize || mem->nvdimmPath || memAccess ||
+    if (useHugepage || mem->nvdimmPath || memAccess ||
         def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) {
         *backendType = "memory-backend-file";
 
-        if (pagesize) {
+        if (useHugepage) {
             if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &memPath) < 0)
                 goto cleanup;
             prealloc = true;
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages7.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages7.args
new file mode 100644
index 000000000..e4229dce6
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages7.args
@@ -0,0 +1,35 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-x86_64 \
+-name fedora \
+-S \
+-M pc-i440fx-2.3 \
+-m size=1048576k,slots=16,maxmem=1099511627776k \
+-smp 2,sockets=2,cores=1,threads=1 \
+-mem-prealloc \
+-mem-path /dev/hugepages2M/libvirt/qemu/-1-fedora \
+-numa node,nodeid=0,cpus=0-1,mem=1024 \
+-object memory-backend-file,id=memdimm0,prealloc=yes,\
+mem-path=/dev/hugepages1G/libvirt/qemu/-1-fedora,size=1073741824,\
+host-nodes=1-3,policy=bind \
+-device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \
+-object memory-backend-file,id=memdimm1,prealloc=yes,\
+mem-path=/dev/hugepages2M/libvirt/qemu/-1-fedora,share=no,size=536870912 \
+-device pc-dimm,node=0,memdev=memdimm1,id=dimm1,slot=1 \
+-uuid 63840878-0deb-4095-97e6-fc444d9bc9fa \
+-nographic \
+-nodefaults \
+-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-fedora/monitor.sock,\
+server,nowait \
+-mon chardev=charmonitor,id=monitor,mode=readline \
+-boot c \
+-usb \
+-drive file=/var/lib/libvirt/images/fedora.qcow2,format=qcow2,if=none,\
+id=drive-virtio-disk0 \
+-device virtio-blk-pci,bus=pci.0,addr=0x7,drive=drive-virtio-disk0,\
+id=virtio-disk0 \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages7.xml b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages7.xml
new file mode 100644
index 000000000..d75cf5afa
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages7.xml
@@ -0,0 +1,65 @@
+<domain type='qemu'>
+  <name>fedora</name>
+  <uuid>63840878-0deb-4095-97e6-fc444d9bc9fa</uuid>
+  <maxMemory slots='16' unit='KiB'>1099511627776</maxMemory>
+  <memory unit='KiB'>2621439</memory>
+  <currentMemory unit='KiB'>2621439</currentMemory>
+  <memoryBacking>
+    <hugepages/>
+  </memoryBacking>
+  <vcpu placement='static'>2</vcpu>
+  <os>
+    <type arch='x86_64' machine='pc-i440fx-2.3'>hvm</type>
+    <boot dev='hd'/>
+  </os>
+  <features>
+    <acpi/>
+    <apic/>
+    <pae/>
+  </features>
+  <cpu>
+    <numa>
+      <cell id='0' cpus='0-1' memory='1048576' unit='KiB'/>
+    </numa>
+  </cpu>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>restart</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu-system-x86_64</emulator>
+    <disk type='file' device='disk'>
+      <driver name='qemu' type='qcow2'/>
+      <source file='/var/lib/libvirt/images/fedora.qcow2'/>
+      <target dev='vda' bus='virtio'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x07' function='0x0'/>
+    </disk>
+    <controller type='usb' index='0'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/>
+    </controller>
+    <controller type='pci' index='0' model='pci-root'/>
+    <input type='mouse' bus='ps2'/>
+    <input type='keyboard' bus='ps2'/>
+    <memballoon model='virtio'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
+    </memballoon>
+    <memory model='dimm'>
+      <source>
+        <nodemask>1-3</nodemask>
+        <pagesize unit='KiB'>1048576</pagesize>
+      </source>
+      <target>
+        <size unit='KiB'>1048576</size>
+        <node>0</node>
+      </target>
+      <address type='dimm' slot='0'/>
+    </memory>
+    <memory model='dimm' access='private'>
+      <target>
+        <size unit='KiB'>524287</size>
+        <node>0</node>
+      </target>
+      <address type='dimm' slot='1'/>
+    </memory>
+  </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index 655c89bc3..993805231 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -844,6 +844,8 @@ mymain(void)
             QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE);
     DO_TEST("hugepages-pages5", QEMU_CAPS_MEM_PATH);
     DO_TEST("hugepages-pages6", NONE);
+    DO_TEST("hugepages-pages7", QEMU_CAPS_MEM_PATH,
+            QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_OBJECT_MEMORY_FILE);
     DO_TEST("hugepages-memaccess", QEMU_CAPS_OBJECT_MEMORY_FILE,
             QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_DEVICE_PC_DIMM,
             QEMU_CAPS_NUMA);
diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-hugepages-pages7.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-hugepages-pages7.xml
new file mode 120000
index 000000000..9aafa6e95
--- /dev/null
+++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-hugepages-pages7.xml
@@ -0,0 +1 @@
+../qemuxml2argvdata/qemuxml2argv-hugepages-pages7.xml
\ No newline at end of file
diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c
index 0d549adec..311b71356 100644
--- a/tests/qemuxml2xmltest.c
+++ b/tests/qemuxml2xmltest.c
@@ -433,6 +433,7 @@ mymain(void)
     DO_TEST("hugepages-pages4", NONE);
     DO_TEST("hugepages-pages5", NONE);
     DO_TEST("hugepages-pages6", NONE);
+    DO_TEST("hugepages-pages7", NONE);
     DO_TEST("hugepages-shared", NONE);
     DO_TEST("hugepages-memaccess", NONE);
     DO_TEST("hugepages-memaccess2", NONE);
-- 
2.13.0

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH] qemuBuildMemoryBackendStr: Handle one more corner case
Posted by Martin Kletzander 6 years, 8 months ago
On Tue, Aug 08, 2017 at 05:04:15PM +0200, Michal Privoznik wrote:
>https://bugzilla.redhat.com/show_bug.cgi?id=1458638
>
>This code is so complicated because we allow enabling the same
>bits at many places. Just like in this case: huge pages can be
>enabled by global <hugepages/> element under <memoryBacking> or
>on per <memory/> basis. To complicate things a bit more, users
>are allowed to not specify any specific page size in which case

s/to not specify/not to specify/

reads a bit nicely, or even

s/to not specify any/to specify no/ or "allowed to omit the page size"

Choice is yours

Reviewed-by: Martin Kletzander <mkletzan@redhat.com>

One more note that I told you in person, but I can't resist the urge to
share it more widely.  The code is gross because it can't be made much
better due to the fact that we keep the parsed data the same way they
are in the XML.  I think this could be made nicer if we were to fill all
the information during parsing.

When parsing /memory/hugepages we would fill in missing information
right away.  Then, when parsing /cpu/numa/cell we could copy that
information to the specific cells (if missing) and because the whole
definition is already available from parsing /memory/hugepages there's
less lookups.  Finally, when parsing /devices/memory/source we can
directly get missing info from the numa cell if nodemask is present or
global hugepages if it is not, and we don't have to lookup anything,
because it is already guaranteed to be filled in for the previous parts.

I hope it's understandable.  What I hope even more is that someone
refactors the code this way =)
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH] qemuBuildMemoryBackendStr: Handle one more corner case
Posted by Michal Privoznik 6 years, 8 months ago
On 08/10/2017 03:44 PM, Martin Kletzander wrote:
> On Tue, Aug 08, 2017 at 05:04:15PM +0200, Michal Privoznik wrote:
>> https://bugzilla.redhat.com/show_bug.cgi?id=1458638
>>
>> This code is so complicated because we allow enabling the same
>> bits at many places. Just like in this case: huge pages can be
>> enabled by global <hugepages/> element under <memoryBacking> or
>> on per <memory/> basis. To complicate things a bit more, users
>> are allowed to not specify any specific page size in which case
> 
> s/to not specify/not to specify/
> 
> reads a bit nicely, or even
> 
> s/to not specify any/to specify no/ or "allowed to omit the page size"

I'm going with the latter.

> 
> Choice is yours
> 
> Reviewed-by: Martin Kletzander <mkletzan@redhat.com>

Pushed, thanks.

> 
> One more note that I told you in person, but I can't resist the urge to
> share it more widely.  The code is gross because it can't be made much
> better due to the fact that we keep the parsed data the same way they
> are in the XML.  I think this could be made nicer if we were to fill all
> the information during parsing.
> 
> When parsing /memory/hugepages we would fill in missing information
> right away.  Then, when parsing /cpu/numa/cell we could copy that
> information to the specific cells (if missing) and because the whole
> definition is already available from parsing /memory/hugepages there's
> less lookups.  Finally, when parsing /devices/memory/source we can
> directly get missing info from the numa cell if nodemask is present or
> global hugepages if it is not, and we don't have to lookup anything,
> because it is already guaranteed to be filled in for the previous parts.
> 
> I hope it's understandable.  What I hope even more is that someone
> refactors the code this way =)

Right. One thing though that I haven't realized during our conversation
is that we would need to keep track if the info is supplied by user or
by this algorithm. The problem is the following: users can specify
<memoryBacking> <hugepages/> </memoryBacking>  in which case so called
default huge page size is used. The default is whatever page size is
printed in /proc/meminfo. For instance, on my local machine:

$ cat /proc/meminfo | grep Hugepagesize:
Hugepagesize:       2048 kB

$ cat /proc/cpuinfo | grep -o pdpe1gb
pdpe1gb

So the default huge page size is 2M but the processor supports also 1GB.
However, on different machine the default may be something else. 2MB
might not even be accessible there. Therefore if we would fill that info
during parsing we might be screwed. Alternatively, if admin have 2MB
enabled during define, but then prohibits 2MB in favour of 1GB, we are
back in the problem. Hope it makes sense what I'm saying.

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH] qemuBuildMemoryBackendStr: Handle one more corner case
Posted by Martin Kletzander 6 years, 8 months ago
On Thu, Aug 10, 2017 at 05:43:13PM +0200, Michal Privoznik wrote:
>On 08/10/2017 03:44 PM, Martin Kletzander wrote:
>> On Tue, Aug 08, 2017 at 05:04:15PM +0200, Michal Privoznik wrote:
>>> https://bugzilla.redhat.com/show_bug.cgi?id=1458638
>>>
>>> This code is so complicated because we allow enabling the same
>>> bits at many places. Just like in this case: huge pages can be
>>> enabled by global <hugepages/> element under <memoryBacking> or
>>> on per <memory/> basis. To complicate things a bit more, users
>>> are allowed to not specify any specific page size in which case
>>
>> s/to not specify/not to specify/
>>
>> reads a bit nicely, or even
>>
>> s/to not specify any/to specify no/ or "allowed to omit the page size"
>
>I'm going with the latter.
>
>>
>> Choice is yours
>>
>> Reviewed-by: Martin Kletzander <mkletzan@redhat.com>
>
>Pushed, thanks.
>
>>
>> One more note that I told you in person, but I can't resist the urge to
>> share it more widely.  The code is gross because it can't be made much
>> better due to the fact that we keep the parsed data the same way they
>> are in the XML.  I think this could be made nicer if we were to fill all
>> the information during parsing.
>>
>> When parsing /memory/hugepages we would fill in missing information
>> right away.  Then, when parsing /cpu/numa/cell we could copy that
>> information to the specific cells (if missing) and because the whole
>> definition is already available from parsing /memory/hugepages there's
>> less lookups.  Finally, when parsing /devices/memory/source we can
>> directly get missing info from the numa cell if nodemask is present or
>> global hugepages if it is not, and we don't have to lookup anything,
>> because it is already guaranteed to be filled in for the previous parts.
>>
>> I hope it's understandable.  What I hope even more is that someone
>> refactors the code this way =)
>
>Right. One thing though that I haven't realized during our conversation
>is that we would need to keep track if the info is supplied by user or
>by this algorithm. The problem is the following: users can specify
><memoryBacking> <hugepages/> </memoryBacking>  in which case so called
>default huge page size is used. The default is whatever page size is
>printed in /proc/meminfo. For instance, on my local machine:
>
>$ cat /proc/meminfo | grep Hugepagesize:
>Hugepagesize:       2048 kB
>
>$ cat /proc/cpuinfo | grep -o pdpe1gb
>pdpe1gb
>
>So the default huge page size is 2M but the processor supports also 1GB.
>However, on different machine the default may be something else. 2MB
>might not even be accessible there. Therefore if we would fill that info
>during parsing we might be screwed. Alternatively, if admin have 2MB
>enabled during define, but then prohibits 2MB in favour of 1GB, we are
>back in the problem. Hope it makes sense what I'm saying.
>

You don't have to copy the size, just whatever information you might
have.  We can talk it through later or I can explain myself with the
code, but I don't want to do the rework right now, to be honest.

>Michal
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list