[libvirt] [PATCH 2/4] conf: Move hugepage XML validation check out of qemu_command

Pavel Hrdina posted 4 patches 7 years, 7 months ago
There is a newer version of this series
[libvirt] [PATCH 2/4] conf: Move hugepage XML validation check out of qemu_command
Posted by Pavel Hrdina 7 years, 7 months ago
We can safely validate the hugepage nodeset attribute at a define time.
This validation is not done for already existing domains when the daemon
is restarted.

All the changes to the tests are necessary because we move the error
from domain start into XML parse.

Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
---
 src/conf/domain_conf.c                        | 32 +++++++++++++++++
 src/qemu/qemu_command.c                       | 34 -------------------
 .../seclabel-dynamic-none-relabel.xml         |  2 +-
 tests/qemuxml2argvtest.c                      | 16 +++++----
 .../qemuxml2xmloutdata/hugepages-pages10.xml  | 30 ----------------
 tests/qemuxml2xmloutdata/hugepages-pages4.xml |  1 -
 tests/qemuxml2xmloutdata/hugepages-pages9.xml | 31 -----------------
 .../seclabel-dynamic-none-relabel.xml         |  2 +-
 tests/qemuxml2xmltest.c                       |  3 --
 9 files changed, 43 insertions(+), 108 deletions(-)
 delete mode 100644 tests/qemuxml2xmloutdata/hugepages-pages10.xml
 delete mode 120000 tests/qemuxml2xmloutdata/hugepages-pages4.xml
 delete mode 100644 tests/qemuxml2xmloutdata/hugepages-pages9.xml

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 7396616eda..20d67e7854 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -6104,6 +6104,35 @@ virDomainDefLifecycleActionValidate(const virDomainDef *def)
 }
 
 
+static int
+virDomainDefMemtuneValidate(const virDomainDef *def)
+{
+    const virDomainMemtune *mem = &(def->mem);
+    size_t i;
+    ssize_t pos = virDomainNumaGetNodeCount(def->numa) - 1;
+
+    for (i = 0; i < mem->nhugepages; i++) {
+        ssize_t nextBit;
+
+        if (!mem->hugepages[i].nodemask) {
+            /* This is the master hugepage to use. Skip it as it has no
+             * nodemask anyway. */
+            continue;
+        }
+
+        nextBit = virBitmapNextSetBit(mem->hugepages[i].nodemask, pos);
+        if (nextBit >= 0) {
+            virReportError(VIR_ERR_XML_DETAIL,
+                           _("hugepages: node %zd not found"),
+                           nextBit);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
 static int
 virDomainDefValidateInternal(const virDomainDef *def)
 {
@@ -6139,6 +6168,9 @@ virDomainDefValidateInternal(const virDomainDef *def)
     if (virDomainDefLifecycleActionValidate(def) < 0)
         return -1;
 
+    if (virDomainDefMemtuneValidate(def) < 0)
+        return -1;
+
     return 0;
 }
 
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 44ae8dcef7..a0b829628a 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -7470,16 +7470,6 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg,
     if (!def->mem.nhugepages)
         return 0;
 
-    if (def->mem.hugepages[0].nodemask) {
-        ssize_t next_bit = virBitmapNextSetBit(def->mem.hugepages[0].nodemask, -1);
-        if (next_bit >= 0) {
-            virReportError(VIR_ERR_XML_DETAIL,
-                           _("hugepages: node %zd not found"),
-                           next_bit);
-            return -1;
-        }
-    }
-
     /* There is one special case: if user specified "huge"
      * pages of regular system pages size.
      * And there is nothing to do in this case.
@@ -7612,30 +7602,6 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
     if (!virDomainNumatuneNodesetIsAvailable(def->numa, priv->autoNodeset))
         goto cleanup;
 
-    for (i = 0; i < def->mem.nhugepages; i++) {
-        ssize_t next_bit, pos = 0;
-
-        if (!def->mem.hugepages[i].nodemask) {
-            /* This is the master hugepage to use. Skip it as it has no
-             * nodemask anyway. */
-            continue;
-        }
-
-        if (ncells) {
-            /* Fortunately, we allow only guest NUMA nodes to be continuous
-             * starting from zero. */
-            pos = ncells - 1;
-        }
-
-        next_bit = virBitmapNextSetBit(def->mem.hugepages[i].nodemask, pos);
-        if (next_bit >= 0) {
-            virReportError(VIR_ERR_XML_DETAIL,
-                           _("hugepages: node %zd not found"),
-                           next_bit);
-            goto cleanup;
-        }
-    }
-
     if (VIR_ALLOC_N(nodeBackends, ncells) < 0)
         goto cleanup;
 
diff --git a/tests/qemuxml2argvdata/seclabel-dynamic-none-relabel.xml b/tests/qemuxml2argvdata/seclabel-dynamic-none-relabel.xml
index 47f253b5f7..e954250009 100644
--- a/tests/qemuxml2argvdata/seclabel-dynamic-none-relabel.xml
+++ b/tests/qemuxml2argvdata/seclabel-dynamic-none-relabel.xml
@@ -5,7 +5,7 @@
   <currentMemory unit='KiB'>262144</currentMemory>
   <memoryBacking>
     <hugepages>
-      <page size='2048' unit='KiB' nodeset='0'/>
+      <page size='2048' unit='KiB'/>
     </hugepages>
   </memoryBacking>
   <vcpu placement='static'>4</vcpu>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index 7236e184b8..15f9fb7b11 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -968,18 +968,20 @@ mymain(void)
             QEMU_CAPS_OBJECT_MEMORY_RAM,
             QEMU_CAPS_OBJECT_MEMORY_FILE);
     DO_TEST_PARSE_ERROR("hugepages-memaccess-invalid", NONE);
-    DO_TEST_FAILURE("hugepages-pages4",
-            QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE);
+    DO_TEST_PARSE_ERROR("hugepages-pages4",
+                        QEMU_CAPS_OBJECT_MEMORY_RAM,
+                        QEMU_CAPS_OBJECT_MEMORY_FILE);
     DO_TEST("hugepages-pages5", NONE);
     DO_TEST("hugepages-pages6", NONE);
     DO_TEST("hugepages-pages7",
             QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_OBJECT_MEMORY_FILE,
             QEMU_CAPS_OBJECT_MEMORY_FILE_DISCARD);
-    DO_TEST_FAILURE("hugepages-pages8",
-                    QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_OBJECT_MEMORY_FILE,
-                    QEMU_CAPS_OBJECT_MEMORY_FILE_DISCARD);
-    DO_TEST_FAILURE("hugepages-pages9", NONE);
-    DO_TEST_FAILURE("hugepages-pages10", NONE);
+    DO_TEST_PARSE_ERROR("hugepages-pages8",
+                        QEMU_CAPS_DEVICE_PC_DIMM,
+                        QEMU_CAPS_OBJECT_MEMORY_FILE,
+                        QEMU_CAPS_OBJECT_MEMORY_FILE_DISCARD);
+    DO_TEST_PARSE_ERROR("hugepages-pages9", NONE);
+    DO_TEST_PARSE_ERROR("hugepages-pages10", NONE);
     DO_TEST("hugepages-memaccess", QEMU_CAPS_OBJECT_MEMORY_FILE,
             QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_DEVICE_PC_DIMM,
             QEMU_CAPS_NUMA);
diff --git a/tests/qemuxml2xmloutdata/hugepages-pages10.xml b/tests/qemuxml2xmloutdata/hugepages-pages10.xml
deleted file mode 100644
index 4a85ddffad..0000000000
--- a/tests/qemuxml2xmloutdata/hugepages-pages10.xml
+++ /dev/null
@@ -1,30 +0,0 @@
-<domain type='qemu'>
-  <name>SomeDummyHugepagesGuest</name>
-  <uuid>ef1bdff4-27f3-4e85-a807-5fb4d58463cc</uuid>
-  <memory unit='KiB'>1048576</memory>
-  <currentMemory unit='KiB'>1048576</currentMemory>
-  <memoryBacking>
-    <hugepages>
-      <page size='2048' unit='KiB' nodeset='0'/>
-    </hugepages>
-  </memoryBacking>
-  <vcpu placement='static'>2</vcpu>
-  <os>
-    <type arch='i686' machine='pc'>hvm</type>
-    <boot dev='hd'/>
-  </os>
-  <clock offset='utc'/>
-  <on_poweroff>destroy</on_poweroff>
-  <on_reboot>restart</on_reboot>
-  <on_crash>destroy</on_crash>
-  <devices>
-    <emulator>/usr/bin/qemu-system-i686</emulator>
-    <controller type='usb' index='0'>
-      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/>
-    </controller>
-    <controller type='pci' index='0' model='pci-root'/>
-    <input type='mouse' bus='ps2'/>
-    <input type='keyboard' bus='ps2'/>
-    <memballoon model='none'/>
-  </devices>
-</domain>
diff --git a/tests/qemuxml2xmloutdata/hugepages-pages4.xml b/tests/qemuxml2xmloutdata/hugepages-pages4.xml
deleted file mode 120000
index 127e66e64f..0000000000
--- a/tests/qemuxml2xmloutdata/hugepages-pages4.xml
+++ /dev/null
@@ -1 +0,0 @@
-../qemuxml2argvdata/hugepages-pages4.xml
\ No newline at end of file
diff --git a/tests/qemuxml2xmloutdata/hugepages-pages9.xml b/tests/qemuxml2xmloutdata/hugepages-pages9.xml
deleted file mode 100644
index 8f380c46df..0000000000
--- a/tests/qemuxml2xmloutdata/hugepages-pages9.xml
+++ /dev/null
@@ -1,31 +0,0 @@
-<domain type='qemu'>
-  <name>SomeDummyHugepagesGuest</name>
-  <uuid>ef1bdff4-27f3-4e85-a807-5fb4d58463cc</uuid>
-  <memory unit='KiB'>1048576</memory>
-  <currentMemory unit='KiB'>1048576</currentMemory>
-  <memoryBacking>
-    <hugepages>
-      <page size='2048' unit='KiB' nodeset='0'/>
-      <page size='1048576' unit='KiB'/>
-    </hugepages>
-  </memoryBacking>
-  <vcpu placement='static'>2</vcpu>
-  <os>
-    <type arch='i686' machine='pc'>hvm</type>
-    <boot dev='hd'/>
-  </os>
-  <clock offset='utc'/>
-  <on_poweroff>destroy</on_poweroff>
-  <on_reboot>restart</on_reboot>
-  <on_crash>destroy</on_crash>
-  <devices>
-    <emulator>/usr/bin/qemu-system-i686</emulator>
-    <controller type='usb' index='0'>
-      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/>
-    </controller>
-    <controller type='pci' index='0' model='pci-root'/>
-    <input type='mouse' bus='ps2'/>
-    <input type='keyboard' bus='ps2'/>
-    <memballoon model='none'/>
-  </devices>
-</domain>
diff --git a/tests/qemuxml2xmloutdata/seclabel-dynamic-none-relabel.xml b/tests/qemuxml2xmloutdata/seclabel-dynamic-none-relabel.xml
index 050967b4ee..bfa66b8deb 100644
--- a/tests/qemuxml2xmloutdata/seclabel-dynamic-none-relabel.xml
+++ b/tests/qemuxml2xmloutdata/seclabel-dynamic-none-relabel.xml
@@ -5,7 +5,7 @@
   <currentMemory unit='KiB'>262144</currentMemory>
   <memoryBacking>
     <hugepages>
-      <page size='2048' unit='KiB' nodeset='0'/>
+      <page size='2048' unit='KiB'/>
     </hugepages>
   </memoryBacking>
   <vcpu placement='static'>4</vcpu>
diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c
index ae11fbe60c..a70516ada1 100644
--- a/tests/qemuxml2xmltest.c
+++ b/tests/qemuxml2xmltest.c
@@ -334,12 +334,9 @@ mymain(void)
     DO_TEST("hugepages-pages", NONE);
     DO_TEST("hugepages-pages2", NONE);
     DO_TEST("hugepages-pages3", NONE);
-    DO_TEST("hugepages-pages4", NONE);
     DO_TEST("hugepages-pages5", NONE);
     DO_TEST("hugepages-pages6", NONE);
     DO_TEST("hugepages-pages7", NONE);
-    DO_TEST("hugepages-pages9", NONE);
-    DO_TEST("hugepages-pages10", NONE);
     DO_TEST("hugepages-shared", NONE);
     DO_TEST("hugepages-memaccess", NONE);
     DO_TEST("hugepages-memaccess2", NONE);
-- 
2.17.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH 2/4] conf: Move hugepage XML validation check out of qemu_command
Posted by Michal Privoznik 7 years, 7 months ago
On 07/11/2018 10:22 AM, Pavel Hrdina wrote:
> We can safely validate the hugepage nodeset attribute at a define time.
> This validation is not done for already existing domains when the daemon
> is restarted.
> 
> All the changes to the tests are necessary because we move the error
> from domain start into XML parse.
> 
> Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
> ---
>  src/conf/domain_conf.c                        | 32 +++++++++++++++++
>  src/qemu/qemu_command.c                       | 34 -------------------
>  .../seclabel-dynamic-none-relabel.xml         |  2 +-
>  tests/qemuxml2argvtest.c                      | 16 +++++----
>  .../qemuxml2xmloutdata/hugepages-pages10.xml  | 30 ----------------
>  tests/qemuxml2xmloutdata/hugepages-pages4.xml |  1 -
>  tests/qemuxml2xmloutdata/hugepages-pages9.xml | 31 -----------------
>  .../seclabel-dynamic-none-relabel.xml         |  2 +-
>  tests/qemuxml2xmltest.c                       |  3 --
>  9 files changed, 43 insertions(+), 108 deletions(-)
>  delete mode 100644 tests/qemuxml2xmloutdata/hugepages-pages10.xml
>  delete mode 120000 tests/qemuxml2xmloutdata/hugepages-pages4.xml
>  delete mode 100644 tests/qemuxml2xmloutdata/hugepages-pages9.xml
> 
> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index 7396616eda..20d67e7854 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -6104,6 +6104,35 @@ virDomainDefLifecycleActionValidate(const virDomainDef *def)
>  }
>  
>  
> +static int
> +virDomainDefMemtuneValidate(const virDomainDef *def)
> +{
> +    const virDomainMemtune *mem = &(def->mem);
> +    size_t i;
> +    ssize_t pos = virDomainNumaGetNodeCount(def->numa) - 1;
> +
> +    for (i = 0; i < mem->nhugepages; i++) {
> +        ssize_t nextBit;
> +
> +        if (!mem->hugepages[i].nodemask) {
> +            /* This is the master hugepage to use. Skip it as it has no
> +             * nodemask anyway. */
> +            continue;
> +        }
> +
> +        nextBit = virBitmapNextSetBit(mem->hugepages[i].nodemask, pos);
> +        if (nextBit >= 0) {

I think its fair to enable hugepages for node #0 which is always there
(even if not configured in domain XML). Just try to run 'numactl -H'
from a domain that has no <numa/> in its XML.

> +            virReportError(VIR_ERR_XML_DETAIL,
> +                           _("hugepages: node %zd not found"),
> +                           nextBit);
> +            return -1;
> +        }
> +    }

Also, I see that you're removing hugepages-pages9 test from xml2xml
test. But that is needed only because you disallowed nodeset='0' for
nonnuma domain. The real problem there is that the default page size has
no numa node to apply to, not nodeset='0'. I guess we need to check for
that too (or do we want to?)

> +
> +    return 0;
> +}
> +

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH 2/4] conf: Move hugepage XML validation check out of qemu_command
Posted by Pavel Hrdina 7 years, 7 months ago
On Wed, Jul 11, 2018 at 05:05:07PM +0200, Michal Privoznik wrote:
> On 07/11/2018 10:22 AM, Pavel Hrdina wrote:
> > We can safely validate the hugepage nodeset attribute at a define time.
> > This validation is not done for already existing domains when the daemon
> > is restarted.
> > 
> > All the changes to the tests are necessary because we move the error
> > from domain start into XML parse.
> > 
> > Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
> > ---
> >  src/conf/domain_conf.c                        | 32 +++++++++++++++++
> >  src/qemu/qemu_command.c                       | 34 -------------------
> >  .../seclabel-dynamic-none-relabel.xml         |  2 +-
> >  tests/qemuxml2argvtest.c                      | 16 +++++----
> >  .../qemuxml2xmloutdata/hugepages-pages10.xml  | 30 ----------------
> >  tests/qemuxml2xmloutdata/hugepages-pages4.xml |  1 -
> >  tests/qemuxml2xmloutdata/hugepages-pages9.xml | 31 -----------------
> >  .../seclabel-dynamic-none-relabel.xml         |  2 +-
> >  tests/qemuxml2xmltest.c                       |  3 --
> >  9 files changed, 43 insertions(+), 108 deletions(-)
> >  delete mode 100644 tests/qemuxml2xmloutdata/hugepages-pages10.xml
> >  delete mode 120000 tests/qemuxml2xmloutdata/hugepages-pages4.xml
> >  delete mode 100644 tests/qemuxml2xmloutdata/hugepages-pages9.xml
> > 
> > diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> > index 7396616eda..20d67e7854 100644
> > --- a/src/conf/domain_conf.c
> > +++ b/src/conf/domain_conf.c
> > @@ -6104,6 +6104,35 @@ virDomainDefLifecycleActionValidate(const virDomainDef *def)
> >  }
> >  
> >  
> > +static int
> > +virDomainDefMemtuneValidate(const virDomainDef *def)
> > +{
> > +    const virDomainMemtune *mem = &(def->mem);
> > +    size_t i;
> > +    ssize_t pos = virDomainNumaGetNodeCount(def->numa) - 1;
> > +
> > +    for (i = 0; i < mem->nhugepages; i++) {
> > +        ssize_t nextBit;
> > +
> > +        if (!mem->hugepages[i].nodemask) {
> > +            /* This is the master hugepage to use. Skip it as it has no
> > +             * nodemask anyway. */
> > +            continue;
> > +        }
> > +
> > +        nextBit = virBitmapNextSetBit(mem->hugepages[i].nodemask, pos);
> > +        if (nextBit >= 0) {
> 
> I think its fair to enable hugepages for node #0 which is always there
> (even if not configured in domain XML). Just try to run 'numactl -H'
> from a domain that has no <numa/> in its XML.

Well yes, linux always assumes that there is at least one NUMA node
but other systems might not consider it the same.

> 
> > +            virReportError(VIR_ERR_XML_DETAIL,
> > +                           _("hugepages: node %zd not found"),
> > +                           nextBit);
> > +            return -1;
> > +        }
> > +    }
> 
> Also, I see that you're removing hugepages-pages9 test from xml2xml
> test. But that is needed only because you disallowed nodeset='0' for
> nonnuma domain. The real problem there is that the default page size has

That is already disallowed but only once you try to start such domain,
I'm just moving this check from start time to parse time.

If you look into qemuxml2argvtest.c you will see that hugepages-pages9
is expected to fail.

> no numa node to apply to, not nodeset='0'. I guess we need to check for
> that too (or do we want to?)

That is yet different issue that can be addressed but it should not
block this patch.

Pavel
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH 2/4] conf: Move hugepage XML validation check out of qemu_command
Posted by Michal Privoznik 7 years, 7 months ago
On 07/11/2018 05:25 PM, Pavel Hrdina wrote:
> On Wed, Jul 11, 2018 at 05:05:07PM +0200, Michal Privoznik wrote:
>> On 07/11/2018 10:22 AM, Pavel Hrdina wrote:
>>> We can safely validate the hugepage nodeset attribute at a define time.
>>> This validation is not done for already existing domains when the daemon
>>> is restarted.
>>>
>>> All the changes to the tests are necessary because we move the error
>>> from domain start into XML parse.
>>>
>>> Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
>>> ---
>>>  src/conf/domain_conf.c                        | 32 +++++++++++++++++
>>>  src/qemu/qemu_command.c                       | 34 -------------------
>>>  .../seclabel-dynamic-none-relabel.xml         |  2 +-
>>>  tests/qemuxml2argvtest.c                      | 16 +++++----
>>>  .../qemuxml2xmloutdata/hugepages-pages10.xml  | 30 ----------------
>>>  tests/qemuxml2xmloutdata/hugepages-pages4.xml |  1 -
>>>  tests/qemuxml2xmloutdata/hugepages-pages9.xml | 31 -----------------
>>>  .../seclabel-dynamic-none-relabel.xml         |  2 +-
>>>  tests/qemuxml2xmltest.c                       |  3 --
>>>  9 files changed, 43 insertions(+), 108 deletions(-)
>>>  delete mode 100644 tests/qemuxml2xmloutdata/hugepages-pages10.xml
>>>  delete mode 120000 tests/qemuxml2xmloutdata/hugepages-pages4.xml
>>>  delete mode 100644 tests/qemuxml2xmloutdata/hugepages-pages9.xml
>>>
>>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
>>> index 7396616eda..20d67e7854 100644
>>> --- a/src/conf/domain_conf.c
>>> +++ b/src/conf/domain_conf.c
>>> @@ -6104,6 +6104,35 @@ virDomainDefLifecycleActionValidate(const virDomainDef *def)
>>>  }
>>>  
>>>  
>>> +static int
>>> +virDomainDefMemtuneValidate(const virDomainDef *def)
>>> +{
>>> +    const virDomainMemtune *mem = &(def->mem);
>>> +    size_t i;
>>> +    ssize_t pos = virDomainNumaGetNodeCount(def->numa) - 1;
>>> +
>>> +    for (i = 0; i < mem->nhugepages; i++) {
>>> +        ssize_t nextBit;
>>> +
>>> +        if (!mem->hugepages[i].nodemask) {
>>> +            /* This is the master hugepage to use. Skip it as it has no
>>> +             * nodemask anyway. */
>>> +            continue;
>>> +        }
>>> +
>>> +        nextBit = virBitmapNextSetBit(mem->hugepages[i].nodemask, pos);
>>> +        if (nextBit >= 0) {
>>
>> I think its fair to enable hugepages for node #0 which is always there
>> (even if not configured in domain XML). Just try to run 'numactl -H'
>> from a domain that has no <numa/> in its XML.
> 
> Well yes, linux always assumes that there is at least one NUMA node
> but other systems might not consider it the same.

I don't think the assumption is limited to Linux only. Even Windows
behave the same. For instance the following example shows that on
non-NUMA machine there is NUMA node #0.

https://docs.microsoft.com/en-us/windows/desktop/Memory/allocating-memory-from-a-numa-node

> 
>>
>>> +            virReportError(VIR_ERR_XML_DETAIL,
>>> +                           _("hugepages: node %zd not found"),
>>> +                           nextBit);
>>> +            return -1;
>>> +        }
>>> +    }
>>
>> Also, I see that you're removing hugepages-pages9 test from xml2xml
>> test. But that is needed only because you disallowed nodeset='0' for
>> nonnuma domain. The real problem there is that the default page size has
> 
> That is already disallowed but only once you try to start such domain,
> I'm just moving this check from start time to parse time.

Yes because we have a bug in the code. So when you introduced the test
it was doomed to fail.

> 
> If you look into qemuxml2argvtest.c you will see that hugepages-pages9
> is expected to fail.
> 
>> no numa node to apply to, not nodeset='0'. I guess we need to check for
>> that too (or do we want to?)
> 
> That is yet different issue that can be addressed but it should not
> block this patch.

Well, maybe. I'm not saying your patches are wrong. Apart from allowing
nodeset='0' (which I think we should do, but I don't have that much of a
strong opinion there).

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH 2/4] conf: Move hugepage XML validation check out of qemu_command
Posted by Pavel Hrdina 7 years, 7 months ago
On Wed, Jul 11, 2018 at 05:47:58PM +0200, Michal Privoznik wrote:
> On 07/11/2018 05:25 PM, Pavel Hrdina wrote:
> > On Wed, Jul 11, 2018 at 05:05:07PM +0200, Michal Privoznik wrote:
> >> On 07/11/2018 10:22 AM, Pavel Hrdina wrote:
> >>> We can safely validate the hugepage nodeset attribute at a define time.
> >>> This validation is not done for already existing domains when the daemon
> >>> is restarted.
> >>>
> >>> All the changes to the tests are necessary because we move the error
> >>> from domain start into XML parse.
> >>>
> >>> Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
> >>> ---
> >>>  src/conf/domain_conf.c                        | 32 +++++++++++++++++
> >>>  src/qemu/qemu_command.c                       | 34 -------------------
> >>>  .../seclabel-dynamic-none-relabel.xml         |  2 +-
> >>>  tests/qemuxml2argvtest.c                      | 16 +++++----
> >>>  .../qemuxml2xmloutdata/hugepages-pages10.xml  | 30 ----------------
> >>>  tests/qemuxml2xmloutdata/hugepages-pages4.xml |  1 -
> >>>  tests/qemuxml2xmloutdata/hugepages-pages9.xml | 31 -----------------
> >>>  .../seclabel-dynamic-none-relabel.xml         |  2 +-
> >>>  tests/qemuxml2xmltest.c                       |  3 --
> >>>  9 files changed, 43 insertions(+), 108 deletions(-)
> >>>  delete mode 100644 tests/qemuxml2xmloutdata/hugepages-pages10.xml
> >>>  delete mode 120000 tests/qemuxml2xmloutdata/hugepages-pages4.xml
> >>>  delete mode 100644 tests/qemuxml2xmloutdata/hugepages-pages9.xml
> >>>
> >>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> >>> index 7396616eda..20d67e7854 100644
> >>> --- a/src/conf/domain_conf.c
> >>> +++ b/src/conf/domain_conf.c
> >>> @@ -6104,6 +6104,35 @@ virDomainDefLifecycleActionValidate(const virDomainDef *def)
> >>>  }
> >>>  
> >>>  
> >>> +static int
> >>> +virDomainDefMemtuneValidate(const virDomainDef *def)
> >>> +{
> >>> +    const virDomainMemtune *mem = &(def->mem);
> >>> +    size_t i;
> >>> +    ssize_t pos = virDomainNumaGetNodeCount(def->numa) - 1;
> >>> +
> >>> +    for (i = 0; i < mem->nhugepages; i++) {
> >>> +        ssize_t nextBit;
> >>> +
> >>> +        if (!mem->hugepages[i].nodemask) {
> >>> +            /* This is the master hugepage to use. Skip it as it has no
> >>> +             * nodemask anyway. */
> >>> +            continue;
> >>> +        }
> >>> +
> >>> +        nextBit = virBitmapNextSetBit(mem->hugepages[i].nodemask, pos);
> >>> +        if (nextBit >= 0) {
> >>
> >> I think its fair to enable hugepages for node #0 which is always there
> >> (even if not configured in domain XML). Just try to run 'numactl -H'
> >> from a domain that has no <numa/> in its XML.
> > 
> > Well yes, linux always assumes that there is at least one NUMA node
> > but other systems might not consider it the same.
> 
> I don't think the assumption is limited to Linux only. Even Windows
> behave the same. For instance the following example shows that on
> non-NUMA machine there is NUMA node #0.
> 
> https://docs.microsoft.com/en-us/windows/desktop/Memory/allocating-memory-from-a-numa-node

Well if we can change the assumption into a fact I'm definitely for
that change to consider all guest to have at least one NUMA node.
I was trying to lookup some documentation/specification but failed
to do so.

> 
> > 
> >>
> >>> +            virReportError(VIR_ERR_XML_DETAIL,
> >>> +                           _("hugepages: node %zd not found"),
> >>> +                           nextBit);
> >>> +            return -1;
> >>> +        }
> >>> +    }
> >>
> >> Also, I see that you're removing hugepages-pages9 test from xml2xml
> >> test. But that is needed only because you disallowed nodeset='0' for
> >> nonnuma domain. The real problem there is that the default page size has
> > 
> > That is already disallowed but only once you try to start such domain,
> > I'm just moving this check from start time to parse time.
> 
> Yes because we have a bug in the code. So when you introduced the test
> it was doomed to fail.

This test case should fail every time because it is invalid
configuration.  You have non-NUMA guest with two different pages
and also specific node configured for one page.

> > 
> > If you look into qemuxml2argvtest.c you will see that hugepages-pages9
> > is expected to fail.
> > 
> >> no numa node to apply to, not nodeset='0'. I guess we need to check for
> >> that too (or do we want to?)
> > 
> > That is yet different issue that can be addressed but it should not
> > block this patch.
> 
> Well, maybe. I'm not saying your patches are wrong. Apart from allowing
> nodeset='0' (which I think we should do, but I don't have that much of a
> strong opinion there).

To make it clear I'll summarize all the possible combinations and how it
should work so we are on the same page.

Pavel
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH 2/4] conf: Move hugepage XML validation check out of qemu_command
Posted by Pavel Hrdina 7 years, 6 months ago
On Wed, Jul 11, 2018 at 06:03:08PM +0200, Pavel Hrdina wrote:
> To make it clear I'll summarize all the possible combinations and how it
> should work so we are on the same page.

originally: before commit [1]
now: after commit [1] (current master)
expect: what this patch series should fix


======= Non-NUMA guests =======


* Only one hugepage specified without any nodeset

    <memoryBacking>
      <hugepages>
        <page size='1048576' unit='KiB'/>
      </hugepages>
    </memoryBacking>

    This is correct, was always working and we should not change it.

    originally: works
    now: works
    expected: works


* Only one hugapage specified with nodeset

    <memoryBacking>
      <hugepages>
        <page size='1048576' unit='KiB' nodeset='0'/>
      </hugepages>
    </memoryBacking>

    This is questionable since there is no guest NUMA node specified,
    but on the other hand we can consider non-NUMA guest to have exactly
    one NUMA node.

    This was working but has been broken by commit [1]  which tried to
    fix a case where you are trying to specify non-existing NUMA node.

    Because of that assumption we can consider this as valid XML even
    though there is no NUMA node specified [2].  There are two possible
    solutions:

        - we can leave the XML intact

        - we can silently remove the 'nodeset' attribute to 'fix' the
          XML definition

    originally: works
    now: fails
    expect: works


    <memoryBacking>
      <hugepages>
        <page size='1048576' unit='KiB' nodeset='1'/>
      </hugepages>
    </memoryBacking>

    If the nodeset is != 0 it should newer work becuase there is no
    guest NUMA topology and even if we take into account the assumption
    that there is always one NUMA node it is still invalid.

    originally: works
    now: fails
    expect: fails


* One hugepage with specific nodeset and second default hugepage

    <memoryBacking>
      <hugepages>
        <page size='1048576' unit='KiB' nodeset='0'/>
        <page size='2048' unit='KiB'/>
      </hugepages>
    </memoryBacking>

    This was working but was 'fixed' by commit [1] because the code
    checks only the first hugepage and uses only the first hugepage.

    It should never worked because it doesn't make any sense, there
    is no guest NUMA node configured and even if we take into account
    the assumption that non-NUMA guest has one NUMA node there is need
    for the default page size.

    originally: works
    now: fails
    expect: fails


    There is yet another issue with the current state in libvirt, if
    you swap the order of pages:

    <memoryBacking>
      <hugepages>
        <page size='2048' unit='KiB'/>
        <page size='1048576' unit='KiB' nodeset='0'/>
      </hugepages>
    </memoryBacking>

    it will work even with current libvirt with the fix [1].  The reason
    is that code in qemuBuildMemPathStr() function takes into account
    only the first page size so it depends on the order of elements
    which is wrong.

    We should not allow any of these two configurations.  Setting
    nodeset to != 0 will should not make any difference.

    originally: works
    now: works
    expect: fails


====== NUMA guest ======


* Only one hugepage specified without any nodeset

    <memoryBacking>
      <hugepages>
        <page size='2048' unit='KiB'/>
      </hugepages>
    </memoryBacking>
    ...
    <cpu mode='host-passthrough' check='none'>
      <topology sockets='1' cores='4' threads='2'/>
      <numa>
        <cell id='0' cpus='0-3' memory='4194304' unit='KiB'/>
        <cell id='1' cpus='4-7' memory='4194304' unit='KiB'/>
      </numa>
    </cpu>

    originally: works
    now: works
    expect: works


* Only one hugapage specified with nodeset

    <memoryBacking>
      <hugepages>
        <page size='2048' unit='KiB' nodeset='0'/>
      </hugepages>
    </memoryBacking>
    ...
    <cpu mode='host-passthrough' check='none'>
      <topology sockets='1' cores='4' threads='2'/>
      <numa>
        <cell id='0' cpus='0-3' memory='4194304' unit='KiB'/>
        <cell id='1' cpus='4-7' memory='4194304' unit='KiB'/>
      </numa>
    </cpu>

    All possible combinations for the nodeset attribute are allowed
    if it always corresponds to existing guest NUMA node:

        <page size='2048' unit='KiB' nodeset='1'/>

        or

        <page size='2048' unit='KiB' nodeset='0,1'/>

    originally: works
    now: works
    expect: works


    <memoryBacking>
      <hugepages>
        <page size='2048' unit='KiB' nodeset='2'/>
      </hugepages>
    </memoryBacking>
    ...
    <cpu mode='host-passthrough' check='none'>
      <topology sockets='1' cores='4' threads='2'/>
      <numa>
        <cell id='0' cpus='0-3' memory='4194304' unit='KiB'/>
        <cell id='1' cpus='4-7' memory='4194304' unit='KiB'/>
      </numa>
    </cpu>

    There is invalid guest NUMA node specified for the hugepage.

    originally: fails
    now: fails
    expect: fails

* One hugepage with specific nodeset and second default hugepage

    <memoryBacking>
      <hugepages>
        <page size='1048576' unit='KiB' nodeset='0'/>
        <page size='2048' unit='KiB'/>
      </hugepages>
    </memoryBacking>
    ...
    <cpu mode='host-passthrough' check='none'>
      <topology sockets='1' cores='4' threads='2'/>
      <numa>
        <cell id='0' cpus='0-3' memory='4194304' unit='KiB'/>
        <cell id='1' cpus='4-7' memory='4194304' unit='KiB'/>
      </numa>
    </cpu>

    There are two guest NUMA nodes where we have default hugepage size
    configured and for NUMA node '0' we have a different size.

    originally: works
    now: works
    expect: works


    <memoryBacking>
      <hugepages>
        <page size='1048576' unit='KiB' nodeset='0,1'/>
        <page size='2048' unit='KiB'/>
      </hugepages>
    </memoryBacking>
    ...
    <cpu mode='host-passthrough' check='none'>
      <topology sockets='1' cores='4' threads='2'/>
      <numa>
        <cell id='0' cpus='0-3' memory='4194304' unit='KiB'/>
        <cell id='1' cpus='4-7' memory='4194304' unit='KiB'/>
      </numa>
    </cpu>

    originally: works
    now: works
    expect: fails ???

    In this situation all the guest NUMA nodes are covered by the
    hugepage size with specified nodeset attribute.  The default one
    is not used at all so is pointless here.
    
    The difference between this and non-NUMA guest is that if we change
    the order it will still work as expected, it doesn't depend on the
    order of elements.  However, we might consider is as invalid
    configuration because there is no need to have the default page size
    configured at all.


* Multiple combination of default and specific hugepage sizes

    There are some restriction if we use multiple page sizes:
        
        - There cannot be two different default hugepage sizes

        - Two different page elements cannot have the same guest NUMA
          node specified in nodeset attribute

        - hugepages are not allowed if memory backing allocation is set
          to 'ondemand' (not documented)

        - hugepages are not allowed if memory backing source is set to
          'anonymous' (not documented)


I hope that there is no other configuration that we need to care about.

Pavel

[1] <fa6bdf6afa878b8d7c5ed71664ee72be8967cdc5>
[2] <https://bugzilla.redhat.com/show_bug.cgi?id=1591235>
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH 2/4] conf: Move hugepage XML validation check out of qemu_command
Posted by Michal Privoznik 7 years, 6 months ago
On 07/13/2018 02:02 PM, Pavel Hrdina wrote:
> On Wed, Jul 11, 2018 at 06:03:08PM +0200, Pavel Hrdina wrote:
>> To make it clear I'll summarize all the possible combinations and how it
>> should work so we are on the same page.
> 
> originally: before commit [1]
> now: after commit [1] (current master)
> expect: what this patch series should fix
> 
> 
> ======= Non-NUMA guests =======
> 
> 
> * Only one hugepage specified without any nodeset
> 
>     <memoryBacking>
>       <hugepages>
>         <page size='1048576' unit='KiB'/>
>       </hugepages>
>     </memoryBacking>
> 
>     This is correct, was always working and we should not change it.
> 
>     originally: works
>     now: works
>     expected: works
> 
> 
> * Only one hugapage specified with nodeset
> 
>     <memoryBacking>
>       <hugepages>
>         <page size='1048576' unit='KiB' nodeset='0'/>
>       </hugepages>
>     </memoryBacking>
> 
>     This is questionable since there is no guest NUMA node specified,
>     but on the other hand we can consider non-NUMA guest to have exactly
>     one NUMA node.
> 
>     This was working but has been broken by commit [1]  which tried to
>     fix a case where you are trying to specify non-existing NUMA node.
> 
>     Because of that assumption we can consider this as valid XML even
>     though there is no NUMA node specified [2].  There are two possible
>     solutions:
> 
>         - we can leave the XML intact
> 
>         - we can silently remove the 'nodeset' attribute to 'fix' the
>           XML definition
> 
>     originally: works
>     now: fails
>     expect: works
> 
> 
>     <memoryBacking>
>       <hugepages>
>         <page size='1048576' unit='KiB' nodeset='1'/>
>       </hugepages>
>     </memoryBacking>
> 
>     If the nodeset is != 0 it should newer work becuase there is no
>     guest NUMA topology and even if we take into account the assumption
>     that there is always one NUMA node it is still invalid.
> 
>     originally: works
>     now: fails
>     expect: fails
> 
> 
> * One hugepage with specific nodeset and second default hugepage
> 
>     <memoryBacking>
>       <hugepages>
>         <page size='1048576' unit='KiB' nodeset='0'/>
>         <page size='2048' unit='KiB'/>
>       </hugepages>
>     </memoryBacking>
> 
>     This was working but was 'fixed' by commit [1] because the code
>     checks only the first hugepage and uses only the first hugepage.
> 
>     It should never worked because it doesn't make any sense, there
>     is no guest NUMA node configured and even if we take into account
>     the assumption that non-NUMA guest has one NUMA node there is need
>     for the default page size.
> 
>     originally: works
>     now: fails
>     expect: fails
> 
> 
>     There is yet another issue with the current state in libvirt, if
>     you swap the order of pages:
> 
>     <memoryBacking>
>       <hugepages>
>         <page size='2048' unit='KiB'/>
>         <page size='1048576' unit='KiB' nodeset='0'/>
>       </hugepages>
>     </memoryBacking>
> 
>     it will work even with current libvirt with the fix [1].  The reason
>     is that code in qemuBuildMemPathStr() function takes into account
>     only the first page size so it depends on the order of elements
>     which is wrong.
> 
>     We should not allow any of these two configurations.  Setting
>     nodeset to != 0 will should not make any difference.
> 
>     originally: works
>     now: works
>     expect: fails
> 
> 
> ====== NUMA guest ======
> 
> 
> * Only one hugepage specified without any nodeset
> 
>     <memoryBacking>
>       <hugepages>
>         <page size='2048' unit='KiB'/>
>       </hugepages>
>     </memoryBacking>
>     ...
>     <cpu mode='host-passthrough' check='none'>
>       <topology sockets='1' cores='4' threads='2'/>
>       <numa>
>         <cell id='0' cpus='0-3' memory='4194304' unit='KiB'/>
>         <cell id='1' cpus='4-7' memory='4194304' unit='KiB'/>
>       </numa>
>     </cpu>
> 
>     originally: works
>     now: works
>     expect: works
> 
> 
> * Only one hugapage specified with nodeset
> 
>     <memoryBacking>
>       <hugepages>
>         <page size='2048' unit='KiB' nodeset='0'/>
>       </hugepages>
>     </memoryBacking>
>     ...
>     <cpu mode='host-passthrough' check='none'>
>       <topology sockets='1' cores='4' threads='2'/>
>       <numa>
>         <cell id='0' cpus='0-3' memory='4194304' unit='KiB'/>
>         <cell id='1' cpus='4-7' memory='4194304' unit='KiB'/>
>       </numa>
>     </cpu>
> 
>     All possible combinations for the nodeset attribute are allowed
>     if it always corresponds to existing guest NUMA node:
> 
>         <page size='2048' unit='KiB' nodeset='1'/>
> 
>         or
> 
>         <page size='2048' unit='KiB' nodeset='0,1'/>
> 
>     originally: works
>     now: works
>     expect: works
> 
> 
>     <memoryBacking>
>       <hugepages>
>         <page size='2048' unit='KiB' nodeset='2'/>
>       </hugepages>
>     </memoryBacking>
>     ...
>     <cpu mode='host-passthrough' check='none'>
>       <topology sockets='1' cores='4' threads='2'/>
>       <numa>
>         <cell id='0' cpus='0-3' memory='4194304' unit='KiB'/>
>         <cell id='1' cpus='4-7' memory='4194304' unit='KiB'/>
>       </numa>
>     </cpu>
> 
>     There is invalid guest NUMA node specified for the hugepage.
> 
>     originally: fails
>     now: fails
>     expect: fails
> 
> * One hugepage with specific nodeset and second default hugepage
> 
>     <memoryBacking>
>       <hugepages>
>         <page size='1048576' unit='KiB' nodeset='0'/>
>         <page size='2048' unit='KiB'/>
>       </hugepages>
>     </memoryBacking>
>     ...
>     <cpu mode='host-passthrough' check='none'>
>       <topology sockets='1' cores='4' threads='2'/>
>       <numa>
>         <cell id='0' cpus='0-3' memory='4194304' unit='KiB'/>
>         <cell id='1' cpus='4-7' memory='4194304' unit='KiB'/>
>       </numa>
>     </cpu>
> 
>     There are two guest NUMA nodes where we have default hugepage size
>     configured and for NUMA node '0' we have a different size.
> 
>     originally: works
>     now: works
>     expect: works
> 
> 
>     <memoryBacking>
>       <hugepages>
>         <page size='1048576' unit='KiB' nodeset='0,1'/>
>         <page size='2048' unit='KiB'/>
>       </hugepages>
>     </memoryBacking>
>     ...
>     <cpu mode='host-passthrough' check='none'>
>       <topology sockets='1' cores='4' threads='2'/>
>       <numa>
>         <cell id='0' cpus='0-3' memory='4194304' unit='KiB'/>
>         <cell id='1' cpus='4-7' memory='4194304' unit='KiB'/>
>       </numa>
>     </cpu>
> 
>     originally: works
>     now: works
>     expect: fails ???
> 
>     In this situation all the guest NUMA nodes are covered by the
>     hugepage size with specified nodeset attribute.  The default one
>     is not used at all so is pointless here.
>     
>     The difference between this and non-NUMA guest is that if we change
>     the order it will still work as expected, it doesn't depend on the
>     order of elements.  However, we might consider is as invalid
>     configuration because there is no need to have the default page size
>     configured at all.
> 
> 
> * Multiple combination of default and specific hugepage sizes
> 
>     There are some restriction if we use multiple page sizes:
>         
>         - There cannot be two different default hugepage sizes
> 
>         - Two different page elements cannot have the same guest NUMA
>           node specified in nodeset attribute
> 
>         - hugepages are not allowed if memory backing allocation is set
>           to 'ondemand' (not documented)
> 
>         - hugepages are not allowed if memory backing source is set to
>           'anonymous' (not documented)
> 
> 
> I hope that there is no other configuration that we need to care about.
> 

Okay, let's make our lives simpler. I retract my suggestion to allow
nodeset=0 for non-NUMA guest. Let's do it how you describe here.

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list