Let's make it easier to pin threads created via a ThreadContext to
all host CPUs currently belonging to a given set of host NUMA nodes --
which is the common case.
"node-affinity" is simply a shortcut for setting "cpu-affinity" manually
to the list of host CPUs belonging to the set of host nodes. This property
can only be written.
A simple QEMU example to set the CPU affinity to host node 1 on a system
with two nodes, 24 CPUs each, whereby odd-numbered host CPUs belong to
host node 1:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,node-affinity=1
And we can query the cpu-affinity via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
1,
3,
5,
7,
9,
11,
13,
15,
17,
19,
21,
23,
25,
27,
29,
31,
33,
35,
37,
39,
41,
43,
45,
47
]
We cannot query the node-affinity:
(qemu) qom-get tc1 node-affinity
Error: Insufficient permission to perform this operation
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
Note that if the host CPUs for a host node change due do CPU hot(un)plug
CPU onlining/offlining (i.e., lscpu output changes) after the ThreadContext
was started, the CPU affinity will not get updated.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-5-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
qapi/qom.json | 9 ++++-
util/meson.build | 2 +-
util/thread-context.c | 84 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 93 insertions(+), 2 deletions(-)
diff --git a/qapi/qom.json b/qapi/qom.json
index 8013ba4b82..20b5735d78 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -839,10 +839,17 @@
# threads created in the thread context (default: QEMU main
# thread CPU affinity)
#
+# @node-affinity: the list of host node numbers that will be resolved to a
+# list of host CPU numbers used as CPU affinity. This is a
+# shortcut for specifying the list of host CPU numbers
+# belonging to the host nodes manually by setting
+# @cpu-affinity. (default: QEMU main thread affinity)
+#
# Since: 7.2
##
{ 'struct': 'ThreadContextProperties',
- 'data': { '*cpu-affinity': ['uint16'] } }
+ 'data': { '*cpu-affinity': ['uint16'],
+ '*node-affinity': ['uint16'] } }
##
diff --git a/util/meson.build b/util/meson.build
index e97cd2d779..c0a7bc54d4 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -1,5 +1,5 @@
util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
-util_ss.add(files('thread-context.c'))
+util_ss.add(files('thread-context.c'), numa)
if not config_host_data.get('CONFIG_ATOMIC64')
util_ss.add(files('atomic64.c'))
endif
diff --git a/util/thread-context.c b/util/thread-context.c
index c921905396..4138245332 100644
--- a/util/thread-context.c
+++ b/util/thread-context.c
@@ -21,6 +21,10 @@
#include "qemu/module.h"
#include "qemu/bitmap.h"
+#ifdef CONFIG_NUMA
+#include <numa.h>
+#endif
+
enum {
TC_CMD_NONE = 0,
TC_CMD_STOP,
@@ -88,6 +92,11 @@ static void thread_context_set_cpu_affinity(Object *obj, Visitor *v,
int nbits = 0, ret;
Error *err = NULL;
+ if (tc->init_cpu_bitmap) {
+ error_setg(errp, "Mixing CPU and node affinity not supported");
+ return;
+ }
+
visit_type_uint16List(v, name, &host_cpus, &err);
if (err) {
error_propagate(errp, err);
@@ -159,6 +168,79 @@ static void thread_context_get_cpu_affinity(Object *obj, Visitor *v,
qapi_free_uint16List(host_cpus);
}
+static void thread_context_set_node_affinity(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+#ifdef CONFIG_NUMA
+ const int nbits = numa_num_possible_cpus();
+ ThreadContext *tc = THREAD_CONTEXT(obj);
+ uint16List *l, *host_nodes = NULL;
+ unsigned long *bitmap = NULL;
+ struct bitmask *tmp_cpus;
+ Error *err = NULL;
+ int ret, i;
+
+ if (tc->init_cpu_bitmap) {
+ error_setg(errp, "Mixing CPU and node affinity not supported");
+ return;
+ }
+
+ visit_type_uint16List(v, name, &host_nodes, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ if (!host_nodes) {
+ error_setg(errp, "Node list is empty");
+ goto out;
+ }
+
+ bitmap = bitmap_new(nbits);
+ tmp_cpus = numa_allocate_cpumask();
+ for (l = host_nodes; l; l = l->next) {
+ numa_bitmask_clearall(tmp_cpus);
+ ret = numa_node_to_cpus(l->value, tmp_cpus);
+ if (ret) {
+ /* We ignore any errors, such as impossible nodes. */
+ continue;
+ }
+ for (i = 0; i < nbits; i++) {
+ if (numa_bitmask_isbitset(tmp_cpus, i)) {
+ set_bit(i, bitmap);
+ }
+ }
+ }
+ numa_free_cpumask(tmp_cpus);
+
+ if (bitmap_empty(bitmap, nbits)) {
+ error_setg(errp, "The nodes select no CPUs");
+ goto out;
+ }
+
+ if (tc->thread_id != -1) {
+ /*
+ * Note: we won't be adjusting the affinity of any thread that is still
+ * around for now, but only the affinity of the context thread.
+ */
+ ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
+ if (ret) {
+ error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
+ }
+ } else {
+ tc->init_cpu_bitmap = bitmap;
+ bitmap = NULL;
+ tc->init_cpu_nbits = nbits;
+ }
+out:
+ g_free(bitmap);
+ qapi_free_uint16List(host_nodes);
+#else
+ error_setg(errp, "NUMA node affinity is not supported by this QEMU");
+#endif
+}
+
static void thread_context_get_thread_id(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
@@ -208,6 +290,8 @@ static void thread_context_class_init(ObjectClass *oc, void *data)
object_class_property_add(oc, "cpu-affinity", "int",
thread_context_get_cpu_affinity,
thread_context_set_cpu_affinity, NULL, NULL);
+ object_class_property_add(oc, "node-affinity", "int", NULL,
+ thread_context_set_node_affinity, NULL, NULL);
}
static void thread_context_instance_init(Object *obj)
--
2.37.3
Hi,
turning pages back in time,
noticed that in recent qemu-img binaries we include an ELF dependency on libnuma.so that seems unused.
I think it stems from this commit:
commit 10218ae6d006f76410804cc4dc690085b3d008b5
Author: David Hildenbrand <david@redhat.com>
Date: Fri Oct 14 15:47:17 2022 +0200
util: Add write-only "node-affinity" property for ThreadContext
possibly this hunk?
diff --git a/util/meson.build b/util/meson.build
index e97cd2d779..c0a7bc54d4 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -1,5 +1,5 @@
util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
-util_ss.add(files('thread-context.c'))
+util_ss.add(files('thread-context.c'), numa)
if not config_host_data.get('CONFIG_ATOMIC64')
util_ss.add(files('atomic64.c'))
endif
I wonder if there is some conditional we could use to avoid the apparently useless dependency to libnuma in the qemu-img binary?
Ciao,
Claudio
On 10/28/22 11:52, David Hildenbrand wrote:
> Let's make it easier to pin threads created via a ThreadContext to
> all host CPUs currently belonging to a given set of host NUMA nodes --
> which is the common case.
>
> "node-affinity" is simply a shortcut for setting "cpu-affinity" manually
> to the list of host CPUs belonging to the set of host nodes. This property
> can only be written.
>
> A simple QEMU example to set the CPU affinity to host node 1 on a system
> with two nodes, 24 CPUs each, whereby odd-numbered host CPUs belong to
> host node 1:
> qemu-system-x86_64 -S \
> -object thread-context,id=tc1,node-affinity=1
>
> And we can query the cpu-affinity via HMP/QMP:
> (qemu) qom-get tc1 cpu-affinity
> [
> 1,
> 3,
> 5,
> 7,
> 9,
> 11,
> 13,
> 15,
> 17,
> 19,
> 21,
> 23,
> 25,
> 27,
> 29,
> 31,
> 33,
> 35,
> 37,
> 39,
> 41,
> 43,
> 45,
> 47
> ]
>
> We cannot query the node-affinity:
> (qemu) qom-get tc1 node-affinity
> Error: Insufficient permission to perform this operation
>
> But note that due to dynamic library loading this example will not work
> before we actually make use of thread_context_create_thread() in QEMU
> code, because the type will otherwise not get registered. We'll wire
> this up next to make it work.
>
> Note that if the host CPUs for a host node change due do CPU hot(un)plug
> CPU onlining/offlining (i.e., lscpu output changes) after the ThreadContext
> was started, the CPU affinity will not get updated.
>
> Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
> Acked-by: Markus Armbruster <armbru@redhat.com>
> Message-Id: <20221014134720.168738-5-david@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
> qapi/qom.json | 9 ++++-
> util/meson.build | 2 +-
> util/thread-context.c | 84 +++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 93 insertions(+), 2 deletions(-)
>
> diff --git a/qapi/qom.json b/qapi/qom.json
> index 8013ba4b82..20b5735d78 100644
> --- a/qapi/qom.json
> +++ b/qapi/qom.json
> @@ -839,10 +839,17 @@
> # threads created in the thread context (default: QEMU main
> # thread CPU affinity)
> #
> +# @node-affinity: the list of host node numbers that will be resolved to a
> +# list of host CPU numbers used as CPU affinity. This is a
> +# shortcut for specifying the list of host CPU numbers
> +# belonging to the host nodes manually by setting
> +# @cpu-affinity. (default: QEMU main thread affinity)
> +#
> # Since: 7.2
> ##
> { 'struct': 'ThreadContextProperties',
> - 'data': { '*cpu-affinity': ['uint16'] } }
> + 'data': { '*cpu-affinity': ['uint16'],
> + '*node-affinity': ['uint16'] } }
>
>
> ##
> diff --git a/util/meson.build b/util/meson.build
> index e97cd2d779..c0a7bc54d4 100644
> --- a/util/meson.build
> +++ b/util/meson.build
> @@ -1,5 +1,5 @@
> util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
> -util_ss.add(files('thread-context.c'))
> +util_ss.add(files('thread-context.c'), numa)
> if not config_host_data.get('CONFIG_ATOMIC64')
> util_ss.add(files('atomic64.c'))
> endif
> diff --git a/util/thread-context.c b/util/thread-context.c
> index c921905396..4138245332 100644
> --- a/util/thread-context.c
> +++ b/util/thread-context.c
> @@ -21,6 +21,10 @@
> #include "qemu/module.h"
> #include "qemu/bitmap.h"
>
> +#ifdef CONFIG_NUMA
> +#include <numa.h>
> +#endif
> +
> enum {
> TC_CMD_NONE = 0,
> TC_CMD_STOP,
> @@ -88,6 +92,11 @@ static void thread_context_set_cpu_affinity(Object *obj, Visitor *v,
> int nbits = 0, ret;
> Error *err = NULL;
>
> + if (tc->init_cpu_bitmap) {
> + error_setg(errp, "Mixing CPU and node affinity not supported");
> + return;
> + }
> +
> visit_type_uint16List(v, name, &host_cpus, &err);
> if (err) {
> error_propagate(errp, err);
> @@ -159,6 +168,79 @@ static void thread_context_get_cpu_affinity(Object *obj, Visitor *v,
> qapi_free_uint16List(host_cpus);
> }
>
> +static void thread_context_set_node_affinity(Object *obj, Visitor *v,
> + const char *name, void *opaque,
> + Error **errp)
> +{
> +#ifdef CONFIG_NUMA
> + const int nbits = numa_num_possible_cpus();
> + ThreadContext *tc = THREAD_CONTEXT(obj);
> + uint16List *l, *host_nodes = NULL;
> + unsigned long *bitmap = NULL;
> + struct bitmask *tmp_cpus;
> + Error *err = NULL;
> + int ret, i;
> +
> + if (tc->init_cpu_bitmap) {
> + error_setg(errp, "Mixing CPU and node affinity not supported");
> + return;
> + }
> +
> + visit_type_uint16List(v, name, &host_nodes, &err);
> + if (err) {
> + error_propagate(errp, err);
> + return;
> + }
> +
> + if (!host_nodes) {
> + error_setg(errp, "Node list is empty");
> + goto out;
> + }
> +
> + bitmap = bitmap_new(nbits);
> + tmp_cpus = numa_allocate_cpumask();
> + for (l = host_nodes; l; l = l->next) {
> + numa_bitmask_clearall(tmp_cpus);
> + ret = numa_node_to_cpus(l->value, tmp_cpus);
> + if (ret) {
> + /* We ignore any errors, such as impossible nodes. */
> + continue;
> + }
> + for (i = 0; i < nbits; i++) {
> + if (numa_bitmask_isbitset(tmp_cpus, i)) {
> + set_bit(i, bitmap);
> + }
> + }
> + }
> + numa_free_cpumask(tmp_cpus);
> +
> + if (bitmap_empty(bitmap, nbits)) {
> + error_setg(errp, "The nodes select no CPUs");
> + goto out;
> + }
> +
> + if (tc->thread_id != -1) {
> + /*
> + * Note: we won't be adjusting the affinity of any thread that is still
> + * around for now, but only the affinity of the context thread.
> + */
> + ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
> + if (ret) {
> + error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
> + }
> + } else {
> + tc->init_cpu_bitmap = bitmap;
> + bitmap = NULL;
> + tc->init_cpu_nbits = nbits;
> + }
> +out:
> + g_free(bitmap);
> + qapi_free_uint16List(host_nodes);
> +#else
> + error_setg(errp, "NUMA node affinity is not supported by this QEMU");
> +#endif
> +}
> +
> static void thread_context_get_thread_id(Object *obj, Visitor *v,
> const char *name, void *opaque,
> Error **errp)
> @@ -208,6 +290,8 @@ static void thread_context_class_init(ObjectClass *oc, void *data)
> object_class_property_add(oc, "cpu-affinity", "int",
> thread_context_get_cpu_affinity,
> thread_context_set_cpu_affinity, NULL, NULL);
> + object_class_property_add(oc, "node-affinity", "int", NULL,
> + thread_context_set_node_affinity, NULL, NULL);
> }
>
> static void thread_context_instance_init(Object *obj)
On 05.02.24 11:14, Claudio Fontana wrote:
> Hi,
Hi Claudio,
>
> turning pages back in time,
>
> noticed that in recent qemu-img binaries we include an ELF dependency on libnuma.so that seems unused.
>
> I think it stems from this commit:
>
> commit 10218ae6d006f76410804cc4dc690085b3d008b5
> Author: David Hildenbrand <david@redhat.com>
> Date: Fri Oct 14 15:47:17 2022 +0200
>
> util: Add write-only "node-affinity" property for ThreadContext
>
>
> possibly this hunk?
>
> diff --git a/util/meson.build b/util/meson.build
> index e97cd2d779..c0a7bc54d4 100644
> --- a/util/meson.build
> +++ b/util/meson.build
> @@ -1,5 +1,5 @@
> util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
> -util_ss.add(files('thread-context.c'))
> +util_ss.add(files('thread-context.c'), numa)
> if not config_host_data.get('CONFIG_ATOMIC64')
> util_ss.add(files('atomic64.c'))
> endif
>
>
> I wonder if there is some conditional we could use to avoid the apparently useless dependency to libnuma in the qemu-img binary?
the simplest change is probably moving the thread-context stuff out of
util (as you say, it's currently only used by QEMU itself).
--
Cheers,
David / dhildenb
Hello David,
It would seem to me that a lot of the calling code like qemu_prealloc_mem for example
should be sysemu-only, not used for tools, or user mode either right?
And the thread_context.c itself should also be sysemu-only, correct?
Thanks,
Claudio
On 2/5/24 15:15, David Hildenbrand wrote:
> On 05.02.24 11:14, Claudio Fontana wrote:
>> Hi,
>
> Hi Claudio,
>
>>
>> turning pages back in time,
>>
>> noticed that in recent qemu-img binaries we include an ELF dependency on libnuma.so that seems unused.
>>
>> I think it stems from this commit:
>>
>> commit 10218ae6d006f76410804cc4dc690085b3d008b5
>> Author: David Hildenbrand <david@redhat.com>
>> Date: Fri Oct 14 15:47:17 2022 +0200
>>
>> util: Add write-only "node-affinity" property for ThreadContext
>>
>>
>> possibly this hunk?
>>
>> diff --git a/util/meson.build b/util/meson.build
>> index e97cd2d779..c0a7bc54d4 100644
>> --- a/util/meson.build
>> +++ b/util/meson.build
>> @@ -1,5 +1,5 @@
>> util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
>> -util_ss.add(files('thread-context.c'))
>> +util_ss.add(files('thread-context.c'), numa)
>> if not config_host_data.get('CONFIG_ATOMIC64')
>> util_ss.add(files('atomic64.c'))
>> endif
>>
>>
>> I wonder if there is some conditional we could use to avoid the apparently useless dependency to libnuma in the qemu-img binary?
>
> the simplest change is probably moving the thread-context stuff out of
> util (as you say, it's currently only used by QEMU itself).
>
On 05.02.24 17:13, Claudio Fontana wrote: > Hello David, > Hi, > It would seem to me that a lot of the calling code like qemu_prealloc_mem for example > should be sysemu-only, not used for tools, or user mode either right? > > And the thread_context.c itself should also be sysemu-only, correct? Yes, both should currently only get used for sysemu only. Memory backends and virtio-mem are sysemu-only. -- Cheers, David / dhildenb
© 2016 - 2026 Red Hat, Inc.