Add vcpu affinity to the dom0less bindings. Example:
dom1 {
...
cpus = <4>;
vcpu0 {
compatible = "xen,vcpu-affinity";
id = <0>;
hard-affinity = "4-7";
};
vcpu1 {
compatible = "xen,vcpu-affinity";
id = <1>;
hard-affinity = "0-3";
};
vcpu2 {
compatible = "xen,vcpu-affinity";
id = <2>;
hard-affinity = "1,6";
};
...
Note that the property hard-affinity is optional. It is possible to add
other properties in the future not only to specify soft affinity, but
also to provide more precise methods for configuring affinity. For
instance, on ARM the MPIDR could be use to specify the pCPU. For now, it
is left to the future.
Signed-off-by: Xenia Ragiadakou <xenia.ragiadakou@amd.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
---
Changes in v3:
- improve commit message
- improve binding doc
- add panic on invalid pCPU
- move parsing to a separate function
diff --git a/docs/misc/arm/device-tree/booting.txt b/docs/misc/arm/device-tree/booting.txt
index 9c881baccc..10e55c825c 100644
--- a/docs/misc/arm/device-tree/booting.txt
+++ b/docs/misc/arm/device-tree/booting.txt
@@ -324,6 +324,27 @@ The ramdisk sub-node has the following properties:
property because it will be created by the UEFI stub on boot.
This option is needed only when UEFI boot is used.
+Under the "xen,domain" compatible node, it is possible optionally to add
+one or more sub-nodes to configure vCPU affinity. The vCPU affinity
+sub-node has the following properties:
+
+- compatible
+
+ "xen,vcpu-affinity"
+
+- id
+
+ A 32-bit integer that specifies the vCPU id. 0 is the first vCPU.
+ The last vCPU is cpus-1, where "cpus" is the number of vCPUs
+ specified with the "cpus" property under the "xen,domain" node.
+
+- hard-affinity
+
+ Optional. A string specifying the hard affinity configuration for the
+ vCPU: a comma-separated list of pCPUs or ranges of pCPUs is used.
+ Ranges are hyphen-separated intervals (such as `0-4`) and are inclusive
+ on both sides. The numbers refer to pCPU ids.
+
Example
=======
diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
index 49d1f14d65..e364820189 100644
--- a/xen/arch/arm/dom0less-build.c
+++ b/xen/arch/arm/dom0less-build.c
@@ -810,6 +810,68 @@ static int __init construct_domU(struct domain *d,
return rc;
}
+static void __init domain_vcpu_affinity(struct domain *d,
+ const struct dt_device_node *node)
+{
+ const char *hard_affinity_str = NULL;
+ struct dt_device_node *np;
+ uint32_t val;
+ int rc;
+
+ dt_for_each_child_node(node, np)
+ {
+ const char *s;
+ struct vcpu *v;
+ cpumask_t affinity;
+
+ if ( !dt_device_is_compatible(np, "xen,vcpu-affinity") )
+ continue;
+
+ if ( !dt_property_read_u32(np, "id", &val) )
+ continue;
+
+ if ( val >= d->max_vcpus )
+ panic("Invalid vcpu_id %u for domain %s\n", val, dt_node_name(node));
+
+ v = d->vcpu[val];
+ rc = dt_property_read_string(np, "hard-affinity", &hard_affinity_str);
+ if ( rc < 0 )
+ continue;
+
+ s = hard_affinity_str;
+ cpumask_clear(&affinity);
+ while ( *s != '\0' )
+ {
+ unsigned int start, end;
+
+ start = simple_strtoul(s, &s, 0);
+
+ if ( *s == '-' ) /* Range */
+ {
+ s++;
+ end = simple_strtoul(s, &s, 0);
+ }
+ else /* Single value */
+ end = start;
+
+ if ( end >= nr_cpu_ids )
+ panic("Invalid pCPU %u for domain %s\n", end, dt_node_name(node));
+
+ for ( ; start <= end; start++ )
+ cpumask_set_cpu(start, &affinity);
+
+ if ( *s == ',' )
+ s++;
+ else if ( *s != '\0' )
+ break;
+ }
+
+ rc = vcpu_set_hard_affinity(v, &affinity);
+ if ( rc )
+ panic("vcpu%d: failed to set hard affinity\n", v->vcpu_id);
+ }
+}
+
void __init create_domUs(void)
{
struct dt_device_node *node;
@@ -992,6 +1054,8 @@ void __init create_domUs(void)
if ( rc )
panic("Could not set up domain %s (rc = %d)\n",
dt_node_name(node), rc);
+
+ domain_vcpu_affinity(d, node);
}
}
Hi Stefano,
On 18/02/2025 20:29, Stefano Stabellini wrote:
> Add vcpu affinity to the dom0less bindings. Example:
>
> dom1 {
> ...
> cpus = <4>;
> vcpu0 {
> compatible = "xen,vcpu-affinity";
I would prefer if the compatible is "xen,vcpu". This would allow us to
extend for anything that vCPU specific. I would also look less odd if
someone ...
> id = <0>;
> hard-affinity = "4-7";
... doesn't specify hard-affinity which is optional.
> };
> vcpu1 {
> compatible = "xen,vcpu-affinity";
> id = <1>;
> hard-affinity = "0-3";
NIT: This example is exactly the same as vcpu0. How about changing to a
list of range/single value? This would make clear that a mix is possible.
> };
> vcpu2 {
> compatible = "xen,vcpu-affinity";
> id = <2>;
> hard-affinity = "1,6";
> };
> ...
>
> Note that the property hard-affinity is optional. It is possible to add
> other properties in the future not only to specify soft affinity, but
> also to provide more precise methods for configuring affinity. For
> instance, on ARM the MPIDR could be use to specify the pCPU. For now, it
> is left to the future.
>
> Signed-off-by: Xenia Ragiadakou <xenia.ragiadakou@amd.com>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
> ---
> Changes in v3:
> - improve commit message
> - improve binding doc
> - add panic on invalid pCPU
> - move parsing to a separate function
>
> diff --git a/docs/misc/arm/device-tree/booting.txt b/docs/misc/arm/device-tree/booting.txt
> index 9c881baccc..10e55c825c 100644
> --- a/docs/misc/arm/device-tree/booting.txt
> +++ b/docs/misc/arm/device-tree/booting.txt
> @@ -324,6 +324,27 @@ The ramdisk sub-node has the following properties:
> property because it will be created by the UEFI stub on boot.
> This option is needed only when UEFI boot is used.
>
> +Under the "xen,domain" compatible node, it is possible optionally to add
> +one or more sub-nodes to configure vCPU affinity. The vCPU affinity
> +sub-node has the following properties:
> +
> +- compatible
> +
> + "xen,vcpu-affinity"
> +
> +- id
> +
> + A 32-bit integer that specifies the vCPU id. 0 is the first vCPU.
> + The last vCPU is cpus-1, where "cpus" is the number of vCPUs
> + specified with the "cpus" property under the "xen,domain" node.
I think it would be worth mentioning that each node must have a unique
ID. It is not necessary to check in the code, but it would avoid the
question of what happen if someone specify twice the VCPU with different
affinity.
> +
> +- hard-affinity
> +
> + Optional. A string specifying the hard affinity configuration for the
> + vCPU: a comma-separated list of pCPUs or ranges of pCPUs is used.
> + Ranges are hyphen-separated intervals (such as `0-4`) and are inclusive
> + on both sides. The numbers refer to pCPU ids.
Technically MPIDRs are pCPUs ID. So I would add "logical" in front of
pCPU ids to make clear what IDs are we talking about
> +
>
> Example
> =======
No update to the example?
> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
> index 49d1f14d65..e364820189 100644
> --- a/xen/arch/arm/dom0less-build.c
> +++ b/xen/arch/arm/dom0less-build.c
> @@ -810,6 +810,68 @@ static int __init construct_domU(struct domain *d,
> return rc;
> }
>
> +static void __init domain_vcpu_affinity(struct domain *d,
> + const struct dt_device_node *node)
> +{> + const char *hard_affinity_str = NULL;
> + struct dt_device_node *np;
> + uint32_t val;
> + int rc;
Can you expain why 'rc', 'val', 'hard_affinity_str' are defined outside
of the loop when ...
> +
> + dt_for_each_child_node(node, np)
> + {
> + const char *s;
> + struct vcpu *v;
> + cpumask_t affinity;
... they are not? Yet they have the same property (i.e. only called
within the loop).
> +
> + if ( !dt_device_is_compatible(np, "xen,vcpu-affinity") )
> + continue;
> +
> + if ( !dt_property_read_u32(np, "id", &val) )
Looking at the binding you wrote, "id" is mandatory. So I think we
should throw an error if it is not present.
> + continue;
> +> + if ( val >= d->max_vcpus )
> + panic("Invalid vcpu_id %u for domain %s\n", val, dt_node_name(node));
NIT: Maybe print the maximum number of vCPUs? This would make easier to
know what's wrong with the ID.
> +
> + v = d->vcpu[val];
> + rc = dt_property_read_string(np, "hard-affinity", &hard_affinity_str);
> + if ( rc < 0 )
> + continue;
> +
> + s = hard_affinity_str;
OOI, you don't seem to use hard_affinity_str afterwards, so why can't we
use 'hard_affinity_str' directly and avoid an extra variable?
> + cpumask_clear(&affinity);
> + while ( *s != '\0' )
> + {
> + unsigned int start, end;
> +
> + start = simple_strtoul(s, &s, 0);
> +
> + if ( *s == '-' ) /* Range */
> + {
> + s++;
> + end = simple_strtoul(s, &s, 0);
> + }
> + else /* Single value */
> + end = start;
> +
> + if ( end >= nr_cpu_ids )
> + panic("Invalid pCPU %u for domain %s\n", end, dt_node_name(node));
> +
> + for ( ; start <= end; start++ )
> + cpumask_set_cpu(start, &affinity);
> +
> + if ( *s == ',' )
> + s++;
> + else if ( *s != '\0' )
> + break;
NIT: We seem to have various place in Xen parsing range (e.g.
init_boot_pages()). Could we provide an helper to avoid duplicating the
code?
> + }
> +
> + rc = vcpu_set_hard_affinity(v, &affinity);
> + if ( rc )
> + panic("vcpu%d: failed to set hard affinity\n", v->vcpu_id);
Can we print the domain name like you do before and also 'rc'? This
would help any debugging.
> + }
> +}
> +
> void __init create_domUs(void)
> {
> struct dt_device_node *node;
> @@ -992,6 +1054,8 @@ void __init create_domUs(void)
> if ( rc )
> panic("Could not set up domain %s (rc = %d)\n",
> dt_node_name(node), rc);
> +
> + domain_vcpu_affinity(d, node);
Shouldn't this call be part of construct_domU?
> }
> }
>
Cheers,
--
Julien Grall
On Wed, 19 Feb 2025, Julien Grall wrote:
> Hi Stefano,
>
> On 18/02/2025 20:29, Stefano Stabellini wrote:
> > Add vcpu affinity to the dom0less bindings. Example:
> >
> > dom1 {
> > ...
> > cpus = <4>;
> > vcpu0 {
> > compatible = "xen,vcpu-affinity";
>
> I would prefer if the compatible is "xen,vcpu". This would allow us to extend
> for anything that vCPU specific. I would also look less odd if someone ...
>
> > id = <0>;
> > hard-affinity = "4-7";
>
> ... doesn't specify hard-affinity which is optional.
>
> > };
> > vcpu1 {
> > compatible = "xen,vcpu-affinity";
> > id = <1>;
> > hard-affinity = "0-3";
>
> NIT: This example is exactly the same as vcpu0. How about changing to a list
> of range/single value? This would make clear that a mix is possible.
>
> > };
> > vcpu2 {
> > compatible = "xen,vcpu-affinity";
> > id = <2>;
> > hard-affinity = "1,6";
> > };
> > ...
> >
> > Note that the property hard-affinity is optional. It is possible to add
> > other properties in the future not only to specify soft affinity, but
> > also to provide more precise methods for configuring affinity. For
> > instance, on ARM the MPIDR could be use to specify the pCPU. For now, it
> > is left to the future.
> >
> > Signed-off-by: Xenia Ragiadakou <xenia.ragiadakou@amd.com>
> > Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
> > ---
> > Changes in v3:
> > - improve commit message
> > - improve binding doc
> > - add panic on invalid pCPU
> > - move parsing to a separate function
> >
> > diff --git a/docs/misc/arm/device-tree/booting.txt
> > b/docs/misc/arm/device-tree/booting.txt
> > index 9c881baccc..10e55c825c 100644
> > --- a/docs/misc/arm/device-tree/booting.txt
> > +++ b/docs/misc/arm/device-tree/booting.txt
> > @@ -324,6 +324,27 @@ The ramdisk sub-node has the following properties:
> > property because it will be created by the UEFI stub on boot.
> > This option is needed only when UEFI boot is used.
> > +Under the "xen,domain" compatible node, it is possible optionally to add
> > +one or more sub-nodes to configure vCPU affinity. The vCPU affinity
> > +sub-node has the following properties:
> > +
> > +- compatible
> > +
> > + "xen,vcpu-affinity"
> > +
> > +- id
> > +
> > + A 32-bit integer that specifies the vCPU id. 0 is the first vCPU.
> > + The last vCPU is cpus-1, where "cpus" is the number of vCPUs
> > + specified with the "cpus" property under the "xen,domain" node.
>
> I think it would be worth mentioning that each node must have a unique ID. It
> is not necessary to check in the code, but it would avoid the question of what
> happen if someone specify twice the VCPU with different affinity.
>
> > +
> > +- hard-affinity
> > +
> > + Optional. A string specifying the hard affinity configuration for the
> > + vCPU: a comma-separated list of pCPUs or ranges of pCPUs is used.
> > + Ranges are hyphen-separated intervals (such as `0-4`) and are inclusive
> > + on both sides. The numbers refer to pCPU ids.
>
> Technically MPIDRs are pCPUs ID. So I would add "logical" in front of pCPU ids
> to make clear what IDs are we talking about
>
> > +
> > Example
> > =======
>
> No update to the example?
>
> > diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
> > index 49d1f14d65..e364820189 100644
> > --- a/xen/arch/arm/dom0less-build.c
> > +++ b/xen/arch/arm/dom0less-build.c
> > @@ -810,6 +810,68 @@ static int __init construct_domU(struct domain *d,
> > return rc;
> > }
> > +static void __init domain_vcpu_affinity(struct domain *d,
> > + const struct dt_device_node *node)
> > +{> + const char *hard_affinity_str = NULL;
> > + struct dt_device_node *np;
> > + uint32_t val;
> > + int rc;
>
> Can you expain why 'rc', 'val', 'hard_affinity_str' are defined outside of the
> loop when ...
>
> > +
> > + dt_for_each_child_node(node, np)
> > + {
> > + const char *s;
> > + struct vcpu *v;
> > + cpumask_t affinity;
>
> ... they are not? Yet they have the same property (i.e. only called within the
> loop).
>
> > +
> > + if ( !dt_device_is_compatible(np, "xen,vcpu-affinity") )
> > + continue;
> > +
> > + if ( !dt_property_read_u32(np, "id", &val) )
>
> Looking at the binding you wrote, "id" is mandatory. So I think we should
> throw an error if it is not present.
>
> > + continue;
> > +> + if ( val >= d->max_vcpus )
> > + panic("Invalid vcpu_id %u for domain %s\n", val,
> > dt_node_name(node));
>
> NIT: Maybe print the maximum number of vCPUs? This would make easier to know
> what's wrong with the ID.
>
> > +
> > + v = d->vcpu[val];
> > + rc = dt_property_read_string(np, "hard-affinity",
> > &hard_affinity_str);
> > + if ( rc < 0 )
> > + continue;
> > +
> > + s = hard_affinity_str;
>
> OOI, you don't seem to use hard_affinity_str afterwards, so why can't we use
> 'hard_affinity_str' directly and avoid an extra variable?
>
> > + cpumask_clear(&affinity);
> > + while ( *s != '\0' )
> > + {
> > + unsigned int start, end;
> > +
> > + start = simple_strtoul(s, &s, 0);
> > +
> > + if ( *s == '-' ) /* Range */
> > + {
> > + s++;
> > + end = simple_strtoul(s, &s, 0);
> > + }
> > + else /* Single value */
> > + end = start;
> > +
> > + if ( end >= nr_cpu_ids )
> > + panic("Invalid pCPU %u for domain %s\n", end,
> > dt_node_name(node));
> > +
> > + for ( ; start <= end; start++ )
> > + cpumask_set_cpu(start, &affinity);
> > +
> > + if ( *s == ',' )
> > + s++;
> > + else if ( *s != '\0' )
> > + break;
>
> NIT: We seem to have various place in Xen parsing range (e.g.
> init_boot_pages()). Could we provide an helper to avoid duplicating the code?
Hi Julien,
Many thanks for the review, I addressed all the comments, except for
this NIT
On Tue, Feb 18, 2025 at 12:29:24PM -0800, Stefano Stabellini wrote:
> Add vcpu affinity to the dom0less bindings. Example:
>
> dom1 {
> ...
> cpus = <4>;
> vcpu0 {
> compatible = "xen,vcpu-affinity";
> id = <0>;
> hard-affinity = "4-7";
> };
> vcpu1 {
> compatible = "xen,vcpu-affinity";
> id = <1>;
> hard-affinity = "0-3";
> };
> vcpu2 {
> compatible = "xen,vcpu-affinity";
> id = <2>;
> hard-affinity = "1,6";
> };
> ...
>
> Note that the property hard-affinity is optional. It is possible to add
> other properties in the future not only to specify soft affinity, but
> also to provide more precise methods for configuring affinity. For
> instance, on ARM the MPIDR could be use to specify the pCPU. For now, it
> is left to the future.
>
> Signed-off-by: Xenia Ragiadakou <xenia.ragiadakou@amd.com>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
Sorry to be picky, just an unrelated nit, but usually the first SoB
matches the "From:" field in the patch, or a commit body "From:" tag,
for example:
https://lore.kernel.org/xen-devel/20250124120112.56678-2-roger.pau@citrix.com/
Thanks, Roger.
On 2/18/25 9:29 PM, Stefano Stabellini wrote:
> Add vcpu affinity to the dom0less bindings. Example:
>
> dom1 {
> ...
> cpus = <4>;
> vcpu0 {
> compatible = "xen,vcpu-affinity";
> id = <0>;
> hard-affinity = "4-7";
> };
> vcpu1 {
> compatible = "xen,vcpu-affinity";
> id = <1>;
> hard-affinity = "0-3";
> };
> vcpu2 {
> compatible = "xen,vcpu-affinity";
> id = <2>;
> hard-affinity = "1,6";
> };
> ...
>
> Note that the property hard-affinity is optional. It is possible to add
> other properties in the future not only to specify soft affinity, but
> also to provide more precise methods for configuring affinity. For
> instance, on ARM the MPIDR could be use to specify the pCPU. For now, it
> is left to the future.
I think we want this to add to CHANGELOG.
Thanks.
~ Oleksii
>
> Signed-off-by: Xenia Ragiadakou<xenia.ragiadakou@amd.com>
> Signed-off-by: Stefano Stabellini<stefano.stabellini@amd.com>
> ---
> Changes in v3:
> - improve commit message
> - improve binding doc
> - add panic on invalid pCPU
> - move parsing to a separate function
>
> diff --git a/docs/misc/arm/device-tree/booting.txt b/docs/misc/arm/device-tree/booting.txt
> index 9c881baccc..10e55c825c 100644
> --- a/docs/misc/arm/device-tree/booting.txt
> +++ b/docs/misc/arm/device-tree/booting.txt
> @@ -324,6 +324,27 @@ The ramdisk sub-node has the following properties:
> property because it will be created by the UEFI stub on boot.
> This option is needed only when UEFI boot is used.
>
> +Under the "xen,domain" compatible node, it is possible optionally to add
> +one or more sub-nodes to configure vCPU affinity. The vCPU affinity
> +sub-node has the following properties:
> +
> +- compatible
> +
> + "xen,vcpu-affinity"
> +
> +- id
> +
> + A 32-bit integer that specifies the vCPU id. 0 is the first vCPU.
> + The last vCPU is cpus-1, where "cpus" is the number of vCPUs
> + specified with the "cpus" property under the "xen,domain" node.
> +
> +- hard-affinity
> +
> + Optional. A string specifying the hard affinity configuration for the
> + vCPU: a comma-separated list of pCPUs or ranges of pCPUs is used.
> + Ranges are hyphen-separated intervals (such as `0-4`) and are inclusive
> + on both sides. The numbers refer to pCPU ids.
> +
>
> Example
> =======
> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
> index 49d1f14d65..e364820189 100644
> --- a/xen/arch/arm/dom0less-build.c
> +++ b/xen/arch/arm/dom0less-build.c
> @@ -810,6 +810,68 @@ static int __init construct_domU(struct domain *d,
> return rc;
> }
>
> +static void __init domain_vcpu_affinity(struct domain *d,
> + const struct dt_device_node *node)
> +{
> + const char *hard_affinity_str = NULL;
> + struct dt_device_node *np;
> + uint32_t val;
> + int rc;
> +
> + dt_for_each_child_node(node, np)
> + {
> + const char *s;
> + struct vcpu *v;
> + cpumask_t affinity;
> +
> + if ( !dt_device_is_compatible(np, "xen,vcpu-affinity") )
> + continue;
> +
> + if ( !dt_property_read_u32(np, "id", &val) )
> + continue;
> +
> + if ( val >= d->max_vcpus )
> + panic("Invalid vcpu_id %u for domain %s\n", val, dt_node_name(node));
> +
> + v = d->vcpu[val];
> + rc = dt_property_read_string(np, "hard-affinity", &hard_affinity_str);
> + if ( rc < 0 )
> + continue;
> +
> + s = hard_affinity_str;
> + cpumask_clear(&affinity);
> + while ( *s != '\0' )
> + {
> + unsigned int start, end;
> +
> + start = simple_strtoul(s, &s, 0);
> +
> + if ( *s == '-' ) /* Range */
> + {
> + s++;
> + end = simple_strtoul(s, &s, 0);
> + }
> + else /* Single value */
> + end = start;
> +
> + if ( end >= nr_cpu_ids )
> + panic("Invalid pCPU %u for domain %s\n", end, dt_node_name(node));
> +
> + for ( ; start <= end; start++ )
> + cpumask_set_cpu(start, &affinity);
> +
> + if ( *s == ',' )
> + s++;
> + else if ( *s != '\0' )
> + break;
> + }
> +
> + rc = vcpu_set_hard_affinity(v, &affinity);
> + if ( rc )
> + panic("vcpu%d: failed to set hard affinity\n", v->vcpu_id);
> + }
> +}
> +
> void __init create_domUs(void)
> {
> struct dt_device_node *node;
> @@ -992,6 +1054,8 @@ void __init create_domUs(void)
> if ( rc )
> panic("Could not set up domain %s (rc = %d)\n",
> dt_node_name(node), rc);
> +
> + domain_vcpu_affinity(d, node);
> }
> }
>
>
© 2016 - 2025 Red Hat, Inc.