Make alloc_dom0_vcpu0() viable as a general vcpu0 allocator. Keep
behaviour on any hwdom/ctldom identical to that dom0 used to have, and
make non-dom0 have auto node affinity.
Rename the function to alloc_dom_vcpu0() to reflect this change in
scope, and move the prototype to asm/domain.h from xen/domain.h as it's
only used in x86.
Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
Signed-off-by: Alejandro Vallejo <alejandro.garciavallejo@amd.com>
---
xen/arch/x86/dom0_build.c | 12 ++++++++----
xen/arch/x86/include/asm/dom0_build.h | 5 +++++
xen/arch/x86/setup.c | 6 ++++--
xen/include/xen/domain.h | 1 -
4 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
index 0b467fd4a4..dfae7f888f 100644
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -254,12 +254,16 @@ unsigned int __init dom0_max_vcpus(void)
return max_vcpus;
}
-struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
+struct vcpu *__init alloc_dom_vcpu0(struct domain *d)
{
- dom0->node_affinity = dom0_nodes;
- dom0->auto_node_affinity = !dom0_nr_pxms;
+ d->auto_node_affinity = true;
+ if ( is_hardware_domain(d) || is_control_domain(d) )
+ {
+ d->node_affinity = dom0_nodes;
+ d->auto_node_affinity = !dom0_nr_pxms;
+ }
- return vcpu_create(dom0, 0);
+ return vcpu_create(d, 0);
}
#ifdef CONFIG_SHADOW_PAGING
diff --git a/xen/arch/x86/include/asm/dom0_build.h b/xen/arch/x86/include/asm/dom0_build.h
index ff021c24af..46bfd111f2 100644
--- a/xen/arch/x86/include/asm/dom0_build.h
+++ b/xen/arch/x86/include/asm/dom0_build.h
@@ -23,6 +23,11 @@ unsigned long dom0_paging_pages(const struct domain *d,
void dom0_update_physmap(bool compat, unsigned long pfn,
unsigned long mfn, unsigned long vphysmap_s);
+/* general domain construction */
+
+/* Create the first vCPU of a domain. Sets up node affinity as a side effect */
+struct vcpu *alloc_dom_vcpu0(struct domain *d);
+
#endif /* _DOM0_BUILD_H_ */
/*
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index c6890669b9..77a8ca60c3 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -37,6 +37,7 @@
#include <asm/bzimage.h>
#include <asm/cpu-policy.h>
#include <asm/desc.h>
+#include <asm/dom0_build.h>
#include <asm/e820.h>
#include <asm/edd.h>
#include <asm/genapic.h>
@@ -1054,9 +1055,11 @@ static struct domain *__init create_dom0(struct boot_info *bi)
if ( IS_ERR(d) )
panic("Error creating d%u: %ld\n", bd->domid, PTR_ERR(d));
+ bd->d = d;
+
init_dom0_cpuid_policy(d);
- if ( alloc_dom0_vcpu0(d) == NULL )
+ if ( alloc_dom_vcpu0(d) == NULL )
panic("Error creating %pdv0\n", d);
cmdline_size = domain_cmdline_size(bi, bd);
@@ -1093,7 +1096,6 @@ static struct domain *__init create_dom0(struct boot_info *bi)
bd->cmdline = cmdline;
}
- bd->d = d;
if ( construct_dom0(bd) != 0 )
panic("Could not construct domain 0\n");
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index e10baf2615..bf1fc6227f 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -24,7 +24,6 @@ struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id);
unsigned int dom0_max_vcpus(void);
int parse_arch_dom0_param(const char *s, const char *e);
-struct vcpu *alloc_dom0_vcpu0(struct domain *dom0);
int vcpu_reset(struct vcpu *v);
int vcpu_up(struct vcpu *v);
--
2.43.0
On 17.07.2025 19:51, Alejandro Vallejo wrote:
> Make alloc_dom0_vcpu0() viable as a general vcpu0 allocator. Keep
> behaviour on any hwdom/ctldom identical to that dom0 used to have, and
> make non-dom0 have auto node affinity.
>
> Rename the function to alloc_dom_vcpu0() to reflect this change in
> scope, and move the prototype to asm/domain.h from xen/domain.h as it's
> only used in x86.
Which makes we wonder what's really x86-specific about it. Yes, the use of
...
> --- a/xen/arch/x86/dom0_build.c
> +++ b/xen/arch/x86/dom0_build.c
> @@ -254,12 +254,16 @@ unsigned int __init dom0_max_vcpus(void)
> return max_vcpus;
> }
>
> -struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
> +struct vcpu *__init alloc_dom_vcpu0(struct domain *d)
> {
> - dom0->node_affinity = dom0_nodes;
> - dom0->auto_node_affinity = !dom0_nr_pxms;
> + d->auto_node_affinity = true;
> + if ( is_hardware_domain(d) || is_control_domain(d) )
> + {
> + d->node_affinity = dom0_nodes;
> + d->auto_node_affinity = !dom0_nr_pxms;
... dom0_nr_pxms here perhaps is. Yet that could be sorted e.g. by making
this a function parameter.
> --- a/xen/arch/x86/include/asm/dom0_build.h
> +++ b/xen/arch/x86/include/asm/dom0_build.h
> @@ -23,6 +23,11 @@ unsigned long dom0_paging_pages(const struct domain *d,
> void dom0_update_physmap(bool compat, unsigned long pfn,
> unsigned long mfn, unsigned long vphysmap_s);
>
> +/* general domain construction */
Nit: Comment style.
> @@ -1054,9 +1055,11 @@ static struct domain *__init create_dom0(struct boot_info *bi)
> if ( IS_ERR(d) )
> panic("Error creating d%u: %ld\n", bd->domid, PTR_ERR(d));
>
> + bd->d = d;
> +
> init_dom0_cpuid_policy(d);
>
> - if ( alloc_dom0_vcpu0(d) == NULL )
> + if ( alloc_dom_vcpu0(d) == NULL )
> panic("Error creating %pdv0\n", d);
>
> cmdline_size = domain_cmdline_size(bi, bd);
> @@ -1093,7 +1096,6 @@ static struct domain *__init create_dom0(struct boot_info *bi)
> bd->cmdline = cmdline;
> }
>
> - bd->d = d;
> if ( construct_dom0(bd) != 0 )
> panic("Could not construct domain 0\n");
Isn't this movement of the bd->d assignment entirely unrelated?
Jan
On Tue Jul 22, 2025 at 4:45 PM CEST, Jan Beulich wrote:
> On 17.07.2025 19:51, Alejandro Vallejo wrote:
>> Make alloc_dom0_vcpu0() viable as a general vcpu0 allocator. Keep
>> behaviour on any hwdom/ctldom identical to that dom0 used to have, and
>> make non-dom0 have auto node affinity.
>>
>> Rename the function to alloc_dom_vcpu0() to reflect this change in
>> scope, and move the prototype to asm/domain.h from xen/domain.h as it's
>> only used in x86.
>
> Which makes we wonder what's really x86-specific about it. Yes, the use of
> ...
Mostly that it's the only arch doing it.
>
>> --- a/xen/arch/x86/dom0_build.c
>> +++ b/xen/arch/x86/dom0_build.c
>> @@ -254,12 +254,16 @@ unsigned int __init dom0_max_vcpus(void)
>> return max_vcpus;
>> }
>>
>> -struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
>> +struct vcpu *__init alloc_dom_vcpu0(struct domain *d)
>> {
>> - dom0->node_affinity = dom0_nodes;
>> - dom0->auto_node_affinity = !dom0_nr_pxms;
>> + d->auto_node_affinity = true;
>> + if ( is_hardware_domain(d) || is_control_domain(d) )
>> + {
>> + d->node_affinity = dom0_nodes;
>> + d->auto_node_affinity = !dom0_nr_pxms;
>
> ... dom0_nr_pxms here perhaps is. Yet that could be sorted e.g. by making
> this a function parameter.
ARM doesn't dabble with affinities while allocating the first vcpu. It's a
straight vcpu_create(). We could definitely inline setting that affinity
setting and forego the function altogether. I'm not a big fan of functions
with non-obvious side-effects, and this is one such case.
>
>> --- a/xen/arch/x86/include/asm/dom0_build.h
>> +++ b/xen/arch/x86/include/asm/dom0_build.h
>> @@ -23,6 +23,11 @@ unsigned long dom0_paging_pages(const struct domain *d,
>> void dom0_update_physmap(bool compat, unsigned long pfn,
>> unsigned long mfn, unsigned long vphysmap_s);
>>
>> +/* general domain construction */
>
> Nit: Comment style.
Ack
>
>> @@ -1054,9 +1055,11 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>> if ( IS_ERR(d) )
>> panic("Error creating d%u: %ld\n", bd->domid, PTR_ERR(d));
>>
>> + bd->d = d;
>> +
>> init_dom0_cpuid_policy(d);
>>
>> - if ( alloc_dom0_vcpu0(d) == NULL )
>> + if ( alloc_dom_vcpu0(d) == NULL )
>> panic("Error creating %pdv0\n", d);
>>
>> cmdline_size = domain_cmdline_size(bi, bd);
>> @@ -1093,7 +1096,6 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>> bd->cmdline = cmdline;
>> }
>>
>> - bd->d = d;
>> if ( construct_dom0(bd) != 0 )
>> panic("Could not construct domain 0\n");
>
> Isn't this movement of the bd->d assignment entirely unrelated?
>
> Jan
The prior incarnation of this patch (see Daniel's RFC) took a boot_domain, I
think, for which the change would be required. It indeed doesn't seem to be
required any longer.
Cheers,
Alejandro
On 2025-07-17 13:51, Alejandro Vallejo wrote:
> Make alloc_dom0_vcpu0() viable as a general vcpu0 allocator. Keep
> behaviour on any hwdom/ctldom identical to that dom0 used to have, and
> make non-dom0 have auto node affinity.
>
> Rename the function to alloc_dom_vcpu0() to reflect this change in
> scope, and move the prototype to asm/domain.h from xen/domain.h as it's
> only used in x86.
>
> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
> Signed-off-by: Alejandro Vallejo <alejandro.garciavallejo@amd.com>
> ---
> xen/arch/x86/dom0_build.c | 12 ++++++++----
> xen/arch/x86/include/asm/dom0_build.h | 5 +++++
> xen/arch/x86/setup.c | 6 ++++--
> xen/include/xen/domain.h | 1 -
> 4 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
> index 0b467fd4a4..dfae7f888f 100644
> --- a/xen/arch/x86/dom0_build.c
> +++ b/xen/arch/x86/dom0_build.c
> @@ -254,12 +254,16 @@ unsigned int __init dom0_max_vcpus(void)
> return max_vcpus;
> }
>
> -struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
> +struct vcpu *__init alloc_dom_vcpu0(struct domain *d)
> {
> - dom0->node_affinity = dom0_nodes;
> - dom0->auto_node_affinity = !dom0_nr_pxms;
> + d->auto_node_affinity = true;
> + if ( is_hardware_domain(d) || is_control_domain(d) )
Do we want dom0 options to apply to:
hardware or control
just hardware
just dom0 (hardware && control && xenstore)
?
I think "just dom0" may make the most sense. My next preference is just
hardware. Control I think should be mostly a domU except for having
is_privileged = true;
The rest of the patch looks good.
Regards,
Jason
On Thu, 17 Jul 2025, Jason Andryuk wrote:
> On 2025-07-17 13:51, Alejandro Vallejo wrote:
> > Make alloc_dom0_vcpu0() viable as a general vcpu0 allocator. Keep
> > behaviour on any hwdom/ctldom identical to that dom0 used to have, and
> > make non-dom0 have auto node affinity.
> >
> > Rename the function to alloc_dom_vcpu0() to reflect this change in
> > scope, and move the prototype to asm/domain.h from xen/domain.h as it's
> > only used in x86.
> >
> > Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
> > Signed-off-by: Alejandro Vallejo <alejandro.garciavallejo@amd.com>
> > ---
> > xen/arch/x86/dom0_build.c | 12 ++++++++----
> > xen/arch/x86/include/asm/dom0_build.h | 5 +++++
> > xen/arch/x86/setup.c | 6 ++++--
> > xen/include/xen/domain.h | 1 -
> > 4 files changed, 17 insertions(+), 7 deletions(-)
> >
> > diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
> > index 0b467fd4a4..dfae7f888f 100644
> > --- a/xen/arch/x86/dom0_build.c
> > +++ b/xen/arch/x86/dom0_build.c
> > @@ -254,12 +254,16 @@ unsigned int __init dom0_max_vcpus(void)
> > return max_vcpus;
> > }
> > -struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
> > +struct vcpu *__init alloc_dom_vcpu0(struct domain *d)
> > {
> > - dom0->node_affinity = dom0_nodes;
> > - dom0->auto_node_affinity = !dom0_nr_pxms;
> > + d->auto_node_affinity = true;
> > + if ( is_hardware_domain(d) || is_control_domain(d) )
>
> Do we want dom0 options to apply to:
> hardware or control
> just hardware
> just dom0 (hardware && control && xenstore)
>
> ?
>
> I think "just dom0" may make the most sense. My next preference is just
> hardware. Control I think should be mostly a domU except for having
> is_privileged = true;
Great question. Certainly dom0 options, such as dom0_mem, should not
apply to Control, and certainly they should apply to regular Dom0.
The interesting question is whether they should apply to the Hardware
Domain. Some of the Dom0 options make sense for the Hardware Domain and
there isn't an equivalent config option available via Dom0less bindings.
I am not thinking about the dom0_* options but things like nmi=dom0. For
simplicity and ease of use I would say they should apply to the Hardware
Domain.
In any case, I think the strategy should apply to all architectures.
On 18.07.2025 02:04, Stefano Stabellini wrote:
> On Thu, 17 Jul 2025, Jason Andryuk wrote:
>> On 2025-07-17 13:51, Alejandro Vallejo wrote:
>>> Make alloc_dom0_vcpu0() viable as a general vcpu0 allocator. Keep
>>> behaviour on any hwdom/ctldom identical to that dom0 used to have, and
>>> make non-dom0 have auto node affinity.
>>>
>>> Rename the function to alloc_dom_vcpu0() to reflect this change in
>>> scope, and move the prototype to asm/domain.h from xen/domain.h as it's
>>> only used in x86.
>>>
>>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>>> Signed-off-by: Alejandro Vallejo <alejandro.garciavallejo@amd.com>
>>> ---
>>> xen/arch/x86/dom0_build.c | 12 ++++++++----
>>> xen/arch/x86/include/asm/dom0_build.h | 5 +++++
>>> xen/arch/x86/setup.c | 6 ++++--
>>> xen/include/xen/domain.h | 1 -
>>> 4 files changed, 17 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
>>> index 0b467fd4a4..dfae7f888f 100644
>>> --- a/xen/arch/x86/dom0_build.c
>>> +++ b/xen/arch/x86/dom0_build.c
>>> @@ -254,12 +254,16 @@ unsigned int __init dom0_max_vcpus(void)
>>> return max_vcpus;
>>> }
>>> -struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
>>> +struct vcpu *__init alloc_dom_vcpu0(struct domain *d)
>>> {
>>> - dom0->node_affinity = dom0_nodes;
>>> - dom0->auto_node_affinity = !dom0_nr_pxms;
>>> + d->auto_node_affinity = true;
>>> + if ( is_hardware_domain(d) || is_control_domain(d) )
>>
>> Do we want dom0 options to apply to:
>> hardware or control
>> just hardware
>> just dom0 (hardware && control && xenstore)
>>
>> ?
>>
>> I think "just dom0" may make the most sense. My next preference is just
>> hardware. Control I think should be mostly a domU except for having
>> is_privileged = true;
>
> Great question. Certainly dom0 options, such as dom0_mem, should not
> apply to Control, and certainly they should apply to regular Dom0.
>
> The interesting question is whether they should apply to the Hardware
> Domain. Some of the Dom0 options make sense for the Hardware Domain and
> there isn't an equivalent config option available via Dom0less bindings.
> I am not thinking about the dom0_* options but things like nmi=dom0. For
> simplicity and ease of use I would say they should apply to the Hardware
> Domain.
Interesting indeed. So far we more or less aliased hwdom == dom0.
Jan
On Fri Jul 18, 2025 at 8:42 AM CEST, Jan Beulich wrote:
> On 18.07.2025 02:04, Stefano Stabellini wrote:
>> On Thu, 17 Jul 2025, Jason Andryuk wrote:
>>> On 2025-07-17 13:51, Alejandro Vallejo wrote:
>>>> Make alloc_dom0_vcpu0() viable as a general vcpu0 allocator. Keep
>>>> behaviour on any hwdom/ctldom identical to that dom0 used to have, and
>>>> make non-dom0 have auto node affinity.
>>>>
>>>> Rename the function to alloc_dom_vcpu0() to reflect this change in
>>>> scope, and move the prototype to asm/domain.h from xen/domain.h as it's
>>>> only used in x86.
>>>>
>>>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>>>> Signed-off-by: Alejandro Vallejo <alejandro.garciavallejo@amd.com>
>>>> ---
>>>> xen/arch/x86/dom0_build.c | 12 ++++++++----
>>>> xen/arch/x86/include/asm/dom0_build.h | 5 +++++
>>>> xen/arch/x86/setup.c | 6 ++++--
>>>> xen/include/xen/domain.h | 1 -
>>>> 4 files changed, 17 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
>>>> index 0b467fd4a4..dfae7f888f 100644
>>>> --- a/xen/arch/x86/dom0_build.c
>>>> +++ b/xen/arch/x86/dom0_build.c
>>>> @@ -254,12 +254,16 @@ unsigned int __init dom0_max_vcpus(void)
>>>> return max_vcpus;
>>>> }
>>>> -struct vcpu *__init alloc_dom0_vcpu0(struct domain *dom0)
>>>> +struct vcpu *__init alloc_dom_vcpu0(struct domain *d)
>>>> {
>>>> - dom0->node_affinity = dom0_nodes;
>>>> - dom0->auto_node_affinity = !dom0_nr_pxms;
>>>> + d->auto_node_affinity = true;
>>>> + if ( is_hardware_domain(d) || is_control_domain(d) )
>>>
>>> Do we want dom0 options to apply to:
>>> hardware or control
>>> just hardware
>>> just dom0 (hardware && control && xenstore)
>>>
>>> ?
>>>
>>> I think "just dom0" may make the most sense. My next preference is just
>>> hardware. Control I think should be mostly a domU except for having
>>> is_privileged = true;
I could get behind just hardware, but I don't think the xenstore cap bears much
importance in this.
>>
>> Great question. Certainly dom0 options, such as dom0_mem, should not
>> apply to Control, and certainly they should apply to regular Dom0.
But what is a regular dom0, when not booted regularly? What is it that makes
dom0 quack like a dom0 and not like a domU? It may very well be the case that
it's the regularity of dom0 that makes it a dom0, and cannot be sensibly
described in the presence of split ctl/hw domains.
>>
>> The interesting question is whether they should apply to the Hardware
>> Domain. Some of the Dom0 options make sense for the Hardware Domain and
>> there isn't an equivalent config option available via Dom0less bindings.
Well, bindings can be easily created. nmi= in particular can be trivially
directed anywhere via a boolean binding attached to any domain.
The fact that the bindings are less granular than the dom0 cmdline due
to not being needed until now.
>> I am not thinking about the dom0_* options but things like nmi=dom0. For
>> simplicity and ease of use I would say they should apply to the Hardware
>> Domain.
That's a fun case. So when nmi=dom0 the report goes to the hwdom even if it's
not dom0 (i.e: late hwdom).
>
> Interesting indeed. So far we more or less aliased hwdom == dom0.
>
> Jan
Some arguments do want to be adjusted one way or the other spec_ctrl.c makes
heavy assumptions about there not being any hwdom/ctldom separation, and both
having domain_id == 0. There are other cases.
Yet another option is to single-out the Hyperlaunch/dom0less case and never
apply dom0 commandline overrides there (dom0_*=). It'd be a flag in
boot_domain. Might even allow us to compile them out altogether for
dom0less-only configurations (e.g: CONFIG_DOM0LESS_BOOT && !CONFIG_DOM0_BOOT, or
something like that).
Thoughts?
Cheers,
Alejandro
On Fri, 18 Jul 2025, Alejandro Vallejo wrote: > Some arguments do want to be adjusted one way or the other spec_ctrl.c makes > heavy assumptions about there not being any hwdom/ctldom separation, and both > having domain_id == 0. There are other cases. > > Yet another option is to single-out the Hyperlaunch/dom0less case and never > apply dom0 commandline overrides there (dom0_*=). It'd be a flag in > boot_domain. Might even allow us to compile them out altogether for > dom0less-only configurations (e.g: CONFIG_DOM0LESS_BOOT && !CONFIG_DOM0_BOOT, or > something like that). > > Thoughts? I have been reviewing this in more detail, including the WIP draft that Alejandro has not yet submitted to xen-devel, which completes the hyperlaunch/dom0less enablement on x86. I think we should apply all dom0 command line arguments exactly as they are to both classic Dom0 and the hardware domain (and only to the hardware domain). This is the simplest approach and the only one that would work with the current code.
© 2016 - 2025 Red Hat, Inc.