From: Denis Mukhin <dmkhn@proton.me>
From: Denis Mukhin <dmukhin@ford.com>
Remove the hardcoded domain ID 0 allocation for hardware domain and replace it
with a call to get_initial_domain_id() (returns the value of hardware_domid on
Arm).
Update domid_alloc(DOMID_INVALID) case to ensure that get_initial_domain_id()
ID is skipped during domain ID allocation to cover domU case in dom0less
configuration. That also fixes a potential issue with re-using ID#0 for domUs
when get_initial_domain_id() returns non-zero.
Signed-off-by: Denis Mukhin <dmukhin@ford.com>
---
Changes since v8:
- rebased 
---
 xen/arch/arm/domain_build.c             | 4 ++--
 xen/common/device-tree/dom0less-build.c | 9 +++------
 xen/common/domain.c                     | 4 ++--
 3 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index e9d563c269..0ad80b020a 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -2035,9 +2035,9 @@ void __init create_dom0(void)
     if ( !llc_coloring_enabled )
         flags |= CDF_directmap;
 
-    domid = domid_alloc(0);
+    domid = domid_alloc(get_initial_domain_id());
     if ( domid == DOMID_INVALID )
-        panic("Error allocating domain ID 0\n");
+        panic("Error allocating domain ID %d\n", get_initial_domain_id());
 
     dom0 = domain_create(domid, &dom0_cfg, flags);
     if ( IS_ERR(dom0) )
diff --git a/xen/common/device-tree/dom0less-build.c b/xen/common/device-tree/dom0less-build.c
index a509f8fecd..9a6015f4ce 100644
--- a/xen/common/device-tree/dom0less-build.c
+++ b/xen/common/device-tree/dom0less-build.c
@@ -974,14 +974,11 @@ void __init create_domUs(void)
 
         arch_create_domUs(node, &d_cfg, flags);
 
-        /*
-         * The variable max_init_domid is initialized with zero, so here it's
-         * very important to use the pre-increment operator to call
-         * domain_create() with a domid > 0. (domid == 0 is reserved for Dom0)
-         */
-        domid = domid_alloc(++max_init_domid);
+        domid = domid_alloc(DOMID_INVALID);
         if ( domid == DOMID_INVALID )
             panic("Error allocating ID for domain %s\n", dt_node_name(node));
+        if ( max_init_domid < domid )
+            max_init_domid = domid;
 
         d = domain_create(domid, &d_cfg, flags);
         if ( IS_ERR(d) )
diff --git a/xen/common/domain.c b/xen/common/domain.c
index ae0c44fcbb..129b4fcb37 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -2423,8 +2423,8 @@ domid_t domid_alloc(domid_t domid)
     else
     {
         static domid_t domid_last;
-        /* NB: account for late hwdom case, skip ID#0 */
-        const domid_t reserved_domid = 0;
+        /* NB: account for late hwdom case */
+        const domid_t reserved_domid = get_initial_domain_id();
         const bool reserved = __test_and_set_bit(reserved_domid, domid_bitmap);
 
         domid = find_next_zero_bit(domid_bitmap, DOMID_FIRST_RESERVED,
-- 
2.34.1Hi Denis,
On 28/05/2025 23:50, dmkhn@proton.me wrote:
> From: Denis Mukhin <dmkhn@proton.me>
> 
> From: Denis Mukhin <dmukhin@ford.com>
> 
> Remove the hardcoded domain ID 0 allocation for hardware domain and replace it
> with a call to get_initial_domain_id() (returns the value of hardware_domid on
> Arm).
I am not entirely why this is done. Are you intending to pass a 
different domain ID? If so...
> 
> Update domid_alloc(DOMID_INVALID) case to ensure that get_initial_domain_id()
> ID is skipped during domain ID allocation to cover domU case in dom0less
> configuration. That also fixes a potential issue with re-using ID#0 for domUs
> when get_initial_domain_id() returns non-zero.
> 
> Signed-off-by: Denis Mukhin <dmukhin@ford.com>
> ---
> Changes since v8:
> - rebased
> ---
>   xen/arch/arm/domain_build.c             | 4 ++--
>   xen/common/device-tree/dom0less-build.c | 9 +++------
>   xen/common/domain.c                     | 4 ++--
>   3 files changed, 7 insertions(+), 10 deletions(-)
> 
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index e9d563c269..0ad80b020a 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -2035,9 +2035,9 @@ void __init create_dom0(void)
... naming like create_dom0() probably wants to be renamed.
That said, I am not convinced a domain other than 0 should have full 
privilege by default. So I would argue it should stay as ...
>       if ( !llc_coloring_enabled )
>           flags |= CDF_directmap;
>   
> -    domid = domid_alloc(0);
> +    domid = domid_alloc(get_initial_domain_id());
... 0.
>       if ( domid == DOMID_INVALID )
> -        panic("Error allocating domain ID 0\n");
> +        panic("Error allocating domain ID %d\n", get_initial_domain_id());
>   
>       dom0 = domain_create(domid, &dom0_cfg, flags);
>       if ( IS_ERR(dom0) )
> diff --git a/xen/common/device-tree/dom0less-build.c b/xen/common/device-tree/dom0less-build.c
> index a509f8fecd..9a6015f4ce 100644
> --- a/xen/common/device-tree/dom0less-build.c
> +++ b/xen/common/device-tree/dom0less-build.c
> @@ -974,14 +974,11 @@ void __init create_domUs(void)
>   
>           arch_create_domUs(node, &d_cfg, flags);
>   
> -        /*
> -         * The variable max_init_domid is initialized with zero, so here it's
> -         * very important to use the pre-increment operator to call
> -         * domain_create() with a domid > 0. (domid == 0 is reserved for Dom0)
> -         */
> -        domid = domid_alloc(++max_init_domid);
> +        domid = domid_alloc(DOMID_INVALID);
>           if ( domid == DOMID_INVALID )
>               panic("Error allocating ID for domain %s\n", dt_node_name(node));
> +        if ( max_init_domid < domid )
> +            max_init_domid = domid;
>   
>           d = domain_create(domid, &d_cfg, flags);
>           if ( IS_ERR(d) )
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index ae0c44fcbb..129b4fcb37 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -2423,8 +2423,8 @@ domid_t domid_alloc(domid_t domid)
>       else
>       {
>           static domid_t domid_last;
> -        /* NB: account for late hwdom case, skip ID#0 */
> -        const domid_t reserved_domid = 0;
> +        /* NB: account for late hwdom case */
> +        const domid_t reserved_domid = get_initial_domain_id();
This is somewhat confusing to modify domid_alloc() in a patch that is 
meant to modify only the Arm allocation. Can you clarify why this can't 
be done earlier?
>           const bool reserved = __test_and_set_bit(reserved_domid, domid_bitmap);
>   
>           domid = find_next_zero_bit(domid_bitmap, DOMID_FIRST_RESERVED,
Cheers,
-- 
Julien Grall
                
            Hi Denis,
On 05/06/2025 23:05, Julien Grall wrote:
> Hi Denis,
> 
> On 28/05/2025 23:50, dmkhn@proton.me wrote:
>> From: Denis Mukhin <dmkhn@proton.me>
>>
>> From: Denis Mukhin <dmukhin@ford.com>
>>
>> Remove the hardcoded domain ID 0 allocation for hardware domain and 
>> replace it
>> with a call to get_initial_domain_id() (returns the value of 
>> hardware_domid on
>> Arm).
> 
> I am not entirely why this is done. Are you intending to pass a 
> different domain ID? If so...
> 
>>
>> Update domid_alloc(DOMID_INVALID) case to ensure that 
>> get_initial_domain_id()
>> ID is skipped during domain ID allocation to cover domU case in dom0less
>> configuration. That also fixes a potential issue with re-using ID#0 
>> for domUs
>> when get_initial_domain_id() returns non-zero.
>>
>> Signed-off-by: Denis Mukhin <dmukhin@ford.com>
>> ---
>> Changes since v8:
>> - rebased
>> ---
>>   xen/arch/arm/domain_build.c             | 4 ++--
>>   xen/common/device-tree/dom0less-build.c | 9 +++------
>>   xen/common/domain.c                     | 4 ++--
>>   3 files changed, 7 insertions(+), 10 deletions(-)
>>
>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>> index e9d563c269..0ad80b020a 100644
>> --- a/xen/arch/arm/domain_build.c
>> +++ b/xen/arch/arm/domain_build.c
>> @@ -2035,9 +2035,9 @@ void __init create_dom0(void)
> 
> ... naming like create_dom0() probably wants to be renamed.
> 
> That said, I am not convinced a domain other than 0 should have full 
> privilege by default. So I would argue it should stay as ...
> 
>>       if ( !llc_coloring_enabled )
>>           flags |= CDF_directmap;
>> -    domid = domid_alloc(0);
>> +    domid = domid_alloc(get_initial_domain_id());
> 
> ... 0.
Looking at the implementation of get_initial_domain_id(), I noticed the 
behavior was changed for x86 by [1].
Before, get_initial_domain_id() was returning 0 except for the PV shim.
But now, it would could return the domain ID specified on the command 
line (via hardware_dom).
 From my understanding, the goal of the command line was to create the 
hardware domain *after* boot. So initially we create dom0 and then 
initialize the hardware domain. With the patch below, this has changed.
However, from the commit message, I don't understand why. It seems like 
we broke late hwdom?
For instance, late_hwdom_init() has the following assert:
     dom0 = rcu_lock_domain_by_id(0);
     ASSERT(dom0 != NULL);
Jan, I saw you were involved in the review of the series. Any idea why 
this was changed?
Cheers,
[1] https://lore.kernel.org/all/20250306075819.154361-1-dmkhn@proton.me/
-- 
Julien Grall
                
            On 06.06.2025 23:29, Julien Grall wrote: > Hi Denis, > > On 05/06/2025 23:05, Julien Grall wrote: >> Hi Denis, >> >> On 28/05/2025 23:50, dmkhn@proton.me wrote: >>> From: Denis Mukhin <dmkhn@proton.me> >>> >>> From: Denis Mukhin <dmukhin@ford.com> >>> >>> Remove the hardcoded domain ID 0 allocation for hardware domain and replace it >>> with a call to get_initial_domain_id() (returns the value of hardware_domid on >>> Arm). >> >> I am not entirely why this is done. Are you intending to pass a different domain ID? If so... >> >>> >>> Update domid_alloc(DOMID_INVALID) case to ensure that get_initial_domain_id() >>> ID is skipped during domain ID allocation to cover domU case in dom0less >>> configuration. That also fixes a potential issue with re-using ID#0 for domUs >>> when get_initial_domain_id() returns non-zero. >>> >>> Signed-off-by: Denis Mukhin <dmukhin@ford.com> >>> --- >>> Changes since v8: >>> - rebased >>> --- >>> xen/arch/arm/domain_build.c | 4 ++-- >>> xen/common/device-tree/dom0less-build.c | 9 +++------ >>> xen/common/domain.c | 4 ++-- >>> 3 files changed, 7 insertions(+), 10 deletions(-) >>> >>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c >>> index e9d563c269..0ad80b020a 100644 >>> --- a/xen/arch/arm/domain_build.c >>> +++ b/xen/arch/arm/domain_build.c >>> @@ -2035,9 +2035,9 @@ void __init create_dom0(void) >> >> ... naming like create_dom0() probably wants to be renamed. >> >> That said, I am not convinced a domain other than 0 should have full privilege by default. So I would argue it should stay as ... >> >>> if ( !llc_coloring_enabled ) >>> flags |= CDF_directmap; >>> - domid = domid_alloc(0); >>> + domid = domid_alloc(get_initial_domain_id()); >> >> ... 0. > > Looking at the implementation of get_initial_domain_id(), I noticed the behavior was changed for x86 by [1]. > > Before, get_initial_domain_id() was returning 0 except for the PV shim. > But now, it would could return the domain ID specified on the command line (via hardware_dom). > > From my understanding, the goal of the command line was to create the hardware domain *after* boot. So initially we create dom0 and then initialize the hardware domain. With the patch below, this has changed. > > However, from the commit message, I don't understand why. It seems like we broke late hwdom? > > For instance, late_hwdom_init() has the following assert: > > dom0 = rcu_lock_domain_by_id(0); > ASSERT(dom0 != NULL); > > Jan, I saw you were involved in the review of the series. Any idea why this was changed? I simply overlooked this aspect when looking at the change. You're right, things were broken there. Unless a simple and clean fix can be made relatively soon, I think this simply needs reverting (which may mean to revert any later commits that depend on that). I can't help noting that in this console rework there were way too many issues, and I fear more than just this one may have slipped through. I therefore wonder whether taken as a whole this was/is worth both the submitter's and all the reviewers' time. Jan > [1] https://lore.kernel.org/all/20250306075819.154361-1-dmkhn@proton.me/ >
On Tue, Jun 10, 2025 at 08:53:12AM +0200, Jan Beulich wrote: > On 06.06.2025 23:29, Julien Grall wrote: > > Hi Denis, > > > > On 05/06/2025 23:05, Julien Grall wrote: > >> Hi Denis, > >> > >> On 28/05/2025 23:50, dmkhn@proton.me wrote: > >>> From: Denis Mukhin <dmkhn@proton.me> > >>> > >>> From: Denis Mukhin <dmukhin@ford.com> > >>> > >>> Remove the hardcoded domain ID 0 allocation for hardware domain and replace it > >>> with a call to get_initial_domain_id() (returns the value of hardware_domid on > >>> Arm). > >> > >> I am not entirely why this is done. Are you intending to pass a different domain ID? If so... > >> > >>> > >>> Update domid_alloc(DOMID_INVALID) case to ensure that get_initial_domain_id() > >>> ID is skipped during domain ID allocation to cover domU case in dom0less > >>> configuration. That also fixes a potential issue with re-using ID#0 for domUs > >>> when get_initial_domain_id() returns non-zero. > >>> > >>> Signed-off-by: Denis Mukhin <dmukhin@ford.com> > >>> --- > >>> Changes since v8: > >>> - rebased > >>> --- > >>> xen/arch/arm/domain_build.c | 4 ++-- > >>> xen/common/device-tree/dom0less-build.c | 9 +++------ > >>> xen/common/domain.c | 4 ++-- > >>> 3 files changed, 7 insertions(+), 10 deletions(-) > >>> > >>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > >>> index e9d563c269..0ad80b020a 100644 > >>> --- a/xen/arch/arm/domain_build.c > >>> +++ b/xen/arch/arm/domain_build.c > >>> @@ -2035,9 +2035,9 @@ void __init create_dom0(void) > >> > >> ... naming like create_dom0() probably wants to be renamed. > >> > >> That said, I am not convinced a domain other than 0 should have full privilege by default. So I would argue it should stay as ... > >> > >>> if ( !llc_coloring_enabled ) > >>> flags |= CDF_directmap; > >>> - domid = domid_alloc(0); > >>> + domid = domid_alloc(get_initial_domain_id()); > >> > >> ... 0. > > > > Looking at the implementation of get_initial_domain_id(), I noticed the behavior was changed for x86 by [1]. > > > > Before, get_initial_domain_id() was returning 0 except for the PV shim. > > But now, it would could return the domain ID specified on the command line (via hardware_dom). > > > > From my understanding, the goal of the command line was to create the hardware domain *after* boot. So initially we create dom0 and then initialize the hardware domain. With the patch below, this has changed. > > > > However, from the commit message, I don't understand why. It seems like we broke late hwdom? > > > > For instance, late_hwdom_init() has the following assert: > > > > dom0 = rcu_lock_domain_by_id(0); > > ASSERT(dom0 != NULL); > > > > Jan, I saw you were involved in the review of the series. Any idea why this was changed? > > I simply overlooked this aspect when looking at the change. You're right, things > were broken there. Unless a simple and clean fix can be made relatively soon, I > think this simply needs reverting (which may mean to revert any later commits > that depend on that). I can't help noting that in this console rework there were > way too many issues, and I fear more than just this one may have slipped > through. I therefore wonder whether taken as a whole this was/is worth both the > submitter's and all the reviewers' time. Yes, sorry, I overlooked late_hwdom_init() modification. IMO, the clean fix would be adding another command line parameter `control_domid` (with default value 0), make get_initial_domain_id() return it instead of current `hardware_domid` and update late_hwdom_init() to use `control_domid` insted of open-coded 0. That should align with the effort of splitting priveleged dom0 into control and hardware domains: control domain will be the first domain ID allocated, followed by the hardware domain. > > Jan > > > [1] https://lore.kernel.org/all/20250306075819.154361-1-dmkhn@proton.me/ > > >
On 10.06.2025 10:02, dmkhn@proton.me wrote: > On Tue, Jun 10, 2025 at 08:53:12AM +0200, Jan Beulich wrote: >> On 06.06.2025 23:29, Julien Grall wrote: >>> Hi Denis, >>> >>> On 05/06/2025 23:05, Julien Grall wrote: >>>> Hi Denis, >>>> >>>> On 28/05/2025 23:50, dmkhn@proton.me wrote: >>>>> From: Denis Mukhin <dmkhn@proton.me> >>>>> >>>>> From: Denis Mukhin <dmukhin@ford.com> >>>>> >>>>> Remove the hardcoded domain ID 0 allocation for hardware domain and replace it >>>>> with a call to get_initial_domain_id() (returns the value of hardware_domid on >>>>> Arm). >>>> >>>> I am not entirely why this is done. Are you intending to pass a different domain ID? If so... >>>> >>>>> >>>>> Update domid_alloc(DOMID_INVALID) case to ensure that get_initial_domain_id() >>>>> ID is skipped during domain ID allocation to cover domU case in dom0less >>>>> configuration. That also fixes a potential issue with re-using ID#0 for domUs >>>>> when get_initial_domain_id() returns non-zero. >>>>> >>>>> Signed-off-by: Denis Mukhin <dmukhin@ford.com> >>>>> --- >>>>> Changes since v8: >>>>> - rebased >>>>> --- >>>>> xen/arch/arm/domain_build.c | 4 ++-- >>>>> xen/common/device-tree/dom0less-build.c | 9 +++------ >>>>> xen/common/domain.c | 4 ++-- >>>>> 3 files changed, 7 insertions(+), 10 deletions(-) >>>>> >>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c >>>>> index e9d563c269..0ad80b020a 100644 >>>>> --- a/xen/arch/arm/domain_build.c >>>>> +++ b/xen/arch/arm/domain_build.c >>>>> @@ -2035,9 +2035,9 @@ void __init create_dom0(void) >>>> >>>> ... naming like create_dom0() probably wants to be renamed. >>>> >>>> That said, I am not convinced a domain other than 0 should have full privilege by default. So I would argue it should stay as ... >>>> >>>>> if ( !llc_coloring_enabled ) >>>>> flags |= CDF_directmap; >>>>> - domid = domid_alloc(0); >>>>> + domid = domid_alloc(get_initial_domain_id()); >>>> >>>> ... 0. >>> >>> Looking at the implementation of get_initial_domain_id(), I noticed the behavior was changed for x86 by [1]. >>> >>> Before, get_initial_domain_id() was returning 0 except for the PV shim. >>> But now, it would could return the domain ID specified on the command line (via hardware_dom). >>> >>> From my understanding, the goal of the command line was to create the hardware domain *after* boot. So initially we create dom0 and then initialize the hardware domain. With the patch below, this has changed. >>> >>> However, from the commit message, I don't understand why. It seems like we broke late hwdom? >>> >>> For instance, late_hwdom_init() has the following assert: >>> >>> dom0 = rcu_lock_domain_by_id(0); >>> ASSERT(dom0 != NULL); >>> >>> Jan, I saw you were involved in the review of the series. Any idea why this was changed? >> >> I simply overlooked this aspect when looking at the change. You're right, things >> were broken there. Unless a simple and clean fix can be made relatively soon, I >> think this simply needs reverting (which may mean to revert any later commits >> that depend on that). I can't help noting that in this console rework there were >> way too many issues, and I fear more than just this one may have slipped >> through. I therefore wonder whether taken as a whole this was/is worth both the >> submitter's and all the reviewers' time. > > Yes, sorry, I overlooked late_hwdom_init() modification. > > IMO, the clean fix would be adding another command line parameter > `control_domid` (with default value 0), make get_initial_domain_id() return it > instead of current `hardware_domid` and update late_hwdom_init() to use > `control_domid` insted of open-coded 0. No, no new command line option will address this. Original behavior needs to be restored (either by correcting the earlier change or, as said, be reverting). Jan
On Tue, Jun 10, 2025 at 10:26:22AM +0200, Jan Beulich wrote: > On 10.06.2025 10:02, dmkhn@proton.me wrote: > > On Tue, Jun 10, 2025 at 08:53:12AM +0200, Jan Beulich wrote: > >> On 06.06.2025 23:29, Julien Grall wrote: > >>> Hi Denis, > >>> > >>> On 05/06/2025 23:05, Julien Grall wrote: > >>>> Hi Denis, > >>>> > >>>> On 28/05/2025 23:50, dmkhn@proton.me wrote: > >>>>> From: Denis Mukhin <dmkhn@proton.me> > >>>>> > >>>>> From: Denis Mukhin <dmukhin@ford.com> > >>>>> > >>>>> Remove the hardcoded domain ID 0 allocation for hardware domain and replace it > >>>>> with a call to get_initial_domain_id() (returns the value of hardware_domid on > >>>>> Arm). > >>>> > >>>> I am not entirely why this is done. Are you intending to pass a different domain ID? If so... > >>>> > >>>>> > >>>>> Update domid_alloc(DOMID_INVALID) case to ensure that get_initial_domain_id() > >>>>> ID is skipped during domain ID allocation to cover domU case in dom0less > >>>>> configuration. That also fixes a potential issue with re-using ID#0 for domUs > >>>>> when get_initial_domain_id() returns non-zero. > >>>>> > >>>>> Signed-off-by: Denis Mukhin <dmukhin@ford.com> > >>>>> --- > >>>>> Changes since v8: > >>>>> - rebased > >>>>> --- > >>>>> xen/arch/arm/domain_build.c | 4 ++-- > >>>>> xen/common/device-tree/dom0less-build.c | 9 +++------ > >>>>> xen/common/domain.c | 4 ++-- > >>>>> 3 files changed, 7 insertions(+), 10 deletions(-) > >>>>> > >>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > >>>>> index e9d563c269..0ad80b020a 100644 > >>>>> --- a/xen/arch/arm/domain_build.c > >>>>> +++ b/xen/arch/arm/domain_build.c > >>>>> @@ -2035,9 +2035,9 @@ void __init create_dom0(void) > >>>> > >>>> ... naming like create_dom0() probably wants to be renamed. > >>>> > >>>> That said, I am not convinced a domain other than 0 should have full privilege by default. So I would argue it should stay as ... > >>>> > >>>>> if ( !llc_coloring_enabled ) > >>>>> flags |= CDF_directmap; > >>>>> - domid = domid_alloc(0); > >>>>> + domid = domid_alloc(get_initial_domain_id()); > >>>> > >>>> ... 0. > >>> > >>> Looking at the implementation of get_initial_domain_id(), I noticed the behavior was changed for x86 by [1]. > >>> > >>> Before, get_initial_domain_id() was returning 0 except for the PV shim. > >>> But now, it would could return the domain ID specified on the command line (via hardware_dom). > >>> > >>> From my understanding, the goal of the command line was to create the hardware domain *after* boot. So initially we create dom0 and then initialize the hardware domain. With the patch below, this has changed. > >>> > >>> However, from the commit message, I don't understand why. It seems like we broke late hwdom? > >>> > >>> For instance, late_hwdom_init() has the following assert: > >>> > >>> dom0 = rcu_lock_domain_by_id(0); > >>> ASSERT(dom0 != NULL); > >>> > >>> Jan, I saw you were involved in the review of the series. Any idea why this was changed? > >> > >> I simply overlooked this aspect when looking at the change. You're right, things > >> were broken there. Unless a simple and clean fix can be made relatively soon, I > >> think this simply needs reverting (which may mean to revert any later commits > >> that depend on that). I can't help noting that in this console rework there were > >> way too many issues, and I fear more than just this one may have slipped > >> through. I therefore wonder whether taken as a whole this was/is worth both the > >> submitter's and all the reviewers' time. > > > > Yes, sorry, I overlooked late_hwdom_init() modification. > > > > IMO, the clean fix would be adding another command line parameter > > `control_domid` (with default value 0), make get_initial_domain_id() return it > > instead of current `hardware_domid` and update late_hwdom_init() to use > > `control_domid` insted of open-coded 0. > > No, no new command line option will address this. Original behavior needs to be > restored (either by correcting the earlier change or, as said, be reverting). Correction of the earlier change: https://gitlab.com/xen-project/people/dmukhin/xen/-/commit/b7f194650307a08a9e6da5aa9fdd1f8a9afd67eb re: command line option: I meant something like this: https://gitlab.com/xen-project/people/dmukhin/xen/-/commit/51a519de6ea73ff3b650fd9bd4f3c5c63f64c069 > > Jan
Hi Denis, On 10/06/2025 10:04, dmkhn@proton.me wrote: > On Tue, Jun 10, 2025 at 10:26:22AM +0200, Jan Beulich wrote: >> On 10.06.2025 10:02, dmkhn@proton.me wrote: >>> On Tue, Jun 10, 2025 at 08:53:12AM +0200, Jan Beulich wrote: >>>> On 06.06.2025 23:29, Julien Grall wrote: >>>>> Hi Denis, >>>>> >>>>> On 05/06/2025 23:05, Julien Grall wrote: >>>>>> Hi Denis, >>>>>> >>>>>> On 28/05/2025 23:50, dmkhn@proton.me wrote: >>>>>>> From: Denis Mukhin <dmkhn@proton.me> >>>>>>> >>>>>>> From: Denis Mukhin <dmukhin@ford.com> >>>>>>> >>>>>>> Remove the hardcoded domain ID 0 allocation for hardware domain and replace it >>>>>>> with a call to get_initial_domain_id() (returns the value of hardware_domid on >>>>>>> Arm). >>>>>> >>>>>> I am not entirely why this is done. Are you intending to pass a different domain ID? If so... >>>>>> >>>>>>> >>>>>>> Update domid_alloc(DOMID_INVALID) case to ensure that get_initial_domain_id() >>>>>>> ID is skipped during domain ID allocation to cover domU case in dom0less >>>>>>> configuration. That also fixes a potential issue with re-using ID#0 for domUs >>>>>>> when get_initial_domain_id() returns non-zero. >>>>>>> >>>>>>> Signed-off-by: Denis Mukhin <dmukhin@ford.com> >>>>>>> --- >>>>>>> Changes since v8: >>>>>>> - rebased >>>>>>> --- >>>>>>> xen/arch/arm/domain_build.c | 4 ++-- >>>>>>> xen/common/device-tree/dom0less-build.c | 9 +++------ >>>>>>> xen/common/domain.c | 4 ++-- >>>>>>> 3 files changed, 7 insertions(+), 10 deletions(-) >>>>>>> >>>>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c >>>>>>> index e9d563c269..0ad80b020a 100644 >>>>>>> --- a/xen/arch/arm/domain_build.c >>>>>>> +++ b/xen/arch/arm/domain_build.c >>>>>>> @@ -2035,9 +2035,9 @@ void __init create_dom0(void) >>>>>> >>>>>> ... naming like create_dom0() probably wants to be renamed. >>>>>> >>>>>> That said, I am not convinced a domain other than 0 should have full privilege by default. So I would argue it should stay as ... >>>>>> >>>>>>> if ( !llc_coloring_enabled ) >>>>>>> flags |= CDF_directmap; >>>>>>> - domid = domid_alloc(0); >>>>>>> + domid = domid_alloc(get_initial_domain_id()); >>>>>> >>>>>> ... 0. >>>>> >>>>> Looking at the implementation of get_initial_domain_id(), I noticed the behavior was changed for x86 by [1]. >>>>> >>>>> Before, get_initial_domain_id() was returning 0 except for the PV shim. >>>>> But now, it would could return the domain ID specified on the command line (via hardware_dom). >>>>> >>>>> From my understanding, the goal of the command line was to create the hardware domain *after* boot. So initially we create dom0 and then initialize the hardware domain. With the patch below, this has changed. >>>>> >>>>> However, from the commit message, I don't understand why. It seems like we broke late hwdom? >>>>> >>>>> For instance, late_hwdom_init() has the following assert: >>>>> >>>>> dom0 = rcu_lock_domain_by_id(0); >>>>> ASSERT(dom0 != NULL); >>>>> >>>>> Jan, I saw you were involved in the review of the series. Any idea why this was changed? >>>> >>>> I simply overlooked this aspect when looking at the change. You're right, things >>>> were broken there. Unless a simple and clean fix can be made relatively soon, I >>>> think this simply needs reverting (which may mean to revert any later commits >>>> that depend on that). I can't help noting that in this console rework there were >>>> way too many issues, and I fear more than just this one may have slipped >>>> through. I therefore wonder whether taken as a whole this was/is worth both the >>>> submitter's and all the reviewers' time. >>> >>> Yes, sorry, I overlooked late_hwdom_init() modification. >>> >>> IMO, the clean fix would be adding another command line parameter >>> `control_domid` (with default value 0), make get_initial_domain_id() return it >>> instead of current `hardware_domid` and update late_hwdom_init() to use >>> `control_domid` insted of open-coded 0. >> >> No, no new command line option will address this. Original behavior needs to be >> restored (either by correcting the earlier change or, as said, be reverting). > > Correction of the earlier change: > > https://gitlab.com/xen-project/people/dmukhin/xen/-/commit/b7f194650307a08a9e6da5aa9fdd1f8a9afd67eb > > re: command line option: I meant something like this: > > https://gitlab.com/xen-project/people/dmukhin/xen/-/commit/51a519de6ea73ff3b650fd9bd4f3c5c63f64c069 I am with Jan here. This used to work before without "control_domid", so we ought to keep the same. But even if we are ok to break compatibility, I don't see the value of "control_domid". The implication of setting "hardware_domid" is you will have a separate control domain. At which point, why would it matter to specify the domain ID? Cheers, -- Julien Grall
On Tue, Jun 10, 2025 at 10:44:39AM +0100, Julien Grall wrote: > Hi Denis, > > On 10/06/2025 10:04, dmkhn@proton.me wrote: > > On Tue, Jun 10, 2025 at 10:26:22AM +0200, Jan Beulich wrote: > >> On 10.06.2025 10:02, dmkhn@proton.me wrote: > >>> On Tue, Jun 10, 2025 at 08:53:12AM +0200, Jan Beulich wrote: > >>>> On 06.06.2025 23:29, Julien Grall wrote: > >>>>> Hi Denis, > >>>>> > >>>>> On 05/06/2025 23:05, Julien Grall wrote: > >>>>>> Hi Denis, > >>>>>> > >>>>>> On 28/05/2025 23:50, dmkhn@proton.me wrote: > >>>>>>> From: Denis Mukhin <dmkhn@proton.me> > >>>>>>> > >>>>>>> From: Denis Mukhin <dmukhin@ford.com> > >>>>>>> > >>>>>>> Remove the hardcoded domain ID 0 allocation for hardware domain and replace it > >>>>>>> with a call to get_initial_domain_id() (returns the value of hardware_domid on > >>>>>>> Arm). > >>>>>> > >>>>>> I am not entirely why this is done. Are you intending to pass a different domain ID? If so... > >>>>>> > >>>>>>> > >>>>>>> Update domid_alloc(DOMID_INVALID) case to ensure that get_initial_domain_id() > >>>>>>> ID is skipped during domain ID allocation to cover domU case in dom0less > >>>>>>> configuration. That also fixes a potential issue with re-using ID#0 for domUs > >>>>>>> when get_initial_domain_id() returns non-zero. > >>>>>>> > >>>>>>> Signed-off-by: Denis Mukhin <dmukhin@ford.com> > >>>>>>> --- > >>>>>>> Changes since v8: > >>>>>>> - rebased > >>>>>>> --- > >>>>>>> xen/arch/arm/domain_build.c | 4 ++-- > >>>>>>> xen/common/device-tree/dom0less-build.c | 9 +++------ > >>>>>>> xen/common/domain.c | 4 ++-- > >>>>>>> 3 files changed, 7 insertions(+), 10 deletions(-) > >>>>>>> > >>>>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > >>>>>>> index e9d563c269..0ad80b020a 100644 > >>>>>>> --- a/xen/arch/arm/domain_build.c > >>>>>>> +++ b/xen/arch/arm/domain_build.c > >>>>>>> @@ -2035,9 +2035,9 @@ void __init create_dom0(void) > >>>>>> > >>>>>> ... naming like create_dom0() probably wants to be renamed. > >>>>>> > >>>>>> That said, I am not convinced a domain other than 0 should have full privilege by default. So I would argue it should stay as ... > >>>>>> > >>>>>>> if ( !llc_coloring_enabled ) > >>>>>>> flags |= CDF_directmap; > >>>>>>> - domid = domid_alloc(0); > >>>>>>> + domid = domid_alloc(get_initial_domain_id()); > >>>>>> > >>>>>> ... 0. > >>>>> > >>>>> Looking at the implementation of get_initial_domain_id(), I noticed the behavior was changed for x86 by [1]. > >>>>> > >>>>> Before, get_initial_domain_id() was returning 0 except for the PV shim. > >>>>> But now, it would could return the domain ID specified on the command line (via hardware_dom). > >>>>> > >>>>> From my understanding, the goal of the command line was to create the hardware domain *after* boot. So initially we create dom0 and then initialize the hardware domain. With the patch below, this has changed. > >>>>> > >>>>> However, from the commit message, I don't understand why. It seems like we broke late hwdom? > >>>>> > >>>>> For instance, late_hwdom_init() has the following assert: > >>>>> > >>>>> dom0 = rcu_lock_domain_by_id(0); > >>>>> ASSERT(dom0 != NULL); > >>>>> > >>>>> Jan, I saw you were involved in the review of the series. Any idea why this was changed? > >>>> > >>>> I simply overlooked this aspect when looking at the change. You're right, things > >>>> were broken there. Unless a simple and clean fix can be made relatively soon, I > >>>> think this simply needs reverting (which may mean to revert any later commits > >>>> that depend on that). I can't help noting that in this console rework there were > >>>> way too many issues, and I fear more than just this one may have slipped > >>>> through. I therefore wonder whether taken as a whole this was/is worth both the > >>>> submitter's and all the reviewers' time. > >>> > >>> Yes, sorry, I overlooked late_hwdom_init() modification. > >>> > >>> IMO, the clean fix would be adding another command line parameter > >>> `control_domid` (with default value 0), make get_initial_domain_id() return it > >>> instead of current `hardware_domid` and update late_hwdom_init() to use > >>> `control_domid` insted of open-coded 0. > >> > >> No, no new command line option will address this. Original behavior needs to be > >> restored (either by correcting the earlier change or, as said, be reverting). > > > > Correction of the earlier change: > > > > https://gitlab.com/xen-project/people/dmukhin/xen/-/commit/b7f194650307a08a9e6da5aa9fdd1f8a9afd67eb > > > > re: command line option: I meant something like this: > > > > https://gitlab.com/xen-project/people/dmukhin/xen/-/commit/51a519de6ea73ff3b650fd9bd4f3c5c63f64c069 > > I am with Jan here. This used to work before without "control_domid", so > we ought to keep the same. OK, thanks for the feedback! I will post the correction of the earlier change. > > But even if we are ok to break compatibility, I don't see the value of > "control_domid". The implication of setting "hardware_domid" is you will > have a separate control domain. At which point, why would it matter to > specify the domain ID? I thought there's a plan to use symbols in the ongoing hyperlauch reworks, but as later Jason explains, this is not the case. > > Cheers, > > -- > Julien Grall >
+Jason On Tue, 10 Jun 2025, Julien Grall wrote: > But even if we are ok to break compatibility, I don't see the value of > "control_domid". The implication of setting "hardware_domid" is you will > have a separate control domain. At which point, why would it matter to specify > the domain ID? I just wanted to say that while we (AMD) are looking for a hardware domain / control domain separation for safety reasons, I don't think we have a need to specify the domid for either one. Jason, is that correct?
On 2025-06-10 14:33, Stefano Stabellini wrote: > +Jason > > On Tue, 10 Jun 2025, Julien Grall wrote: >> But even if we are ok to break compatibility, I don't see the value of >> "control_domid". The implication of setting "hardware_domid" is you will >> have a separate control domain. At which point, why would it matter to specify >> the domain ID? > > I just wanted to say that while we (AMD) are looking for a hardware > domain / control domain separation for safety reasons, I don't think we > have a need to specify the domid for either one. Specifying domids isn't really necessary, but it can be convenience or usability improvement with dom0less/Hyperlaunch. But I don't think control_domid is necessary. hardware_domid is not used for dom0less/Hyperlaunch with split control and hardware domains. The "capabilities" device tree (DT) property specifies the role of the domain. Hyperlaunch lets you specify a domid in the DT - there is some auto-allocation logic, but I haven't use it. dom0less doesn't allow specifying a domid today, but it could. dom0less domains are assigned domids sequentially, so you can determine it from the order in the DT. Knowing the domids can be helpful for configuring userspace, and that only really matters for dom0less/Hyperlaunch. e.g. xenstored wants to know which domid is control. I think it's nice to have a domid property so that you know when configuring the system which domain is which. You can explicitly read the domid out of the DT and know what it is. Since dom0 userspace needs to refer to domids, this make it clear which domain is which for, as an example, connecting disks. hardware_domid= is the way of enabling later hardware domain functionality. dom0 boots as dom0. When it creates domid == hardware_domid, that new domain becomes the hardware domain, and dom0 loses hwdom and becomes just the control domain. It's a legacy feature and was a workaround for when Xen could only create a single domain at boot. Regards, Jason
On Tue, Jun 10, 2025 at 05:37:33PM -0400, Jason Andryuk wrote: > On 2025-06-10 14:33, Stefano Stabellini wrote: > > +Jason > > > > On Tue, 10 Jun 2025, Julien Grall wrote: > >> But even if we are ok to break compatibility, I don't see the value of > >> "control_domid". The implication of setting "hardware_domid" is you will > >> have a separate control domain. At which point, why would it matter to specify > >> the domain ID? > > > > I just wanted to say that while we (AMD) are looking for a hardware > > domain / control domain separation for safety reasons, I don't think we > > have a need to specify the domid for either one. > > Specifying domids isn't really necessary, but it can be convenience or > usability improvement with dom0less/Hyperlaunch. But I don't think > control_domid is necessary. > > hardware_domid is not used for dom0less/Hyperlaunch with split control > and hardware domains. The "capabilities" device tree (DT) property > specifies the role of the domain. > > Hyperlaunch lets you specify a domid in the DT - there is some > auto-allocation logic, but I haven't use it. dom0less doesn't allow > specifying a domid today, but it could. dom0less domains are assigned > domids sequentially, so you can determine it from the order in the DT. > > Knowing the domids can be helpful for configuring userspace, and that > only really matters for dom0less/Hyperlaunch. e.g. xenstored wants to > know which domid is control. > > I think it's nice to have a domid property so that you know when > configuring the system which domain is which. You can explicitly read > the domid out of the DT and know what it is. Since dom0 userspace needs > to refer to domids, this make it clear which domain is which for, as an > example, connecting disks. > > hardware_domid= is the way of enabling later hardware domain > functionality. dom0 boots as dom0. When it creates domid == > hardware_domid, that new domain becomes the hardware domain, and dom0 > loses hwdom and becomes just the control domain. It's a legacy feature > and was a workaround for when Xen could only create a single domain at boot. Thanks for explanation! > > Regards, > Jason
On Wed, 28 May 2025, dmkhn@proton.me wrote:
> From: Denis Mukhin <dmkhn@proton.me>
> 
> From: Denis Mukhin <dmukhin@ford.com>
> 
> Remove the hardcoded domain ID 0 allocation for hardware domain and replace it
> with a call to get_initial_domain_id() (returns the value of hardware_domid on
> Arm).
> 
> Update domid_alloc(DOMID_INVALID) case to ensure that get_initial_domain_id()
> ID is skipped during domain ID allocation to cover domU case in dom0less
> configuration. That also fixes a potential issue with re-using ID#0 for domUs
> when get_initial_domain_id() returns non-zero.
It looks like this sentence should be removed from the commit message as
not valid anymore.
Aside from that, the code changes below as clear.
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
> Signed-off-by: Denis Mukhin <dmukhin@ford.com>
> ---
> Changes since v8:
> - rebased 
> ---
>  xen/arch/arm/domain_build.c             | 4 ++--
>  xen/common/device-tree/dom0less-build.c | 9 +++------
>  xen/common/domain.c                     | 4 ++--
>  3 files changed, 7 insertions(+), 10 deletions(-)
> 
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index e9d563c269..0ad80b020a 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -2035,9 +2035,9 @@ void __init create_dom0(void)
>      if ( !llc_coloring_enabled )
>          flags |= CDF_directmap;
>  
> -    domid = domid_alloc(0);
> +    domid = domid_alloc(get_initial_domain_id());
>      if ( domid == DOMID_INVALID )
> -        panic("Error allocating domain ID 0\n");
> +        panic("Error allocating domain ID %d\n", get_initial_domain_id());
>  
>      dom0 = domain_create(domid, &dom0_cfg, flags);
>      if ( IS_ERR(dom0) )
> diff --git a/xen/common/device-tree/dom0less-build.c b/xen/common/device-tree/dom0less-build.c
> index a509f8fecd..9a6015f4ce 100644
> --- a/xen/common/device-tree/dom0less-build.c
> +++ b/xen/common/device-tree/dom0less-build.c
> @@ -974,14 +974,11 @@ void __init create_domUs(void)
>  
>          arch_create_domUs(node, &d_cfg, flags);
>  
> -        /*
> -         * The variable max_init_domid is initialized with zero, so here it's
> -         * very important to use the pre-increment operator to call
> -         * domain_create() with a domid > 0. (domid == 0 is reserved for Dom0)
> -         */
> -        domid = domid_alloc(++max_init_domid);
> +        domid = domid_alloc(DOMID_INVALID);
>          if ( domid == DOMID_INVALID )
>              panic("Error allocating ID for domain %s\n", dt_node_name(node));
> +        if ( max_init_domid < domid )
> +            max_init_domid = domid;
>  
>          d = domain_create(domid, &d_cfg, flags);
>          if ( IS_ERR(d) )
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index ae0c44fcbb..129b4fcb37 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -2423,8 +2423,8 @@ domid_t domid_alloc(domid_t domid)
>      else
>      {
>          static domid_t domid_last;
> -        /* NB: account for late hwdom case, skip ID#0 */
> -        const domid_t reserved_domid = 0;
> +        /* NB: account for late hwdom case */
> +        const domid_t reserved_domid = get_initial_domain_id();
>          const bool reserved = __test_and_set_bit(reserved_domid, domid_bitmap);
>  
>          domid = find_next_zero_bit(domid_bitmap, DOMID_FIRST_RESERVED,
> -- 
> 2.34.1
> 
>
                
            © 2016 - 2025 Red Hat, Inc.