[PATCH v10 1/3] xen/domain: unify domain ID allocation

dmkhn@proton.me posted 3 patches 4 months, 1 week ago
There is a newer version of this series
[PATCH v10 1/3] xen/domain: unify domain ID allocation
Posted by dmkhn@proton.me 4 months, 1 week ago
From: Denis Mukhin <dmukhin@ford.com>

Currently, there are two different domain ID allocation implementations:

  1) Sequential IDs allocation in dom0less Arm code based on max_init_domid;

  2) Sequential IDs allocation in XEN_DOMCTL_createdomain; does not use
     max_init_domid (both Arm and x86).

The domain ID allocation covers dom0 or late hwdom, predefined domains,
post-boot domains, excluding Xen system domains (domid >=
DOMID_FIRST_RESERVED).

It makes sense to have a common helper code for such task across architectures
(Arm and x86) and between dom0less / toolstack domU allocation.

Note, fixing dependency on max_init_domid is out of scope of this patch.

Wrap the domain ID allocation as an arch-independent function domid_alloc() in
common/domain.c based on the bitmap.

Allocation algorithm:
- If an explicit domain ID is provided, verify its availability and use it if
  ID is not used;
- If DOMID_INVALID is provided, search the range [1..DOMID_FIRST_RESERVED-1],
  starting from the last used ID. IDs are not wrapped around in dom0less case.
  Implementation guarantees that two consecutive calls will never return the
  same ID. ID#0 is reserved for the first boot domain (currently, dom0) and
  excluded from allocation range.

Remove is_free_domid() helper as it is not needed now.

No functional change intended.

Signed-off-by: Denis Mukhin <dmukhin@ford.com>
---
Changes from v9:
- dropped unrelated message formatting from create_dom0()
- no wraparound of IDs in dom0less case
- fixed ID#0 treatment

Link to v9: https://lore.kernel.org/r/20250528225030.2652166-2-dmukhin@ford.com
---
 xen/arch/arm/domain_build.c             |  7 ++-
 xen/arch/x86/setup.c                    |  7 ++-
 xen/common/device-tree/dom0less-build.c | 17 +++---
 xen/common/domain.c                     | 75 +++++++++++++++++++++++++
 xen/common/domctl.c                     | 42 ++------------
 xen/include/xen/domain.h                |  3 +
 6 files changed, 102 insertions(+), 49 deletions(-)

diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 4ff161887ec3..9fa5143eb98c 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -2049,6 +2049,7 @@ void __init create_dom0(void)
         .grant_opts = XEN_DOMCTL_GRANT_version(opt_gnttab_max_version),
     };
     unsigned int flags = CDF_privileged | CDF_hardware;
+    domid_t domid;
     int rc;
 
     /* The vGIC for DOM0 is exactly emulating the hardware GIC */
@@ -2073,7 +2074,11 @@ void __init create_dom0(void)
     if ( !llc_coloring_enabled )
         flags |= CDF_directmap;
 
-    dom0 = domain_create(0, &dom0_cfg, flags);
+    domid = domid_alloc(0);
+    if ( domid == DOMID_INVALID )
+        panic("Error allocating domain ID 0\n");
+
+    dom0 = domain_create(domid, &dom0_cfg, flags);
     if ( IS_ERR(dom0) )
         panic("Error creating domain 0 (rc = %ld)\n", PTR_ERR(dom0));
 
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index f32efa7c6045..7adb92d78a18 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1044,8 +1044,11 @@ static struct domain *__init create_dom0(struct boot_info *bi)
     if ( iommu_enabled )
         dom0_cfg.flags |= XEN_DOMCTL_CDF_iommu;
 
-    /* Create initial domain.  Not d0 for pvshim. */
-    bd->domid = get_initial_domain_id();
+    /* Allocate initial domain ID.  Not d0 for pvshim. */
+    bd->domid = domid_alloc(get_initial_domain_id());
+    if ( bd->domid == DOMID_INVALID )
+        panic("Error allocating domain ID %d\n", get_initial_domain_id());
+
     d = domain_create(bd->domid, &dom0_cfg,
                       pv_shim ? 0 : CDF_privileged | CDF_hardware);
     if ( IS_ERR(d) )
diff --git a/xen/common/device-tree/dom0less-build.c b/xen/common/device-tree/dom0less-build.c
index 3d503c697337..576fdfa6a19a 100644
--- a/xen/common/device-tree/dom0less-build.c
+++ b/xen/common/device-tree/dom0less-build.c
@@ -839,15 +839,13 @@ void __init create_domUs(void)
         struct xen_domctl_createdomain d_cfg = {0};
         unsigned int flags = 0U;
         bool has_dtb = false;
+        domid_t domid;
         uint32_t val;
         int rc;
 
         if ( !dt_device_is_compatible(node, "xen,domain") )
             continue;
 
-        if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
-            panic("No more domain IDs available\n");
-
         d_cfg.max_evtchn_port = 1023;
         d_cfg.max_grant_frames = -1;
         d_cfg.max_maptrack_frames = -1;
@@ -965,12 +963,13 @@ void __init create_domUs(void)
 
         arch_create_domUs(node, &d_cfg, flags);
 
-        /*
-         * The variable max_init_domid is initialized with zero, so here it's
-         * very important to use the pre-increment operator to call
-         * domain_create() with a domid > 0. (domid == 0 is reserved for Dom0)
-         */
-        d = domain_create(++max_init_domid, &d_cfg, flags);
+        domid = domid_alloc(DOMID_INVALID);
+        if ( domid == DOMID_INVALID )
+            panic("Error allocating ID for domain %s\n", dt_node_name(node));
+        if ( max_init_domid < domid )
+            max_init_domid = domid;
+
+        d = domain_create(domid, &d_cfg, flags);
         if ( IS_ERR(d) )
             panic("Error creating domain %s (rc = %ld)\n",
                   dt_node_name(node), PTR_ERR(d));
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 434d32901b1b..be022c720b13 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -66,6 +66,14 @@ DEFINE_RCU_READ_LOCK(domlist_read_lock);
 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
 struct domain *domain_list;
 
+/*
+ * Domain ID allocator.
+ * Covers dom0 or late hwdom, predefined domains, post-boot domains; excludes
+ * Xen system domains (ID >= DOMID_FIRST_RESERVED).
+ */
+static DEFINE_SPINLOCK(domid_lock);
+static DECLARE_BITMAP(domid_bitmap, DOMID_FIRST_RESERVED);
+
 /*
  * Insert a domain into the domlist/hash.  This allows the domain to be looked
  * up by domid, and therefore to be the subject of hypercalls/etc.
@@ -1452,6 +1460,8 @@ void domain_destroy(struct domain *d)
 
     TRACE_TIME(TRC_DOM0_DOM_REM, d->domain_id);
 
+    domid_free(d->domain_id);
+
     /* Remove from the domlist/hash. */
     domlist_remove(d);
 
@@ -2433,6 +2443,71 @@ void thaw_domains(void)
     rcu_read_unlock(&domlist_read_lock);
 }
 
+domid_t domid_alloc(domid_t domid)
+{
+    static domid_t domid_last;
+
+    spin_lock(&domid_lock);
+
+    /* Exact match. */
+    if ( domid < DOMID_FIRST_RESERVED )
+    {
+        if ( __test_and_set_bit(domid, domid_bitmap) )
+            domid = DOMID_INVALID;
+    }
+    /*
+     * Exhaustive search.
+     *
+     * Domain ID#0 is reserved for the first boot domain (e.g. control domain)
+     * and excluded from allocation.
+     *
+     * In dom0less build, domains are not dynamically destroyed, so there's no
+     * need to do a wraparound of the IDs.
+     */
+#ifdef CONFIG_DOM0LESS_BOOT
+    else if ( domid_last + 1 >= DOMID_FIRST_RESERVED )
+    {
+        domid = DOMID_INVALID;
+    }
+#endif
+    else
+    {
+        domid = find_next_zero_bit(domid_bitmap,
+                                   DOMID_FIRST_RESERVED,
+                                   domid_last + 1);
+#ifdef CONFIG_DOM0LESS_BOOT
+        if ( domid == DOMID_FIRST_RESERVED )
+            domid = find_next_zero_bit(domid_bitmap,
+                                       DOMID_FIRST_RESERVED,
+                                       1);
+#endif
+
+        if ( domid < DOMID_FIRST_RESERVED )
+        {
+            __set_bit(domid, domid_bitmap);
+            domid_last = domid;
+        }
+        else
+        {
+            domid = DOMID_INVALID;
+        }
+    }
+
+    spin_unlock(&domid_lock);
+
+    return domid;
+}
+
+void domid_free(domid_t domid)
+{
+    if ( domid < DOMID_FIRST_RESERVED )
+    {
+        spin_lock(&domid_lock);
+        __clear_bit(domid, domid_bitmap);
+        spin_unlock(&domid_lock);
+    }
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index bfe2e1f9f057..8ef0c147c9b0 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -49,20 +49,6 @@ static int xenctl_bitmap_to_nodemask(nodemask_t *nodemask,
                                    MAX_NUMNODES);
 }
 
-static inline int is_free_domid(domid_t dom)
-{
-    struct domain *d;
-
-    if ( dom >= DOMID_FIRST_RESERVED )
-        return 0;
-
-    if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
-        return 1;
-
-    rcu_unlock_domain(d);
-    return 0;
-}
-
 void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
 {
     struct vcpu *v;
@@ -421,36 +407,18 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
 
     case XEN_DOMCTL_createdomain:
     {
-        domid_t        dom;
-        static domid_t rover = 0;
+        domid_t domid = domid_alloc(op->domain);
 
-        dom = op->domain;
-        if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
+        if ( domid == DOMID_INVALID )
         {
             ret = -EEXIST;
-            if ( !is_free_domid(dom) )
-                break;
-        }
-        else
-        {
-            for ( dom = rover + 1; dom != rover; dom++ )
-            {
-                if ( dom == DOMID_FIRST_RESERVED )
-                    dom = 1;
-                if ( is_free_domid(dom) )
-                    break;
-            }
-
-            ret = -ENOMEM;
-            if ( dom == rover )
-                break;
-
-            rover = dom;
+            break;
         }
 
-        d = domain_create(dom, &op->u.createdomain, false);
+        d = domain_create(domid, &op->u.createdomain, false);
         if ( IS_ERR(d) )
         {
+            domid_free(domid);
             ret = PTR_ERR(d);
             d = NULL;
             break;
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index e10baf2615fd..8aab05ae93c8 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -38,6 +38,9 @@ void arch_get_domain_info(const struct domain *d,
 
 domid_t get_initial_domain_id(void);
 
+domid_t domid_alloc(domid_t domid);
+void domid_free(domid_t domid);
+
 /* CDF_* constant. Internal flags for domain creation. */
 /* Is this a privileged domain? */
 #define CDF_privileged           (1U << 0)
-- 
2.34.1
Re: [PATCH v10 1/3] xen/domain: unify domain ID allocation
Posted by Alejandro Vallejo 3 months, 2 weeks ago
Hi,

Sorry I'm so late to this. I have a few suggestions to improve the ergonomics
of domid handling in dom0less/Hyperlaunch.

On Mon Jun 23, 2025 at 8:28 PM CEST, dmkhn wrote:
> From: Denis Mukhin <dmukhin@ford.com>
>
> Currently, there are two different domain ID allocation implementations:
>
>   1) Sequential IDs allocation in dom0less Arm code based on max_init_domid;
>
>   2) Sequential IDs allocation in XEN_DOMCTL_createdomain; does not use
>      max_init_domid (both Arm and x86).
>
> The domain ID allocation covers dom0 or late hwdom, predefined domains,
> post-boot domains, excluding Xen system domains (domid >=
> DOMID_FIRST_RESERVED).
>
> It makes sense to have a common helper code for such task across architectures
> (Arm and x86) and between dom0less / toolstack domU allocation.
>
> Note, fixing dependency on max_init_domid is out of scope of this patch.
>
> Wrap the domain ID allocation as an arch-independent function domid_alloc() in
> common/domain.c based on the bitmap.
>
> Allocation algorithm:
> - If an explicit domain ID is provided, verify its availability and use it if
>   ID is not used;
> - If DOMID_INVALID is provided, search the range [1..DOMID_FIRST_RESERVED-1],
>   starting from the last used ID. IDs are not wrapped around in dom0less case.
>   Implementation guarantees that two consecutive calls will never return the
>   same ID. ID#0 is reserved for the first boot domain (currently, dom0) and
>   excluded from allocation range.
>
> Remove is_free_domid() helper as it is not needed now.
>
> No functional change intended.
>
> Signed-off-by: Denis Mukhin <dmukhin@ford.com>
> ---
> Changes from v9:
> - dropped unrelated message formatting from create_dom0()
> - no wraparound of IDs in dom0less case
> - fixed ID#0 treatment
>
> Link to v9: https://lore.kernel.org/r/20250528225030.2652166-2-dmukhin@ford.com
> ---
>  xen/arch/arm/domain_build.c             |  7 ++-
>  xen/arch/x86/setup.c                    |  7 ++-
>  xen/common/device-tree/dom0less-build.c | 17 +++---
>  xen/common/domain.c                     | 75 +++++++++++++++++++++++++
>  xen/common/domctl.c                     | 42 ++------------
>  xen/include/xen/domain.h                |  3 +
>  6 files changed, 102 insertions(+), 49 deletions(-)
>
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index 4ff161887ec3..9fa5143eb98c 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -2049,6 +2049,7 @@ void __init create_dom0(void)
>          .grant_opts = XEN_DOMCTL_GRANT_version(opt_gnttab_max_version),
>      };
>      unsigned int flags = CDF_privileged | CDF_hardware;
> +    domid_t domid;
>      int rc;
>  
>      /* The vGIC for DOM0 is exactly emulating the hardware GIC */
> @@ -2073,7 +2074,11 @@ void __init create_dom0(void)
>      if ( !llc_coloring_enabled )
>          flags |= CDF_directmap;
>  
> -    dom0 = domain_create(0, &dom0_cfg, flags);
> +    domid = domid_alloc(0);

The way I´d expect domid_alloc() to be used is twofold:

  1. "Give me this specific domid"

for which this interface looks fine, perhaps renamed to domid_alloc_exact(domid)

  2. "Give me any domid"

for which we'd benefit more from a domid_alloc()

This removes the heuristics from the interface. Worst-case execution remains the
same, under 500 iterations. (32K minus a little bit, checked 64bits at a time).

> +    if ( domid == DOMID_INVALID )
> +        panic("Error allocating domain ID 0\n");
> +
> +    dom0 = domain_create(domid, &dom0_cfg, flags);
>      if ( IS_ERR(dom0) )
>          panic("Error creating domain 0 (rc = %ld)\n", PTR_ERR(dom0));
>  
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index f32efa7c6045..7adb92d78a18 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1044,8 +1044,11 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>      if ( iommu_enabled )
>          dom0_cfg.flags |= XEN_DOMCTL_CDF_iommu;
>  
> -    /* Create initial domain.  Not d0 for pvshim. */
> -    bd->domid = get_initial_domain_id();
> +    /* Allocate initial domain ID.  Not d0 for pvshim. */
> +    bd->domid = domid_alloc(get_initial_domain_id());
> +    if ( bd->domid == DOMID_INVALID )
> +        panic("Error allocating domain ID %d\n", get_initial_domain_id());
> +
>      d = domain_create(bd->domid, &dom0_cfg,
>                        pv_shim ? 0 : CDF_privileged | CDF_hardware);
>      if ( IS_ERR(d) )
> diff --git a/xen/common/device-tree/dom0less-build.c b/xen/common/device-tree/dom0less-build.c
> index 3d503c697337..576fdfa6a19a 100644
> --- a/xen/common/device-tree/dom0less-build.c
> +++ b/xen/common/device-tree/dom0less-build.c
> @@ -839,15 +839,13 @@ void __init create_domUs(void)
>          struct xen_domctl_createdomain d_cfg = {0};
>          unsigned int flags = 0U;
>          bool has_dtb = false;
> +        domid_t domid;
>          uint32_t val;
>          int rc;
>  
>          if ( !dt_device_is_compatible(node, "xen,domain") )
>              continue;
>  
> -        if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
> -            panic("No more domain IDs available\n");
> -
>          d_cfg.max_evtchn_port = 1023;
>          d_cfg.max_grant_frames = -1;
>          d_cfg.max_maptrack_frames = -1;
> @@ -965,12 +963,13 @@ void __init create_domUs(void)
>  
>          arch_create_domUs(node, &d_cfg, flags);
>  
> -        /*
> -         * The variable max_init_domid is initialized with zero, so here it's
> -         * very important to use the pre-increment operator to call
> -         * domain_create() with a domid > 0. (domid == 0 is reserved for Dom0)
> -         */
> -        d = domain_create(++max_init_domid, &d_cfg, flags);
> +        domid = domid_alloc(DOMID_INVALID);
> +        if ( domid == DOMID_INVALID )
> +            panic("Error allocating ID for domain %s\n", dt_node_name(node));
> +        if ( max_init_domid < domid )
> +            max_init_domid = domid;
> +
> +        d = domain_create(domid, &d_cfg, flags);
>          if ( IS_ERR(d) )
>              panic("Error creating domain %s (rc = %ld)\n",
>                    dt_node_name(node), PTR_ERR(d));
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 434d32901b1b..be022c720b13 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -66,6 +66,14 @@ DEFINE_RCU_READ_LOCK(domlist_read_lock);
>  static struct domain *domain_hash[DOMAIN_HASH_SIZE];
>  struct domain *domain_list;
>  
> +/*
> + * Domain ID allocator.
> + * Covers dom0 or late hwdom, predefined domains, post-boot domains; excludes
> + * Xen system domains (ID >= DOMID_FIRST_RESERVED).
> + */
> +static DEFINE_SPINLOCK(domid_lock);
> +static DECLARE_BITMAP(domid_bitmap, DOMID_FIRST_RESERVED);
> +
>  /*
>   * Insert a domain into the domlist/hash.  This allows the domain to be looked
>   * up by domid, and therefore to be the subject of hypercalls/etc.
> @@ -1452,6 +1460,8 @@ void domain_destroy(struct domain *d)
>  
>      TRACE_TIME(TRC_DOM0_DOM_REM, d->domain_id);
>  
> +    domid_free(d->domain_id);
> +

Shouldn't this go after domlist_remove()? Otherwise fun things might happen
if a domid is allocated while the old domain that still keeps the old domid
is still in its hash.

The domctl lock (maybe) protects this case implicitly, but it's probably better
to destroy things in a reasonable order.

>      /* Remove from the domlist/hash. */
>      domlist_remove(d);
>  
> @@ -2433,6 +2443,71 @@ void thaw_domains(void)
>      rcu_read_unlock(&domlist_read_lock);
>  }
>  
> +domid_t domid_alloc(domid_t domid)
> +{
> +    static domid_t domid_last;
> +
> +    spin_lock(&domid_lock);
> +
> +    /* Exact match. */
> +    if ( domid < DOMID_FIRST_RESERVED )
> +    {
> +        if ( __test_and_set_bit(domid, domid_bitmap) )
> +            domid = DOMID_INVALID;
> +    }
> +    /*
> +     * Exhaustive search.
> +     *
> +     * Domain ID#0 is reserved for the first boot domain (e.g. control domain)
> +     * and excluded from allocation.
> +     *
> +     * In dom0less build, domains are not dynamically destroyed, so there's no
> +     * need to do a wraparound of the IDs.
> +     */
> +#ifdef CONFIG_DOM0LESS_BOOT

These ifdef guards are problematic. The fact that a platform supports dom0less
doesn't mean that every boot is dom0less (I can boot a non-dom0less system on
a dom0less-capable Xen).

Furthermore, the rationale for panicking on wraparound is because of exhaustion,
but you do have a proper bitmap here to do proper exhaustive search, so IMO,
this branch is not necessary.

> +    else if ( domid_last + 1 >= DOMID_FIRST_RESERVED )
> +    {
> +        domid = DOMID_INVALID;
> +    }
> +#endif
> +    else
> +    {
> +        domid = find_next_zero_bit(domid_bitmap,
> +                                   DOMID_FIRST_RESERVED,
> +                                   domid_last + 1);
> +#ifdef CONFIG_DOM0LESS_BOOT
> +        if ( domid == DOMID_FIRST_RESERVED )
> +            domid = find_next_zero_bit(domid_bitmap,
> +                                       DOMID_FIRST_RESERVED,
> +                                       1);

nit: I'd say 0 is fair game. On Hyperlaunch (and soon dom0less) it'll be possible
to have a domU with domid=0 and a hwdom/ctldom with domids != 0 via the domid
property on the DTB.

Starting from 1 might be slightly saner for defence in depth, so it really is
a nit. I don't think being cautious about dom0 is necessarily a bad thing.

> +#endif
> +
> +        if ( domid < DOMID_FIRST_RESERVED )
> +        {
> +            __set_bit(domid, domid_bitmap);
> +            domid_last = domid;

Rather than setting domid_last here, I'd move it right before the spin_unlock()
gated by "if ( domid != DOMID_INVALID )". That'd also bump domid_last in the
exact match case.

It also allows to drop the (then) redundant braces.

> +        }
> +        else
> +        {

nit: redundant braces

> +            domid = DOMID_INVALID;
> +        }
> +    }
> +
> +    spin_unlock(&domid_lock);
> +
> +    return domid;
> +}
> +
> +void domid_free(domid_t domid)
> +{
> +    if ( domid < DOMID_FIRST_RESERVED )
> +    {
> +        spin_lock(&domid_lock);
> +        __clear_bit(domid, domid_bitmap);
> +        spin_unlock(&domid_lock);
> +    }
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/common/domctl.c b/xen/common/domctl.c
> index bfe2e1f9f057..8ef0c147c9b0 100644
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -49,20 +49,6 @@ static int xenctl_bitmap_to_nodemask(nodemask_t *nodemask,
>                                     MAX_NUMNODES);
>  }
>  
> -static inline int is_free_domid(domid_t dom)
> -{
> -    struct domain *d;
> -
> -    if ( dom >= DOMID_FIRST_RESERVED )
> -        return 0;
> -
> -    if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
> -        return 1;
> -
> -    rcu_unlock_domain(d);
> -    return 0;
> -}

Good riddance. This is racy without the domctl lock.

> -
>  void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
>  {
>      struct vcpu *v;
> @@ -421,36 +407,18 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>  
>      case XEN_DOMCTL_createdomain:
>      {
> -        domid_t        dom;
> -        static domid_t rover = 0;
> +        domid_t domid = domid_alloc(op->domain);
>  
> -        dom = op->domain;
> -        if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
> +        if ( domid == DOMID_INVALID )
>          {
>              ret = -EEXIST;

nit: IMO. If createdomain didn't set domctl.domid, we shouldn't return EEXIST,
     but ENOSPC. It's a very impossible case, so I don't care much though.

> -            if ( !is_free_domid(dom) )
> -                break;
> -        }
> -        else
> -        {
> -            for ( dom = rover + 1; dom != rover; dom++ )
> -            {
> -                if ( dom == DOMID_FIRST_RESERVED )
> -                    dom = 1;
> -                if ( is_free_domid(dom) )
> -                    break;
> -            }
> -
> -            ret = -ENOMEM;
> -            if ( dom == rover )
> -                break;
> -
> -            rover = dom;
> +            break;
>          }
>  
> -        d = domain_create(dom, &op->u.createdomain, false);
> +        d = domain_create(domid, &op->u.createdomain, false);
>          if ( IS_ERR(d) )
>          {
> +            domid_free(domid);
>              ret = PTR_ERR(d);
>              d = NULL;
>              break;
> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
> index e10baf2615fd..8aab05ae93c8 100644
> --- a/xen/include/xen/domain.h
> +++ b/xen/include/xen/domain.h
> @@ -38,6 +38,9 @@ void arch_get_domain_info(const struct domain *d,
>  
>  domid_t get_initial_domain_id(void);
>  
> +domid_t domid_alloc(domid_t domid);
> +void domid_free(domid_t domid);
> +
>  /* CDF_* constant. Internal flags for domain creation. */
>  /* Is this a privileged domain? */
>  #define CDF_privileged           (1U << 0)

Cheers,
Alejandro
Re: [PATCH v10 1/3] xen/domain: unify domain ID allocation
Posted by dmkhn@proton.me 3 months, 1 week ago
On Thu, Jul 17, 2025 at 12:27:48PM +0200, Alejandro Vallejo wrote:
> Hi,
> 
> Sorry I'm so late to this. I have a few suggestions to improve the ergonomics
> of domid handling in dom0less/Hyperlaunch.

Thanks for the feedback!

> 
> On Mon Jun 23, 2025 at 8:28 PM CEST, dmkhn wrote:
> > From: Denis Mukhin <dmukhin@ford.com>
> >
> > Currently, there are two different domain ID allocation implementations:
> >
> >   1) Sequential IDs allocation in dom0less Arm code based on max_init_domid;
> >
> >   2) Sequential IDs allocation in XEN_DOMCTL_createdomain; does not use
> >      max_init_domid (both Arm and x86).
> >
> > The domain ID allocation covers dom0 or late hwdom, predefined domains,
> > post-boot domains, excluding Xen system domains (domid >=
> > DOMID_FIRST_RESERVED).
> >
> > It makes sense to have a common helper code for such task across architectures
> > (Arm and x86) and between dom0less / toolstack domU allocation.
> >
> > Note, fixing dependency on max_init_domid is out of scope of this patch.
> >
> > Wrap the domain ID allocation as an arch-independent function domid_alloc() in
> > common/domain.c based on the bitmap.
> >
> > Allocation algorithm:
> > - If an explicit domain ID is provided, verify its availability and use it if
> >   ID is not used;
> > - If DOMID_INVALID is provided, search the range [1..DOMID_FIRST_RESERVED-1],
> >   starting from the last used ID. IDs are not wrapped around in dom0less case.
> >   Implementation guarantees that two consecutive calls will never return the
> >   same ID. ID#0 is reserved for the first boot domain (currently, dom0) and
> >   excluded from allocation range.
> >
> > Remove is_free_domid() helper as it is not needed now.
> >
> > No functional change intended.
> >
> > Signed-off-by: Denis Mukhin <dmukhin@ford.com>
> > ---
> > Changes from v9:
> > - dropped unrelated message formatting from create_dom0()
> > - no wraparound of IDs in dom0less case
> > - fixed ID#0 treatment
> >
> > Link to v9: https://lore.kernel.org/r/20250528225030.2652166-2-dmukhin@ford.com
> > ---
> >  xen/arch/arm/domain_build.c             |  7 ++-
> >  xen/arch/x86/setup.c                    |  7 ++-
> >  xen/common/device-tree/dom0less-build.c | 17 +++---
> >  xen/common/domain.c                     | 75 +++++++++++++++++++++++++
> >  xen/common/domctl.c                     | 42 ++------------
> >  xen/include/xen/domain.h                |  3 +
> >  6 files changed, 102 insertions(+), 49 deletions(-)
> >
> > diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> > index 4ff161887ec3..9fa5143eb98c 100644
> > --- a/xen/arch/arm/domain_build.c
> > +++ b/xen/arch/arm/domain_build.c
> > @@ -2049,6 +2049,7 @@ void __init create_dom0(void)
> >          .grant_opts = XEN_DOMCTL_GRANT_version(opt_gnttab_max_version),
> >      };
> >      unsigned int flags = CDF_privileged | CDF_hardware;
> > +    domid_t domid;
> >      int rc;
> >
> >      /* The vGIC for DOM0 is exactly emulating the hardware GIC */
> > @@ -2073,7 +2074,11 @@ void __init create_dom0(void)
> >      if ( !llc_coloring_enabled )
> >          flags |= CDF_directmap;
> >
> > -    dom0 = domain_create(0, &dom0_cfg, flags);
> > +    domid = domid_alloc(0);
> 
> The way I´d expect domid_alloc() to be used is twofold:
> 
>   1. "Give me this specific domid"
> 
> for which this interface looks fine, perhaps renamed to domid_alloc_exact(domid)
> 
>   2. "Give me any domid"
> 
> for which we'd benefit more from a domid_alloc()
> 
> This removes the heuristics from the interface. Worst-case execution remains the
> same, under 500 iterations. (32K minus a little bit, checked 64bits at a time).

I think we've settled on the domid_alloc() with partitioned values:
- exact ID allocation within [0..DOMID_FIRST_RESERVED-1]
  if input value is within the range
- exhaustive search within the range of [1..DOMID_FIRST_RESERVED-1] (skipping
  reserved ID#0) if the input value is DOMID_INVALID

I was thinking about having two calls originally, but with splitting the APIs,
do_domctl() should have an extra check for the range to re-direct to the
proper alloc variant. In current implementation it is not needed.

> 
> > +    if ( domid == DOMID_INVALID )
> > +        panic("Error allocating domain ID 0\n");
> > +
> > +    dom0 = domain_create(domid, &dom0_cfg, flags);
> >      if ( IS_ERR(dom0) )
> >          panic("Error creating domain 0 (rc = %ld)\n", PTR_ERR(dom0));
> >
> > diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> > index f32efa7c6045..7adb92d78a18 100644
> > --- a/xen/arch/x86/setup.c
> > +++ b/xen/arch/x86/setup.c
> > @@ -1044,8 +1044,11 @@ static struct domain *__init create_dom0(struct boot_info *bi)
> >      if ( iommu_enabled )
> >          dom0_cfg.flags |= XEN_DOMCTL_CDF_iommu;
> >
> > -    /* Create initial domain.  Not d0 for pvshim. */
> > -    bd->domid = get_initial_domain_id();
> > +    /* Allocate initial domain ID.  Not d0 for pvshim. */
> > +    bd->domid = domid_alloc(get_initial_domain_id());
> > +    if ( bd->domid == DOMID_INVALID )
> > +        panic("Error allocating domain ID %d\n", get_initial_domain_id());
> > +
> >      d = domain_create(bd->domid, &dom0_cfg,
> >                        pv_shim ? 0 : CDF_privileged | CDF_hardware);
> >      if ( IS_ERR(d) )
> > diff --git a/xen/common/device-tree/dom0less-build.c b/xen/common/device-tree/dom0less-build.c
> > index 3d503c697337..576fdfa6a19a 100644
> > --- a/xen/common/device-tree/dom0less-build.c
> > +++ b/xen/common/device-tree/dom0less-build.c
> > @@ -839,15 +839,13 @@ void __init create_domUs(void)
> >          struct xen_domctl_createdomain d_cfg = {0};
> >          unsigned int flags = 0U;
> >          bool has_dtb = false;
> > +        domid_t domid;
> >          uint32_t val;
> >          int rc;
> >
> >          if ( !dt_device_is_compatible(node, "xen,domain") )
> >              continue;
> >
> > -        if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
> > -            panic("No more domain IDs available\n");
> > -
> >          d_cfg.max_evtchn_port = 1023;
> >          d_cfg.max_grant_frames = -1;
> >          d_cfg.max_maptrack_frames = -1;
> > @@ -965,12 +963,13 @@ void __init create_domUs(void)
> >
> >          arch_create_domUs(node, &d_cfg, flags);
> >
> > -        /*
> > -         * The variable max_init_domid is initialized with zero, so here it's
> > -         * very important to use the pre-increment operator to call
> > -         * domain_create() with a domid > 0. (domid == 0 is reserved for Dom0)
> > -         */
> > -        d = domain_create(++max_init_domid, &d_cfg, flags);
> > +        domid = domid_alloc(DOMID_INVALID);
> > +        if ( domid == DOMID_INVALID )
> > +            panic("Error allocating ID for domain %s\n", dt_node_name(node));
> > +        if ( max_init_domid < domid )
> > +            max_init_domid = domid;
> > +
> > +        d = domain_create(domid, &d_cfg, flags);
> >          if ( IS_ERR(d) )
> >              panic("Error creating domain %s (rc = %ld)\n",
> >                    dt_node_name(node), PTR_ERR(d));
> > diff --git a/xen/common/domain.c b/xen/common/domain.c
> > index 434d32901b1b..be022c720b13 100644
> > --- a/xen/common/domain.c
> > +++ b/xen/common/domain.c
> > @@ -66,6 +66,14 @@ DEFINE_RCU_READ_LOCK(domlist_read_lock);
> >  static struct domain *domain_hash[DOMAIN_HASH_SIZE];
> >  struct domain *domain_list;
> >
> > +/*
> > + * Domain ID allocator.
> > + * Covers dom0 or late hwdom, predefined domains, post-boot domains; excludes
> > + * Xen system domains (ID >= DOMID_FIRST_RESERVED).
> > + */
> > +static DEFINE_SPINLOCK(domid_lock);
> > +static DECLARE_BITMAP(domid_bitmap, DOMID_FIRST_RESERVED);
> > +
> >  /*
> >   * Insert a domain into the domlist/hash.  This allows the domain to be looked
> >   * up by domid, and therefore to be the subject of hypercalls/etc.
> > @@ -1452,6 +1460,8 @@ void domain_destroy(struct domain *d)
> >
> >      TRACE_TIME(TRC_DOM0_DOM_REM, d->domain_id);
> >
> > +    domid_free(d->domain_id);
> > +
> 
> Shouldn't this go after domlist_remove()? Otherwise fun things might happen
> if a domid is allocated while the old domain that still keeps the old domid
> is still in its hash.

Yep, it should! Thanks for the catch.

> 
> The domctl lock (maybe) protects this case implicitly, but it's probably better
> to destroy things in a reasonable order.
> 
> >      /* Remove from the domlist/hash. */
> >      domlist_remove(d);
> >
> > @@ -2433,6 +2443,71 @@ void thaw_domains(void)
> >      rcu_read_unlock(&domlist_read_lock);
> >  }
> >
> > +domid_t domid_alloc(domid_t domid)
> > +{
> > +    static domid_t domid_last;
> > +
> > +    spin_lock(&domid_lock);
> > +
> > +    /* Exact match. */
> > +    if ( domid < DOMID_FIRST_RESERVED )
> > +    {
> > +        if ( __test_and_set_bit(domid, domid_bitmap) )
> > +            domid = DOMID_INVALID;
> > +    }
> > +    /*
> > +     * Exhaustive search.
> > +     *
> > +     * Domain ID#0 is reserved for the first boot domain (e.g. control domain)
> > +     * and excluded from allocation.
> > +     *
> > +     * In dom0less build, domains are not dynamically destroyed, so there's no
> > +     * need to do a wraparound of the IDs.
> > +     */
> > +#ifdef CONFIG_DOM0LESS_BOOT
> 
> These ifdef guards are problematic. The fact that a platform supports dom0less
> doesn't mean that every boot is dom0less (I can boot a non-dom0less system on
> a dom0less-capable Xen).

These #ifdefs are meant to align the code with the current Arm behavior, but
there will be correction.

There was v9 feedback around create_domUs() on that:
  https://lore.kernel.org/all/d0829041-1375-4161-b2c4-f8dffadbb657@xen.org/

> 
> Furthermore, the rationale for panicking on wraparound is because of exhaustion,
> but you do have a proper bitmap here to do proper exhaustive search, so IMO,
> this branch is not necessary.
> 
> > +    else if ( domid_last + 1 >= DOMID_FIRST_RESERVED )
> > +    {
> > +        domid = DOMID_INVALID;
> > +    }
> > +#endif
> > +    else
> > +    {
> > +        domid = find_next_zero_bit(domid_bitmap,
> > +                                   DOMID_FIRST_RESERVED,
> > +                                   domid_last + 1);
> > +#ifdef CONFIG_DOM0LESS_BOOT
> > +        if ( domid == DOMID_FIRST_RESERVED )
> > +            domid = find_next_zero_bit(domid_bitmap,
> > +                                       DOMID_FIRST_RESERVED,
> > +                                       1);
> 
> nit: I'd say 0 is fair game. On Hyperlaunch (and soon dom0less) it'll be possible
> to have a domU with domid=0 and a hwdom/ctldom with domids != 0 via the domid
> property on the DTB.
> 
> Starting from 1 might be slightly saner for defence in depth, so it really is
> a nit. I don't think being cautious about dom0 is necessarily a bad thing.

I kept 1 to ensure ID#0 is reserved for dom0.

There was v9 feedback around domid_alloc() on that:
  https://lore.kernel.org/all/d0829041-1375-4161-b2c4-f8dffadbb657@xen.org/

> 
> > +#endif
> > +
> > +        if ( domid < DOMID_FIRST_RESERVED )
> > +        {
> > +            __set_bit(domid, domid_bitmap);
> > +            domid_last = domid;
> 
> Rather than setting domid_last here, I'd move it right before the spin_unlock()
> gated by "if ( domid != DOMID_INVALID )". That'd also bump domid_last in the
> exact match case.
> 
> It also allows to drop the (then) redundant braces.
> 
> > +        }
> > +        else
> > +        {
> 
> nit: redundant braces

Ack.

> 
> > +            domid = DOMID_INVALID;
> > +        }
> > +    }
> > +
> > +    spin_unlock(&domid_lock);
> > +
> > +    return domid;
> > +}
> > +
> > +void domid_free(domid_t domid)
> > +{
> > +    if ( domid < DOMID_FIRST_RESERVED )
> > +    {
> > +        spin_lock(&domid_lock);
> > +        __clear_bit(domid, domid_bitmap);
> > +        spin_unlock(&domid_lock);
> > +    }
> > +}
> > +
> >  /*
> >   * Local variables:
> >   * mode: C
> > diff --git a/xen/common/domctl.c b/xen/common/domctl.c
> > index bfe2e1f9f057..8ef0c147c9b0 100644
> > --- a/xen/common/domctl.c
> > +++ b/xen/common/domctl.c
> > @@ -49,20 +49,6 @@ static int xenctl_bitmap_to_nodemask(nodemask_t *nodemask,
> >                                     MAX_NUMNODES);
> >  }
> >
> > -static inline int is_free_domid(domid_t dom)
> > -{
> > -    struct domain *d;
> > -
> > -    if ( dom >= DOMID_FIRST_RESERVED )
> > -        return 0;
> > -
> > -    if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
> > -        return 1;
> > -
> > -    rcu_unlock_domain(d);
> > -    return 0;
> > -}
> 
> Good riddance. This is racy without the domctl lock.
> 
> > -
> >  void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
> >  {
> >      struct vcpu *v;
> > @@ -421,36 +407,18 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
> >
> >      case XEN_DOMCTL_createdomain:
> >      {
> > -        domid_t        dom;
> > -        static domid_t rover = 0;
> > +        domid_t domid = domid_alloc(op->domain);
> >
> > -        dom = op->domain;
> > -        if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
> > +        if ( domid == DOMID_INVALID )
> >          {
> >              ret = -EEXIST;
> 
> nit: IMO. If createdomain didn't set domctl.domid, we shouldn't return EEXIST,
>      but ENOSPC. It's a very impossible case, so I don't care much though.

I agree, but that will be behavior change which I want to avoid.
I kept -EEXIST because I am not sure how users treat the return value.

> 
> > -            if ( !is_free_domid(dom) )
> > -                break;
> > -        }
> > -        else
> > -        {
> > -            for ( dom = rover + 1; dom != rover; dom++ )
> > -            {
> > -                if ( dom == DOMID_FIRST_RESERVED )
> > -                    dom = 1;
> > -                if ( is_free_domid(dom) )
> > -                    break;
> > -            }
> > -
> > -            ret = -ENOMEM;
> > -            if ( dom == rover )
> > -                break;
> > -
> > -            rover = dom;
> > +            break;
> >          }
> >
> > -        d = domain_create(dom, &op->u.createdomain, false);
> > +        d = domain_create(domid, &op->u.createdomain, false);
> >          if ( IS_ERR(d) )
> >          {
> > +            domid_free(domid);
> >              ret = PTR_ERR(d);
> >              d = NULL;
> >              break;
> > diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
> > index e10baf2615fd..8aab05ae93c8 100644
> > --- a/xen/include/xen/domain.h
> > +++ b/xen/include/xen/domain.h
> > @@ -38,6 +38,9 @@ void arch_get_domain_info(const struct domain *d,
> >
> >  domid_t get_initial_domain_id(void);
> >
> > +domid_t domid_alloc(domid_t domid);
> > +void domid_free(domid_t domid);
> > +
> >  /* CDF_* constant. Internal flags for domain creation. */
> >  /* Is this a privileged domain? */
> >  #define CDF_privileged           (1U << 0)
> 
> Cheers,
> Alejandro
> 
Re: [PATCH v10 1/3] xen/domain: unify domain ID allocation
Posted by Jan Beulich 4 months, 1 week ago
On 23.06.2025 20:28, dmkhn@proton.me wrote:
> @@ -2433,6 +2443,71 @@ void thaw_domains(void)
>      rcu_read_unlock(&domlist_read_lock);
>  }
>  
> +domid_t domid_alloc(domid_t domid)
> +{
> +    static domid_t domid_last;
> +
> +    spin_lock(&domid_lock);
> +
> +    /* Exact match. */
> +    if ( domid < DOMID_FIRST_RESERVED )
> +    {
> +        if ( __test_and_set_bit(domid, domid_bitmap) )
> +            domid = DOMID_INVALID;
> +    }
> +    /*
> +     * Exhaustive search.
> +     *
> +     * Domain ID#0 is reserved for the first boot domain (e.g. control domain)
> +     * and excluded from allocation.
> +     *
> +     * In dom0less build, domains are not dynamically destroyed, so there's no
> +     * need to do a wraparound of the IDs.
> +     */
> +#ifdef CONFIG_DOM0LESS_BOOT
> +    else if ( domid_last + 1 >= DOMID_FIRST_RESERVED )
> +    {
> +        domid = DOMID_INVALID;
> +    }
> +#endif

With this, ...

> +    else
> +    {
> +        domid = find_next_zero_bit(domid_bitmap,
> +                                   DOMID_FIRST_RESERVED,
> +                                   domid_last + 1);
> +#ifdef CONFIG_DOM0LESS_BOOT

... was this meant to be #ifndef?

> +        if ( domid == DOMID_FIRST_RESERVED )

This needs to be >=.

> +            domid = find_next_zero_bit(domid_bitmap,
> +                                       DOMID_FIRST_RESERVED,
> +                                       1);
> +#endif
> +
> +        if ( domid < DOMID_FIRST_RESERVED )
> +        {
> +            __set_bit(domid, domid_bitmap);
> +            domid_last = domid;
> +        }
> +        else
> +        {
> +            domid = DOMID_INVALID;
> +        }
> +    }
> +
> +    spin_unlock(&domid_lock);
> +
> +    return domid;
> +}
> +
> +void domid_free(domid_t domid)
> +{
> +    if ( domid < DOMID_FIRST_RESERVED )

Is this legitimate to happen? IOW doesn't this want to be some kind of
assertion?

Jan