To add more flexibility in system configuration add the new
DOMAIN_CAPS_DEVICE_MODEL flag and XEN_DOMCTL_CDF_device_model.
Thie new flag corresponds to allowing XSM_DM_PRIV for the domain.  This
will enable running device model emulators (QEMU) from the assigne
domain for multiple target domains.
Stubdoms assign target allowing the stubdom to serve as the device
model for a single domain.  This new flag allows the single domain to
provide emulators for multiple guests.
The specific scenario is a disaggregated system with the hardware domain
providing device models for muitple guest domains.
The OCaml code needs the flag added in as well.
Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
---
 tools/ocaml/libs/xc/xenctrl.ml          |  1 +
 tools/ocaml/libs/xc/xenctrl.mli         |  1 +
 xen/arch/arm/domain.c                   |  3 ++-
 xen/common/device-tree/dom0less-build.c |  3 +++
 xen/common/domain.c                     |  3 ++-
 xen/include/public/bootfdt.h            | 12 ++++++++++--
 xen/include/public/domctl.h             |  4 +++-
 xen/include/xen/sched.h                 |  9 +++++++++
 xen/include/xsm/dummy.h                 |  2 ++
 9 files changed, 33 insertions(+), 5 deletions(-)
diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index 2690f9a923..ef0c5dca2a 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -70,6 +70,7 @@ type domain_create_flag =
   | CDF_IOMMU
   | CDF_NESTED_VIRT
   | CDF_VPMU
+  | CDF_DEVICE_MODEL
 
 type domain_create_iommu_opts =
   | IOMMU_NO_SHAREPT
diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
index febbe1f6ae..c0156fa5c6 100644
--- a/tools/ocaml/libs/xc/xenctrl.mli
+++ b/tools/ocaml/libs/xc/xenctrl.mli
@@ -63,6 +63,7 @@ type domain_create_flag =
   | CDF_IOMMU
   | CDF_NESTED_VIRT
   | CDF_VPMU
+  | CDF_DEVICE_MODEL
 
 type domain_create_iommu_opts =
   | IOMMU_NO_SHAREPT
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 45aeb8bddc..12fda0762f 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -612,7 +612,8 @@ int arch_sanitise_domain_config(struct xen_domctl_createdomain *config)
     unsigned int max_vcpus;
     unsigned int flags_required = (XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap);
     unsigned int flags_optional = (XEN_DOMCTL_CDF_iommu | XEN_DOMCTL_CDF_vpmu |
-                                   XEN_DOMCTL_CDF_xs_domain );
+                                   XEN_DOMCTL_CDF_xs_domain |
+                                   XEN_DOMCTL_CDF_device_model);
     unsigned int sve_vl_bits = sve_decode_vl(config->arch.sve_vl);
 
     if ( (config->flags & ~flags_optional) != flags_required )
diff --git a/xen/common/device-tree/dom0less-build.c b/xen/common/device-tree/dom0less-build.c
index 3d503c6973..993ff32f5e 100644
--- a/xen/common/device-tree/dom0less-build.c
+++ b/xen/common/device-tree/dom0less-build.c
@@ -884,6 +884,9 @@ void __init create_domUs(void)
                 d_cfg.flags |= XEN_DOMCTL_CDF_xs_domain;
                 d_cfg.max_evtchn_port = -1;
             }
+
+            if ( val & DOMAIN_CAPS_DEVICE_MODEL )
+                d_cfg.flags |= XEN_DOMCTL_CDF_device_model;
         }
 
         if ( dt_find_property(node, "xen,static-mem", NULL) )
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 153cd75340..ab2c8f864d 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -721,7 +721,8 @@ static int sanitise_domain_config(struct xen_domctl_createdomain *config)
          ~(XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap |
            XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_oos_off |
            XEN_DOMCTL_CDF_xs_domain | XEN_DOMCTL_CDF_iommu |
-           XEN_DOMCTL_CDF_nested_virt | XEN_DOMCTL_CDF_vpmu) )
+           XEN_DOMCTL_CDF_nested_virt | XEN_DOMCTL_CDF_vpmu |
+           XEN_DOMCTL_CDF_device_model) )
     {
         dprintk(XENLOG_INFO, "Unknown CDF flags %#x\n", config->flags);
         return -EINVAL;
diff --git a/xen/include/public/bootfdt.h b/xen/include/public/bootfdt.h
index 86c46b42a9..c6b5afc76a 100644
--- a/xen/include/public/bootfdt.h
+++ b/xen/include/public/bootfdt.h
@@ -25,7 +25,15 @@
 #define DOMAIN_CAPS_HARDWARE (1U << 1)
 /* Xenstore domain. */
 #define DOMAIN_CAPS_XENSTORE (1U << 2)
-#define DOMAIN_CAPS_MASK     (DOMAIN_CAPS_CONTROL | DOMAIN_CAPS_HARDWARE | \
-                              DOMAIN_CAPS_XENSTORE)
+/*
+ * Device model capability allows the use of the dm_op hypercalls to provide
+ * the device model emulation (run QEMU) for other domains.  This is a
+ * subset of the Control capability which can be granted to the
+ * Hardware domain for running QEMU.
+ */
+#define DOMAIN_CAPS_DEVICE_MODEL (1U << 3)
+
+#define DOMAIN_CAPS_MASK     (DOMAIN_CAPS_CONTROL  | DOMAIN_CAPS_HARDWARE | \
+                              DOMAIN_CAPS_XENSTORE | DOMAIN_CAPS_DEVICE_MODEL )
 
 #endif /* __XEN_PUBLIC_BOOTFDT_H__ */
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 5b2063eed9..2280489be2 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -66,9 +66,11 @@ struct xen_domctl_createdomain {
 #define XEN_DOMCTL_CDF_nested_virt    (1U << _XEN_DOMCTL_CDF_nested_virt)
 /* Should we expose the vPMU to the guest? */
 #define XEN_DOMCTL_CDF_vpmu           (1U << 7)
+/* Allow domain to provide device model for multiple other domains */
+#define XEN_DOMCTL_CDF_device_model   (1U << 8)
 
 /* Max XEN_DOMCTL_CDF_* constant.  Used for ABI checking. */
-#define XEN_DOMCTL_CDF_MAX XEN_DOMCTL_CDF_vpmu
+#define XEN_DOMCTL_CDF_MAX XEN_DOMCTL_CDF_device_model
 
     uint32_t flags;
 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index fe53d4fab7..9863603d93 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -1148,6 +1148,15 @@ static always_inline bool is_control_domain(const struct domain *d)
     return evaluate_nospec(d->is_privileged);
 }
 
+/* This check is for functionality specific to a device model domain */
+static always_inline bool is_dm_domain(const struct domain *d)
+{
+    if ( IS_ENABLED(CONFIG_PV_SHIM_EXCLUSIVE) )
+        return false;
+
+    return evaluate_nospec(d->options & XEN_DOMCTL_CDF_device_model);
+}
+
 #define VM_ASSIST(d, t) (test_bit(VMASST_TYPE_ ## t, &(d)->vm_assist))
 
 static always_inline bool is_pv_domain(const struct domain *d)
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index d8df3f66c4..477fadaefd 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -91,6 +91,8 @@ static always_inline int xsm_default_action(
             return 0;
         fallthrough;
     case XSM_DM_PRIV:
+        if ( is_dm_domain(src) )
+            return 0;
         if ( target && evaluate_nospec(src->target == target) )
             return 0;
         fallthrough;
-- 
2.49.0On 11.06.2025 00:57, Jason Andryuk wrote: > To add more flexibility in system configuration add the new > DOMAIN_CAPS_DEVICE_MODEL flag and XEN_DOMCTL_CDF_device_model. > > Thie new flag corresponds to allowing XSM_DM_PRIV for the domain. This > will enable running device model emulators (QEMU) from the assigne > domain for multiple target domains. > > Stubdoms assign target allowing the stubdom to serve as the device > model for a single domain. This new flag allows the single domain to > provide emulators for multiple guests. > > The specific scenario is a disaggregated system with the hardware domain > providing device models for muitple guest domains. Why the hardware domain? Unless a DM also needs access to some of the physical hardware, it ought to run in a separate domain. Conceivably such a domain could service multiply guests, so maybe the "single target" concept presently used for stubdom simply needed extending? Jan
On Wed, 11 Jun 2025, Jan Beulich wrote: > On 11.06.2025 00:57, Jason Andryuk wrote: > > To add more flexibility in system configuration add the new > > DOMAIN_CAPS_DEVICE_MODEL flag and XEN_DOMCTL_CDF_device_model. > > > > Thie new flag corresponds to allowing XSM_DM_PRIV for the domain. This > > will enable running device model emulators (QEMU) from the assigne > > domain for multiple target domains. > > > > Stubdoms assign target allowing the stubdom to serve as the device > > model for a single domain. This new flag allows the single domain to > > provide emulators for multiple guests. > > > > The specific scenario is a disaggregated system with the hardware domain > > providing device models for muitple guest domains. > > Why the hardware domain? Unless a DM also needs access to some of the > physical hardware, it ought to run in a separate domain. Conceivably > such a domain could service multiply guests, so maybe the "single > target" concept presently used for stubdom simply needed extending? Not necessarily. While it is possible to have driver domains, it is not the default configuration. In a default configuration, the hardware domain gets all the hardware by default and therefore will also run the PV backends and Virtio backends. The Virtio backends require DM hypercalls. Let me elaborate further. In the datacenter, we have Dom0 typically with all the hardware, the backends (PV and Virtio), and also the toolstack. Then all other domains are created dynamically by the toolstack. Driver domains are possible but not very common. In automotive/embedded, the total number of domains is static, so we can create them using dom0less. We don't need the toolstack to create VMs. Also, we have safety concerns, so we want to take away as much privileges as possible from Dom0. This is easy because thanks to dom0less, we don't need the toolstack and we don't need to create VMs dynamically. So the model is that Dom0 becomes the hardware domain: it has all the drivers and backends but it is not privileged in the sense of creating/destroying other VMs. If a user wants to have Dom0 "super powers", they can create an optional Control Domain. The Control Domain is expected to be tiny, such as XTF or Zephyr. It will have the ability that Dom0 used to have but without the drivers. From a privilege perspective, the Control Domain could create additional VMs, but in automotive/embedded it is not expected to be a use-case because the total number of VMs is still static. So your point about driver domains. Yes, one can have driver domains the same way that one can have driver domains in the datacenter but it is not the default. The new default for embedded is what I described above and I think it is a very widely applicable concept across industries: automotive, industrial, robotics, etc. and also across vendors: AMD, Xilinx, Renesas, EPAM, ARM, etc.
On 14.06.2025 00:47, Stefano Stabellini wrote: > On Wed, 11 Jun 2025, Jan Beulich wrote: >> On 11.06.2025 00:57, Jason Andryuk wrote: >>> To add more flexibility in system configuration add the new >>> DOMAIN_CAPS_DEVICE_MODEL flag and XEN_DOMCTL_CDF_device_model. >>> >>> Thie new flag corresponds to allowing XSM_DM_PRIV for the domain. This >>> will enable running device model emulators (QEMU) from the assigne >>> domain for multiple target domains. >>> >>> Stubdoms assign target allowing the stubdom to serve as the device >>> model for a single domain. This new flag allows the single domain to >>> provide emulators for multiple guests. >>> >>> The specific scenario is a disaggregated system with the hardware domain >>> providing device models for muitple guest domains. >> >> Why the hardware domain? Unless a DM also needs access to some of the >> physical hardware, it ought to run in a separate domain. Conceivably >> such a domain could service multiply guests, so maybe the "single >> target" concept presently used for stubdom simply needed extending? > > Not necessarily. While it is possible to have driver domains, it is not > the default configuration. > > In a default configuration, the hardware domain gets all the hardware by > default and therefore will also run the PV backends and Virtio backends. > The Virtio backends require DM hypercalls. Let me elaborate further. > > In the datacenter, we have Dom0 typically with all the hardware, the > backends (PV and Virtio), and also the toolstack. Then all other domains > are created dynamically by the toolstack. Driver domains are possible > but not very common. > > In automotive/embedded, the total number of domains is static, so we can > create them using dom0less. We don't need the toolstack to create VMs. > Also, we have safety concerns, so we want to take away as much > privileges as possible from Dom0. At least purely by the wording, this ... > This is easy because thanks to > dom0less, we don't need the toolstack and we don't need to create VMs > dynamically. > > So the model is that Dom0 becomes the hardware domain: it has all the > drivers and backends but it is not privileged in the sense of > creating/destroying other VMs. If a user wants to have Dom0 "super > powers", they can create an optional Control Domain. The Control Domain > is expected to be tiny, such as XTF or Zephyr. It will have the ability > that Dom0 used to have but without the drivers. From a privilege > perspective, the Control Domain could create additional VMs, but in > automotive/embedded it is not expected to be a use-case because the > total number of VMs is still static. > > So your point about driver domains. Yes, one can have driver domains the > same way that one can have driver domains in the datacenter but it is > not the default. ... kind of contradicts this: Running e.g. qemu in Dom0 gives Dom0 quite a bit of extra privilege. (And no, the term "driver domain" does not describe a domain running DMs, imo.) Jan > The new default for embedded is what I described above > and I think it is a very widely applicable concept across industries: > automotive, industrial, robotics, etc. and also across vendors: AMD, > Xilinx, Renesas, EPAM, ARM, etc.
On Mon, 16 Jun 2025, Jan Beulich wrote: > On 14.06.2025 00:47, Stefano Stabellini wrote: > > On Wed, 11 Jun 2025, Jan Beulich wrote: > >> On 11.06.2025 00:57, Jason Andryuk wrote: > >>> To add more flexibility in system configuration add the new > >>> DOMAIN_CAPS_DEVICE_MODEL flag and XEN_DOMCTL_CDF_device_model. > >>> > >>> Thie new flag corresponds to allowing XSM_DM_PRIV for the domain. This > >>> will enable running device model emulators (QEMU) from the assigne > >>> domain for multiple target domains. > >>> > >>> Stubdoms assign target allowing the stubdom to serve as the device > >>> model for a single domain. This new flag allows the single domain to > >>> provide emulators for multiple guests. > >>> > >>> The specific scenario is a disaggregated system with the hardware domain > >>> providing device models for muitple guest domains. > >> > >> Why the hardware domain? Unless a DM also needs access to some of the > >> physical hardware, it ought to run in a separate domain. Conceivably > >> such a domain could service multiply guests, so maybe the "single > >> target" concept presently used for stubdom simply needed extending? > > > > Not necessarily. While it is possible to have driver domains, it is not > > the default configuration. > > > > In a default configuration, the hardware domain gets all the hardware by > > default and therefore will also run the PV backends and Virtio backends. > > The Virtio backends require DM hypercalls. Let me elaborate further. > > > > In the datacenter, we have Dom0 typically with all the hardware, the > > backends (PV and Virtio), and also the toolstack. Then all other domains > > are created dynamically by the toolstack. Driver domains are possible > > but not very common. > > > > In automotive/embedded, the total number of domains is static, so we can > > create them using dom0less. We don't need the toolstack to create VMs. > > Also, we have safety concerns, so we want to take away as much > > privileges as possible from Dom0. > > At least purely by the wording, this ... > > > This is easy because thanks to > > dom0less, we don't need the toolstack and we don't need to create VMs > > dynamically. > > > > So the model is that Dom0 becomes the hardware domain: it has all the > > drivers and backends but it is not privileged in the sense of > > creating/destroying other VMs. If a user wants to have Dom0 "super > > powers", they can create an optional Control Domain. The Control Domain > > is expected to be tiny, such as XTF or Zephyr. It will have the ability > > that Dom0 used to have but without the drivers. From a privilege > > perspective, the Control Domain could create additional VMs, but in > > automotive/embedded it is not expected to be a use-case because the > > total number of VMs is still static. > > > > So your point about driver domains. Yes, one can have driver domains the > > same way that one can have driver domains in the datacenter but it is > > not the default. > > ... kind of contradicts this: Running e.g. qemu in Dom0 gives Dom0 quite > a bit of extra privilege. Yes, in an ideal world that would not be necessary. However, in automotive Virtio has become the standard. While there are efforts ongoing to rework the Virtio protocol to have a better security/safety profile, we need to provide something that works today. Even PV drivers are not perfect in that regard because I don't think we can claim they are free from interference but that is another topic. In order to provide something that works today, we need to have support for virtio backends in the hardware domain. Like you said, that gives quite a bit of extra privilege to the hardware domain which is not acceptable when targeting a "safe" VM such as the Control Domain. Thus, we have another series to restrict DM and foreign mapping hypercalls from being able to target "safe" domains. In other words, the patch series will prevent the hardware domain from being able to target the Control Domain or another DomU configured as "safe" with DM hypercalls or foreign mapping hypercalls. It will be up to the user to decide which domUs the harwdare domain will be able to target. That way, the user will still be able to configure one or more VMs are completely protected from interference from the hardware domain at the cost of having no (traditional) virtio devices.
On 6/13/25 18:47, Stefano Stabellini wrote: > On Wed, 11 Jun 2025, Jan Beulich wrote: >> On 11.06.2025 00:57, Jason Andryuk wrote: >>> To add more flexibility in system configuration add the new >>> DOMAIN_CAPS_DEVICE_MODEL flag and XEN_DOMCTL_CDF_device_model. >>> >>> Thie new flag corresponds to allowing XSM_DM_PRIV for the domain. This >>> will enable running device model emulators (QEMU) from the assigne >>> domain for multiple target domains. >>> >>> Stubdoms assign target allowing the stubdom to serve as the device >>> model for a single domain. This new flag allows the single domain to >>> provide emulators for multiple guests. >>> >>> The specific scenario is a disaggregated system with the hardware domain >>> providing device models for muitple guest domains. >> >> Why the hardware domain? Unless a DM also needs access to some of the >> physical hardware, it ought to run in a separate domain. Conceivably >> such a domain could service multiply guests, so maybe the "single >> target" concept presently used for stubdom simply needed extending? > > Not necessarily. While it is possible to have driver domains, it is not > the default configuration. > > In a default configuration, the hardware domain gets all the hardware by > default and therefore will also run the PV backends and Virtio backends. > The Virtio backends require DM hypercalls. Let me elaborate further. > > In the datacenter, we have Dom0 typically with all the hardware, the > backends (PV and Virtio), and also the toolstack. Then all other domains > are created dynamically by the toolstack. Driver domains are possible > but not very common. > > In automotive/embedded, the total number of domains is static, so we can > create them using dom0less. We don't need the toolstack to create VMs. > Also, we have safety concerns, so we want to take away as much > privileges as possible from Dom0. This is easy because thanks to > dom0less, we don't need the toolstack and we don't need to create VMs > dynamically. > > So the model is that Dom0 becomes the hardware domain: it has all the > drivers and backends but it is not privileged in the sense of > creating/destroying other VMs. If a user wants to have Dom0 "super > powers", they can create an optional Control Domain. The Control Domain > is expected to be tiny, such as XTF or Zephyr. It will have the ability > that Dom0 used to have but without the drivers. From a privilege > perspective, the Control Domain could create additional VMs, but in > automotive/embedded it is not expected to be a use-case because the > total number of VMs is still static. > > So your point about driver domains. Yes, one can have driver domains the > same way that one can have driver domains in the datacenter but it is > not the default. The new default for embedded is what I described above > and I think it is a very widely applicable concept across industries: > automotive, industrial, robotics, etc. and also across vendors: AMD, > Xilinx, Renesas, EPAM, ARM, etc. I think the benefits of this are much reduced as long as the hardware domain is not strongly isolated from the other domains, in the sense that the hardware domain being able to compromise other domains is not considered a security vulnerability. Specifically, in safety-critical scenarios the hardware domain (which, to the best of my understanding, generally runs Linux) must not be able to compromise any of the safety- critical domains. This is, of course, achievable, but my understanding is that it isn't something guaranteed by upstream Xen. Rather, each user must ensure it by assigning any hardware that could compromise Xen to the control domain or a quarantine domain. Could this be included in documentation? -- Sincerely, Demi Marie Obenour (she/her/hers)
On Fri, 13 Jun 2025, Demi Marie Obenour wrote: > On 6/13/25 18:47, Stefano Stabellini wrote: > > On Wed, 11 Jun 2025, Jan Beulich wrote: > >> On 11.06.2025 00:57, Jason Andryuk wrote: > >>> To add more flexibility in system configuration add the new > >>> DOMAIN_CAPS_DEVICE_MODEL flag and XEN_DOMCTL_CDF_device_model. > >>> > >>> Thie new flag corresponds to allowing XSM_DM_PRIV for the domain. This > >>> will enable running device model emulators (QEMU) from the assigne > >>> domain for multiple target domains. > >>> > >>> Stubdoms assign target allowing the stubdom to serve as the device > >>> model for a single domain. This new flag allows the single domain to > >>> provide emulators for multiple guests. > >>> > >>> The specific scenario is a disaggregated system with the hardware domain > >>> providing device models for muitple guest domains. > >> > >> Why the hardware domain? Unless a DM also needs access to some of the > >> physical hardware, it ought to run in a separate domain. Conceivably > >> such a domain could service multiply guests, so maybe the "single > >> target" concept presently used for stubdom simply needed extending? > > > > Not necessarily. While it is possible to have driver domains, it is not > > the default configuration. > > > > In a default configuration, the hardware domain gets all the hardware by > > default and therefore will also run the PV backends and Virtio backends. > > The Virtio backends require DM hypercalls. Let me elaborate further. > > > > In the datacenter, we have Dom0 typically with all the hardware, the > > backends (PV and Virtio), and also the toolstack. Then all other domains > > are created dynamically by the toolstack. Driver domains are possible > > but not very common. > > > > In automotive/embedded, the total number of domains is static, so we can > > create them using dom0less. We don't need the toolstack to create VMs. > > Also, we have safety concerns, so we want to take away as much > > privileges as possible from Dom0. This is easy because thanks to > > dom0less, we don't need the toolstack and we don't need to create VMs > > dynamically. > > > > So the model is that Dom0 becomes the hardware domain: it has all the > > drivers and backends but it is not privileged in the sense of > > creating/destroying other VMs. If a user wants to have Dom0 "super > > powers", they can create an optional Control Domain. The Control Domain > > is expected to be tiny, such as XTF or Zephyr. It will have the ability > > that Dom0 used to have but without the drivers. From a privilege > > perspective, the Control Domain could create additional VMs, but in > > automotive/embedded it is not expected to be a use-case because the > > total number of VMs is still static. > > > > So your point about driver domains. Yes, one can have driver domains the > > same way that one can have driver domains in the datacenter but it is > > not the default. The new default for embedded is what I described above > > and I think it is a very widely applicable concept across industries: > > automotive, industrial, robotics, etc. and also across vendors: AMD, > > Xilinx, Renesas, EPAM, ARM, etc. > > I think the benefits of this are much reduced as long as the hardware > domain is not strongly isolated from the other domains, in the sense that > the hardware domain being able to compromise other domains is not > considered a security vulnerability. Specifically, in safety-critical > scenarios the hardware domain (which, to the best of my understanding, > generally runs Linux) must not be able to compromise any of the safety- > critical domains. > > This is, of course, achievable, but my understanding is that it isn't > something guaranteed by upstream Xen. Rather, each user must ensure > it by assigning any hardware that could compromise Xen to the control > domain or a quarantine domain. > > Could this be included in documentation? Yes, I agree that it should be included in the documentation.
On 2025-06-11 09:24, Jan Beulich wrote: > On 11.06.2025 00:57, Jason Andryuk wrote: >> To add more flexibility in system configuration add the new >> DOMAIN_CAPS_DEVICE_MODEL flag and XEN_DOMCTL_CDF_device_model. >> >> Thie new flag corresponds to allowing XSM_DM_PRIV for the domain. This >> will enable running device model emulators (QEMU) from the assigne >> domain for multiple target domains. >> >> Stubdoms assign target allowing the stubdom to serve as the device >> model for a single domain. This new flag allows the single domain to >> provide emulators for multiple guests. >> >> The specific scenario is a disaggregated system with the hardware domain >> providing device models for muitple guest domains. > > Why the hardware domain? Unless a DM also needs access to some of the > physical hardware, it ought to run in a separate domain. Conceivably > such a domain could service multiply guests, so maybe the "single > target" concept presently used for stubdom simply needed extending? One configuration is the hardware domain running QEMU for the virtio-gpu. In an earlier iteration, I allowed XSM_DM_PRIV for is_hardware_domain(). Rightfully, there was some questioning of that hardcoding. Adding a new flag allows it to be configurable. Maybe target could be extended. I was thinking that could be left for the stubdom case as it is today. i.e. a 1-1 device model. But a 1-N case could be handled this way. Today dom0 XSM_DM_PRIV falls through to is_control_domain(). The idea was place a new check directly corresponding to XSM_DM_PRIV. Regards, Jason
Acked-by: Christian Lindig <christian.lindig@cloud.com>
> On 10 Jun 2025, at 23:57, Jason Andryuk <jason.andryuk@amd.com> wrote:
> 
> To add more flexibility in system configuration add the new
> DOMAIN_CAPS_DEVICE_MODEL flag and XEN_DOMCTL_CDF_device_model.
> 
> Thie new flag corresponds to allowing XSM_DM_PRIV for the domain.  This
> will enable running device model emulators (QEMU) from the assigne
> domain for multiple target domains.
> 
> Stubdoms assign target allowing the stubdom to serve as the device
> model for a single domain.  This new flag allows the single domain to
> provide emulators for multiple guests.
> 
> The specific scenario is a disaggregated system with the hardware domain
> providing device models for muitple guest domains.
> 
> The OCaml code needs the flag added in as well.
> 
> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
> ---
> tools/ocaml/libs/xc/xenctrl.ml          |  1 +
> tools/ocaml/libs/xc/xenctrl.mli         |  1 +
> xen/arch/arm/domain.c                   |  3 ++-
> xen/common/device-tree/dom0less-build.c |  3 +++
> xen/common/domain.c                     |  3 ++-
> xen/include/public/bootfdt.h            | 12 ++++++++++--
> xen/include/public/domctl.h             |  4 +++-
> xen/include/xen/sched.h                 |  9 +++++++++
> xen/include/xsm/dummy.h                 |  2 ++
> 9 files changed, 33 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
> index 2690f9a923..ef0c5dca2a 100644
> --- a/tools/ocaml/libs/xc/xenctrl.ml
> +++ b/tools/ocaml/libs/xc/xenctrl.ml
> @@ -70,6 +70,7 @@ type domain_create_flag =
>   | CDF_IOMMU
>   | CDF_NESTED_VIRT
>   | CDF_VPMU
> +  | CDF_DEVICE_MODEL
> 
> type domain_create_iommu_opts =
>   | IOMMU_NO_SHAREPT
> diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
> index febbe1f6ae..c0156fa5c6 100644
> --- a/tools/ocaml/libs/xc/xenctrl.mli
> +++ b/tools/ocaml/libs/xc/xenctrl.mli
> @@ -63,6 +63,7 @@ type domain_create_flag =
>   | CDF_IOMMU
>   | CDF_NESTED_VIRT
>   | CDF_VPMU
> +  | CDF_DEVICE_MODEL
> 
> type domain_create_iommu_opts =
>   | IOMMU_NO_SHAREPT
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 45aeb8bddc..12fda0762f 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -612,7 +612,8 @@ int arch_sanitise_domain_config(struct xen_domctl_createdomain *config)
>     unsigned int max_vcpus;
>     unsigned int flags_required = (XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap);
>     unsigned int flags_optional = (XEN_DOMCTL_CDF_iommu | XEN_DOMCTL_CDF_vpmu |
> -                                   XEN_DOMCTL_CDF_xs_domain );
> +                                   XEN_DOMCTL_CDF_xs_domain |
> +                                   XEN_DOMCTL_CDF_device_model);
>     unsigned int sve_vl_bits = sve_decode_vl(config->arch.sve_vl);
> 
>     if ( (config->flags & ~flags_optional) != flags_required )
> diff --git a/xen/common/device-tree/dom0less-build.c b/xen/common/device-tree/dom0less-build.c
> index 3d503c6973..993ff32f5e 100644
> --- a/xen/common/device-tree/dom0less-build.c
> +++ b/xen/common/device-tree/dom0less-build.c
> @@ -884,6 +884,9 @@ void __init create_domUs(void)
>                 d_cfg.flags |= XEN_DOMCTL_CDF_xs_domain;
>                 d_cfg.max_evtchn_port = -1;
>             }
> +
> +            if ( val & DOMAIN_CAPS_DEVICE_MODEL )
> +                d_cfg.flags |= XEN_DOMCTL_CDF_device_model;
>         }
> 
>         if ( dt_find_property(node, "xen,static-mem", NULL) )
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 153cd75340..ab2c8f864d 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -721,7 +721,8 @@ static int sanitise_domain_config(struct xen_domctl_createdomain *config)
>          ~(XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap |
>            XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_oos_off |
>            XEN_DOMCTL_CDF_xs_domain | XEN_DOMCTL_CDF_iommu |
> -           XEN_DOMCTL_CDF_nested_virt | XEN_DOMCTL_CDF_vpmu) )
> +           XEN_DOMCTL_CDF_nested_virt | XEN_DOMCTL_CDF_vpmu |
> +           XEN_DOMCTL_CDF_device_model) )
>     {
>         dprintk(XENLOG_INFO, "Unknown CDF flags %#x\n", config->flags);
>         return -EINVAL;
> diff --git a/xen/include/public/bootfdt.h b/xen/include/public/bootfdt.h
> index 86c46b42a9..c6b5afc76a 100644
> --- a/xen/include/public/bootfdt.h
> +++ b/xen/include/public/bootfdt.h
> @@ -25,7 +25,15 @@
> #define DOMAIN_CAPS_HARDWARE (1U << 1)
> /* Xenstore domain. */
> #define DOMAIN_CAPS_XENSTORE (1U << 2)
> -#define DOMAIN_CAPS_MASK     (DOMAIN_CAPS_CONTROL | DOMAIN_CAPS_HARDWARE | \
> -                              DOMAIN_CAPS_XENSTORE)
> +/*
> + * Device model capability allows the use of the dm_op hypercalls to provide
> + * the device model emulation (run QEMU) for other domains.  This is a
> + * subset of the Control capability which can be granted to the
> + * Hardware domain for running QEMU.
> + */
> +#define DOMAIN_CAPS_DEVICE_MODEL (1U << 3)
> +
> +#define DOMAIN_CAPS_MASK     (DOMAIN_CAPS_CONTROL  | DOMAIN_CAPS_HARDWARE | \
> +                              DOMAIN_CAPS_XENSTORE | DOMAIN_CAPS_DEVICE_MODEL )
> 
> #endif /* __XEN_PUBLIC_BOOTFDT_H__ */
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 5b2063eed9..2280489be2 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -66,9 +66,11 @@ struct xen_domctl_createdomain {
> #define XEN_DOMCTL_CDF_nested_virt    (1U << _XEN_DOMCTL_CDF_nested_virt)
> /* Should we expose the vPMU to the guest? */
> #define XEN_DOMCTL_CDF_vpmu           (1U << 7)
> +/* Allow domain to provide device model for multiple other domains */
> +#define XEN_DOMCTL_CDF_device_model   (1U << 8)
> 
> /* Max XEN_DOMCTL_CDF_* constant.  Used for ABI checking. */
> -#define XEN_DOMCTL_CDF_MAX XEN_DOMCTL_CDF_vpmu
> +#define XEN_DOMCTL_CDF_MAX XEN_DOMCTL_CDF_device_model
> 
>     uint32_t flags;
> 
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index fe53d4fab7..9863603d93 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -1148,6 +1148,15 @@ static always_inline bool is_control_domain(const struct domain *d)
>     return evaluate_nospec(d->is_privileged);
> }
> 
> +/* This check is for functionality specific to a device model domain */
> +static always_inline bool is_dm_domain(const struct domain *d)
> +{
> +    if ( IS_ENABLED(CONFIG_PV_SHIM_EXCLUSIVE) )
> +        return false;
> +
> +    return evaluate_nospec(d->options & XEN_DOMCTL_CDF_device_model);
> +}
> +
> #define VM_ASSIST(d, t) (test_bit(VMASST_TYPE_ ## t, &(d)->vm_assist))
> 
> static always_inline bool is_pv_domain(const struct domain *d)
> diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
> index d8df3f66c4..477fadaefd 100644
> --- a/xen/include/xsm/dummy.h
> +++ b/xen/include/xsm/dummy.h
> @@ -91,6 +91,8 @@ static always_inline int xsm_default_action(
>             return 0;
>         fallthrough;
>     case XSM_DM_PRIV:
> +        if ( is_dm_domain(src) )
> +            return 0;
>         if ( target && evaluate_nospec(src->target == target) )
>             return 0;
>         fallthrough;
> -- 
> 2.49.0
> 
                
            © 2016 - 2025 Red Hat, Inc.