[PATCH 05/23] xen/arm: Add capabilities to dom0less

Jason Andryuk posted 23 patches 3 days ago
[PATCH 05/23] xen/arm: Add capabilities to dom0less
Posted by Jason Andryuk 3 days ago
Add capabilities property to dom0less to allow building a
disaggregated system.

Introduce bootfdt.h to contain these constants.

When using the hardware or xenstore capabilities, adjust the grant and
event channel limits similar to dom0.

Also for the hardware domain, set directmap and iommu.  This brings its
configuration in line with a dom0.

Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
---
There is overlap with hyperlaunch.  The numeric values are the same.
Hyperlaunch doesn't expose the values in a public header as done here.
Is this to be expected for dom0less?  It seems most of dom0less isn't in
a header, but just in docs.

Hyperlaunch uses BUILD_CAPS_, but I chose DOMAIN_CAPS_ since there are
domain-level capabilities.

Only a single xenstore and hardware domain make sense.  A check to limit
to only a single hardware domain is in place - building two breaks.  But
nothing prevents the dom0less configuration from only having multiple
xenstore domains.  Each xenstore domain would have slightly more
permissions, but only the last one would be used.
---
 docs/misc/arm/device-tree/booting.txt | 11 ++++++++++
 xen/arch/arm/dom0less-build.c         | 29 +++++++++++++++++++++++++++
 xen/arch/arm/domain.c                 |  3 ++-
 xen/include/public/bootfdt.h          | 27 +++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 xen/include/public/bootfdt.h

diff --git a/docs/misc/arm/device-tree/booting.txt b/docs/misc/arm/device-tree/booting.txt
index ac781c9cc8..490c792ddf 100644
--- a/docs/misc/arm/device-tree/booting.txt
+++ b/docs/misc/arm/device-tree/booting.txt
@@ -167,6 +167,17 @@ with the following properties:
     Refer to docs/misc/cache_coloring.rst for syntax. This option is applicable
     only to Arm64 guests.
 
+- capabilities
+    Optional.  A bit field of domain capabilities for a disaggregated
+    system.  A traditional dom0 has all all of these capabilities, and a
+    domU has none of them.
+
+    0x1 DOMAIN_CAPS_CONTROL  - A privileged, control domain
+    0x2 DOMAIN_CAPS_HARDWARE - The hardware domain - there can be only 1
+    0x4 DOMAIN_CAPS_XENSTORE - The xenstore domain - there can be only 1
+
+    The default is no capabilities.
+
 - vpl011
 
     An empty property to enable/disable a virtual pl011 for the guest to
diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
index 5a7871939b..068bf99294 100644
--- a/xen/arch/arm/dom0less-build.c
+++ b/xen/arch/arm/dom0less-build.c
@@ -12,6 +12,7 @@
 #include <xen/sizes.h>
 #include <xen/vmap.h>
 
+#include <public/bootfdt.h>
 #include <public/io/xs_wire.h>
 
 #include <asm/arm64/sve.h>
@@ -994,6 +995,34 @@ void __init create_domUs(void)
         if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
             panic("No more domain IDs available\n");
 
+        if ( dt_property_read_u32(node, "capabilities", &val) )
+        {
+            if ( val & ~DOMAIN_CAPS_MASK )
+                panic("Invalid capabilities (%"PRIx32")\n", val);
+
+            if ( val & DOMAIN_CAPS_CONTROL )
+                flags |= CDF_privileged;
+
+            if ( val & DOMAIN_CAPS_HARDWARE )
+            {
+                if ( hardware_domain )
+                    panic("Only 1 hardware domain can be specified! (%pd)\n",
+                           hardware_domain);
+
+                d_cfg.max_grant_frames = gnttab_dom0_frames();
+                d_cfg.max_evtchn_port = -1;
+                flags |= CDF_hardware;
+                flags |= CDF_directmap;
+                iommu = true;
+            }
+
+            if ( val & DOMAIN_CAPS_XENSTORE )
+            {
+                d_cfg.flags |= XEN_DOMCTL_CDF_xs_domain;
+                d_cfg.max_evtchn_port = -1;
+            }
+        }
+
         if ( dt_find_property(node, "xen,static-mem", NULL) )
         {
             if ( llc_coloring_enabled )
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 3ba959f866..dc4b4e84c1 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -608,7 +608,8 @@ int arch_sanitise_domain_config(struct xen_domctl_createdomain *config)
 {
     unsigned int max_vcpus;
     unsigned int flags_required = (XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap);
-    unsigned int flags_optional = (XEN_DOMCTL_CDF_iommu | XEN_DOMCTL_CDF_vpmu);
+    unsigned int flags_optional = (XEN_DOMCTL_CDF_iommu | XEN_DOMCTL_CDF_vpmu |
+                                   XEN_DOMCTL_CDF_xs_domain );
     unsigned int sve_vl_bits = sve_decode_vl(config->arch.sve_vl);
 
     if ( (config->flags & ~flags_optional) != flags_required )
diff --git a/xen/include/public/bootfdt.h b/xen/include/public/bootfdt.h
new file mode 100644
index 0000000000..4e87aca8ac
--- /dev/null
+++ b/xen/include/public/bootfdt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Xen Device Tree boot information
+ *
+ * Information for configuring Xen domains created at boot time.
+ */
+
+#ifndef __XEN_PUBLIC_BOOTFDT_H__
+#define __XEN_PUBLIC_BOOTFDT_H__
+
+/* Domain Capabilities specified in the "capabilities" property.  Use of
+ * this property allows splitting up the monolithic dom0 into separate,
+ * less privileged components.  A regular domU has no capabilities
+ * (which is the default if nothing is specified).  A traditional dom0
+ * has all three capabilities.*/
+
+/* Control/Privileged domain capable of affecting other domains. */
+#define DOMAIN_CAPS_CONTROL  (1 << 0)
+/* Hardware domain controlling physical hardware.  Typically providing
+ * backends to other domains.  */
+#define DOMAIN_CAPS_HARDWARE (1 << 1)
+/* Xenstore domain. */
+#define DOMAIN_CAPS_XENSTORE (1 << 2)
+#define DOMAIN_CAPS_MASK     (DOMAIN_CAPS_CONTROL | DOMAIN_CAPS_HARDWARE | \
+                              DOMAIN_CAPS_XENSTORE)
+
+#endif /* __XEN_PUBLIC_BOOTFDT_H__ */
-- 
2.48.1
Re: [PATCH 05/23] xen/arm: Add capabilities to dom0less
Posted by Julien Grall 2 days, 13 hours ago
Hi,

On 06/03/2025 22:03, Jason Andryuk wrote:
> Add capabilities property to dom0less to allow building a
> disaggregated system.
> 
> Introduce bootfdt.h to contain these constants.
> 
> When using the hardware or xenstore capabilities, adjust the grant and
> event channel limits similar to dom0.
 > > Also for the hardware domain, set directmap and iommu.  This brings its
> configuration in line with a dom0.

Looking the device tree bindings, a user would be allowed to disable 
"passthrough" or even "directmap". This means, we would never be able to 
disable "directmap" for the hardware domain in the future with the 
existing property (this is to avoid break backwards compatibility).

Instead, I think we should check what the user provided and confirm this 
is matching our expectation for an hardware domain.

That said, I am not entirely sure why we should force directmap for the 
HW domain. We are starting from a clean slate, so I think it would be 
better to have by default no directmap and imposing the presence of an 
IOMMU in the system.

Lastly, can you provide an example of what the hardware domain DT node 
would looke like?

> 
> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
> ---
> There is overlap with hyperlaunch.  The numeric values are the same.
> Hyperlaunch doesn't expose the values in a public header as done here.
> Is this to be expected for dom0less?  It seems most of dom0less isn't in
> a header, but just in docs.
> 
> Hyperlaunch uses BUILD_CAPS_, but I chose DOMAIN_CAPS_ since there are
> domain-level capabilities.
> 
> Only a single xenstore and hardware domain make sense.  A check to limit
> to only a single hardware domain is in place - building two breaks.  But
> nothing prevents the dom0less configuration from only having multiple
> xenstore domains.  Each xenstore domain would have slightly more
> permissions, but only the last one would be used.
 > --->   docs/misc/arm/device-tree/booting.txt | 11 ++++++++++
>   xen/arch/arm/dom0less-build.c         | 29 +++++++++++++++++++++++++++
>   xen/arch/arm/domain.c                 |  3 ++-
>   xen/include/public/bootfdt.h          | 27 +++++++++++++++++++++++++
>   4 files changed, 69 insertions(+), 1 deletion(-)
>   create mode 100644 xen/include/public/bootfdt.h
> 
> diff --git a/docs/misc/arm/device-tree/booting.txt b/docs/misc/arm/device-tree/booting.txt
> index ac781c9cc8..490c792ddf 100644
> --- a/docs/misc/arm/device-tree/booting.txt
> +++ b/docs/misc/arm/device-tree/booting.txt
> @@ -167,6 +167,17 @@ with the following properties:
>       Refer to docs/misc/cache_coloring.rst for syntax. This option is applicable
>       only to Arm64 guests.
>   
> +- capabilities
> +    Optional.  A bit field of domain capabilities for a disaggregated
> +    system.  A traditional dom0 has all all of these capabilities, and a
> +    domU has none of them.
> +
> +    0x1 DOMAIN_CAPS_CONTROL  - A privileged, control domain
> +    0x2 DOMAIN_CAPS_HARDWARE - The hardware domain - there can be only 1
> +    0x4 DOMAIN_CAPS_XENSTORE - The xenstore domain - there can be only 1
> +
> +    The default is no capabilities.
> +
>   - vpl011
>   
>       An empty property to enable/disable a virtual pl011 for the guest to
> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
> index 5a7871939b..068bf99294 100644
> --- a/xen/arch/arm/dom0less-build.c
> +++ b/xen/arch/arm/dom0less-build.c
> @@ -12,6 +12,7 @@
>   #include <xen/sizes.h>
>   #include <xen/vmap.h>
>   
> +#include <public/bootfdt.h>
>   #include <public/io/xs_wire.h>
>   
>   #include <asm/arm64/sve.h>
> @@ -994,6 +995,34 @@ void __init create_domUs(void)
>           if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
>               panic("No more domain IDs available\n");
>   
> +        if ( dt_property_read_u32(node, "capabilities", &val) )
> +        {
> +            if ( val & ~DOMAIN_CAPS_MASK )
> +                panic("Invalid capabilities (%"PRIx32")\n", val);
> +
> +            if ( val & DOMAIN_CAPS_CONTROL )
> +                flags |= CDF_privileged;
> +
> +            if ( val & DOMAIN_CAPS_HARDWARE )
> +            {
> +                if ( hardware_domain )
> +                    panic("Only 1 hardware domain can be specified! (%pd)\n",
> +                           hardware_domain);
> +
> +                d_cfg.max_grant_frames = gnttab_dom0_frames();
> +                d_cfg.max_evtchn_port = -1;

What about d_cfg.arch.nr_spis? Are we expecting the user to pass "nr_spis"?

> +                flags |= CDF_hardware;
> +                flags |= CDF_directmap;
 > +                iommu = true;> +            }
> +
> +            if ( val & DOMAIN_CAPS_XENSTORE )
> +            {
> +                d_cfg.flags |= XEN_DOMCTL_CDF_xs_domain;
> +                d_cfg.max_evtchn_port = -1;
> +            }
> +        }
> +
>           if ( dt_find_property(node, "xen,static-mem", NULL) )
>           {
>               if ( llc_coloring_enabled )
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 3ba959f866..dc4b4e84c1 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -608,7 +608,8 @@ int arch_sanitise_domain_config(struct xen_domctl_createdomain *config)
>   {
>       unsigned int max_vcpus;
>       unsigned int flags_required = (XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap);
> -    unsigned int flags_optional = (XEN_DOMCTL_CDF_iommu | XEN_DOMCTL_CDF_vpmu);
> +    unsigned int flags_optional = (XEN_DOMCTL_CDF_iommu | XEN_DOMCTL_CDF_vpmu |
> +                                   XEN_DOMCTL_CDF_xs_domain );
>       unsigned int sve_vl_bits = sve_decode_vl(config->arch.sve_vl);
>   
>       if ( (config->flags & ~flags_optional) != flags_required )
> diff --git a/xen/include/public/bootfdt.h b/xen/include/public/bootfdt.h
> new file mode 100644
> index 0000000000..4e87aca8ac
> --- /dev/null
> +++ b/xen/include/public/bootfdt.h
> @@ -0,0 +1,27 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Xen Device Tree boot information
> + *
> + * Information for configuring Xen domains created at boot time.
> + */
> +
> +#ifndef __XEN_PUBLIC_BOOTFDT_H__
> +#define __XEN_PUBLIC_BOOTFDT_H__
> +
> +/* Domain Capabilities specified in the "capabilities" property.  Use of
> + * this property allows splitting up the monolithic dom0 into separate,
> + * less privileged components.  A regular domU has no capabilities
> + * (which is the default if nothing is specified).  A traditional dom0
> + * has all three capabilities.*/
> +
> +/* Control/Privileged domain capable of affecting other domains. */
> +#define DOMAIN_CAPS_CONTROL  (1 << 0)
> +/* Hardware domain controlling physical hardware.  Typically providing
> + * backends to other domains.  */
> +#define DOMAIN_CAPS_HARDWARE (1 << 1)
> +/* Xenstore domain. */
> +#define DOMAIN_CAPS_XENSTORE (1 << 2)
> +#define DOMAIN_CAPS_MASK     (DOMAIN_CAPS_CONTROL | DOMAIN_CAPS_HARDWARE | \
> +                              DOMAIN_CAPS_XENSTORE)
> +
> +#endif /* __XEN_PUBLIC_BOOTFDT_H__ */

-- 
Julien Grall
Re: [PATCH 05/23] xen/arm: Add capabilities to dom0less
Posted by Jason Andryuk 2 days, 4 hours ago
On 2025-03-07 04:01, Julien Grall wrote:
> Hi,
> 
> On 06/03/2025 22:03, Jason Andryuk wrote:
>> Add capabilities property to dom0less to allow building a
>> disaggregated system.
>>
>> Introduce bootfdt.h to contain these constants.
>>
>> When using the hardware or xenstore capabilities, adjust the grant and
>> event channel limits similar to dom0.
>  > > Also for the hardware domain, set directmap and iommu.  This brings 
> its
>> configuration in line with a dom0.
> 
> Looking the device tree bindings, a user would be allowed to disable 
> "passthrough" or even "directmap". This means, we would never be able to 
> disable "directmap" for the hardware domain in the future with the 
> existing property (this is to avoid break backwards compatibility).
> 
> Instead, I think we should check what the user provided and confirm this 
> is matching our expectation for an hardware domain.
 >
> That said, I am not entirely sure why we should force directmap for the 
> HW domain. We are starting from a clean slate, so I think it would be 
> better to have by default no directmap and imposing the presence of an 
> IOMMU in the system.

Ok, it seems like directmap is not necessary.  It was helpful early on 
to get things booting, but I think it's no longer necessary after 
factoring out construct_hwdom().

What exactly do you mean by imposing with respect to the iommu?  Require 
one, or mirror the dom0 code and set it for the hwdom?

     if ( iommu_enabled )
         dom0_cfg.flags |= XEN_DOMCTL_CDF_iommu;

> Lastly, can you provide an example of what the hardware domain DT node 
> would looke like?

I've attached a dump of /sys/firmware/fdt from hwdom.  (This is direct 
mapped).

>> --- a/xen/arch/arm/dom0less-build.c
>> +++ b/xen/arch/arm/dom0less-build.c
>> @@ -12,6 +12,7 @@
>>   #include <xen/sizes.h>
>>   #include <xen/vmap.h>
>> +#include <public/bootfdt.h>
>>   #include <public/io/xs_wire.h>
>>   #include <asm/arm64/sve.h>
>> @@ -994,6 +995,34 @@ void __init create_domUs(void)
>>           if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
>>               panic("No more domain IDs available\n");
>> +        if ( dt_property_read_u32(node, "capabilities", &val) )
>> +        {
>> +            if ( val & ~DOMAIN_CAPS_MASK )
>> +                panic("Invalid capabilities (%"PRIx32")\n", val);
>> +
>> +            if ( val & DOMAIN_CAPS_CONTROL )
>> +                flags |= CDF_privileged;
>> +
>> +            if ( val & DOMAIN_CAPS_HARDWARE )
>> +            {
>> +                if ( hardware_domain )
>> +                    panic("Only 1 hardware domain can be specified! 
>> (%pd)\n",
>> +                           hardware_domain);
>> +
>> +                d_cfg.max_grant_frames = gnttab_dom0_frames();
>> +                d_cfg.max_evtchn_port = -1;
> 
> What about d_cfg.arch.nr_spis? Are we expecting the user to pass "nr_spis"?

Further down, when nr_spis isn't specified in the DT, it defaults to:
     d_cfg.arch.nr_spis = gic_number_lines() - 32;

Dom0 does:
     /*
      * Xen vGIC supports a maximum of 992 interrupt lines.
      * 32 are substracted to cover local IRQs.
      */
     dom0_cfg.arch.nr_spis = min(gic_number_lines(), (unsigned int) 992) 
- 32;
     if ( gic_number_lines() > 992 )
         printk(XENLOG_WARNING "Maximum number of vGIC IRQs exceeded.\n");

So I think it's fine as-is?  I could add the min() and warning if you 
think it's necessary.

Regards,
Jason/dts-v1/;

/ {
	interrupt-parent = <0x8005>;
	dma-coherent;
	model = "linux,dummy-virt";
	#size-cells = <0x02>;
	#address-cells = <0x02>;
	compatible = "linux,dummy-virt";

	platform-bus@c000000 {
		interrupt-parent = <0x8005>;
		ranges = <0x00 0x00 0xc000000 0x2000000>;
		#address-cells = <0x01>;
		#size-cells = <0x01>;
		compatible = "qemu,platform", "simple-bus";
	};

	fw-cfg@9020000 {
		dma-coherent;
		reg = <0x00 0x9020000 0x00 0x18>;
		compatible = "qemu,fw-cfg-mmio";
	};

	virtio_mmio@a000000 {
		dma-coherent;
		interrupts = <0x00 0x10 0x01>;
		reg = <0x00 0xa000000 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a000200 {
		dma-coherent;
		interrupts = <0x00 0x11 0x01>;
		reg = <0x00 0xa000200 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a000400 {
		dma-coherent;
		interrupts = <0x00 0x12 0x01>;
		reg = <0x00 0xa000400 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a000600 {
		dma-coherent;
		interrupts = <0x00 0x13 0x01>;
		reg = <0x00 0xa000600 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a000800 {
		dma-coherent;
		interrupts = <0x00 0x14 0x01>;
		reg = <0x00 0xa000800 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a000a00 {
		dma-coherent;
		interrupts = <0x00 0x15 0x01>;
		reg = <0x00 0xa000a00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a000c00 {
		dma-coherent;
		interrupts = <0x00 0x16 0x01>;
		reg = <0x00 0xa000c00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a000e00 {
		dma-coherent;
		interrupts = <0x00 0x17 0x01>;
		reg = <0x00 0xa000e00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a001000 {
		dma-coherent;
		interrupts = <0x00 0x18 0x01>;
		reg = <0x00 0xa001000 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a001200 {
		dma-coherent;
		interrupts = <0x00 0x19 0x01>;
		reg = <0x00 0xa001200 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a001400 {
		dma-coherent;
		interrupts = <0x00 0x1a 0x01>;
		reg = <0x00 0xa001400 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a001600 {
		dma-coherent;
		interrupts = <0x00 0x1b 0x01>;
		reg = <0x00 0xa001600 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a001800 {
		dma-coherent;
		interrupts = <0x00 0x1c 0x01>;
		reg = <0x00 0xa001800 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a001a00 {
		dma-coherent;
		interrupts = <0x00 0x1d 0x01>;
		reg = <0x00 0xa001a00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a001c00 {
		dma-coherent;
		interrupts = <0x00 0x1e 0x01>;
		reg = <0x00 0xa001c00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a001e00 {
		dma-coherent;
		interrupts = <0x00 0x1f 0x01>;
		reg = <0x00 0xa001e00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a002000 {
		dma-coherent;
		interrupts = <0x00 0x20 0x01>;
		reg = <0x00 0xa002000 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a002200 {
		dma-coherent;
		interrupts = <0x00 0x21 0x01>;
		reg = <0x00 0xa002200 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a002400 {
		dma-coherent;
		interrupts = <0x00 0x22 0x01>;
		reg = <0x00 0xa002400 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a002600 {
		dma-coherent;
		interrupts = <0x00 0x23 0x01>;
		reg = <0x00 0xa002600 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a002800 {
		dma-coherent;
		interrupts = <0x00 0x24 0x01>;
		reg = <0x00 0xa002800 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a002a00 {
		dma-coherent;
		interrupts = <0x00 0x25 0x01>;
		reg = <0x00 0xa002a00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a002c00 {
		dma-coherent;
		interrupts = <0x00 0x26 0x01>;
		reg = <0x00 0xa002c00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a002e00 {
		dma-coherent;
		interrupts = <0x00 0x27 0x01>;
		reg = <0x00 0xa002e00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a003000 {
		dma-coherent;
		interrupts = <0x00 0x28 0x01>;
		reg = <0x00 0xa003000 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a003200 {
		dma-coherent;
		interrupts = <0x00 0x29 0x01>;
		reg = <0x00 0xa003200 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a003400 {
		dma-coherent;
		interrupts = <0x00 0x2a 0x01>;
		reg = <0x00 0xa003400 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a003600 {
		dma-coherent;
		interrupts = <0x00 0x2b 0x01>;
		reg = <0x00 0xa003600 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a003800 {
		dma-coherent;
		interrupts = <0x00 0x2c 0x01>;
		reg = <0x00 0xa003800 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a003a00 {
		dma-coherent;
		interrupts = <0x00 0x2d 0x01>;
		reg = <0x00 0xa003a00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a003c00 {
		dma-coherent;
		interrupts = <0x00 0x2e 0x01>;
		reg = <0x00 0xa003c00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	virtio_mmio@a003e00 {
		dma-coherent;
		interrupts = <0x00 0x2f 0x01>;
		reg = <0x00 0xa003e00 0x00 0x200>;
		compatible = "virtio,mmio";
	};

	gpio-keys {
		compatible = "gpio-keys";

		poweroff {
			gpios = <0x8007 0x03 0x00>;
			linux,code = <0x74>;
			label = "GPIO Key Poweroff";
		};
	};

	pl061@9030000 {
		phandle = <0x8007>;
		clock-names = "apb_pclk";
		clocks = <0x8000>;
		interrupts = <0x00 0x07 0x04>;
		gpio-controller;
		#gpio-cells = <0x02>;
		compatible = "arm,pl061", "arm,primecell";
		reg = <0x00 0x9030000 0x00 0x1000>;
		status = "disabled";
	};

	pcie@10000000 {
		interrupt-map-mask = <0x1800 0x00 0x00 0x07>;
		interrupt-map = <0x00 0x00 0x00 0x01 0x8005 0x00 0x00 0x00 0x03 0x04 0x00 0x00 0x00 0x02 0x8005 0x00 0x00 0x00 0x04 0x04 0x00 0x00 0x00 0x03 0x8005 0x00 0x00 0x00 0x05 0x04 0x00 0x00 0x00 0x04 0x8005 0x00 0x00 0x00 0x06 0x04 0x800 0x00 0x00 0x01 0x8005 0x00 0x00 0x00 0x04 0x04 0x800 0x00 0x00 0x02 0x8005 0x00 0x00 0x00 0x05 0x04 0x800 0x00 0x00 0x03 0x8005 0x00 0x00 0x00 0x06 0x04 0x800 0x00 0x00 0x04 0x8005 0x00 0x00 0x00 0x03 0x04 0x1000 0x00 0x00 0x01 0x8005 0x00 0x00 0x00 0x05 0x04 0x1000 0x00 0x00 0x02 0x8005 0x00 0x00 0x00 0x06 0x04 0x1000 0x00 0x00 0x03 0x8005 0x00 0x00 0x00 0x03 0x04 0x1000 0x00 0x00 0x04 0x8005 0x00 0x00 0x00 0x04 0x04 0x1800 0x00 0x00 0x01 0x8005 0x00 0x00 0x00 0x06 0x04 0x1800 0x00 0x00 0x02 0x8005 0x00 0x00 0x00 0x03 0x04 0x1800 0x00 0x00 0x03 0x8005 0x00 0x00 0x00 0x04 0x04 0x1800 0x00 0x00 0x04 0x8005 0x00 0x00 0x00 0x05 0x04>;
		#interrupt-cells = <0x01>;
		ranges = <0x1000000 0x00 0x00 0x00 0x3eff0000 0x00 0x10000 0x2000000 0x00 0x10000000 0x00 0x10000000 0x00 0x2eff0000 0x3000000 0x80 0x00 0x80 0x00 0x80 0x00>;
		reg = <0x40 0x10000000 0x00 0x10000000>;
		msi-map = <0x00 0x8006 0x00 0x10000>;
		dma-coherent;
		bus-range = <0x00 0xff>;
		linux,pci-domain = <0x00>;
		#size-cells = <0x02>;
		#address-cells = <0x03>;
		device_type = "pci";
		compatible = "pci-host-ecam-generic";
	};

	pl031@9010000 {
		clock-names = "apb_pclk";
		clocks = <0x8000>;
		interrupts = <0x00 0x02 0x04>;
		reg = <0x00 0x9010000 0x00 0x1000>;
		compatible = "arm,pl031", "arm,primecell";
	};

	intc@8000000 {
		phandle = <0x8005>;
		#address-cells = <0x02>;
		#size-cells = <0x02>;
		#interrupt-cells = <0x03>;
		interrupt-controller;
		compatible = "arm,gic-v3";
		#redistributor-regions = <0x01>;
		reg = <0x00 0x8000000 0x00 0x10000 0x00 0x80a0000 0x00 0xf60000>;
	};

	flash@0 {
		bank-width = <0x04>;
		reg = <0x00 0x00 0x00 0x4000000 0x00 0x4000000 0x00 0x4000000>;
		compatible = "cfi-flash";
	};

	timer {
		compatible = "arm,armv8-timer";
		interrupts = <0x01 0x0d 0xf08 0x01 0x0e 0xf08 0x01 0x0b 0xf08>;
		interrupt-parent = <0x8005>;
	};

	apb-pclk {
		phandle = <0x8000>;
		clock-output-names = "clk24mhz";
		clock-frequency = <0x16e3600>;
		#clock-cells = <0x00>;
		compatible = "fixed-clock";
	};

	aliases {
		serial0 = "/pl011@9000000";
	};

	chosen {
		u-boot,version = "2024.10";
		#size-cells = <0x02>;
		#address-cells = <0x02>;
		kaslr-seed = <0xcfbd6cf4 0xaf233870>;
		bootargs = "console=hvc0 console=ttyAMA0 earlycon=xen earlyprintk=xen clk_ignore_unused";
		linux,initrd-start = <0x00 0xc8200000>;
		linux,initrd-end = <0x00 0xcbd2faef>;
	};

	hypervisor {
		compatible = "xen,xen-4.21", "xen,xen";
		reg = <0x00 0x4d200000 0x00 0x40000 0x00 0x40000000 0x00 0xd000000 0x00 0x4d400000 0x00 0x72a00000 0x00 0xe0000000 0x00 0xfe00000 0x00 0xf8000000 0x00 0x32600000 0x01 0x2f400000 0x00 0x10a00000>;
		interrupts = <0x01 0x00 0xf08>;
		interrupt-parent = <0x8005>;
	};

	psci {
		compatible = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
		method = "hvc";
		cpu_off = <0x01>;
		cpu_on = <0x02>;
	};

	cpus {
		#address-cells = <0x01>;
		#size-cells = <0x00>;

		cpu@0 {
			compatible = "arm,cortex-a57";
			device_type = "cpu";
			reg = <0x00>;
			enable-method = "psci";
		};
	};

	memory@c0000000 {
		device_type = "memory";
		reg = <0x00 0xc0000000 0x00 0x20000000 0x00 0xf0000000 0x00 0x8000000 0x01 0x2a800000 0x00 0x3800000 0x01 0x2f000000 0x00 0x400000>;
	};
};
Re: [PATCH 05/23] xen/arm: Add capabilities to dom0less
Posted by Julien Grall 2 days, 1 hour ago
Hi Jason,

On 07/03/2025 17:58, Jason Andryuk wrote:
> On 2025-03-07 04:01, Julien Grall wrote:
>> Hi,
>>
>> On 06/03/2025 22:03, Jason Andryuk wrote:
>>> Add capabilities property to dom0less to allow building a
>>> disaggregated system.
>>>
>>> Introduce bootfdt.h to contain these constants.
>>>
>>> When using the hardware or xenstore capabilities, adjust the grant and
>>> event channel limits similar to dom0.
>>  > > Also for the hardware domain, set directmap and iommu.  This 
>> brings its
>>> configuration in line with a dom0.
>>
>> Looking the device tree bindings, a user would be allowed to disable 
>> "passthrough" or even "directmap". This means, we would never be able 
>> to disable "directmap" for the hardware domain in the future with the 
>> existing property (this is to avoid break backwards compatibility).
>>
>> Instead, I think we should check what the user provided and confirm 
>> this is matching our expectation for an hardware domain.
>  >
>> That said, I am not entirely sure why we should force directmap for 
>> the HW domain. We are starting from a clean slate, so I think it would 
>> be better to have by default no directmap and imposing the presence of 
>> an IOMMU in the system.
> 
> Ok, it seems like directmap is not necessary.  It was helpful early on 
> to get things booting, but I think it's no longer necessary after 
> factoring out construct_hwdom().
> 
> What exactly do you mean by imposing with respect to the iommu?  Require 
> one, or mirror the dom0 code and set it for the hwdom?
> 
>      if ( iommu_enabled )
>          dom0_cfg.flags |= XEN_DOMCTL_CDF_iommu;

I mean requires one. Without it, you would need to set directmap and I 
don't think this should be allowed (at least for now) for the HW domain.

> 
>> Lastly, can you provide an example of what the hardware domain DT node 
>> would looke like?
> 
> I've attached a dump of /sys/firmware/fdt from hwdom.  (This is direct 
> mapped).

Sorry if this was not clear, I am asking for the configuration you wrote 
in the host DT for create the hardware domain. I am interested to know 
which properties you set...

> 
>>> --- a/xen/arch/arm/dom0less-build.c
>>> +++ b/xen/arch/arm/dom0less-build.c
>>> @@ -12,6 +12,7 @@
>>>   #include <xen/sizes.h>
>>>   #include <xen/vmap.h>
>>> +#include <public/bootfdt.h>
>>>   #include <public/io/xs_wire.h>
>>>   #include <asm/arm64/sve.h>
>>> @@ -994,6 +995,34 @@ void __init create_domUs(void)
>>>           if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
>>>               panic("No more domain IDs available\n");
>>> +        if ( dt_property_read_u32(node, "capabilities", &val) )
>>> +        {
>>> +            if ( val & ~DOMAIN_CAPS_MASK )
>>> +                panic("Invalid capabilities (%"PRIx32")\n", val);
>>> +
>>> +            if ( val & DOMAIN_CAPS_CONTROL )
>>> +                flags |= CDF_privileged;
>>> +
>>> +            if ( val & DOMAIN_CAPS_HARDWARE )
>>> +            {
>>> +                if ( hardware_domain )
>>> +                    panic("Only 1 hardware domain can be specified! 
>>> (%pd)\n",
>>> +                           hardware_domain);
>>> +
>>> +                d_cfg.max_grant_frames = gnttab_dom0_frames();
>>> +                d_cfg.max_evtchn_port = -1;
>>
>> What about d_cfg.arch.nr_spis? Are we expecting the user to pass 
>> "nr_spis"?
> 
> Further down, when nr_spis isn't specified in the DT, it defaults to:
>      d_cfg.arch.nr_spis = gic_number_lines() - 32;

Thanks. One further question, what if the user pass "nr_spis" for the HW 
domain. Wouldn't this result to more issue further down the line?

> 
> Dom0 does:
>      /*
>       * Xen vGIC supports a maximum of 992 interrupt lines.
>       * 32 are substracted to cover local IRQs.
>       */
>      dom0_cfg.arch.nr_spis = min(gic_number_lines(), (unsigned int) 992) 
> - 32;
>      if ( gic_number_lines() > 992 )
>          printk(XENLOG_WARNING "Maximum number of vGIC IRQs exceeded.\n");
> 
> So I think it's fine as-is?  I could add the min() and warning if you 
> think it's necessary.

Regardless this discussion, I wonder why we didn't add the min(...) here 
like for dom0 because we are using the same vGIC emulation. So the max 
SPIs supported is the same...

What I am trying to understand is whether it is ok to allow the user to 
select "nr_spis", "vpl011" & co if they are either not honored (like for 
vpl011) or could introduce further issue (like for nr_spis as the HW 
domain is supposed to have the same configuration as dom0).

I also don't think it is a good idea to silently ignore what the user 
requested. So far, on Arm, we mainly decided to reject/panic early if 
the setup is not correct.

Cheers,

-- 
Julien Grall


Re: [PATCH 05/23] xen/arm: Add capabilities to dom0less
Posted by Stefano Stabellini 1 day, 21 hours ago
On Fri, 7 Mar 2025, Julien Grall wrote:
> > What exactly do you mean by imposing with respect to the iommu?  Require
> > one, or mirror the dom0 code and set it for the hwdom?
> > 
> >      if ( iommu_enabled )
> >          dom0_cfg.flags |= XEN_DOMCTL_CDF_iommu;
> 
> I mean requires one. Without it, you would need to set directmap and I don't
> think this should be allowed (at least for now) for the HW domain.

I think the directmap should be optional for the hardware domain. In
practice, we already have not direct-mapped dom0 today when the user
enables cache coloring.
Re: [PATCH 05/23] xen/arm: Add capabilities to dom0less
Posted by Jason Andryuk 1 day, 22 hours ago
On 2025-03-07 16:21, Julien Grall wrote:
> Hi Jason,
> 
> On 07/03/2025 17:58, Jason Andryuk wrote:
>> On 2025-03-07 04:01, Julien Grall wrote:
>>> Hi,
>>>
>>> On 06/03/2025 22:03, Jason Andryuk wrote:
>>>> Add capabilities property to dom0less to allow building a
>>>> disaggregated system.
>>>>
>>>> Introduce bootfdt.h to contain these constants.
>>>>
>>>> When using the hardware or xenstore capabilities, adjust the grant and
>>>> event channel limits similar to dom0.
>>>  > > Also for the hardware domain, set directmap and iommu.  This 
>>> brings its
>>>> configuration in line with a dom0.
>>>
>>> Looking the device tree bindings, a user would be allowed to disable 
>>> "passthrough" or even "directmap". This means, we would never be able 
>>> to disable "directmap" for the hardware domain in the future with the 
>>> existing property (this is to avoid break backwards compatibility).
>>>
>>> Instead, I think we should check what the user provided and confirm 
>>> this is matching our expectation for an hardware domain.
>>  >
>>> That said, I am not entirely sure why we should force directmap for 
>>> the HW domain. We are starting from a clean slate, so I think it 
>>> would be better to have by default no directmap and imposing the 
>>> presence of an IOMMU in the system.
>>
>> Ok, it seems like directmap is not necessary.  It was helpful early on 
>> to get things booting, but I think it's no longer necessary after 
>> factoring out construct_hwdom().
>>
>> What exactly do you mean by imposing with respect to the iommu?  
>> Require one, or mirror the dom0 code and set it for the hwdom?
>>
>>      if ( iommu_enabled )
>>          dom0_cfg.flags |= XEN_DOMCTL_CDF_iommu;
> 
> I mean requires one. Without it, you would need to set directmap and I 
> don't think this should be allowed (at least for now) for the HW domain.
> 
>>
>>> Lastly, can you provide an example of what the hardware domain DT 
>>> node would looke like?
>>
>> I've attached a dump of /sys/firmware/fdt from hwdom.  (This is direct 
>> mapped).
> 
> Sorry if this was not clear, I am asking for the configuration you wrote 
> in the host DT for create the hardware domain. I am interested to know 
> which properties you set...

I've attached the u-boot fdt commands which generate the DT.  Hopefully 
that works for you.

>>
>>>> --- a/xen/arch/arm/dom0less-build.c
>>>> +++ b/xen/arch/arm/dom0less-build.c
>>>> @@ -12,6 +12,7 @@
>>>>   #include <xen/sizes.h>
>>>>   #include <xen/vmap.h>
>>>> +#include <public/bootfdt.h>
>>>>   #include <public/io/xs_wire.h>
>>>>   #include <asm/arm64/sve.h>
>>>> @@ -994,6 +995,34 @@ void __init create_domUs(void)
>>>>           if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
>>>>               panic("No more domain IDs available\n");
>>>> +        if ( dt_property_read_u32(node, "capabilities", &val) )
>>>> +        {
>>>> +            if ( val & ~DOMAIN_CAPS_MASK )
>>>> +                panic("Invalid capabilities (%"PRIx32")\n", val);
>>>> +
>>>> +            if ( val & DOMAIN_CAPS_CONTROL )
>>>> +                flags |= CDF_privileged;
>>>> +
>>>> +            if ( val & DOMAIN_CAPS_HARDWARE )
>>>> +            {
>>>> +                if ( hardware_domain )
>>>> +                    panic("Only 1 hardware domain can be specified! 
>>>> (%pd)\n",
>>>> +                           hardware_domain);
>>>> +
>>>> +                d_cfg.max_grant_frames = gnttab_dom0_frames();
>>>> +                d_cfg.max_evtchn_port = -1;
>>>
>>> What about d_cfg.arch.nr_spis? Are we expecting the user to pass 
>>> "nr_spis"?
>>
>> Further down, when nr_spis isn't specified in the DT, it defaults to:
>>      d_cfg.arch.nr_spis = gic_number_lines() - 32;
> 
> Thanks. One further question, what if the user pass "nr_spis" for the HW 
> domain. Wouldn't this result to more issue further down the line?

I'm not familiar with ARM, so I'll to whatever is best.  I did put the 
capabilities first, thinking it would set defaults, and then later 
options could override them.

>>
>> Dom0 does:
>>      /*
>>       * Xen vGIC supports a maximum of 992 interrupt lines.
>>       * 32 are substracted to cover local IRQs.
>>       */
>>      dom0_cfg.arch.nr_spis = min(gic_number_lines(), (unsigned int) 
>> 992) - 32;
>>      if ( gic_number_lines() > 992 )
>>          printk(XENLOG_WARNING "Maximum number of vGIC IRQs exceeded. 
>> \n");
>>
>> So I think it's fine as-is?  I could add the min() and warning if you 
>> think it's necessary.
> 
> Regardless this discussion, I wonder why we didn't add the min(...) here 
> like for dom0 because we are using the same vGIC emulation. So the max 
> SPIs supported is the same...
> 
> What I am trying to understand is whether it is ok to allow the user to 
> select "nr_spis", "vpl011" & co if they are either not honored (like for 
> vpl011) or could introduce further issue (like for nr_spis as the HW 
> domain is supposed to have the same configuration as dom0).
> 
> I also don't think it is a good idea to silently ignore what the user 
> requested. So far, on Arm, we mainly decided to reject/panic early if 
> the setup is not correct.

Again, I'll do whatever is best.

Regards,
Jasontftpb 0x40400000 Image
tftpb 0x42000000 dom0-rootfs.cpio.gz
tftpb 0x45C00000 Image
tftpb 0x47800000 initrd
tftpb 0x47A00000 Image
tftpb 0x49600000 domU-rootfs.cpio.gz
tftpb 0x4D200000 xen
tftpb 0x4D400000 virt-gicv3.dtb
fdt addr 0x4D400000
fdt resize 1024
fdt set /chosen \#address-cells <0x2>
fdt set /chosen \#size-cells <0x2>
fdt set /chosen xen,xen-bootargs "console=dtuart dtuart=serial0   bootscrub=0 vwfi=native sched=null"
fdt mknod /chosen domU0
fdt set /chosen/domU0 compatible "xen,domain"
fdt set /chosen/domU0 \#address-cells <0x2>
fdt set /chosen/domU0 \#size-cells <0x2>
fdt set /chosen/domU0 memory <0x0 0xAF000 >
fdt set /chosen/domU0 cpus <1>
fdt set /chosen/domU0 vpl011
fdt set /chosen/domU0 capabilities <6>
fdt set /chosen/domU0 passthrough ""
fdt mknod /chosen/domU0 module@40400000
fdt set /chosen/domU0/module@40400000 compatible  "multiboot,kernel" "multiboot,module"
fdt set /chosen/domU0/module@40400000 reg <0x0 0x40400000 0x0 0x1A80000 >
fdt set /chosen/domU0/module@40400000 bootargs "console=hvc0 console=ttyAMA0 earlycon=xen earlyprintk=xen clk_ignore_unused"
fdt mknod /chosen/domU0 module@42000000
fdt set /chosen/domU0/module@42000000 compatible  "multiboot,ramdisk" "multiboot,module"
fdt set /chosen/domU0/module@42000000 reg <0x0 0x42000000 0x0 0x3B2EFB6 >
fdt mknod /chosen domU1
fdt set /chosen/domU1 compatible "xen,domain"
fdt set /chosen/domU1 \#address-cells <0x2>
fdt set /chosen/domU1 \#size-cells <0x2>
fdt set /chosen/domU1 memory <0x0 0x64000 >
fdt set /chosen/domU1 cpus <2>
fdt set /chosen/domU1 vpl011
fdt set /chosen/domU1 xen,enhanced "enabled"
fdt set /chosen/domU1 capabilities <0>
fdt mknod /chosen/domU1 module@45C00000
fdt set /chosen/domU1/module@45C00000 compatible  "multiboot,kernel" "multiboot,module"
fdt set /chosen/domU1/module@45C00000 reg <0x0 0x45C00000 0x0 0x1A80000 >
fdt set /chosen/domU1/module@45C00000 bootargs "console=ttyAMA0"
fdt mknod /chosen/domU1 module@47800000
fdt set /chosen/domU1/module@47800000 compatible  "multiboot,ramdisk" "multiboot,module"
fdt set /chosen/domU1/module@47800000 reg <0x0 0x47800000 0x0 0x151205 >
fdt mknod /chosen domU2
fdt set /chosen/domU2 compatible "xen,domain"
fdt set /chosen/domU2 \#address-cells <0x2>
fdt set /chosen/domU2 \#size-cells <0x2>
fdt set /chosen/domU2 memory <0x0 0xAF000 >
fdt set /chosen/domU2 cpus <1>
fdt set /chosen/domU2 vpl011
fdt set /chosen/domU2 xen,enhanced "enabled"
fdt set /chosen/domU2 capabilities <1>
fdt mknod /chosen/domU2 module@47A00000
fdt set /chosen/domU2/module@47A00000 compatible  "multiboot,kernel" "multiboot,module"
fdt set /chosen/domU2/module@47A00000 reg <0x0 0x47A00000 0x0 0x1A80000 >
fdt set /chosen/domU2/module@47A00000 bootargs "console=ttyAMA0"
fdt mknod /chosen/domU2 module@49600000
fdt set /chosen/domU2/module@49600000 compatible  "multiboot,ramdisk" "multiboot,module"
fdt set /chosen/domU2/module@49600000 reg <0x0 0x49600000 0x0 0x3B2F182 >
setenv fdt_high 0xffffffffffffffff
booti 0x4D200000 - 0x4D400000
Re: [PATCH 05/23] xen/arm: Add capabilities to dom0less
Posted by Julien Grall 1 day, 9 hours ago
Hi Jason,

On 08/03/2025 00:02, Jason Andryuk wrote:
> On 2025-03-07 16:21, Julien Grall wrote:
>> Hi Jason,
>>
>> On 07/03/2025 17:58, Jason Andryuk wrote:
>>> On 2025-03-07 04:01, Julien Grall wrote:
>>>> Hi,
>>>>
>>>> On 06/03/2025 22:03, Jason Andryuk wrote:
>>>>> Add capabilities property to dom0less to allow building a
>>>>> disaggregated system.
>>>>>
>>>>> Introduce bootfdt.h to contain these constants.
>>>>>
>>>>> When using the hardware or xenstore capabilities, adjust the grant and
>>>>> event channel limits similar to dom0.
>>>>  > > Also for the hardware domain, set directmap and iommu.  This 
>>>> brings its
>>>>> configuration in line with a dom0.
>>>>
>>>> Looking the device tree bindings, a user would be allowed to disable 
>>>> "passthrough" or even "directmap". This means, we would never be 
>>>> able to disable "directmap" for the hardware domain in the future 
>>>> with the existing property (this is to avoid break backwards 
>>>> compatibility).
>>>>
>>>> Instead, I think we should check what the user provided and confirm 
>>>> this is matching our expectation for an hardware domain.
>>>  >
>>>> That said, I am not entirely sure why we should force directmap for 
>>>> the HW domain. We are starting from a clean slate, so I think it 
>>>> would be better to have by default no directmap and imposing the 
>>>> presence of an IOMMU in the system.
>>>
>>> Ok, it seems like directmap is not necessary.  It was helpful early 
>>> on to get things booting, but I think it's no longer necessary after 
>>> factoring out construct_hwdom().
>>>
>>> What exactly do you mean by imposing with respect to the iommu? 
>>> Require one, or mirror the dom0 code and set it for the hwdom?
>>>
>>>      if ( iommu_enabled )
>>>          dom0_cfg.flags |= XEN_DOMCTL_CDF_iommu;
>>
>> I mean requires one. Without it, you would need to set directmap and I 
>> don't think this should be allowed (at least for now) for the HW domain.
>>
>>>
>>>> Lastly, can you provide an example of what the hardware domain DT 
>>>> node would looke like?
>>>
>>> I've attached a dump of /sys/firmware/fdt from hwdom.  (This is 
>>> direct mapped).
>>
>> Sorry if this was not clear, I am asking for the configuration you 
>> wrote in the host DT for create the hardware domain. I am interested 
>> to know which properties you set...
> 
> I've attached the u-boot fdt commands which generate the DT.  Hopefully 
> that works for you.
> 
>>>
>>>>> --- a/xen/arch/arm/dom0less-build.c
>>>>> +++ b/xen/arch/arm/dom0less-build.c
>>>>> @@ -12,6 +12,7 @@
>>>>>   #include <xen/sizes.h>
>>>>>   #include <xen/vmap.h>
>>>>> +#include <public/bootfdt.h>
>>>>>   #include <public/io/xs_wire.h>
>>>>>   #include <asm/arm64/sve.h>
>>>>> @@ -994,6 +995,34 @@ void __init create_domUs(void)
>>>>>           if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
>>>>>               panic("No more domain IDs available\n");
>>>>> +        if ( dt_property_read_u32(node, "capabilities", &val) )
>>>>> +        {
>>>>> +            if ( val & ~DOMAIN_CAPS_MASK )
>>>>> +                panic("Invalid capabilities (%"PRIx32")\n", val);
>>>>> +
>>>>> +            if ( val & DOMAIN_CAPS_CONTROL )
>>>>> +                flags |= CDF_privileged;
>>>>> +
>>>>> +            if ( val & DOMAIN_CAPS_HARDWARE )
>>>>> +            {
>>>>> +                if ( hardware_domain )
>>>>> +                    panic("Only 1 hardware domain can be 
>>>>> specified! (%pd)\n",
>>>>> +                           hardware_domain);
>>>>> +
>>>>> +                d_cfg.max_grant_frames = gnttab_dom0_frames();
>>>>> +                d_cfg.max_evtchn_port = -1;
>>>>
>>>> What about d_cfg.arch.nr_spis? Are we expecting the user to pass 
>>>> "nr_spis"?
>>>
>>> Further down, when nr_spis isn't specified in the DT, it defaults to:
>>>      d_cfg.arch.nr_spis = gic_number_lines() - 32;
>>
>> Thanks. One further question, what if the user pass "nr_spis" for the 
>> HW domain. Wouldn't this result to more issue further down the line?
> 
> I'm not familiar with ARM, so I'll to whatever is best.  I did put the 
> capabilities first, thinking it would set defaults, and then later 
> options could override them.

I am not sure it is related to Arm. It is more that the HW domain is 
going to re-use the memory layout of the host (this is including the 
mapping for the GIC) and also have all the irqs with pirq == virq.

I am a bit concerned that letting the users mistakenly tweaking the 
defaults could prevent attaching devices (for instance if the IRQ ID is 
above > nr_spis).

> 
>>>
>>> Dom0 does:
>>>      /*
>>>       * Xen vGIC supports a maximum of 992 interrupt lines.
>>>       * 32 are substracted to cover local IRQs.
>>>       */
>>>      dom0_cfg.arch.nr_spis = min(gic_number_lines(), (unsigned int) 
>>> 992) - 32;
>>>      if ( gic_number_lines() > 992 )
>>>          printk(XENLOG_WARNING "Maximum number of vGIC IRQs exceeded. 
>>> \n");
>>>
>>> So I think it's fine as-is?  I could add the min() and warning if you 
>>> think it's necessary.
>>
>> Regardless this discussion, I wonder why we didn't add the min(...) 
>> here like for dom0 because we are using the same vGIC emulation. So 
>> the max SPIs supported is the same...
>>
>> What I am trying to understand is whether it is ok to allow the user 
>> to select "nr_spis", "vpl011" & co if they are either not honored 
>> (like for vpl011) or could introduce further issue (like for nr_spis 
>> as the HW domain is supposed to have the same configuration as dom0).
>>
>> I also don't think it is a good idea to silently ignore what the user 
>> requested. So far, on Arm, we mainly decided to reject/panic early if 
>> the setup is not correct.
> 
> Again, I'll do whatever is best.

Bertrand, Michal, Stefano, any opinions?

Cheers,

-- 
Julien Grall


Re: [PATCH 05/23] xen/arm: Add capabilities to dom0less
Posted by Stefano Stabellini 2 days, 20 hours ago
On Thu, 6 Mar 2025, Jason Andryuk wrote:
> Add capabilities property to dom0less to allow building a
> disaggregated system.
> 
> Introduce bootfdt.h to contain these constants.
> 
> When using the hardware or xenstore capabilities, adjust the grant and
> event channel limits similar to dom0.
> 
> Also for the hardware domain, set directmap and iommu.  This brings its
> configuration in line with a dom0.
> 
> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
> ---
> There is overlap with hyperlaunch.  The numeric values are the same.
> Hyperlaunch doesn't expose the values in a public header as done here.
> Is this to be expected for dom0less?  It seems most of dom0less isn't in
> a header, but just in docs.
> 
> Hyperlaunch uses BUILD_CAPS_, but I chose DOMAIN_CAPS_ since there are
> domain-level capabilities.
> 
> Only a single xenstore and hardware domain make sense.  A check to limit
> to only a single hardware domain is in place - building two breaks.  But
> nothing prevents the dom0less configuration from only having multiple
> xenstore domains.  Each xenstore domain would have slightly more
> permissions, but only the last one would be used.
> ---
>  docs/misc/arm/device-tree/booting.txt | 11 ++++++++++
>  xen/arch/arm/dom0less-build.c         | 29 +++++++++++++++++++++++++++
>  xen/arch/arm/domain.c                 |  3 ++-
>  xen/include/public/bootfdt.h          | 27 +++++++++++++++++++++++++
>  4 files changed, 69 insertions(+), 1 deletion(-)
>  create mode 100644 xen/include/public/bootfdt.h
> 
> diff --git a/docs/misc/arm/device-tree/booting.txt b/docs/misc/arm/device-tree/booting.txt
> index ac781c9cc8..490c792ddf 100644
> --- a/docs/misc/arm/device-tree/booting.txt
> +++ b/docs/misc/arm/device-tree/booting.txt
> @@ -167,6 +167,17 @@ with the following properties:
>      Refer to docs/misc/cache_coloring.rst for syntax. This option is applicable
>      only to Arm64 guests.
>  
> +- capabilities
> +    Optional.  A bit field of domain capabilities for a disaggregated
> +    system.  A traditional dom0 has all all of these capabilities, and a
> +    domU has none of them.
> +
> +    0x1 DOMAIN_CAPS_CONTROL  - A privileged, control domain
> +    0x2 DOMAIN_CAPS_HARDWARE - The hardware domain - there can be only 1
> +    0x4 DOMAIN_CAPS_XENSTORE - The xenstore domain - there can be only 1
> +
> +    The default is no capabilities.
> +
>  - vpl011
>  
>      An empty property to enable/disable a virtual pl011 for the guest to
> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
> index 5a7871939b..068bf99294 100644
> --- a/xen/arch/arm/dom0less-build.c
> +++ b/xen/arch/arm/dom0less-build.c
> @@ -12,6 +12,7 @@
>  #include <xen/sizes.h>
>  #include <xen/vmap.h>
>  
> +#include <public/bootfdt.h>
>  #include <public/io/xs_wire.h>
>  
>  #include <asm/arm64/sve.h>
> @@ -994,6 +995,34 @@ void __init create_domUs(void)
>          if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
>              panic("No more domain IDs available\n");
>  
> +        if ( dt_property_read_u32(node, "capabilities", &val) )
> +        {
> +            if ( val & ~DOMAIN_CAPS_MASK )
> +                panic("Invalid capabilities (%"PRIx32")\n", val);
> +
> +            if ( val & DOMAIN_CAPS_CONTROL )
> +                flags |= CDF_privileged;
> +
> +            if ( val & DOMAIN_CAPS_HARDWARE )
> +            {
> +                if ( hardware_domain )
> +                    panic("Only 1 hardware domain can be specified! (%pd)\n",
> +                           hardware_domain);
> +
> +                d_cfg.max_grant_frames = gnttab_dom0_frames();
> +                d_cfg.max_evtchn_port = -1;

max_maptrack_frames = -1 ?


> +                flags |= CDF_hardware;
> +                flags |= CDF_directmap;
> +                iommu = true;
> +            }
> +
> +            if ( val & DOMAIN_CAPS_XENSTORE )
> +            {
> +                d_cfg.flags |= XEN_DOMCTL_CDF_xs_domain;

shouldn't we take the opportunity to also set XEN_DOMCTL_CDF_xs_domain
in xen/arch/arm/domain_build.c:create_dom0 ?


> +                d_cfg.max_evtchn_port = -1;

Why this one?


> +            }
> +        }
> +
>          if ( dt_find_property(node, "xen,static-mem", NULL) )
>          {
>              if ( llc_coloring_enabled )
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 3ba959f866..dc4b4e84c1 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -608,7 +608,8 @@ int arch_sanitise_domain_config(struct xen_domctl_createdomain *config)
>  {
>      unsigned int max_vcpus;
>      unsigned int flags_required = (XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap);
> -    unsigned int flags_optional = (XEN_DOMCTL_CDF_iommu | XEN_DOMCTL_CDF_vpmu);
> +    unsigned int flags_optional = (XEN_DOMCTL_CDF_iommu | XEN_DOMCTL_CDF_vpmu |
> +                                   XEN_DOMCTL_CDF_xs_domain );
>      unsigned int sve_vl_bits = sve_decode_vl(config->arch.sve_vl);
>  
>      if ( (config->flags & ~flags_optional) != flags_required )
> diff --git a/xen/include/public/bootfdt.h b/xen/include/public/bootfdt.h
> new file mode 100644
> index 0000000000..4e87aca8ac
> --- /dev/null
> +++ b/xen/include/public/bootfdt.h
> @@ -0,0 +1,27 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Xen Device Tree boot information
> + *
> + * Information for configuring Xen domains created at boot time.
> + */
> +
> +#ifndef __XEN_PUBLIC_BOOTFDT_H__
> +#define __XEN_PUBLIC_BOOTFDT_H__
> +
> +/* Domain Capabilities specified in the "capabilities" property.  Use of
> + * this property allows splitting up the monolithic dom0 into separate,
> + * less privileged components.  A regular domU has no capabilities
> + * (which is the default if nothing is specified).  A traditional dom0
> + * has all three capabilities.*/

The multiline comment coding style is:

/*
 * comment
 * comment
 */


> +/* Control/Privileged domain capable of affecting other domains. */
> +#define DOMAIN_CAPS_CONTROL  (1 << 0)
> +/* Hardware domain controlling physical hardware.  Typically providing
> + * backends to other domains.  */
> +#define DOMAIN_CAPS_HARDWARE (1 << 1)
> +/* Xenstore domain. */
> +#define DOMAIN_CAPS_XENSTORE (1 << 2)
> +#define DOMAIN_CAPS_MASK     (DOMAIN_CAPS_CONTROL | DOMAIN_CAPS_HARDWARE | \
> +                              DOMAIN_CAPS_XENSTORE)
> +
> +#endif /* __XEN_PUBLIC_BOOTFDT_H__ */
> -- 
> 2.48.1
>
Re: [PATCH 05/23] xen/arm: Add capabilities to dom0less
Posted by Jason Andryuk 2 days, 5 hours ago
On 2025-03-06 20:40, Stefano Stabellini wrote:
> On Thu, 6 Mar 2025, Jason Andryuk wrote:
>> Add capabilities property to dom0less to allow building a
>> disaggregated system.
>>
>> Introduce bootfdt.h to contain these constants.
>>
>> When using the hardware or xenstore capabilities, adjust the grant and
>> event channel limits similar to dom0.
>>
>> Also for the hardware domain, set directmap and iommu.  This brings its
>> configuration in line with a dom0.
>>
>> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
>> ---
>> There is overlap with hyperlaunch.  The numeric values are the same.
>> Hyperlaunch doesn't expose the values in a public header as done here.
>> Is this to be expected for dom0less?  It seems most of dom0less isn't in
>> a header, but just in docs.
>>
>> Hyperlaunch uses BUILD_CAPS_, but I chose DOMAIN_CAPS_ since there are
>> domain-level capabilities.
>>
>> Only a single xenstore and hardware domain make sense.  A check to limit
>> to only a single hardware domain is in place - building two breaks.  But
>> nothing prevents the dom0less configuration from only having multiple
>> xenstore domains.  Each xenstore domain would have slightly more
>> permissions, but only the last one would be used.

>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
>> index 5a7871939b..068bf99294 100644
>> --- a/xen/arch/arm/dom0less-build.c
>> +++ b/xen/arch/arm/dom0less-build.c
>> @@ -12,6 +12,7 @@
>>   #include <xen/sizes.h>
>>   #include <xen/vmap.h>
>>   
>> +#include <public/bootfdt.h>
>>   #include <public/io/xs_wire.h>
>>   
>>   #include <asm/arm64/sve.h>
>> @@ -994,6 +995,34 @@ void __init create_domUs(void)
>>           if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
>>               panic("No more domain IDs available\n");
>>   
>> +        if ( dt_property_read_u32(node, "capabilities", &val) )
>> +        {
>> +            if ( val & ~DOMAIN_CAPS_MASK )
>> +                panic("Invalid capabilities (%"PRIx32")\n", val);
>> +
>> +            if ( val & DOMAIN_CAPS_CONTROL )
>> +                flags |= CDF_privileged;
>> +
>> +            if ( val & DOMAIN_CAPS_HARDWARE )
>> +            {
>> +                if ( hardware_domain )
>> +                    panic("Only 1 hardware domain can be specified! (%pd)\n",
>> +                           hardware_domain);
>> +
>> +                d_cfg.max_grant_frames = gnttab_dom0_frames();
>> +                d_cfg.max_evtchn_port = -1;
> 
> max_maptrack_frames = -1 ?

Yes.

> 
>> +                flags |= CDF_hardware;
>> +                flags |= CDF_directmap;
>> +                iommu = true;
>> +            }
>> +
>> +            if ( val & DOMAIN_CAPS_XENSTORE )
>> +            {
>> +                d_cfg.flags |= XEN_DOMCTL_CDF_xs_domain;
> 
> shouldn't we take the opportunity to also set XEN_DOMCTL_CDF_xs_domain
> in xen/arch/arm/domain_build.c:create_dom0 ?

We could.  It isn't normally used except for a standalone xenstore 
domain, which is why I didn't add it.

> 
>> +                d_cfg.max_evtchn_port = -1;
> 
> Why this one?

It mirrors what is done in init-xenstore for a xenstore stubdom.  It's 
to remove the limit so the xenstore domain can create as many as 
necessary for however many domains.

> 
>> +            }
>> +        }
>> +
>>           if ( dt_find_property(node, "xen,static-mem", NULL) )
>>           {
>>               if ( llc_coloring_enabled )

>> diff --git a/xen/include/public/bootfdt.h b/xen/include/public/bootfdt.h
>> new file mode 100644
>> index 0000000000..4e87aca8ac
>> --- /dev/null
>> +++ b/xen/include/public/bootfdt.h
>> @@ -0,0 +1,27 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Xen Device Tree boot information
>> + *
>> + * Information for configuring Xen domains created at boot time.
>> + */
>> +
>> +#ifndef __XEN_PUBLIC_BOOTFDT_H__
>> +#define __XEN_PUBLIC_BOOTFDT_H__
>> +
>> +/* Domain Capabilities specified in the "capabilities" property.  Use of
>> + * this property allows splitting up the monolithic dom0 into separate,
>> + * less privileged components.  A regular domU has no capabilities
>> + * (which is the default if nothing is specified).  A traditional dom0
>> + * has all three capabilities.*/
> 
> The multiline comment coding style is:
> 
> /*
>   * comment
>   * comment
>   */

Ok.

Thanks,
Jason
Re: [PATCH 05/23] xen/arm: Add capabilities to dom0less
Posted by Jason Andryuk 2 days, 5 hours ago
On 2025-03-07 11:40, Jason Andryuk wrote:
> On 2025-03-06 20:40, Stefano Stabellini wrote:
>> On Thu, 6 Mar 2025, Jason Andryuk wrote:

>>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less- 
>>> build.c
>>> index 5a7871939b..068bf99294 100644
>>> --- a/xen/arch/arm/dom0less-build.c
>>> +++ b/xen/arch/arm/dom0less-build.c
>>> @@ -12,6 +12,7 @@
>>>   #include <xen/sizes.h>
>>>   #include <xen/vmap.h>
>>> +#include <public/bootfdt.h>
>>>   #include <public/io/xs_wire.h>
>>>   #include <asm/arm64/sve.h>
>>> @@ -994,6 +995,34 @@ void __init create_domUs(void)
>>>           if ( (max_init_domid + 1) >= DOMID_FIRST_RESERVED )
>>>               panic("No more domain IDs available\n");
>>> +        if ( dt_property_read_u32(node, "capabilities", &val) )
>>> +        {
>>> +            if ( val & ~DOMAIN_CAPS_MASK )
>>> +                panic("Invalid capabilities (%"PRIx32")\n", val);
>>> +
>>> +            if ( val & DOMAIN_CAPS_CONTROL )
>>> +                flags |= CDF_privileged;
>>> +
>>> +            if ( val & DOMAIN_CAPS_HARDWARE )
>>> +            {
>>> +                if ( hardware_domain )
>>> +                    panic("Only 1 hardware domain can be specified! 
>>> (%pd)\n",
>>> +                           hardware_domain);
>>> +
>>> +                d_cfg.max_grant_frames = gnttab_dom0_frames();
>>> +                d_cfg.max_evtchn_port = -1;
>>
>> max_maptrack_frames = -1 ?
> 
> Yes.

Actually, -1 is already used to initialize d_cfg.max_maptrack_frames.

Regards,
Jason