Implement ARM DEN 0057A PV time support for domains created via the
toolstack, utilizing the newly introduced XENMAPSPACE_pv_time.
Signed-off-by: Koichiro Den <den@valinux.co.jp>
---
tools/libs/light/libxl_arm.c | 185 ++++++++++++++++++++++++++++-------
xen/arch/arm/mm.c | 14 +++
xen/include/public/memory.h | 1 +
3 files changed, 167 insertions(+), 33 deletions(-)
diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
index 4a19a8d22bdf..33251520c07a 100644
--- a/tools/libs/light/libxl_arm.c
+++ b/tools/libs/light/libxl_arm.c
@@ -684,6 +684,40 @@ static int make_memory_nodes(libxl__gc *gc, void *fdt,
return 0;
}
+static int make_resv_memory_node(libxl__gc *gc, void *fdt,
+ const struct xc_dom_image *dom)
+{
+ int res;
+
+ if (strcmp(dom->guest_type, "xen-3.0-aarch64"))
+ /*
+ * The stolen time shared memory region for ARM DEN 0057A is currently
+ * the only user of /reserved-memory node when a domain is created via
+ * the toolstack, and it requires both the hypervisor and the domain to
+ * be AArch64.
+ */
+ return 0;
+
+ res = fdt_begin_node(fdt, "reserved-memory");
+ if (res) return res;
+
+ res = fdt_property_cell(fdt, "#address-cells", GUEST_ROOT_ADDRESS_CELLS);
+ if (res) return res;
+
+ res = fdt_property_cell(fdt, "#size-cells", GUEST_ROOT_SIZE_CELLS);
+ if (res) return res;
+
+ /* reg 0 is a placeholder for PV time region */
+ res = fdt_property_reg_placeholder(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
+ GUEST_ROOT_SIZE_CELLS, 1);
+ if (res) return res;
+
+ res = fdt_end_node(fdt);
+ if (res) return res;
+
+ return 0;
+}
+
static int make_gicv2_node(libxl__gc *gc, void *fdt,
uint64_t gicd_base, uint64_t gicd_size,
uint64_t gicc_base, uint64_t gicc_size)
@@ -1352,6 +1386,7 @@ next_resize:
FDT( make_psci_node(gc, fdt) );
FDT( make_memory_nodes(gc, fdt, dom) );
+ FDT( make_resv_memory_node(gc, fdt, dom) );
switch (info->arch_arm.gic_version) {
case LIBXL_GIC_VERSION_V2:
@@ -1519,6 +1554,9 @@ static void finalise_one_node(libxl__gc *gc, void *fdt, const char *uname,
#define EXT_REGION_MIN_SIZE xen_mk_ullong(0x0004000000) /* 64MB */
+/* As per ARM DEN 0057A, stolen time memory regions are 64-byte aligned */
+#define PV_REGIONS_PER_PAGE (XC_PAGE_SIZE / 64)
+
static int compare_iomem(const void *a, const void *b)
{
const libxl_iomem_range *x = a, *y = b;
@@ -1530,24 +1568,92 @@ static int compare_iomem(const void *a, const void *b)
return 0;
}
-static int finalize_hypervisor_node(libxl__gc *gc,
- libxl_domain_build_info *b_info,
- struct xc_dom_image *dom)
+static int get_pv_region(libxl_domain_build_info *b_info,
+ struct xc_dom_image *dom,
+ uint64_t *start, uint64_t end,
+ uint64_t *region_base, uint64_t *region_size)
+{
+ unsigned int npages = DIV_ROUNDUP(b_info->max_vcpus, PV_REGIONS_PER_PAGE);
+ unsigned int len = npages * XC_PAGE_SIZE;
+ uint32_t domid = dom->guest_domid;
+ xc_interface *xch = dom->xch;
+ unsigned long idx = 0;
+ uint64_t size;
+ int rc;
+
+ if (*start >= end)
+ return -1;
+ size = end - *start;
+ if (size < len)
+ return -1;
+
+ for (; npages; npages--, idx++) {
+ rc = xc_domain_add_to_physmap(xch, domid, XENMAPSPACE_pv_time, idx,
+ (*start >> XC_PAGE_SHIFT) + idx);
+ if (rc)
+ return rc;
+ }
+
+ region_base[0] = *start;
+ region_size[0] = len;
+ *start += len;
+ return 0;
+}
+
+static void get_ext_region(uint64_t start, uint64_t end, uint64_t *region_base,
+ uint64_t *region_size, unsigned int *nr_regions)
+{
+ uint64_t size;
+
+ start = ALIGN_UP_TO_2MB(start);
+ if (start >= end)
+ return;
+
+ size = end - start;
+ if (size < EXT_REGION_MIN_SIZE)
+ return;
+
+ region_base[*nr_regions] = start;
+ region_size[*nr_regions] = size;
+ (*nr_regions)++;
+}
+
+static int finalize_extra_regions(libxl__gc *gc,
+ libxl_domain_build_info *b_info,
+ struct xc_dom_image *dom)
{
void *fdt = dom->devicetree_blob;
- uint64_t region_base[MAX_NR_EXT_REGIONS], region_size[MAX_NR_EXT_REGIONS];
- uint32_t regs[(GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
+
+ /* For extended regions */
+ uint64_t ext_region_base[MAX_NR_EXT_REGIONS], ext_region_size[MAX_NR_EXT_REGIONS];
+ uint32_t ext_regs[(GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
(MAX_NR_EXT_REGIONS + 1)];
- be32 *cells = ®s[0];
+ be32 *ext_cells = &ext_regs[0];
+ int hyp_offset;
+
+ /* For pv regions */
+ uint64_t pv_region_base[1], pv_region_size[1];
+ uint32_t pv_regs[GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS];
+ be32 *pv_cells = &pv_regs[0];
+ int resv_offset;
+
const uint64_t bankbase[] = GUEST_RAM_BANK_BASES;
const uint64_t banksize[] = GUEST_RAM_BANK_SIZES;
unsigned int i, j, len, nr_regions = 0;
+ bool pv_region_pending = true;
libxl_dominfo info;
- int offset, rc;
+ int rc;
- offset = fdt_path_offset(fdt, "/hypervisor");
- if (offset < 0)
- return offset;
+ resv_offset = fdt_path_offset(fdt, "/reserved-memory");
+ if (!strcmp(dom->guest_type, "xen-3.0-aarch64")) {
+ if (resv_offset < 0)
+ return resv_offset;
+ } else
+ pv_region_pending = false;
+
+ hyp_offset = fdt_path_offset(fdt, "/hypervisor");
+ if (hyp_offset < 0)
+ return hyp_offset;
rc = libxl_domain_info(CTX, &info, dom->guest_domid);
if (rc)
@@ -1572,8 +1678,7 @@ static int finalize_hypervisor_node(libxl__gc *gc,
} unallocated;
uint64_t unallocated_size = 0;
- unallocated.start = bankbase[i] +
- ALIGN_UP_TO_2MB((uint64_t)dom->rambank_size[i] << XC_PAGE_SHIFT);
+ unallocated.start = bankbase[i] + ((uint64_t)dom->rambank_size[i] << XC_PAGE_SHIFT);
unallocated.end = ~0ULL >> (64 - info.gpaddr_bits);
unallocated.end = min(unallocated.end, bankbase[i] + banksize[i] - 1);
@@ -1581,7 +1686,7 @@ static int finalize_hypervisor_node(libxl__gc *gc,
if (unallocated.end >= unallocated.start)
unallocated_size = unallocated.end - unallocated.start + 1;
- if (unallocated_size < EXT_REGION_MIN_SIZE)
+ if (unallocated_size <= 0)
continue;
/* Exclude iomem */
@@ -1605,14 +1710,14 @@ static int finalize_hypervisor_node(libxl__gc *gc,
if (unallocated.start > unallocated.end)
break;
} else {
- uint64_t size = iomem.start - unallocated.start;
-
- if (size >= EXT_REGION_MIN_SIZE) {
- region_base[nr_regions] = unallocated.start;
- region_size[nr_regions] = size;
- nr_regions++;
+ if (pv_region_pending) {
+ rc = get_pv_region(b_info, dom, &unallocated.start, iomem.start,
+ pv_region_base, pv_region_size);
+ if (!rc)
+ pv_region_pending = false;
}
-
+ get_ext_region(unallocated.start, iomem.start, ext_region_base,
+ ext_region_size, &nr_regions);
unallocated.start = iomem.end + 1;
if (unallocated.start > unallocated.end)
@@ -1624,38 +1729,52 @@ static int finalize_hypervisor_node(libxl__gc *gc,
if (unallocated.end >= unallocated.start
&& nr_regions < MAX_NR_EXT_REGIONS)
{
- uint64_t size = unallocated.end - unallocated.start + 1;
-
- if (size >= EXT_REGION_MIN_SIZE) {
- region_base[nr_regions] = unallocated.start;
- region_size[nr_regions] = size;
- nr_regions++;
+ if (pv_region_pending) {
+ rc = get_pv_region(b_info, dom, &unallocated.start, unallocated.end,
+ pv_region_base, pv_region_size);
+ if (!rc)
+ pv_region_pending = false;
}
+ get_ext_region(unallocated.start, unallocated.end, ext_region_base,
+ ext_region_size, &nr_regions);
}
}
+ if (!strcmp(dom->guest_type, "xen-3.0-aarch64")) {
+ if (pv_region_pending) {
+ LOG(ERROR, "The PV time region cannot be allocated, not enough space");
+ return ERROR_FAIL;
+ }
+ set_range(&pv_cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
+ pv_region_base[0], pv_region_size[0]);
+ len = sizeof(pv_regs[0]) * (GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS);
+ rc = fdt_setprop(fdt, resv_offset, "reg", pv_regs, len);
+ if (rc)
+ return rc;
+ }
+
/*
* The region 0 for grant table space must be always present. If we managed
* to allocate the extended regions then insert them as regions 1...N.
*/
- set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
+ set_range(&ext_cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
GUEST_GNTTAB_BASE, GUEST_GNTTAB_SIZE);
for (i = 0; i < nr_regions; i++) {
LOG(DEBUG, "Extended region %u: %#"PRIx64"->%#"PRIx64"",
- i, region_base[i], region_base[i] + region_size[i] - 1);
+ i, ext_region_base[i], ext_region_base[i] + ext_region_size[i] - 1);
- set_range(&cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
- region_base[i], region_size[i]);
+ set_range(&ext_cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
+ ext_region_base[i], ext_region_size[i]);
}
if (!nr_regions)
LOG(WARN, "The extended regions cannot be allocated, not enough space");
- len = sizeof(regs[0]) * (GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
+ len = sizeof(ext_regs[0]) * (GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
(nr_regions + 1);
- return fdt_setprop(fdt, offset, "reg", regs, len);
+ return fdt_setprop(fdt, hyp_offset, "reg", ext_regs, len);
}
int libxl__arch_domain_finalise_hw_description(libxl__gc *gc,
@@ -1698,7 +1817,7 @@ int libxl__arch_domain_finalise_hw_description(libxl__gc *gc,
}
- res = finalize_hypervisor_node(gc, &d_config->b_info, dom);
+ res = finalize_extra_regions(gc, &d_config->b_info, dom);
if (res)
return res;
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 0613c1916936..4741472ea1a0 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -180,7 +180,21 @@ int xenmem_add_to_physmap_one(
case XENMAPSPACE_dev_mmio:
rc = map_dev_mmio_page(d, gfn, _mfn(idx));
return rc;
+ case XENMAPSPACE_pv_time:
+#ifdef CONFIG_ARM_64
+ ASSERT(IS_POWER_OF_TWO(sizeof(struct pv_time_region)));
+ if ( idx >= DIV_ROUND_UP(d->max_vcpus * sizeof(struct pv_time_region),
+ PAGE_SIZE) )
+ return -EINVAL;
+
+ if ( idx == 0 )
+ d->arch.pv_time_regions_gfn = gfn;
+ mfn = virt_to_mfn(d->arch.pv_time_regions[idx]);
+ t = p2m_ram_ro;
+
+ break;
+#endif
default:
return -ENOSYS;
}
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index bd9fc37b5297..4daa703e882e 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -217,6 +217,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
Stage-2 using the Normal Memory
Inner/Outer Write-Back Cacheable
memory attribute. */
+#define XENMAPSPACE_pv_time 6 /* PV time shared region (ARM64 only) */
/* ` } */
/*
--
2.48.1
On 7/5/25 10:27, Koichiro Den wrote:
> Implement ARM DEN 0057A PV time support for domains created via the
> toolstack, utilizing the newly introduced XENMAPSPACE_pv_time.
>
> Signed-off-by: Koichiro Den <den@valinux.co.jp>
> ---
> tools/libs/light/libxl_arm.c | 185 ++++++++++++++++++++++++++++-------
> xen/arch/arm/mm.c | 14 +++
> xen/include/public/memory.h | 1 +
> 3 files changed, 167 insertions(+), 33 deletions(-)
>
> diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
> index 4a19a8d22bdf..33251520c07a 100644
> --- a/tools/libs/light/libxl_arm.c
> +++ b/tools/libs/light/libxl_arm.c
> @@ -684,6 +684,40 @@ static int make_memory_nodes(libxl__gc *gc, void *fdt,
> return 0;
> }
>
> +static int make_resv_memory_node(libxl__gc *gc, void *fdt,
> + const struct xc_dom_image *dom)
> +{
> + int res;
> +
> + if (strcmp(dom->guest_type, "xen-3.0-aarch64"))
> + /*
> + * The stolen time shared memory region for ARM DEN 0057A is currently
> + * the only user of /reserved-memory node when a domain is created via
> + * the toolstack, and it requires both the hypervisor and the domain to
> + * be AArch64.
> + */
> + return 0;
> +
> + res = fdt_begin_node(fdt, "reserved-memory");
> + if (res) return res;
> +
> + res = fdt_property_cell(fdt, "#address-cells", GUEST_ROOT_ADDRESS_CELLS);
> + if (res) return res;
> +
> + res = fdt_property_cell(fdt, "#size-cells", GUEST_ROOT_SIZE_CELLS);
> + if (res) return res;
> +
> + /* reg 0 is a placeholder for PV time region */
> + res = fdt_property_reg_placeholder(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
> + GUEST_ROOT_SIZE_CELLS, 1);
> + if (res) return res;
> +
> + res = fdt_end_node(fdt);
> + if (res) return res;
> +
> + return 0;
> +}
> +
> static int make_gicv2_node(libxl__gc *gc, void *fdt,
> uint64_t gicd_base, uint64_t gicd_size,
> uint64_t gicc_base, uint64_t gicc_size)
> @@ -1352,6 +1386,7 @@ next_resize:
> FDT( make_psci_node(gc, fdt) );
>
> FDT( make_memory_nodes(gc, fdt, dom) );
> + FDT( make_resv_memory_node(gc, fdt, dom) );
>
> switch (info->arch_arm.gic_version) {
> case LIBXL_GIC_VERSION_V2:
> @@ -1519,6 +1554,9 @@ static void finalise_one_node(libxl__gc *gc, void *fdt, const char *uname,
>
> #define EXT_REGION_MIN_SIZE xen_mk_ullong(0x0004000000) /* 64MB */
>
> +/* As per ARM DEN 0057A, stolen time memory regions are 64-byte aligned */
> +#define PV_REGIONS_PER_PAGE (XC_PAGE_SIZE / 64)
> +
> static int compare_iomem(const void *a, const void *b)
> {
> const libxl_iomem_range *x = a, *y = b;
> @@ -1530,24 +1568,92 @@ static int compare_iomem(const void *a, const void *b)
> return 0;
> }
>
> -static int finalize_hypervisor_node(libxl__gc *gc,
> - libxl_domain_build_info *b_info,
> - struct xc_dom_image *dom)
> +static int get_pv_region(libxl_domain_build_info *b_info,
> + struct xc_dom_image *dom,
> + uint64_t *start, uint64_t end,
> + uint64_t *region_base, uint64_t *region_size)
> +{
> + unsigned int npages = DIV_ROUNDUP(b_info->max_vcpus, PV_REGIONS_PER_PAGE);
> + unsigned int len = npages * XC_PAGE_SIZE;
> + uint32_t domid = dom->guest_domid;
> + xc_interface *xch = dom->xch;
> + unsigned long idx = 0;
> + uint64_t size;
> + int rc;
> +
> + if (*start >= end)
> + return -1;
> + size = end - *start;
> + if (size < len)
> + return -1;
> +
> + for (; npages; npages--, idx++) {
> + rc = xc_domain_add_to_physmap(xch, domid, XENMAPSPACE_pv_time, idx,
> + (*start >> XC_PAGE_SHIFT) + idx);
> + if (rc)
> + return rc;
> + }
> +
> + region_base[0] = *start;
> + region_size[0] = len;
> + *start += len;
> + return 0;
> +}
> +
> +static void get_ext_region(uint64_t start, uint64_t end, uint64_t *region_base,
> + uint64_t *region_size, unsigned int *nr_regions)
> +{
> + uint64_t size;
> +
> + start = ALIGN_UP_TO_2MB(start);
> + if (start >= end)
> + return;
> +
> + size = end - start;
> + if (size < EXT_REGION_MIN_SIZE)
> + return;
> +
> + region_base[*nr_regions] = start;
> + region_size[*nr_regions] = size;
> + (*nr_regions)++;
> +}
> +
> +static int finalize_extra_regions(libxl__gc *gc,
> + libxl_domain_build_info *b_info,
> + struct xc_dom_image *dom)
> {
> void *fdt = dom->devicetree_blob;
> - uint64_t region_base[MAX_NR_EXT_REGIONS], region_size[MAX_NR_EXT_REGIONS];
> - uint32_t regs[(GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
> +
> + /* For extended regions */
> + uint64_t ext_region_base[MAX_NR_EXT_REGIONS], ext_region_size[MAX_NR_EXT_REGIONS];
> + uint32_t ext_regs[(GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
> (MAX_NR_EXT_REGIONS + 1)];
> - be32 *cells = ®s[0];
> + be32 *ext_cells = &ext_regs[0];
> + int hyp_offset;
> +
> + /* For pv regions */
> + uint64_t pv_region_base[1], pv_region_size[1];
> + uint32_t pv_regs[GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS];
> + be32 *pv_cells = &pv_regs[0];
> + int resv_offset;
> +
> const uint64_t bankbase[] = GUEST_RAM_BANK_BASES;
> const uint64_t banksize[] = GUEST_RAM_BANK_SIZES;
> unsigned int i, j, len, nr_regions = 0;
> + bool pv_region_pending = true;
> libxl_dominfo info;
> - int offset, rc;
> + int rc;
>
> - offset = fdt_path_offset(fdt, "/hypervisor");
> - if (offset < 0)
> - return offset;
> + resv_offset = fdt_path_offset(fdt, "/reserved-memory");
> + if (!strcmp(dom->guest_type, "xen-3.0-aarch64")) {
> + if (resv_offset < 0)
> + return resv_offset;
> + } else
> + pv_region_pending = false;
> +
> + hyp_offset = fdt_path_offset(fdt, "/hypervisor");
> + if (hyp_offset < 0)
> + return hyp_offset;
>
> rc = libxl_domain_info(CTX, &info, dom->guest_domid);
> if (rc)
> @@ -1572,8 +1678,7 @@ static int finalize_hypervisor_node(libxl__gc *gc,
> } unallocated;
> uint64_t unallocated_size = 0;
>
> - unallocated.start = bankbase[i] +
> - ALIGN_UP_TO_2MB((uint64_t)dom->rambank_size[i] << XC_PAGE_SHIFT);
> + unallocated.start = bankbase[i] + ((uint64_t)dom->rambank_size[i] << XC_PAGE_SHIFT);
>
> unallocated.end = ~0ULL >> (64 - info.gpaddr_bits);
> unallocated.end = min(unallocated.end, bankbase[i] + banksize[i] - 1);
> @@ -1581,7 +1686,7 @@ static int finalize_hypervisor_node(libxl__gc *gc,
> if (unallocated.end >= unallocated.start)
> unallocated_size = unallocated.end - unallocated.start + 1;
>
> - if (unallocated_size < EXT_REGION_MIN_SIZE)
> + if (unallocated_size <= 0)
> continue;
>
> /* Exclude iomem */
> @@ -1605,14 +1710,14 @@ static int finalize_hypervisor_node(libxl__gc *gc,
> if (unallocated.start > unallocated.end)
> break;
> } else {
> - uint64_t size = iomem.start - unallocated.start;
> -
> - if (size >= EXT_REGION_MIN_SIZE) {
> - region_base[nr_regions] = unallocated.start;
> - region_size[nr_regions] = size;
> - nr_regions++;
> + if (pv_region_pending) {
> + rc = get_pv_region(b_info, dom, &unallocated.start, iomem.start,
> + pv_region_base, pv_region_size);
> + if (!rc)
> + pv_region_pending = false;
> }
> -
> + get_ext_region(unallocated.start, iomem.start, ext_region_base,
> + ext_region_size, &nr_regions);
> unallocated.start = iomem.end + 1;
>
> if (unallocated.start > unallocated.end)
> @@ -1624,38 +1729,52 @@ static int finalize_hypervisor_node(libxl__gc *gc,
> if (unallocated.end >= unallocated.start
> && nr_regions < MAX_NR_EXT_REGIONS)
> {
> - uint64_t size = unallocated.end - unallocated.start + 1;
> -
> - if (size >= EXT_REGION_MIN_SIZE) {
> - region_base[nr_regions] = unallocated.start;
> - region_size[nr_regions] = size;
> - nr_regions++;
> + if (pv_region_pending) {
> + rc = get_pv_region(b_info, dom, &unallocated.start, unallocated.end,
> + pv_region_base, pv_region_size);
> + if (!rc)
> + pv_region_pending = false;
> }
> + get_ext_region(unallocated.start, unallocated.end, ext_region_base,
> + ext_region_size, &nr_regions);
> }
> }
>
> + if (!strcmp(dom->guest_type, "xen-3.0-aarch64")) {
> + if (pv_region_pending) {
> + LOG(ERROR, "The PV time region cannot be allocated, not enough space");
> + return ERROR_FAIL;
> + }
> + set_range(&pv_cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
> + pv_region_base[0], pv_region_size[0]);
> + len = sizeof(pv_regs[0]) * (GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS);
> + rc = fdt_setprop(fdt, resv_offset, "reg", pv_regs, len);
I can appreciate the effort that it took to get this working. However,
given the relatively small size of PV time region, is there a reason
that you chose to allocate it from unallocated guest RAM rather than
reserving a new region/space in xen/include/public/arch-arm.h? Doing so
may also simplify the dom0less case.
E.g.:
diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
index e2412a17474e..681e3d4778ba 100644
--- a/xen/include/public/arch-arm.h
+++ b/xen/include/public/arch-arm.h
@@ -466,6 +466,13 @@ typedef uint64_t xen_callback_t;
#define GUEST_VPCI_MEM_ADDR xen_mk_ullong(0x23000000)
#define GUEST_VPCI_MEM_SIZE xen_mk_ullong(0x10000000)
+/* Current supported guest VCPUs */
+#define GUEST_MAX_VCPUS 128
+
+/* PV time region */
+#define GUEST_PVTIME_BASE xen_mk_ullong(0x37000000)
+#define GUEST_PVTIME_SIZE xen_mk_ullong(GUEST_MAX_VCPUS * 64)
+
/*
* 16MB == 4096 pages reserved for guest to use as a region to map its
* grant table in.
@@ -501,9 +508,6 @@ typedef uint64_t xen_callback_t;
#define GUEST_RAM_BANK_BASES { GUEST_RAM0_BASE, GUEST_RAM1_BASE }
#define GUEST_RAM_BANK_SIZES { GUEST_RAM0_SIZE, GUEST_RAM1_SIZE }
-/* Current supported guest VCPUs */
-#define GUEST_MAX_VCPUS 128
-
/* Interrupts */
#define GUEST_TIMER_VIRT_PPI 27
On Tue, Jul 08, 2025 at 05:10:46PM -0400, Stewart Hildebrand wrote:
> On 7/5/25 10:27, Koichiro Den wrote:
> > Implement ARM DEN 0057A PV time support for domains created via the
> > toolstack, utilizing the newly introduced XENMAPSPACE_pv_time.
> >
> > Signed-off-by: Koichiro Den <den@valinux.co.jp>
> > ---
> > tools/libs/light/libxl_arm.c | 185 ++++++++++++++++++++++++++++-------
> > xen/arch/arm/mm.c | 14 +++
> > xen/include/public/memory.h | 1 +
> > 3 files changed, 167 insertions(+), 33 deletions(-)
> >
> > diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
> > index 4a19a8d22bdf..33251520c07a 100644
> > --- a/tools/libs/light/libxl_arm.c
> > +++ b/tools/libs/light/libxl_arm.c
> > @@ -684,6 +684,40 @@ static int make_memory_nodes(libxl__gc *gc, void *fdt,
> > return 0;
> > }
> >
> > +static int make_resv_memory_node(libxl__gc *gc, void *fdt,
> > + const struct xc_dom_image *dom)
> > +{
> > + int res;
> > +
> > + if (strcmp(dom->guest_type, "xen-3.0-aarch64"))
> > + /*
> > + * The stolen time shared memory region for ARM DEN 0057A is currently
> > + * the only user of /reserved-memory node when a domain is created via
> > + * the toolstack, and it requires both the hypervisor and the domain to
> > + * be AArch64.
> > + */
> > + return 0;
> > +
> > + res = fdt_begin_node(fdt, "reserved-memory");
> > + if (res) return res;
> > +
> > + res = fdt_property_cell(fdt, "#address-cells", GUEST_ROOT_ADDRESS_CELLS);
> > + if (res) return res;
> > +
> > + res = fdt_property_cell(fdt, "#size-cells", GUEST_ROOT_SIZE_CELLS);
> > + if (res) return res;
> > +
> > + /* reg 0 is a placeholder for PV time region */
> > + res = fdt_property_reg_placeholder(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
> > + GUEST_ROOT_SIZE_CELLS, 1);
> > + if (res) return res;
> > +
> > + res = fdt_end_node(fdt);
> > + if (res) return res;
> > +
> > + return 0;
> > +}
> > +
> > static int make_gicv2_node(libxl__gc *gc, void *fdt,
> > uint64_t gicd_base, uint64_t gicd_size,
> > uint64_t gicc_base, uint64_t gicc_size)
> > @@ -1352,6 +1386,7 @@ next_resize:
> > FDT( make_psci_node(gc, fdt) );
> >
> > FDT( make_memory_nodes(gc, fdt, dom) );
> > + FDT( make_resv_memory_node(gc, fdt, dom) );
> >
> > switch (info->arch_arm.gic_version) {
> > case LIBXL_GIC_VERSION_V2:
> > @@ -1519,6 +1554,9 @@ static void finalise_one_node(libxl__gc *gc, void *fdt, const char *uname,
> >
> > #define EXT_REGION_MIN_SIZE xen_mk_ullong(0x0004000000) /* 64MB */
> >
> > +/* As per ARM DEN 0057A, stolen time memory regions are 64-byte aligned */
> > +#define PV_REGIONS_PER_PAGE (XC_PAGE_SIZE / 64)
> > +
> > static int compare_iomem(const void *a, const void *b)
> > {
> > const libxl_iomem_range *x = a, *y = b;
> > @@ -1530,24 +1568,92 @@ static int compare_iomem(const void *a, const void *b)
> > return 0;
> > }
> >
> > -static int finalize_hypervisor_node(libxl__gc *gc,
> > - libxl_domain_build_info *b_info,
> > - struct xc_dom_image *dom)
> > +static int get_pv_region(libxl_domain_build_info *b_info,
> > + struct xc_dom_image *dom,
> > + uint64_t *start, uint64_t end,
> > + uint64_t *region_base, uint64_t *region_size)
> > +{
> > + unsigned int npages = DIV_ROUNDUP(b_info->max_vcpus, PV_REGIONS_PER_PAGE);
> > + unsigned int len = npages * XC_PAGE_SIZE;
> > + uint32_t domid = dom->guest_domid;
> > + xc_interface *xch = dom->xch;
> > + unsigned long idx = 0;
> > + uint64_t size;
> > + int rc;
> > +
> > + if (*start >= end)
> > + return -1;
> > + size = end - *start;
> > + if (size < len)
> > + return -1;
> > +
> > + for (; npages; npages--, idx++) {
> > + rc = xc_domain_add_to_physmap(xch, domid, XENMAPSPACE_pv_time, idx,
> > + (*start >> XC_PAGE_SHIFT) + idx);
> > + if (rc)
> > + return rc;
> > + }
> > +
> > + region_base[0] = *start;
> > + region_size[0] = len;
> > + *start += len;
> > + return 0;
> > +}
> > +
> > +static void get_ext_region(uint64_t start, uint64_t end, uint64_t *region_base,
> > + uint64_t *region_size, unsigned int *nr_regions)
> > +{
> > + uint64_t size;
> > +
> > + start = ALIGN_UP_TO_2MB(start);
> > + if (start >= end)
> > + return;
> > +
> > + size = end - start;
> > + if (size < EXT_REGION_MIN_SIZE)
> > + return;
> > +
> > + region_base[*nr_regions] = start;
> > + region_size[*nr_regions] = size;
> > + (*nr_regions)++;
> > +}
> > +
> > +static int finalize_extra_regions(libxl__gc *gc,
> > + libxl_domain_build_info *b_info,
> > + struct xc_dom_image *dom)
> > {
> > void *fdt = dom->devicetree_blob;
> > - uint64_t region_base[MAX_NR_EXT_REGIONS], region_size[MAX_NR_EXT_REGIONS];
> > - uint32_t regs[(GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
> > +
> > + /* For extended regions */
> > + uint64_t ext_region_base[MAX_NR_EXT_REGIONS], ext_region_size[MAX_NR_EXT_REGIONS];
> > + uint32_t ext_regs[(GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS) *
> > (MAX_NR_EXT_REGIONS + 1)];
> > - be32 *cells = ®s[0];
> > + be32 *ext_cells = &ext_regs[0];
> > + int hyp_offset;
> > +
> > + /* For pv regions */
> > + uint64_t pv_region_base[1], pv_region_size[1];
> > + uint32_t pv_regs[GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS];
> > + be32 *pv_cells = &pv_regs[0];
> > + int resv_offset;
> > +
> > const uint64_t bankbase[] = GUEST_RAM_BANK_BASES;
> > const uint64_t banksize[] = GUEST_RAM_BANK_SIZES;
> > unsigned int i, j, len, nr_regions = 0;
> > + bool pv_region_pending = true;
> > libxl_dominfo info;
> > - int offset, rc;
> > + int rc;
> >
> > - offset = fdt_path_offset(fdt, "/hypervisor");
> > - if (offset < 0)
> > - return offset;
> > + resv_offset = fdt_path_offset(fdt, "/reserved-memory");
> > + if (!strcmp(dom->guest_type, "xen-3.0-aarch64")) {
> > + if (resv_offset < 0)
> > + return resv_offset;
> > + } else
> > + pv_region_pending = false;
> > +
> > + hyp_offset = fdt_path_offset(fdt, "/hypervisor");
> > + if (hyp_offset < 0)
> > + return hyp_offset;
> >
> > rc = libxl_domain_info(CTX, &info, dom->guest_domid);
> > if (rc)
> > @@ -1572,8 +1678,7 @@ static int finalize_hypervisor_node(libxl__gc *gc,
> > } unallocated;
> > uint64_t unallocated_size = 0;
> >
> > - unallocated.start = bankbase[i] +
> > - ALIGN_UP_TO_2MB((uint64_t)dom->rambank_size[i] << XC_PAGE_SHIFT);
> > + unallocated.start = bankbase[i] + ((uint64_t)dom->rambank_size[i] << XC_PAGE_SHIFT);
> >
> > unallocated.end = ~0ULL >> (64 - info.gpaddr_bits);
> > unallocated.end = min(unallocated.end, bankbase[i] + banksize[i] - 1);
> > @@ -1581,7 +1686,7 @@ static int finalize_hypervisor_node(libxl__gc *gc,
> > if (unallocated.end >= unallocated.start)
> > unallocated_size = unallocated.end - unallocated.start + 1;
> >
> > - if (unallocated_size < EXT_REGION_MIN_SIZE)
> > + if (unallocated_size <= 0)
> > continue;
> >
> > /* Exclude iomem */
> > @@ -1605,14 +1710,14 @@ static int finalize_hypervisor_node(libxl__gc *gc,
> > if (unallocated.start > unallocated.end)
> > break;
> > } else {
> > - uint64_t size = iomem.start - unallocated.start;
> > -
> > - if (size >= EXT_REGION_MIN_SIZE) {
> > - region_base[nr_regions] = unallocated.start;
> > - region_size[nr_regions] = size;
> > - nr_regions++;
> > + if (pv_region_pending) {
> > + rc = get_pv_region(b_info, dom, &unallocated.start, iomem.start,
> > + pv_region_base, pv_region_size);
> > + if (!rc)
> > + pv_region_pending = false;
> > }
> > -
> > + get_ext_region(unallocated.start, iomem.start, ext_region_base,
> > + ext_region_size, &nr_regions);
> > unallocated.start = iomem.end + 1;
> >
> > if (unallocated.start > unallocated.end)
> > @@ -1624,38 +1729,52 @@ static int finalize_hypervisor_node(libxl__gc *gc,
> > if (unallocated.end >= unallocated.start
> > && nr_regions < MAX_NR_EXT_REGIONS)
> > {
> > - uint64_t size = unallocated.end - unallocated.start + 1;
> > -
> > - if (size >= EXT_REGION_MIN_SIZE) {
> > - region_base[nr_regions] = unallocated.start;
> > - region_size[nr_regions] = size;
> > - nr_regions++;
> > + if (pv_region_pending) {
> > + rc = get_pv_region(b_info, dom, &unallocated.start, unallocated.end,
> > + pv_region_base, pv_region_size);
> > + if (!rc)
> > + pv_region_pending = false;
> > }
> > + get_ext_region(unallocated.start, unallocated.end, ext_region_base,
> > + ext_region_size, &nr_regions);
> > }
> > }
> >
> > + if (!strcmp(dom->guest_type, "xen-3.0-aarch64")) {
> > + if (pv_region_pending) {
> > + LOG(ERROR, "The PV time region cannot be allocated, not enough space");
> > + return ERROR_FAIL;
> > + }
> > + set_range(&pv_cells, GUEST_ROOT_ADDRESS_CELLS, GUEST_ROOT_SIZE_CELLS,
> > + pv_region_base[0], pv_region_size[0]);
> > + len = sizeof(pv_regs[0]) * (GUEST_ROOT_ADDRESS_CELLS + GUEST_ROOT_SIZE_CELLS);
> > + rc = fdt_setprop(fdt, resv_offset, "reg", pv_regs, len);
>
> I can appreciate the effort that it took to get this working. However,
> given the relatively small size of PV time region, is there a reason
> that you chose to allocate it from unallocated guest RAM rather than
> reserving a new region/space in xen/include/public/arch-arm.h? Doing so
> may also simplify the dom0less case.
I thought that hard-coded fixed assignment should be avoided wherever
possible, but there is no strong reason other than that.
Thanks.
-Koichiro
>
> E.g.:
>
> diff --git a/xen/include/public/arch-arm.h b/xen/include/public/arch-arm.h
> index e2412a17474e..681e3d4778ba 100644
> --- a/xen/include/public/arch-arm.h
> +++ b/xen/include/public/arch-arm.h
> @@ -466,6 +466,13 @@ typedef uint64_t xen_callback_t;
> #define GUEST_VPCI_MEM_ADDR xen_mk_ullong(0x23000000)
> #define GUEST_VPCI_MEM_SIZE xen_mk_ullong(0x10000000)
>
> +/* Current supported guest VCPUs */
> +#define GUEST_MAX_VCPUS 128
> +
> +/* PV time region */
> +#define GUEST_PVTIME_BASE xen_mk_ullong(0x37000000)
> +#define GUEST_PVTIME_SIZE xen_mk_ullong(GUEST_MAX_VCPUS * 64)
> +
> /*
> * 16MB == 4096 pages reserved for guest to use as a region to map its
> * grant table in.
> @@ -501,9 +508,6 @@ typedef uint64_t xen_callback_t;
> #define GUEST_RAM_BANK_BASES { GUEST_RAM0_BASE, GUEST_RAM1_BASE }
> #define GUEST_RAM_BANK_SIZES { GUEST_RAM0_SIZE, GUEST_RAM1_SIZE }
>
> -/* Current supported guest VCPUs */
> -#define GUEST_MAX_VCPUS 128
> -
> /* Interrupts */
>
> #define GUEST_TIMER_VIRT_PPI 27
>
On 05.07.2025 16:27, Koichiro Den wrote: > --- a/xen/arch/arm/mm.c > +++ b/xen/arch/arm/mm.c > @@ -180,7 +180,21 @@ int xenmem_add_to_physmap_one( > case XENMAPSPACE_dev_mmio: > rc = map_dev_mmio_page(d, gfn, _mfn(idx)); > return rc; > + case XENMAPSPACE_pv_time: > +#ifdef CONFIG_ARM_64 These two lines want to change places, I think. > + ASSERT(IS_POWER_OF_TWO(sizeof(struct pv_time_region))); BUILD_BUG_ON() please, so that an issue here can be spotted at build time rather than only at runtime. > + if ( idx >= DIV_ROUND_UP(d->max_vcpus * sizeof(struct pv_time_region), > + PAGE_SIZE) ) > + return -EINVAL; > + > + if ( idx == 0 ) > + d->arch.pv_time_regions_gfn = gfn; This looks fragile, as it'll break once d->max_vcpus can grow large enough to permit a non-zero idx by way of the earlier if() falling through. Imo you'll need at least one further BUILD_BUG_ON() here. > > + mfn = virt_to_mfn(d->arch.pv_time_regions[idx]); > + t = p2m_ram_ro; Is this the correct type to use here? That is, do you really mean guest write attempts to be silently dropped, rather than being reported to the guest as a fault? Then again I can't see such behavior being implemented on Arm, despite the comment on the enumerator saying so (likely inherited from x86). > + break; > +#endif > default: > return -ENOSYS; > } As to style: Please, rather than absorbing the blank line that was there, make sure non-fall-through case blocks are separated from adjacent ones by a blank line. > --- a/xen/include/public/memory.h > +++ b/xen/include/public/memory.h > @@ -217,6 +217,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); > Stage-2 using the Normal Memory > Inner/Outer Write-Back Cacheable > memory attribute. */ > +#define XENMAPSPACE_pv_time 6 /* PV time shared region (ARM64 only) */ The comment isn't specific enough. As per the struct declaration in patch 4, this interface is solely about stolen time. There's a wider PV interface, which at least x86 Linux also uses, and which has been adopted by KVM as well iirc. Hence this new type wants to clarify what exactly it's used for right now, while leaving open other uses on other architectures. Jan
On Mon, Jul 07, 2025 at 10:01:47AM +0200, Jan Beulich wrote:
> On 05.07.2025 16:27, Koichiro Den wrote:
> > --- a/xen/arch/arm/mm.c
> > +++ b/xen/arch/arm/mm.c
> > @@ -180,7 +180,21 @@ int xenmem_add_to_physmap_one(
> > case XENMAPSPACE_dev_mmio:
> > rc = map_dev_mmio_page(d, gfn, _mfn(idx));
> > return rc;
> > + case XENMAPSPACE_pv_time:
> > +#ifdef CONFIG_ARM_64
>
> These two lines want to change places, I think.
Will fix it, thank you.
>
> > + ASSERT(IS_POWER_OF_TWO(sizeof(struct pv_time_region)));
>
> BUILD_BUG_ON() please, so that an issue here can be spotted at build time
> rather than only at runtime.
>
> > + if ( idx >= DIV_ROUND_UP(d->max_vcpus * sizeof(struct pv_time_region),
> > + PAGE_SIZE) )
> > + return -EINVAL;
> > +
> > + if ( idx == 0 )
> > + d->arch.pv_time_regions_gfn = gfn;
>
> This looks fragile, as it'll break once d->max_vcpus can grow large enough to
> permit a non-zero idx by way of the earlier if() falling through. Imo you'll
> need at least one further BUILD_BUG_ON() here.
get_pv_region() can legitimately call xc_domain_add_to_physmap(..,
XENMAPSPACE_pv_time, ..) with idx > 0, but only if the preceding call with
idx == 0 succeeded. So while this may look odd at first glance, I think
this is not broken. What do you think?
>
> >
> > + mfn = virt_to_mfn(d->arch.pv_time_regions[idx]);
> > + t = p2m_ram_ro;
>
> Is this the correct type to use here? That is, do you really mean guest write
> attempts to be silently dropped, rather than being reported to the guest as a
> fault? Then again I can't see such behavior being implemented on Arm, despite
> the comment on the enumerator saying so (likely inherited from x86).
No I didn't intend the "silently drop" behavior. IIUC, we may as well
correct the comment on the enum for Arm:
diff --git a/xen/arch/arm/include/asm/p2m.h b/xen/arch/arm/include/asm/p2m.h
index 2d53bf9b6177..927c588dbcb0 100644
--- a/xen/arch/arm/include/asm/p2m.h
+++ b/xen/arch/arm/include/asm/p2m.h
@@ -123,7 +123,7 @@ struct p2m_domain {
typedef enum {
p2m_invalid = 0, /* Nothing mapped here */
p2m_ram_rw, /* Normal read/write guest RAM */
- p2m_ram_ro, /* Read-only; writes are silently dropped */
+ p2m_ram_ro, /* Read-only */
>
> > + break;
> > +#endif
> > default:
> > return -ENOSYS;
> > }
>
> As to style: Please, rather than absorbing the blank line that was there, make
> sure non-fall-through case blocks are separated from adjacent ones by a blank
> line.
Will do so in the next take.
>
> > --- a/xen/include/public/memory.h
> > +++ b/xen/include/public/memory.h
> > @@ -217,6 +217,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
> > Stage-2 using the Normal Memory
> > Inner/Outer Write-Back Cacheable
> > memory attribute. */
> > +#define XENMAPSPACE_pv_time 6 /* PV time shared region (ARM64 only) */
>
> The comment isn't specific enough. As per the struct declaration in patch 4,
> this interface is solely about stolen time. There's a wider PV interface,
> which at least x86 Linux also uses, and which has been adopted by KVM as
> well iirc. Hence this new type wants to clarify what exactly it's used for
> right now, while leaving open other uses on other architectures.
That sounds reasonable, I'll update it in the next iteration.
Thanks for the review.
-Koichiro
>
> Jan
On 09.07.2025 10:04, Koichiro Den wrote:
> On Mon, Jul 07, 2025 at 10:01:47AM +0200, Jan Beulich wrote:
>> On 05.07.2025 16:27, Koichiro Den wrote:
>>> --- a/xen/arch/arm/mm.c
>>> +++ b/xen/arch/arm/mm.c
>>> @@ -180,7 +180,21 @@ int xenmem_add_to_physmap_one(
>>> case XENMAPSPACE_dev_mmio:
>>> rc = map_dev_mmio_page(d, gfn, _mfn(idx));
>>> return rc;
>>> + case XENMAPSPACE_pv_time:
>>> +#ifdef CONFIG_ARM_64
>>
>> These two lines want to change places, I think.
>
> Will fix it, thank you.
>
>>
>>> + ASSERT(IS_POWER_OF_TWO(sizeof(struct pv_time_region)));
>>
>> BUILD_BUG_ON() please, so that an issue here can be spotted at build time
>> rather than only at runtime.
>>
>>> + if ( idx >= DIV_ROUND_UP(d->max_vcpus * sizeof(struct pv_time_region),
>>> + PAGE_SIZE) )
>>> + return -EINVAL;
>>> +
>>> + if ( idx == 0 )
>>> + d->arch.pv_time_regions_gfn = gfn;
>>
>> This looks fragile, as it'll break once d->max_vcpus can grow large enough to
>> permit a non-zero idx by way of the earlier if() falling through. Imo you'll
>> need at least one further BUILD_BUG_ON() here.
>
> get_pv_region() can legitimately call xc_domain_add_to_physmap(..,
> XENMAPSPACE_pv_time, ..) with idx > 0, but only if the preceding call with
> idx == 0 succeeded.
Can it? What's the intended effect then, when only the idx == 0 case is fully
handled here?
> So while this may look odd at first glance, I think
> this is not broken. What do you think?
The GFN not being recorded anywhere means the call has no effect, while giving
the caller the impression that there was one (by way of returning success).
>>> + mfn = virt_to_mfn(d->arch.pv_time_regions[idx]);
>>> + t = p2m_ram_ro;
>>
>> Is this the correct type to use here? That is, do you really mean guest write
>> attempts to be silently dropped, rather than being reported to the guest as a
>> fault? Then again I can't see such behavior being implemented on Arm, despite
>> the comment on the enumerator saying so (likely inherited from x86).
>
> No I didn't intend the "silently drop" behavior. IIUC, we may as well
> correct the comment on the enum for Arm:
>
> diff --git a/xen/arch/arm/include/asm/p2m.h b/xen/arch/arm/include/asm/p2m.h
> index 2d53bf9b6177..927c588dbcb0 100644
> --- a/xen/arch/arm/include/asm/p2m.h
> +++ b/xen/arch/arm/include/asm/p2m.h
> @@ -123,7 +123,7 @@ struct p2m_domain {
> typedef enum {
> p2m_invalid = 0, /* Nothing mapped here */
> p2m_ram_rw, /* Normal read/write guest RAM */
> - p2m_ram_ro, /* Read-only; writes are silently dropped */
> + p2m_ram_ro, /* Read-only */
Don't know whether that's a good idea, as it'll diverge Arm from the same-
name P2M type that x86 has. (Arguably x86'es type may better be named
p2m_ram_write_ignore or some such.)
Jan
On Wed, Jul 09, 2025 at 11:16:02AM +0200, Jan Beulich wrote:
> On 09.07.2025 10:04, Koichiro Den wrote:
> > On Mon, Jul 07, 2025 at 10:01:47AM +0200, Jan Beulich wrote:
> >> On 05.07.2025 16:27, Koichiro Den wrote:
> >>> --- a/xen/arch/arm/mm.c
> >>> +++ b/xen/arch/arm/mm.c
> >>> @@ -180,7 +180,21 @@ int xenmem_add_to_physmap_one(
> >>> case XENMAPSPACE_dev_mmio:
> >>> rc = map_dev_mmio_page(d, gfn, _mfn(idx));
> >>> return rc;
> >>> + case XENMAPSPACE_pv_time:
> >>> +#ifdef CONFIG_ARM_64
> >>
> >> These two lines want to change places, I think.
> >
> > Will fix it, thank you.
> >
> >>
> >>> + ASSERT(IS_POWER_OF_TWO(sizeof(struct pv_time_region)));
> >>
> >> BUILD_BUG_ON() please, so that an issue here can be spotted at build time
> >> rather than only at runtime.
> >>
> >>> + if ( idx >= DIV_ROUND_UP(d->max_vcpus * sizeof(struct pv_time_region),
> >>> + PAGE_SIZE) )
> >>> + return -EINVAL;
> >>> +
> >>> + if ( idx == 0 )
> >>> + d->arch.pv_time_regions_gfn = gfn;
> >>
> >> This looks fragile, as it'll break once d->max_vcpus can grow large enough to
> >> permit a non-zero idx by way of the earlier if() falling through. Imo you'll
> >> need at least one further BUILD_BUG_ON() here.
> >
> > get_pv_region() can legitimately call xc_domain_add_to_physmap(..,
> > XENMAPSPACE_pv_time, ..) with idx > 0, but only if the preceding call with
> > idx == 0 succeeded.
>
> Can it? What's the intended effect then, when only the idx == 0 case is fully
> handled here?
GFNs are made always contiguous in this implementation (while MFNs might
not) so it can register the whole range in one go when recognizing the
starting GFN (when idx == 0). It's ugly indeed, as it implicitly requires
the caller firstly invokes it with idx == 0 and aborts when it fails.
That said, after reading another feedback, I'm thinking of simplifying
the whole patch series (as I just said in [1]) and I believe this
unnecessarily complicated/confusing stuff will disappear.
[1] https://lore.kernel.org/xen-devel/20250705142703.2769819-1-den@valinux.co.jp/T/#medaa074cd863c05606bfdb6280cd4ccb88803bc7
>
> > So while this may look odd at first glance, I think
> > this is not broken. What do you think?
>
> The GFN not being recorded anywhere means the call has no effect, while giving
> the caller the impression that there was one (by way of returning success).
>
> >>> + mfn = virt_to_mfn(d->arch.pv_time_regions[idx]);
> >>> + t = p2m_ram_ro;
> >>
> >> Is this the correct type to use here? That is, do you really mean guest write
> >> attempts to be silently dropped, rather than being reported to the guest as a
> >> fault? Then again I can't see such behavior being implemented on Arm, despite
> >> the comment on the enumerator saying so (likely inherited from x86).
> >
> > No I didn't intend the "silently drop" behavior. IIUC, we may as well
> > correct the comment on the enum for Arm:
> >
> > diff --git a/xen/arch/arm/include/asm/p2m.h b/xen/arch/arm/include/asm/p2m.h
> > index 2d53bf9b6177..927c588dbcb0 100644
> > --- a/xen/arch/arm/include/asm/p2m.h
> > +++ b/xen/arch/arm/include/asm/p2m.h
> > @@ -123,7 +123,7 @@ struct p2m_domain {
> > typedef enum {
> > p2m_invalid = 0, /* Nothing mapped here */
> > p2m_ram_rw, /* Normal read/write guest RAM */
> > - p2m_ram_ro, /* Read-only; writes are silently dropped */
> > + p2m_ram_ro, /* Read-only */
>
> Don't know whether that's a good idea, as it'll diverge Arm from the same-
> name P2M type that x86 has. (Arguably x86'es type may better be named
> p2m_ram_write_ignore or some such.)
Thanks for sharing your thought. Incidentally, there seems to be the same
comment in ppc's p2m.h as well. I'm not sure at all but I'm guessing that
"writes are silently dropped" line would make sense only when PoD and/or VM
forking is to be supported by arm/ppc, and leaving it as it is for arm/ppc
might be acceptable at the moment.
>
> Jan
On 12.07.2025 10:31, Koichiro Den wrote:
> On Wed, Jul 09, 2025 at 11:16:02AM +0200, Jan Beulich wrote:
>> On 09.07.2025 10:04, Koichiro Den wrote:
>>> On Mon, Jul 07, 2025 at 10:01:47AM +0200, Jan Beulich wrote:
>>>> On 05.07.2025 16:27, Koichiro Den wrote:
>>>>> + mfn = virt_to_mfn(d->arch.pv_time_regions[idx]);
>>>>> + t = p2m_ram_ro;
>>>>
>>>> Is this the correct type to use here? That is, do you really mean guest write
>>>> attempts to be silently dropped, rather than being reported to the guest as a
>>>> fault? Then again I can't see such behavior being implemented on Arm, despite
>>>> the comment on the enumerator saying so (likely inherited from x86).
>>>
>>> No I didn't intend the "silently drop" behavior. IIUC, we may as well
>>> correct the comment on the enum for Arm:
>>>
>>> diff --git a/xen/arch/arm/include/asm/p2m.h b/xen/arch/arm/include/asm/p2m.h
>>> index 2d53bf9b6177..927c588dbcb0 100644
>>> --- a/xen/arch/arm/include/asm/p2m.h
>>> +++ b/xen/arch/arm/include/asm/p2m.h
>>> @@ -123,7 +123,7 @@ struct p2m_domain {
>>> typedef enum {
>>> p2m_invalid = 0, /* Nothing mapped here */
>>> p2m_ram_rw, /* Normal read/write guest RAM */
>>> - p2m_ram_ro, /* Read-only; writes are silently dropped */
>>> + p2m_ram_ro, /* Read-only */
>>
>> Don't know whether that's a good idea, as it'll diverge Arm from the same-
>> name P2M type that x86 has. (Arguably x86'es type may better be named
>> p2m_ram_write_ignore or some such.)
>
> Thanks for sharing your thought. Incidentally, there seems to be the same
> comment in ppc's p2m.h as well.
That's likely (sorry to be blunt) due to blindly copying from Arm.
> I'm not sure at all but I'm guessing that
> "writes are silently dropped" line would make sense only when PoD
You can't silently drop writes in PoD mode, and PoD entries also have their
own type. All you can drop there are writes which store 0.
> and/or VM
> forking is to be supported by arm/ppc, and leaving it as it is for arm/ppc
> might be acceptable at the moment.
Leaving _behavior_ as is may indeed be acceptable. But naming and commentary
would still better change for things to be consistent across architectures,
rather than explicitly confusing people.
Jan
© 2016 - 2025 Red Hat, Inc.