Cache coloring requires Dom0 not to be direct-mapped because of its non
contiguous mapping nature, so allocate_memory() is needed in this case.
8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module")
moved allocate_memory() in dom0less_build.c. In order to use it
in Dom0 construction bring it back to domain_build.c and declare it in
domain_build.h.
Take the opportunity to adapt the implementation of allocate_memory() so
that it uses the host layout when called on the hwdom, via
find_unallocated_memory().
Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech>
---
v11:
- GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory()
- hwdom_ext_regions -> hwdom_free_mem in allocate_memory()
- added a comment in allocate_memory() when skipping small banks
v10:
- fixed a compilation bug that happened when dom0less support was disabled
v9:
- no changes
v8:
- patch adapted to new changes to allocate_memory()
v7:
- allocate_memory() now uses the host layout when called on the hwdom
v6:
- new patch
---
xen/arch/arm/dom0less-build.c | 44 -----------
xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++-
xen/arch/arm/include/asm/domain_build.h | 1 +
3 files changed, 94 insertions(+), 48 deletions(-)
diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c
index d93a85434e..67b1503647 100644
--- a/xen/arch/arm/dom0less-build.c
+++ b/xen/arch/arm/dom0less-build.c
@@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void)
return ( !dom0found && domUfound );
}
-static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo)
-{
- struct membanks *mem = kernel_info_get_mem(kinfo);
- unsigned int i;
- paddr_t bank_size;
-
- printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n",
- /* Don't want format this as PRIpaddr (16 digit hex) */
- (unsigned long)(kinfo->unassigned_mem >> 20), d);
-
- mem->nr_banks = 0;
- bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem);
- if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE),
- bank_size) )
- goto fail;
-
- bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem);
- if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE),
- bank_size) )
- goto fail;
-
- if ( kinfo->unassigned_mem )
- goto fail;
-
- for( i = 0; i < mem->nr_banks; i++ )
- {
- printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n",
- d,
- i,
- mem->bank[i].start,
- mem->bank[i].start + mem->bank[i].size,
- /* Don't want format this as PRIpaddr (16 digit hex) */
- (unsigned long)(mem->bank[i].size >> 20));
- }
-
- return;
-
-fail:
- panic("Failed to allocate requested domain memory."
- /* Don't want format this as PRIpaddr (16 digit hex) */
- " %ldKB unallocated. Fix the VMs configurations.\n",
- (unsigned long)kinfo->unassigned_mem >> 10);
-}
-
#ifdef CONFIG_VGICV2
static int __init make_gicv2_domU_node(struct kernel_info *kinfo)
{
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 2c30792de8..2b8cba9b2f 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d,
}
}
-#ifdef CONFIG_DOM0LESS_BOOT
bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size,
alloc_domheap_mem_cb cb, void *extra)
{
@@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn,
return true;
}
-#endif
/*
* When PCI passthrough is available we want to keep the
@@ -1003,6 +1001,94 @@ out:
return res;
}
+void __init allocate_memory(struct domain *d, struct kernel_info *kinfo)
+{
+ struct membanks *mem = kernel_info_get_mem(kinfo);
+ unsigned int i, nr_banks = GUEST_RAM_BANKS;
+ paddr_t bank_start, bank_size;
+ struct membanks *hwdom_free_mem = NULL;
+ const uint64_t bankbase[] = GUEST_RAM_BANK_BASES;
+ const uint64_t banksize[] = GUEST_RAM_BANK_SIZES;
+
+ printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n",
+ /* Don't want format this as PRIpaddr (16 digit hex) */
+ (unsigned long)(kinfo->unassigned_mem >> 20), d);
+
+ mem->nr_banks = 0;
+ /*
+ * Use host memory layout for hwdom. Only case for this is when LLC coloring
+ * is enabled.
+ */
+ if ( is_hardware_domain(d) )
+ {
+ ASSERT(llc_coloring_enabled);
+
+ hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank,
+ NR_MEM_BANKS);
+ if ( !hwdom_free_mem )
+ goto fail;
+
+ hwdom_free_mem->max_banks = NR_MEM_BANKS;
+
+ if ( find_unallocated_memory(kinfo, hwdom_free_mem) )
+ goto fail;
+
+ nr_banks = hwdom_free_mem->nr_banks;
+ }
+
+ for ( i = 0; kinfo->unassigned_mem > 0 && nr_banks > 0; i++, nr_banks-- )
+ {
+ if ( is_hardware_domain(d) )
+ {
+ bank_start = hwdom_free_mem->bank[i].start;
+ bank_size = hwdom_free_mem->bank[i].size;
+
+ /*
+ * Skip banks that are too small. The first bank must contain
+ * dom0 kernel + ramdisk + dtb and 128 MB is the same limit used
+ * in allocate_memory_11().
+ */
+ if ( bank_size < min_t(paddr_t, kinfo->unassigned_mem, MB(128)) )
+ continue;
+ }
+ else
+ {
+ if ( i >= GUEST_RAM_BANKS )
+ goto fail;
+
+ bank_start = bankbase[i];
+ bank_size = banksize[i];
+ }
+
+ bank_size = MIN(bank_size, kinfo->unassigned_mem);
+ if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(bank_start), bank_size) )
+ goto fail;
+ }
+
+ if ( kinfo->unassigned_mem )
+ goto fail;
+
+ for( i = 0; i < mem->nr_banks; i++ )
+ {
+ printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n",
+ d,
+ i,
+ mem->bank[i].start,
+ mem->bank[i].start + mem->bank[i].size,
+ /* Don't want format this as PRIpaddr (16 digit hex) */
+ (unsigned long)(mem->bank[i].size >> 20));
+ }
+
+ xfree(hwdom_free_mem);
+ return;
+
+fail:
+ panic("Failed to allocate requested domain memory."
+ /* Don't want format this as PRIpaddr (16 digit hex) */
+ " %ldKB unallocated. Fix the VMs configurations.\n",
+ (unsigned long)kinfo->unassigned_mem >> 10);
+}
+
static int __init handle_pci_range(const struct dt_device_node *dev,
uint64_t addr, uint64_t len, void *data)
{
@@ -1223,7 +1309,7 @@ int __init make_hypervisor_node(struct domain *d,
ext_regions->max_banks = NR_MEM_BANKS;
- if ( is_domain_direct_mapped(d) )
+ if ( domain_use_host_layout(d) )
{
if ( !is_iommu_enabled(d) )
res = find_unallocated_memory(kinfo, ext_regions);
@@ -2152,7 +2238,10 @@ static int __init construct_dom0(struct domain *d)
/* type must be set before allocate_memory */
d->arch.type = kinfo.type;
#endif
- allocate_memory_11(d, &kinfo);
+ if ( is_domain_direct_mapped(d) )
+ allocate_memory_11(d, &kinfo);
+ else
+ allocate_memory(d, &kinfo);
find_gnttab_region(d, &kinfo);
rc = process_shm_chosen(d, &kinfo);
diff --git a/xen/arch/arm/include/asm/domain_build.h b/xen/arch/arm/include/asm/domain_build.h
index e712afbc7f..5d77af2e8b 100644
--- a/xen/arch/arm/include/asm/domain_build.h
+++ b/xen/arch/arm/include/asm/domain_build.h
@@ -11,6 +11,7 @@ bool allocate_domheap_memory(struct domain *d, paddr_t tot_size,
alloc_domheap_mem_cb cb, void *extra);
bool allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn,
paddr_t tot_size);
+void allocate_memory(struct domain *d, struct kernel_info *kinfo);
int construct_domain(struct domain *d, struct kernel_info *kinfo);
int domain_fdt_begin_node(void *fdt, const char *name, uint64_t unit);
int make_chosen_node(const struct kernel_info *kinfo);
--
2.43.0
On 02/12/2024 17:59, Carlo Nonato wrote: > > > Cache coloring requires Dom0 not to be direct-mapped because of its non > contiguous mapping nature, so allocate_memory() is needed in this case. > 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") > moved allocate_memory() in dom0less_build.c. In order to use it > in Dom0 construction bring it back to domain_build.c and declare it in > domain_build.h. > > Take the opportunity to adapt the implementation of allocate_memory() so > that it uses the host layout when called on the hwdom, via > find_unallocated_memory(). > > Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> > --- > v11: > - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() > - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() > - added a comment in allocate_memory() when skipping small banks > v10: > - fixed a compilation bug that happened when dom0less support was disabled > v9: > - no changes > v8: > - patch adapted to new changes to allocate_memory() > v7: > - allocate_memory() now uses the host layout when called on the hwdom > v6: > - new patch > --- > xen/arch/arm/dom0less-build.c | 44 ----------- > xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- > xen/arch/arm/include/asm/domain_build.h | 1 + > 3 files changed, 94 insertions(+), 48 deletions(-) > > diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c > index d93a85434e..67b1503647 100644 > --- a/xen/arch/arm/dom0less-build.c > +++ b/xen/arch/arm/dom0less-build.c > @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) > return ( !dom0found && domUfound ); > } > > -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) > -{ > - struct membanks *mem = kernel_info_get_mem(kinfo); > - unsigned int i; > - paddr_t bank_size; > - > - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", > - /* Don't want format this as PRIpaddr (16 digit hex) */ > - (unsigned long)(kinfo->unassigned_mem >> 20), d); > - > - mem->nr_banks = 0; > - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); > - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), > - bank_size) ) > - goto fail; > - > - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); > - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), > - bank_size) ) > - goto fail; > - > - if ( kinfo->unassigned_mem ) > - goto fail; > - > - for( i = 0; i < mem->nr_banks; i++ ) > - { > - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", > - d, > - i, > - mem->bank[i].start, > - mem->bank[i].start + mem->bank[i].size, > - /* Don't want format this as PRIpaddr (16 digit hex) */ > - (unsigned long)(mem->bank[i].size >> 20)); > - } > - > - return; > - > -fail: > - panic("Failed to allocate requested domain memory." > - /* Don't want format this as PRIpaddr (16 digit hex) */ > - " %ldKB unallocated. Fix the VMs configurations.\n", > - (unsigned long)kinfo->unassigned_mem >> 10); > -} > - > #ifdef CONFIG_VGICV2 > static int __init make_gicv2_domU_node(struct kernel_info *kinfo) > { > diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > index 2c30792de8..2b8cba9b2f 100644 > --- a/xen/arch/arm/domain_build.c > +++ b/xen/arch/arm/domain_build.c > @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, > } > } > > -#ifdef CONFIG_DOM0LESS_BOOT > bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, > alloc_domheap_mem_cb cb, void *extra) > { > @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, > > return true; > } > -#endif > > /* > * When PCI passthrough is available we want to keep the > @@ -1003,6 +1001,94 @@ out: > return res; > } > > +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) > +{ > + struct membanks *mem = kernel_info_get_mem(kinfo); > + unsigned int i, nr_banks = GUEST_RAM_BANKS; > + paddr_t bank_start, bank_size; Limit the scope > + struct membanks *hwdom_free_mem = NULL; > + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; > + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; Limit the scope > + > + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", > + /* Don't want format this as PRIpaddr (16 digit hex) */ > + (unsigned long)(kinfo->unassigned_mem >> 20), d); > + > + mem->nr_banks = 0; > + /* > + * Use host memory layout for hwdom. Only case for this is when LLC coloring > + * is enabled. > + */ > + if ( is_hardware_domain(d) ) > + { > + ASSERT(llc_coloring_enabled); This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. > + > + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, > + NR_MEM_BANKS); > + if ( !hwdom_free_mem ) > + goto fail; > + > + hwdom_free_mem->max_banks = NR_MEM_BANKS; > + > + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even change the comments inside the function. The problem is that the function is specifically designed for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab region allocated, etc. My opinion is that we should attempt to make the function generic so that in your case you can choose which regions to exclude, define even your own function to grab free regions (at the moment add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. My very short attempt to make the function as generic as possible in the first iteration: https://paste.debian.net/1338334/ For coloring, you could define your own add_free_regions and only pass RSVD and GNTTAB banks to be excluded. As said before, I still wait for other Arm maintainers to provide their own opinion. ~Michal
Hi, Sorry for the late answer. On 05/12/2024 09:40, Michal Orzel wrote: > > > On 02/12/2024 17:59, Carlo Nonato wrote: >> >> >> Cache coloring requires Dom0 not to be direct-mapped because of its non >> contiguous mapping nature, so allocate_memory() is needed in this case. >> 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") >> moved allocate_memory() in dom0less_build.c. In order to use it >> in Dom0 construction bring it back to domain_build.c and declare it in >> domain_build.h. >> >> Take the opportunity to adapt the implementation of allocate_memory() so >> that it uses the host layout when called on the hwdom, via >> find_unallocated_memory(). >> >> Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> >> --- >> v11: >> - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() >> - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() >> - added a comment in allocate_memory() when skipping small banks >> v10: >> - fixed a compilation bug that happened when dom0less support was disabled >> v9: >> - no changes >> v8: >> - patch adapted to new changes to allocate_memory() >> v7: >> - allocate_memory() now uses the host layout when called on the hwdom >> v6: >> - new patch >> --- >> xen/arch/arm/dom0less-build.c | 44 ----------- >> xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- >> xen/arch/arm/include/asm/domain_build.h | 1 + >> 3 files changed, 94 insertions(+), 48 deletions(-) >> >> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c >> index d93a85434e..67b1503647 100644 >> --- a/xen/arch/arm/dom0less-build.c >> +++ b/xen/arch/arm/dom0less-build.c >> @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) >> return ( !dom0found && domUfound ); >> } >> >> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >> -{ >> - struct membanks *mem = kernel_info_get_mem(kinfo); >> - unsigned int i; >> - paddr_t bank_size; >> - >> - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >> - /* Don't want format this as PRIpaddr (16 digit hex) */ >> - (unsigned long)(kinfo->unassigned_mem >> 20), d); >> - >> - mem->nr_banks = 0; >> - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); >> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), >> - bank_size) ) >> - goto fail; >> - >> - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); >> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), >> - bank_size) ) >> - goto fail; >> - >> - if ( kinfo->unassigned_mem ) >> - goto fail; >> - >> - for( i = 0; i < mem->nr_banks; i++ ) >> - { >> - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", >> - d, >> - i, >> - mem->bank[i].start, >> - mem->bank[i].start + mem->bank[i].size, >> - /* Don't want format this as PRIpaddr (16 digit hex) */ >> - (unsigned long)(mem->bank[i].size >> 20)); >> - } >> - >> - return; >> - >> -fail: >> - panic("Failed to allocate requested domain memory." >> - /* Don't want format this as PRIpaddr (16 digit hex) */ >> - " %ldKB unallocated. Fix the VMs configurations.\n", >> - (unsigned long)kinfo->unassigned_mem >> 10); >> -} >> - >> #ifdef CONFIG_VGICV2 >> static int __init make_gicv2_domU_node(struct kernel_info *kinfo) >> { >> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c >> index 2c30792de8..2b8cba9b2f 100644 >> --- a/xen/arch/arm/domain_build.c >> +++ b/xen/arch/arm/domain_build.c >> @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, >> } >> } >> >> -#ifdef CONFIG_DOM0LESS_BOOT >> bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, >> alloc_domheap_mem_cb cb, void *extra) >> { >> @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, >> >> return true; >> } >> -#endif >> >> /* >> * When PCI passthrough is available we want to keep the >> @@ -1003,6 +1001,94 @@ out: >> return res; >> } >> >> +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >> +{ >> + struct membanks *mem = kernel_info_get_mem(kinfo); >> + unsigned int i, nr_banks = GUEST_RAM_BANKS; >> + paddr_t bank_start, bank_size; > Limit the scope > >> + struct membanks *hwdom_free_mem = NULL; >> + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; >> + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; > Limit the scope > >> + >> + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >> + /* Don't want format this as PRIpaddr (16 digit hex) */ >> + (unsigned long)(kinfo->unassigned_mem >> 20), d); >> + >> + mem->nr_banks = 0; >> + /* >> + * Use host memory layout for hwdom. Only case for this is when LLC coloring >> + * is enabled. >> + */ >> + if ( is_hardware_domain(d) ) >> + { >> + ASSERT(llc_coloring_enabled); > This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. Piggying back on this comment. AFAICT, the code below would work also in the non cache coloring case. So what's the assert is for? > >> + >> + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, >> + NR_MEM_BANKS); >> + if ( !hwdom_free_mem ) >> + goto fail; >> + >> + hwdom_free_mem->max_banks = NR_MEM_BANKS; >> + >> + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) > My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even > change the comments inside the function. The problem is that the function is specifically designed > for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab > region allocated, etc. So I agree that the function should be updated if we plan to use it for other purpose. My opinion is that we should attempt to make the function generic so that in your > case you can choose which regions to exclude, define even your own function to grab free regions (at the moment > add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. > > My very short attempt to make the function as generic as possible in the first iteration: > https://paste.debian.net/1338334/ This looks better, but I wonder why we need still need to exclude the static regions? Wouldn't it be sufficient to exclude just reserved regions? > > For coloring, you could define your own add_free_regions and only pass RSVD and GNTTAB banks to be excluded. > > As said before, I still wait for other Arm maintainers to provide their own opinion. > > ~Michal > -- Julien Grall
On 06/12/2024 19:37, Julien Grall wrote: > > > Hi, > > Sorry for the late answer. > > On 05/12/2024 09:40, Michal Orzel wrote: >> >> >> On 02/12/2024 17:59, Carlo Nonato wrote: >>> >>> >>> Cache coloring requires Dom0 not to be direct-mapped because of its non >>> contiguous mapping nature, so allocate_memory() is needed in this case. >>> 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") >>> moved allocate_memory() in dom0less_build.c. In order to use it >>> in Dom0 construction bring it back to domain_build.c and declare it in >>> domain_build.h. >>> >>> Take the opportunity to adapt the implementation of allocate_memory() so >>> that it uses the host layout when called on the hwdom, via >>> find_unallocated_memory(). >>> >>> Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> >>> --- >>> v11: >>> - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() >>> - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() >>> - added a comment in allocate_memory() when skipping small banks >>> v10: >>> - fixed a compilation bug that happened when dom0less support was disabled >>> v9: >>> - no changes >>> v8: >>> - patch adapted to new changes to allocate_memory() >>> v7: >>> - allocate_memory() now uses the host layout when called on the hwdom >>> v6: >>> - new patch >>> --- >>> xen/arch/arm/dom0less-build.c | 44 ----------- >>> xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- >>> xen/arch/arm/include/asm/domain_build.h | 1 + >>> 3 files changed, 94 insertions(+), 48 deletions(-) >>> >>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c >>> index d93a85434e..67b1503647 100644 >>> --- a/xen/arch/arm/dom0less-build.c >>> +++ b/xen/arch/arm/dom0less-build.c >>> @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) >>> return ( !dom0found && domUfound ); >>> } >>> >>> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >>> -{ >>> - struct membanks *mem = kernel_info_get_mem(kinfo); >>> - unsigned int i; >>> - paddr_t bank_size; >>> - >>> - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>> - (unsigned long)(kinfo->unassigned_mem >> 20), d); >>> - >>> - mem->nr_banks = 0; >>> - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); >>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), >>> - bank_size) ) >>> - goto fail; >>> - >>> - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); >>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), >>> - bank_size) ) >>> - goto fail; >>> - >>> - if ( kinfo->unassigned_mem ) >>> - goto fail; >>> - >>> - for( i = 0; i < mem->nr_banks; i++ ) >>> - { >>> - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", >>> - d, >>> - i, >>> - mem->bank[i].start, >>> - mem->bank[i].start + mem->bank[i].size, >>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>> - (unsigned long)(mem->bank[i].size >> 20)); >>> - } >>> - >>> - return; >>> - >>> -fail: >>> - panic("Failed to allocate requested domain memory." >>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>> - " %ldKB unallocated. Fix the VMs configurations.\n", >>> - (unsigned long)kinfo->unassigned_mem >> 10); >>> -} >>> - >>> #ifdef CONFIG_VGICV2 >>> static int __init make_gicv2_domU_node(struct kernel_info *kinfo) >>> { >>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c >>> index 2c30792de8..2b8cba9b2f 100644 >>> --- a/xen/arch/arm/domain_build.c >>> +++ b/xen/arch/arm/domain_build.c >>> @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, >>> } >>> } >>> >>> -#ifdef CONFIG_DOM0LESS_BOOT >>> bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, >>> alloc_domheap_mem_cb cb, void *extra) >>> { >>> @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, >>> >>> return true; >>> } >>> -#endif >>> >>> /* >>> * When PCI passthrough is available we want to keep the >>> @@ -1003,6 +1001,94 @@ out: >>> return res; >>> } >>> >>> +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >>> +{ >>> + struct membanks *mem = kernel_info_get_mem(kinfo); >>> + unsigned int i, nr_banks = GUEST_RAM_BANKS; >>> + paddr_t bank_start, bank_size; >> Limit the scope >> >>> + struct membanks *hwdom_free_mem = NULL; >>> + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; >>> + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; >> Limit the scope >> >>> + >>> + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >>> + /* Don't want format this as PRIpaddr (16 digit hex) */ >>> + (unsigned long)(kinfo->unassigned_mem >> 20), d); >>> + >>> + mem->nr_banks = 0; >>> + /* >>> + * Use host memory layout for hwdom. Only case for this is when LLC coloring >>> + * is enabled. >>> + */ >>> + if ( is_hardware_domain(d) ) >>> + { >>> + ASSERT(llc_coloring_enabled); >> This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. > > Piggying back on this comment. AFAICT, the code below would work also in > the non cache coloring case. So what's the assert is for? > >> >>> + >>> + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, >>> + NR_MEM_BANKS); >>> + if ( !hwdom_free_mem ) >>> + goto fail; >>> + >>> + hwdom_free_mem->max_banks = NR_MEM_BANKS; >>> + >>> + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) >> My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even >> change the comments inside the function. The problem is that the function is specifically designed >> for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab >> region allocated, etc. > > So I agree that the function should be updated if we plan to use it for > other purpose. > > My opinion is that we should attempt to make the function generic so > that in your >> case you can choose which regions to exclude, define even your own function to grab free regions (at the moment >> add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. >> >> My very short attempt to make the function as generic as possible in the first iteration: >> https://paste.debian.net/1338334/ > > This looks better, but I wonder why we need still need to exclude the > static regions? Wouldn't it be sufficient to exclude just reserved regions? Static shared memory banks are not part of reserved memory (i.e. bootinfo.reserved_mem) if that's what you're asking. They are stored in bootinfo.shmem, hence we need to take them into account when searching for unused address space. If you and Carlo are ok with my proposed solution for making the function generic, I can send a patch as a prerequisite patch for Carlo series. ~Michal
Hi Michal, On 07/12/2024 15:04, Michal Orzel wrote: > > > On 06/12/2024 19:37, Julien Grall wrote: >> >> >> Hi, >> >> Sorry for the late answer. >> >> On 05/12/2024 09:40, Michal Orzel wrote: >>> >>> >>> On 02/12/2024 17:59, Carlo Nonato wrote: >>>> >>>> >>>> Cache coloring requires Dom0 not to be direct-mapped because of its non >>>> contiguous mapping nature, so allocate_memory() is needed in this case. >>>> 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") >>>> moved allocate_memory() in dom0less_build.c. In order to use it >>>> in Dom0 construction bring it back to domain_build.c and declare it in >>>> domain_build.h. >>>> >>>> Take the opportunity to adapt the implementation of allocate_memory() so >>>> that it uses the host layout when called on the hwdom, via >>>> find_unallocated_memory(). >>>> >>>> Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> >>>> --- >>>> v11: >>>> - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() >>>> - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() >>>> - added a comment in allocate_memory() when skipping small banks >>>> v10: >>>> - fixed a compilation bug that happened when dom0less support was disabled >>>> v9: >>>> - no changes >>>> v8: >>>> - patch adapted to new changes to allocate_memory() >>>> v7: >>>> - allocate_memory() now uses the host layout when called on the hwdom >>>> v6: >>>> - new patch >>>> --- >>>> xen/arch/arm/dom0less-build.c | 44 ----------- >>>> xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- >>>> xen/arch/arm/include/asm/domain_build.h | 1 + >>>> 3 files changed, 94 insertions(+), 48 deletions(-) >>>> >>>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c >>>> index d93a85434e..67b1503647 100644 >>>> --- a/xen/arch/arm/dom0less-build.c >>>> +++ b/xen/arch/arm/dom0less-build.c >>>> @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) >>>> return ( !dom0found && domUfound ); >>>> } >>>> >>>> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >>>> -{ >>>> - struct membanks *mem = kernel_info_get_mem(kinfo); >>>> - unsigned int i; >>>> - paddr_t bank_size; >>>> - >>>> - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>> - (unsigned long)(kinfo->unassigned_mem >> 20), d); >>>> - >>>> - mem->nr_banks = 0; >>>> - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); >>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), >>>> - bank_size) ) >>>> - goto fail; >>>> - >>>> - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); >>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), >>>> - bank_size) ) >>>> - goto fail; >>>> - >>>> - if ( kinfo->unassigned_mem ) >>>> - goto fail; >>>> - >>>> - for( i = 0; i < mem->nr_banks; i++ ) >>>> - { >>>> - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", >>>> - d, >>>> - i, >>>> - mem->bank[i].start, >>>> - mem->bank[i].start + mem->bank[i].size, >>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>> - (unsigned long)(mem->bank[i].size >> 20)); >>>> - } >>>> - >>>> - return; >>>> - >>>> -fail: >>>> - panic("Failed to allocate requested domain memory." >>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>> - " %ldKB unallocated. Fix the VMs configurations.\n", >>>> - (unsigned long)kinfo->unassigned_mem >> 10); >>>> -} >>>> - >>>> #ifdef CONFIG_VGICV2 >>>> static int __init make_gicv2_domU_node(struct kernel_info *kinfo) >>>> { >>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c >>>> index 2c30792de8..2b8cba9b2f 100644 >>>> --- a/xen/arch/arm/domain_build.c >>>> +++ b/xen/arch/arm/domain_build.c >>>> @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, >>>> } >>>> } >>>> >>>> -#ifdef CONFIG_DOM0LESS_BOOT >>>> bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, >>>> alloc_domheap_mem_cb cb, void *extra) >>>> { >>>> @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, >>>> >>>> return true; >>>> } >>>> -#endif >>>> >>>> /* >>>> * When PCI passthrough is available we want to keep the >>>> @@ -1003,6 +1001,94 @@ out: >>>> return res; >>>> } >>>> >>>> +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >>>> +{ >>>> + struct membanks *mem = kernel_info_get_mem(kinfo); >>>> + unsigned int i, nr_banks = GUEST_RAM_BANKS; >>>> + paddr_t bank_start, bank_size; >>> Limit the scope >>> >>>> + struct membanks *hwdom_free_mem = NULL; >>>> + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; >>>> + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; >>> Limit the scope >>> >>>> + >>>> + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >>>> + /* Don't want format this as PRIpaddr (16 digit hex) */ >>>> + (unsigned long)(kinfo->unassigned_mem >> 20), d); >>>> + >>>> + mem->nr_banks = 0; >>>> + /* >>>> + * Use host memory layout for hwdom. Only case for this is when LLC coloring >>>> + * is enabled. >>>> + */ >>>> + if ( is_hardware_domain(d) ) >>>> + { >>>> + ASSERT(llc_coloring_enabled); >>> This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. >> >> Piggying back on this comment. AFAICT, the code below would work also in >> the non cache coloring case. So what's the assert is for? >> >>> >>>> + >>>> + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, >>>> + NR_MEM_BANKS); >>>> + if ( !hwdom_free_mem ) >>>> + goto fail; >>>> + >>>> + hwdom_free_mem->max_banks = NR_MEM_BANKS; >>>> + >>>> + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) >>> My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even >>> change the comments inside the function. The problem is that the function is specifically designed >>> for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab >>> region allocated, etc. >> >> So I agree that the function should be updated if we plan to use it for >> other purpose. >> >> My opinion is that we should attempt to make the function generic so >> that in your >>> case you can choose which regions to exclude, define even your own function to grab free regions (at the moment >>> add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. >>> >>> My very short attempt to make the function as generic as possible in the first iteration: >>> https://paste.debian.net/1338334/ >> >> This looks better, but I wonder why we need still need to exclude the >> static regions? Wouldn't it be sufficient to exclude just reserved regions? > Static shared memory banks are not part of reserved memory (i.e. bootinfo.reserved_mem) if that's what you're asking. > They are stored in bootinfo.shmem, hence we need to take them into account when searching for unused address space. Oh I missed the fact you now pass "mem_banks" as a parameter. I thought they would still get excluded for cache coloring case. > > If you and Carlo are ok with my proposed solution for making the function generic, I can send a patch as a prerequisite > patch for Carlo series. I am fine with the approach. Cheers, -- Julien Grall
Hi, On Mon, Dec 9, 2024 at 8:17 PM Julien Grall <julien@xen.org> wrote: > > Hi Michal, > > On 07/12/2024 15:04, Michal Orzel wrote: > > > > > > On 06/12/2024 19:37, Julien Grall wrote: > >> > >> > >> Hi, > >> > >> Sorry for the late answer. > >> > >> On 05/12/2024 09:40, Michal Orzel wrote: > >>> > >>> > >>> On 02/12/2024 17:59, Carlo Nonato wrote: > >>>> > >>>> > >>>> Cache coloring requires Dom0 not to be direct-mapped because of its non > >>>> contiguous mapping nature, so allocate_memory() is needed in this case. > >>>> 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") > >>>> moved allocate_memory() in dom0less_build.c. In order to use it > >>>> in Dom0 construction bring it back to domain_build.c and declare it in > >>>> domain_build.h. > >>>> > >>>> Take the opportunity to adapt the implementation of allocate_memory() so > >>>> that it uses the host layout when called on the hwdom, via > >>>> find_unallocated_memory(). > >>>> > >>>> Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> > >>>> --- > >>>> v11: > >>>> - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() > >>>> - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() > >>>> - added a comment in allocate_memory() when skipping small banks > >>>> v10: > >>>> - fixed a compilation bug that happened when dom0less support was disabled > >>>> v9: > >>>> - no changes > >>>> v8: > >>>> - patch adapted to new changes to allocate_memory() > >>>> v7: > >>>> - allocate_memory() now uses the host layout when called on the hwdom > >>>> v6: > >>>> - new patch > >>>> --- > >>>> xen/arch/arm/dom0less-build.c | 44 ----------- > >>>> xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- > >>>> xen/arch/arm/include/asm/domain_build.h | 1 + > >>>> 3 files changed, 94 insertions(+), 48 deletions(-) > >>>> > >>>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c > >>>> index d93a85434e..67b1503647 100644 > >>>> --- a/xen/arch/arm/dom0less-build.c > >>>> +++ b/xen/arch/arm/dom0less-build.c > >>>> @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) > >>>> return ( !dom0found && domUfound ); > >>>> } > >>>> > >>>> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) > >>>> -{ > >>>> - struct membanks *mem = kernel_info_get_mem(kinfo); > >>>> - unsigned int i; > >>>> - paddr_t bank_size; > >>>> - > >>>> - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", > >>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>> - (unsigned long)(kinfo->unassigned_mem >> 20), d); > >>>> - > >>>> - mem->nr_banks = 0; > >>>> - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); > >>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), > >>>> - bank_size) ) > >>>> - goto fail; > >>>> - > >>>> - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); > >>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), > >>>> - bank_size) ) > >>>> - goto fail; > >>>> - > >>>> - if ( kinfo->unassigned_mem ) > >>>> - goto fail; > >>>> - > >>>> - for( i = 0; i < mem->nr_banks; i++ ) > >>>> - { > >>>> - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", > >>>> - d, > >>>> - i, > >>>> - mem->bank[i].start, > >>>> - mem->bank[i].start + mem->bank[i].size, > >>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>> - (unsigned long)(mem->bank[i].size >> 20)); > >>>> - } > >>>> - > >>>> - return; > >>>> - > >>>> -fail: > >>>> - panic("Failed to allocate requested domain memory." > >>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>> - " %ldKB unallocated. Fix the VMs configurations.\n", > >>>> - (unsigned long)kinfo->unassigned_mem >> 10); > >>>> -} > >>>> - > >>>> #ifdef CONFIG_VGICV2 > >>>> static int __init make_gicv2_domU_node(struct kernel_info *kinfo) > >>>> { > >>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > >>>> index 2c30792de8..2b8cba9b2f 100644 > >>>> --- a/xen/arch/arm/domain_build.c > >>>> +++ b/xen/arch/arm/domain_build.c > >>>> @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, > >>>> } > >>>> } > >>>> > >>>> -#ifdef CONFIG_DOM0LESS_BOOT > >>>> bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, > >>>> alloc_domheap_mem_cb cb, void *extra) > >>>> { > >>>> @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, > >>>> > >>>> return true; > >>>> } > >>>> -#endif > >>>> > >>>> /* > >>>> * When PCI passthrough is available we want to keep the > >>>> @@ -1003,6 +1001,94 @@ out: > >>>> return res; > >>>> } > >>>> > >>>> +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) > >>>> +{ > >>>> + struct membanks *mem = kernel_info_get_mem(kinfo); > >>>> + unsigned int i, nr_banks = GUEST_RAM_BANKS; > >>>> + paddr_t bank_start, bank_size; > >>> Limit the scope > >>> > >>>> + struct membanks *hwdom_free_mem = NULL; > >>>> + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; > >>>> + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; > >>> Limit the scope > >>> > >>>> + > >>>> + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", > >>>> + /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>> + (unsigned long)(kinfo->unassigned_mem >> 20), d); > >>>> + > >>>> + mem->nr_banks = 0; > >>>> + /* > >>>> + * Use host memory layout for hwdom. Only case for this is when LLC coloring > >>>> + * is enabled. > >>>> + */ > >>>> + if ( is_hardware_domain(d) ) > >>>> + { > >>>> + ASSERT(llc_coloring_enabled); > >>> This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. > >> > >> Piggying back on this comment. AFAICT, the code below would work also in > >> the non cache coloring case. So what's the assert is for? > >> > >>> > >>>> + > >>>> + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, > >>>> + NR_MEM_BANKS); > >>>> + if ( !hwdom_free_mem ) > >>>> + goto fail; > >>>> + > >>>> + hwdom_free_mem->max_banks = NR_MEM_BANKS; > >>>> + > >>>> + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) > >>> My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even > >>> change the comments inside the function. The problem is that the function is specifically designed > >>> for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab > >>> region allocated, etc. > >> > >> So I agree that the function should be updated if we plan to use it for > >> other purpose. > >> > >> My opinion is that we should attempt to make the function generic so > >> that in your > >>> case you can choose which regions to exclude, define even your own function to grab free regions (at the moment > >>> add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. > >>> > >>> My very short attempt to make the function as generic as possible in the first iteration: > >>> https://paste.debian.net/1338334/ > >> > >> This looks better, but I wonder why we need still need to exclude the > >> static regions? Wouldn't it be sufficient to exclude just reserved regions? > > Static shared memory banks are not part of reserved memory (i.e. bootinfo.reserved_mem) if that's what you're asking. > > They are stored in bootinfo.shmem, hence we need to take them into account when searching for unused address space. > > Oh I missed the fact you now pass "mem_banks" as a parameter. I thought > they would still get excluded for cache coloring case. > > > > > If you and Carlo are ok with my proposed solution for making the function generic, I can send a patch as a prerequisite > > patch for Carlo series. > > I am fine with the approach. > > Cheers, > > -- > Julien Grall > > @@ -2152,7 +2238,10 @@ static int __init construct_dom0(struct domain *d) > /* type must be set before allocate_memory */ > d->arch.type = kinfo.type; > #endif > - allocate_memory_11(d, &kinfo); > + if ( is_domain_direct_mapped(d) ) > + allocate_memory_11(d, &kinfo); > + else > + allocate_memory(d, &kinfo); > find_gnttab_region(d, &kinfo); Since find_gnttab_region() is called after allocate_memory(), kinfo->gnttab_* fields aren't initialized and the call to find_unallocated_memory() with gnttab as the region to exclude, fails ending in a crash since memory for dom0 can't be allocated. Can the solution be to call find_gnttab_region() before the above if? Or should I just call it before allocate_memory() in one case, but still after allocate_memory_11() in the other? Thanks.
On 12/12/2024 18:48, Carlo Nonato wrote: > Hi, > > On Mon, Dec 9, 2024 at 8:17 PM Julien Grall <julien@xen.org> wrote: >> >> Hi Michal, >> >> On 07/12/2024 15:04, Michal Orzel wrote: >>> >>> >>> On 06/12/2024 19:37, Julien Grall wrote: >>>> >>>> >>>> Hi, >>>> >>>> Sorry for the late answer. >>>> >>>> On 05/12/2024 09:40, Michal Orzel wrote: >>>>> >>>>> >>>>> On 02/12/2024 17:59, Carlo Nonato wrote: >>>>>> >>>>>> >>>>>> Cache coloring requires Dom0 not to be direct-mapped because of its non >>>>>> contiguous mapping nature, so allocate_memory() is needed in this case. >>>>>> 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") >>>>>> moved allocate_memory() in dom0less_build.c. In order to use it >>>>>> in Dom0 construction bring it back to domain_build.c and declare it in >>>>>> domain_build.h. >>>>>> >>>>>> Take the opportunity to adapt the implementation of allocate_memory() so >>>>>> that it uses the host layout when called on the hwdom, via >>>>>> find_unallocated_memory(). >>>>>> >>>>>> Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> >>>>>> --- >>>>>> v11: >>>>>> - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() >>>>>> - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() >>>>>> - added a comment in allocate_memory() when skipping small banks >>>>>> v10: >>>>>> - fixed a compilation bug that happened when dom0less support was disabled >>>>>> v9: >>>>>> - no changes >>>>>> v8: >>>>>> - patch adapted to new changes to allocate_memory() >>>>>> v7: >>>>>> - allocate_memory() now uses the host layout when called on the hwdom >>>>>> v6: >>>>>> - new patch >>>>>> --- >>>>>> xen/arch/arm/dom0less-build.c | 44 ----------- >>>>>> xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- >>>>>> xen/arch/arm/include/asm/domain_build.h | 1 + >>>>>> 3 files changed, 94 insertions(+), 48 deletions(-) >>>>>> >>>>>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c >>>>>> index d93a85434e..67b1503647 100644 >>>>>> --- a/xen/arch/arm/dom0less-build.c >>>>>> +++ b/xen/arch/arm/dom0less-build.c >>>>>> @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) >>>>>> return ( !dom0found && domUfound ); >>>>>> } >>>>>> >>>>>> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >>>>>> -{ >>>>>> - struct membanks *mem = kernel_info_get_mem(kinfo); >>>>>> - unsigned int i; >>>>>> - paddr_t bank_size; >>>>>> - >>>>>> - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>> - (unsigned long)(kinfo->unassigned_mem >> 20), d); >>>>>> - >>>>>> - mem->nr_banks = 0; >>>>>> - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); >>>>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), >>>>>> - bank_size) ) >>>>>> - goto fail; >>>>>> - >>>>>> - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); >>>>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), >>>>>> - bank_size) ) >>>>>> - goto fail; >>>>>> - >>>>>> - if ( kinfo->unassigned_mem ) >>>>>> - goto fail; >>>>>> - >>>>>> - for( i = 0; i < mem->nr_banks; i++ ) >>>>>> - { >>>>>> - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", >>>>>> - d, >>>>>> - i, >>>>>> - mem->bank[i].start, >>>>>> - mem->bank[i].start + mem->bank[i].size, >>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>> - (unsigned long)(mem->bank[i].size >> 20)); >>>>>> - } >>>>>> - >>>>>> - return; >>>>>> - >>>>>> -fail: >>>>>> - panic("Failed to allocate requested domain memory." >>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>> - " %ldKB unallocated. Fix the VMs configurations.\n", >>>>>> - (unsigned long)kinfo->unassigned_mem >> 10); >>>>>> -} >>>>>> - >>>>>> #ifdef CONFIG_VGICV2 >>>>>> static int __init make_gicv2_domU_node(struct kernel_info *kinfo) >>>>>> { >>>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c >>>>>> index 2c30792de8..2b8cba9b2f 100644 >>>>>> --- a/xen/arch/arm/domain_build.c >>>>>> +++ b/xen/arch/arm/domain_build.c >>>>>> @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, >>>>>> } >>>>>> } >>>>>> >>>>>> -#ifdef CONFIG_DOM0LESS_BOOT >>>>>> bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, >>>>>> alloc_domheap_mem_cb cb, void *extra) >>>>>> { >>>>>> @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, >>>>>> >>>>>> return true; >>>>>> } >>>>>> -#endif >>>>>> >>>>>> /* >>>>>> * When PCI passthrough is available we want to keep the >>>>>> @@ -1003,6 +1001,94 @@ out: >>>>>> return res; >>>>>> } >>>>>> >>>>>> +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >>>>>> +{ >>>>>> + struct membanks *mem = kernel_info_get_mem(kinfo); >>>>>> + unsigned int i, nr_banks = GUEST_RAM_BANKS; >>>>>> + paddr_t bank_start, bank_size; >>>>> Limit the scope >>>>> >>>>>> + struct membanks *hwdom_free_mem = NULL; >>>>>> + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; >>>>>> + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; >>>>> Limit the scope >>>>> >>>>>> + >>>>>> + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >>>>>> + /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>> + (unsigned long)(kinfo->unassigned_mem >> 20), d); >>>>>> + >>>>>> + mem->nr_banks = 0; >>>>>> + /* >>>>>> + * Use host memory layout for hwdom. Only case for this is when LLC coloring >>>>>> + * is enabled. >>>>>> + */ >>>>>> + if ( is_hardware_domain(d) ) >>>>>> + { >>>>>> + ASSERT(llc_coloring_enabled); >>>>> This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. >>>> >>>> Piggying back on this comment. AFAICT, the code below would work also in >>>> the non cache coloring case. So what's the assert is for? >>>> >>>>> >>>>>> + >>>>>> + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, >>>>>> + NR_MEM_BANKS); >>>>>> + if ( !hwdom_free_mem ) >>>>>> + goto fail; >>>>>> + >>>>>> + hwdom_free_mem->max_banks = NR_MEM_BANKS; >>>>>> + >>>>>> + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) >>>>> My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even >>>>> change the comments inside the function. The problem is that the function is specifically designed >>>>> for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab >>>>> region allocated, etc. >>>> >>>> So I agree that the function should be updated if we plan to use it for >>>> other purpose. >>>> >>>> My opinion is that we should attempt to make the function generic so >>>> that in your >>>>> case you can choose which regions to exclude, define even your own function to grab free regions (at the moment >>>>> add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. >>>>> >>>>> My very short attempt to make the function as generic as possible in the first iteration: >>>>> https://paste.debian.net/1338334/ >>>> >>>> This looks better, but I wonder why we need still need to exclude the >>>> static regions? Wouldn't it be sufficient to exclude just reserved regions? >>> Static shared memory banks are not part of reserved memory (i.e. bootinfo.reserved_mem) if that's what you're asking. >>> They are stored in bootinfo.shmem, hence we need to take them into account when searching for unused address space. >> >> Oh I missed the fact you now pass "mem_banks" as a parameter. I thought >> they would still get excluded for cache coloring case. >> >>> >>> If you and Carlo are ok with my proposed solution for making the function generic, I can send a patch as a prerequisite >>> patch for Carlo series. >> >> I am fine with the approach. >> >> Cheers, >> >> -- >> Julien Grall >> > >> @@ -2152,7 +2238,10 @@ static int __init construct_dom0(struct domain *d) >> /* type must be set before allocate_memory */ >> d->arch.type = kinfo.type; >> #endif >> - allocate_memory_11(d, &kinfo); >> + if ( is_domain_direct_mapped(d) ) >> + allocate_memory_11(d, &kinfo); >> + else >> + allocate_memory(d, &kinfo); >> find_gnttab_region(d, &kinfo); > > Since find_gnttab_region() is called after allocate_memory(), kinfo->gnttab_* > fields aren't initialized and the call to find_unallocated_memory() with > gnttab as the region to exclude, fails ending in a crash since memory for > dom0 can't be allocated. > > Can the solution be to call find_gnttab_region() before the above if? The function is called find, but currently it only initializes kinfo->gnttab_start and kinfo->gnttab_size and we tested that moving it before allocate_memory* doesn't cause fallouts. If moving before allocate_memory*, would it be better to rename it e.g., init_gnttab_region()? Thanks, Andrea > Or should I just call it before allocate_memory() in one case, but still after > allocate_memory_11() in the other? > > Thanks.
Hi Carlo, Andrea, On 12/12/2024 19:22, Andrea Bastoni wrote: > > > On 12/12/2024 18:48, Carlo Nonato wrote: >> Hi, >> >> On Mon, Dec 9, 2024 at 8:17 PM Julien Grall <julien@xen.org> wrote: >>> >>> Hi Michal, >>> >>> On 07/12/2024 15:04, Michal Orzel wrote: >>>> >>>> >>>> On 06/12/2024 19:37, Julien Grall wrote: >>>>> >>>>> >>>>> Hi, >>>>> >>>>> Sorry for the late answer. >>>>> >>>>> On 05/12/2024 09:40, Michal Orzel wrote: >>>>>> >>>>>> >>>>>> On 02/12/2024 17:59, Carlo Nonato wrote: >>>>>>> >>>>>>> >>>>>>> Cache coloring requires Dom0 not to be direct-mapped because of its non >>>>>>> contiguous mapping nature, so allocate_memory() is needed in this case. >>>>>>> 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") >>>>>>> moved allocate_memory() in dom0less_build.c. In order to use it >>>>>>> in Dom0 construction bring it back to domain_build.c and declare it in >>>>>>> domain_build.h. >>>>>>> >>>>>>> Take the opportunity to adapt the implementation of allocate_memory() so >>>>>>> that it uses the host layout when called on the hwdom, via >>>>>>> find_unallocated_memory(). >>>>>>> >>>>>>> Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> >>>>>>> --- >>>>>>> v11: >>>>>>> - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() >>>>>>> - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() >>>>>>> - added a comment in allocate_memory() when skipping small banks >>>>>>> v10: >>>>>>> - fixed a compilation bug that happened when dom0less support was disabled >>>>>>> v9: >>>>>>> - no changes >>>>>>> v8: >>>>>>> - patch adapted to new changes to allocate_memory() >>>>>>> v7: >>>>>>> - allocate_memory() now uses the host layout when called on the hwdom >>>>>>> v6: >>>>>>> - new patch >>>>>>> --- >>>>>>> xen/arch/arm/dom0less-build.c | 44 ----------- >>>>>>> xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- >>>>>>> xen/arch/arm/include/asm/domain_build.h | 1 + >>>>>>> 3 files changed, 94 insertions(+), 48 deletions(-) >>>>>>> >>>>>>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c >>>>>>> index d93a85434e..67b1503647 100644 >>>>>>> --- a/xen/arch/arm/dom0less-build.c >>>>>>> +++ b/xen/arch/arm/dom0less-build.c >>>>>>> @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) >>>>>>> return ( !dom0found && domUfound ); >>>>>>> } >>>>>>> >>>>>>> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >>>>>>> -{ >>>>>>> - struct membanks *mem = kernel_info_get_mem(kinfo); >>>>>>> - unsigned int i; >>>>>>> - paddr_t bank_size; >>>>>>> - >>>>>>> - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>>> - (unsigned long)(kinfo->unassigned_mem >> 20), d); >>>>>>> - >>>>>>> - mem->nr_banks = 0; >>>>>>> - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); >>>>>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), >>>>>>> - bank_size) ) >>>>>>> - goto fail; >>>>>>> - >>>>>>> - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); >>>>>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), >>>>>>> - bank_size) ) >>>>>>> - goto fail; >>>>>>> - >>>>>>> - if ( kinfo->unassigned_mem ) >>>>>>> - goto fail; >>>>>>> - >>>>>>> - for( i = 0; i < mem->nr_banks; i++ ) >>>>>>> - { >>>>>>> - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", >>>>>>> - d, >>>>>>> - i, >>>>>>> - mem->bank[i].start, >>>>>>> - mem->bank[i].start + mem->bank[i].size, >>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>>> - (unsigned long)(mem->bank[i].size >> 20)); >>>>>>> - } >>>>>>> - >>>>>>> - return; >>>>>>> - >>>>>>> -fail: >>>>>>> - panic("Failed to allocate requested domain memory." >>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>>> - " %ldKB unallocated. Fix the VMs configurations.\n", >>>>>>> - (unsigned long)kinfo->unassigned_mem >> 10); >>>>>>> -} >>>>>>> - >>>>>>> #ifdef CONFIG_VGICV2 >>>>>>> static int __init make_gicv2_domU_node(struct kernel_info *kinfo) >>>>>>> { >>>>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c >>>>>>> index 2c30792de8..2b8cba9b2f 100644 >>>>>>> --- a/xen/arch/arm/domain_build.c >>>>>>> +++ b/xen/arch/arm/domain_build.c >>>>>>> @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, >>>>>>> } >>>>>>> } >>>>>>> >>>>>>> -#ifdef CONFIG_DOM0LESS_BOOT >>>>>>> bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, >>>>>>> alloc_domheap_mem_cb cb, void *extra) >>>>>>> { >>>>>>> @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, >>>>>>> >>>>>>> return true; >>>>>>> } >>>>>>> -#endif >>>>>>> >>>>>>> /* >>>>>>> * When PCI passthrough is available we want to keep the >>>>>>> @@ -1003,6 +1001,94 @@ out: >>>>>>> return res; >>>>>>> } >>>>>>> >>>>>>> +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >>>>>>> +{ >>>>>>> + struct membanks *mem = kernel_info_get_mem(kinfo); >>>>>>> + unsigned int i, nr_banks = GUEST_RAM_BANKS; >>>>>>> + paddr_t bank_start, bank_size; >>>>>> Limit the scope >>>>>> >>>>>>> + struct membanks *hwdom_free_mem = NULL; >>>>>>> + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; >>>>>>> + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; >>>>>> Limit the scope >>>>>> >>>>>>> + >>>>>>> + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >>>>>>> + /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>>> + (unsigned long)(kinfo->unassigned_mem >> 20), d); >>>>>>> + >>>>>>> + mem->nr_banks = 0; >>>>>>> + /* >>>>>>> + * Use host memory layout for hwdom. Only case for this is when LLC coloring >>>>>>> + * is enabled. >>>>>>> + */ >>>>>>> + if ( is_hardware_domain(d) ) >>>>>>> + { >>>>>>> + ASSERT(llc_coloring_enabled); >>>>>> This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. >>>>> >>>>> Piggying back on this comment. AFAICT, the code below would work also in >>>>> the non cache coloring case. So what's the assert is for? >>>>> >>>>>> >>>>>>> + >>>>>>> + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, >>>>>>> + NR_MEM_BANKS); >>>>>>> + if ( !hwdom_free_mem ) >>>>>>> + goto fail; >>>>>>> + >>>>>>> + hwdom_free_mem->max_banks = NR_MEM_BANKS; >>>>>>> + >>>>>>> + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) >>>>>> My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even >>>>>> change the comments inside the function. The problem is that the function is specifically designed >>>>>> for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab >>>>>> region allocated, etc. >>>>> >>>>> So I agree that the function should be updated if we plan to use it for >>>>> other purpose. >>>>> >>>>> My opinion is that we should attempt to make the function generic so >>>>> that in your >>>>>> case you can choose which regions to exclude, define even your own function to grab free regions (at the moment >>>>>> add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. >>>>>> >>>>>> My very short attempt to make the function as generic as possible in the first iteration: >>>>>> https://paste.debian.net/1338334/ >>>>> >>>>> This looks better, but I wonder why we need still need to exclude the >>>>> static regions? Wouldn't it be sufficient to exclude just reserved regions? >>>> Static shared memory banks are not part of reserved memory (i.e. bootinfo.reserved_mem) if that's what you're asking. >>>> They are stored in bootinfo.shmem, hence we need to take them into account when searching for unused address space. >>> >>> Oh I missed the fact you now pass "mem_banks" as a parameter. I thought >>> they would still get excluded for cache coloring case. >>> >>>> >>>> If you and Carlo are ok with my proposed solution for making the function generic, I can send a patch as a prerequisite >>>> patch for Carlo series. >>> >>> I am fine with the approach. >>> >>> Cheers, >>> >>> -- >>> Julien Grall >>> >> >>> @@ -2152,7 +2238,10 @@ static int __init construct_dom0(struct domain *d) >>> /* type must be set before allocate_memory */ >>> d->arch.type = kinfo.type; >>> #endif >>> - allocate_memory_11(d, &kinfo); >>> + if ( is_domain_direct_mapped(d) ) >>> + allocate_memory_11(d, &kinfo); >>> + else >>> + allocate_memory(d, &kinfo); >>> find_gnttab_region(d, &kinfo); >> >> Since find_gnttab_region() is called after allocate_memory(), kinfo->gnttab_* >> fields aren't initialized and the call to find_unallocated_memory() with >> gnttab as the region to exclude, fails ending in a crash since memory for >> dom0 can't be allocated. >> >> Can the solution be to call find_gnttab_region() before the above if? > > The function is called find, but currently it only initializes kinfo->gnttab_start > and kinfo->gnttab_size and we tested that moving it before allocate_memory* doesn't > cause fallouts. > > If moving before allocate_memory*, would it be better to rename it e.g., init_gnttab_region()? > > Thanks, > Andrea > >> Or should I just call it before allocate_memory() in one case, but still after >> allocate_memory_11() in the other? >> >> Thanks. > AFAICT there is nothing stopping us from moving find_gnttab_region() before allocate_*. This function initializes gnttab region with PA of Xen. In normal case, because Xen is added as bootmodule, it will never be mapped in dom0 memory map and the placement does not matter. In LLC case, it will point to relocated address of Xen and it needs to be known before calling find_unallocated_memory. Don't rename it, leave as is, just move before allocate_*. @Carlo: My prerequisite patch has been merged, so you're good to respin a series (unless you wait for some feedback in which case do let me know). To prevent too many respins, you're going to call find_unallocated_memory for LLC passing resmem and gnttab to be excluded. If you're going to reuse add_ext_regions, you need to rename it and fix comments to make it more generic. As for the size, the decision is yours. One solution would be to modify add_ext_regions to take min bank size as parameter (64MB for extended regions, X for LLC dom0). In your code, you write that the first bank must contain dom0, dtb, ramdisk and you chose 128MB. However, looking at the code, you seem to discard banks < 128 for all the banks, not only for the first one. This is the part that I don't have a ready solution. Maybe you could define your own add_free_region function and sort the banks, so that you take the largest possible bank first for dom0. This could simplify things. You can also ask others for opinion. We are approaching Dec 20th deadline, and I want this series to be in as it's been on the list for too many years. I'm willing to accept a sub-optimal solution (so far will be used only for LLC, and LLC as experimental feature will be the only victim of not optimal algorithm) for now, and we can think of a better one after the release. But still, even the sub-optimal solution must make sense. ~Michal
Hi Michal, On Fri, Dec 13, 2024 at 10:46 AM Michal Orzel <michal.orzel@amd.com> wrote: > > Hi Carlo, Andrea, > > On 12/12/2024 19:22, Andrea Bastoni wrote: > > > > > > On 12/12/2024 18:48, Carlo Nonato wrote: > >> Hi, > >> > >> On Mon, Dec 9, 2024 at 8:17 PM Julien Grall <julien@xen.org> wrote: > >>> > >>> Hi Michal, > >>> > >>> On 07/12/2024 15:04, Michal Orzel wrote: > >>>> > >>>> > >>>> On 06/12/2024 19:37, Julien Grall wrote: > >>>>> > >>>>> > >>>>> Hi, > >>>>> > >>>>> Sorry for the late answer. > >>>>> > >>>>> On 05/12/2024 09:40, Michal Orzel wrote: > >>>>>> > >>>>>> > >>>>>> On 02/12/2024 17:59, Carlo Nonato wrote: > >>>>>>> > >>>>>>> > >>>>>>> Cache coloring requires Dom0 not to be direct-mapped because of its non > >>>>>>> contiguous mapping nature, so allocate_memory() is needed in this case. > >>>>>>> 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") > >>>>>>> moved allocate_memory() in dom0less_build.c. In order to use it > >>>>>>> in Dom0 construction bring it back to domain_build.c and declare it in > >>>>>>> domain_build.h. > >>>>>>> > >>>>>>> Take the opportunity to adapt the implementation of allocate_memory() so > >>>>>>> that it uses the host layout when called on the hwdom, via > >>>>>>> find_unallocated_memory(). > >>>>>>> > >>>>>>> Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> > >>>>>>> --- > >>>>>>> v11: > >>>>>>> - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() > >>>>>>> - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() > >>>>>>> - added a comment in allocate_memory() when skipping small banks > >>>>>>> v10: > >>>>>>> - fixed a compilation bug that happened when dom0less support was disabled > >>>>>>> v9: > >>>>>>> - no changes > >>>>>>> v8: > >>>>>>> - patch adapted to new changes to allocate_memory() > >>>>>>> v7: > >>>>>>> - allocate_memory() now uses the host layout when called on the hwdom > >>>>>>> v6: > >>>>>>> - new patch > >>>>>>> --- > >>>>>>> xen/arch/arm/dom0less-build.c | 44 ----------- > >>>>>>> xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- > >>>>>>> xen/arch/arm/include/asm/domain_build.h | 1 + > >>>>>>> 3 files changed, 94 insertions(+), 48 deletions(-) > >>>>>>> > >>>>>>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c > >>>>>>> index d93a85434e..67b1503647 100644 > >>>>>>> --- a/xen/arch/arm/dom0less-build.c > >>>>>>> +++ b/xen/arch/arm/dom0less-build.c > >>>>>>> @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) > >>>>>>> return ( !dom0found && domUfound ); > >>>>>>> } > >>>>>>> > >>>>>>> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) > >>>>>>> -{ > >>>>>>> - struct membanks *mem = kernel_info_get_mem(kinfo); > >>>>>>> - unsigned int i; > >>>>>>> - paddr_t bank_size; > >>>>>>> - > >>>>>>> - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", > >>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>>>>> - (unsigned long)(kinfo->unassigned_mem >> 20), d); > >>>>>>> - > >>>>>>> - mem->nr_banks = 0; > >>>>>>> - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); > >>>>>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), > >>>>>>> - bank_size) ) > >>>>>>> - goto fail; > >>>>>>> - > >>>>>>> - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); > >>>>>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), > >>>>>>> - bank_size) ) > >>>>>>> - goto fail; > >>>>>>> - > >>>>>>> - if ( kinfo->unassigned_mem ) > >>>>>>> - goto fail; > >>>>>>> - > >>>>>>> - for( i = 0; i < mem->nr_banks; i++ ) > >>>>>>> - { > >>>>>>> - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", > >>>>>>> - d, > >>>>>>> - i, > >>>>>>> - mem->bank[i].start, > >>>>>>> - mem->bank[i].start + mem->bank[i].size, > >>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>>>>> - (unsigned long)(mem->bank[i].size >> 20)); > >>>>>>> - } > >>>>>>> - > >>>>>>> - return; > >>>>>>> - > >>>>>>> -fail: > >>>>>>> - panic("Failed to allocate requested domain memory." > >>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>>>>> - " %ldKB unallocated. Fix the VMs configurations.\n", > >>>>>>> - (unsigned long)kinfo->unassigned_mem >> 10); > >>>>>>> -} > >>>>>>> - > >>>>>>> #ifdef CONFIG_VGICV2 > >>>>>>> static int __init make_gicv2_domU_node(struct kernel_info *kinfo) > >>>>>>> { > >>>>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > >>>>>>> index 2c30792de8..2b8cba9b2f 100644 > >>>>>>> --- a/xen/arch/arm/domain_build.c > >>>>>>> +++ b/xen/arch/arm/domain_build.c > >>>>>>> @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, > >>>>>>> } > >>>>>>> } > >>>>>>> > >>>>>>> -#ifdef CONFIG_DOM0LESS_BOOT > >>>>>>> bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, > >>>>>>> alloc_domheap_mem_cb cb, void *extra) > >>>>>>> { > >>>>>>> @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, > >>>>>>> > >>>>>>> return true; > >>>>>>> } > >>>>>>> -#endif > >>>>>>> > >>>>>>> /* > >>>>>>> * When PCI passthrough is available we want to keep the > >>>>>>> @@ -1003,6 +1001,94 @@ out: > >>>>>>> return res; > >>>>>>> } > >>>>>>> > >>>>>>> +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) > >>>>>>> +{ > >>>>>>> + struct membanks *mem = kernel_info_get_mem(kinfo); > >>>>>>> + unsigned int i, nr_banks = GUEST_RAM_BANKS; > >>>>>>> + paddr_t bank_start, bank_size; > >>>>>> Limit the scope > >>>>>> > >>>>>>> + struct membanks *hwdom_free_mem = NULL; > >>>>>>> + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; > >>>>>>> + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; > >>>>>> Limit the scope > >>>>>> > >>>>>>> + > >>>>>>> + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", > >>>>>>> + /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>>>>> + (unsigned long)(kinfo->unassigned_mem >> 20), d); > >>>>>>> + > >>>>>>> + mem->nr_banks = 0; > >>>>>>> + /* > >>>>>>> + * Use host memory layout for hwdom. Only case for this is when LLC coloring > >>>>>>> + * is enabled. > >>>>>>> + */ > >>>>>>> + if ( is_hardware_domain(d) ) > >>>>>>> + { > >>>>>>> + ASSERT(llc_coloring_enabled); > >>>>>> This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. > >>>>> > >>>>> Piggying back on this comment. AFAICT, the code below would work also in > >>>>> the non cache coloring case. So what's the assert is for? > >>>>> > >>>>>> > >>>>>>> + > >>>>>>> + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, > >>>>>>> + NR_MEM_BANKS); > >>>>>>> + if ( !hwdom_free_mem ) > >>>>>>> + goto fail; > >>>>>>> + > >>>>>>> + hwdom_free_mem->max_banks = NR_MEM_BANKS; > >>>>>>> + > >>>>>>> + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) > >>>>>> My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even > >>>>>> change the comments inside the function. The problem is that the function is specifically designed > >>>>>> for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab > >>>>>> region allocated, etc. > >>>>> > >>>>> So I agree that the function should be updated if we plan to use it for > >>>>> other purpose. > >>>>> > >>>>> My opinion is that we should attempt to make the function generic so > >>>>> that in your > >>>>>> case you can choose which regions to exclude, define even your own function to grab free regions (at the moment > >>>>>> add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. > >>>>>> > >>>>>> My very short attempt to make the function as generic as possible in the first iteration: > >>>>>> https://paste.debian.net/1338334/ > >>>>> > >>>>> This looks better, but I wonder why we need still need to exclude the > >>>>> static regions? Wouldn't it be sufficient to exclude just reserved regions? > >>>> Static shared memory banks are not part of reserved memory (i.e. bootinfo.reserved_mem) if that's what you're asking. > >>>> They are stored in bootinfo.shmem, hence we need to take them into account when searching for unused address space. > >>> > >>> Oh I missed the fact you now pass "mem_banks" as a parameter. I thought > >>> they would still get excluded for cache coloring case. > >>> > >>>> > >>>> If you and Carlo are ok with my proposed solution for making the function generic, I can send a patch as a prerequisite > >>>> patch for Carlo series. > >>> > >>> I am fine with the approach. > >>> > >>> Cheers, > >>> > >>> -- > >>> Julien Grall > >>> > >> > >>> @@ -2152,7 +2238,10 @@ static int __init construct_dom0(struct domain *d) > >>> /* type must be set before allocate_memory */ > >>> d->arch.type = kinfo.type; > >>> #endif > >>> - allocate_memory_11(d, &kinfo); > >>> + if ( is_domain_direct_mapped(d) ) > >>> + allocate_memory_11(d, &kinfo); > >>> + else > >>> + allocate_memory(d, &kinfo); > >>> find_gnttab_region(d, &kinfo); > >> > >> Since find_gnttab_region() is called after allocate_memory(), kinfo->gnttab_* > >> fields aren't initialized and the call to find_unallocated_memory() with > >> gnttab as the region to exclude, fails ending in a crash since memory for > >> dom0 can't be allocated. > >> > >> Can the solution be to call find_gnttab_region() before the above if? > > > > The function is called find, but currently it only initializes kinfo->gnttab_start > > and kinfo->gnttab_size and we tested that moving it before allocate_memory* doesn't > > cause fallouts. > > > > If moving before allocate_memory*, would it be better to rename it e.g., init_gnttab_region()? > > > > Thanks, > > Andrea > > > >> Or should I just call it before allocate_memory() in one case, but still after > >> allocate_memory_11() in the other? > >> > >> Thanks. > > > > AFAICT there is nothing stopping us from moving find_gnttab_region() before allocate_*. This function initializes > gnttab region with PA of Xen. In normal case, because Xen is added as bootmodule, it will never be mapped in dom0 memory map > and the placement does not matter. In LLC case, it will point to relocated address of Xen and it needs to be known before > calling find_unallocated_memory. Don't rename it, leave as is, just move before allocate_*. > > @Carlo: > My prerequisite patch has been merged, so you're good to respin a series (unless you wait for some feedback in which case do let me know). > To prevent too many respins, you're going to call find_unallocated_memory for LLC passing resmem and gnttab to be excluded. If you're going > to reuse add_ext_regions, you need to rename it and fix comments to make it more generic. As for the size, the decision is yours. One solution > would be to modify add_ext_regions to take min bank size as parameter (64MB for extended regions, X for LLC dom0). In your code, you write that > the first bank must contain dom0, dtb, ramdisk and you chose 128MB. However, looking at the code, you seem to discard banks < 128 for all the banks, > not only for the first one. This is the part that I don't have a ready solution. Maybe you could define your own add_free_region function and sort > the banks, so that you take the largest possible bank first for dom0. This could simplify things. For the moment I added a __add_ext_regions() helper that also takes a skip_size parameter. This is called by add_ext_regions() and by a new add_hwdom_free_regions() callback used in allocate_memory(). I still use 128MB for all the banks. Do you think this is acceptable, maybe with a FIXME comment cause we should skip only the first bank, or not? > You can also ask others for opinion. > > We are approaching Dec 20th deadline, and I want this series to be in as it's been on the list for too many years. I'm willing to accept a sub-optimal solution > (so far will be used only for LLC, and LLC as experimental feature will be the only victim of not optimal algorithm) for now, and we can think of a better one > after the release. But still, even the sub-optimal solution must make sense. > > ~Michal > Thanks.
On 13/12/2024 11:26, Carlo Nonato wrote: > > > Hi Michal, > > On Fri, Dec 13, 2024 at 10:46 AM Michal Orzel <michal.orzel@amd.com> wrote: >> >> Hi Carlo, Andrea, >> >> On 12/12/2024 19:22, Andrea Bastoni wrote: >>> >>> >>> On 12/12/2024 18:48, Carlo Nonato wrote: >>>> Hi, >>>> >>>> On Mon, Dec 9, 2024 at 8:17 PM Julien Grall <julien@xen.org> wrote: >>>>> >>>>> Hi Michal, >>>>> >>>>> On 07/12/2024 15:04, Michal Orzel wrote: >>>>>> >>>>>> >>>>>> On 06/12/2024 19:37, Julien Grall wrote: >>>>>>> >>>>>>> >>>>>>> Hi, >>>>>>> >>>>>>> Sorry for the late answer. >>>>>>> >>>>>>> On 05/12/2024 09:40, Michal Orzel wrote: >>>>>>>> >>>>>>>> >>>>>>>> On 02/12/2024 17:59, Carlo Nonato wrote: >>>>>>>>> >>>>>>>>> >>>>>>>>> Cache coloring requires Dom0 not to be direct-mapped because of its non >>>>>>>>> contiguous mapping nature, so allocate_memory() is needed in this case. >>>>>>>>> 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") >>>>>>>>> moved allocate_memory() in dom0less_build.c. In order to use it >>>>>>>>> in Dom0 construction bring it back to domain_build.c and declare it in >>>>>>>>> domain_build.h. >>>>>>>>> >>>>>>>>> Take the opportunity to adapt the implementation of allocate_memory() so >>>>>>>>> that it uses the host layout when called on the hwdom, via >>>>>>>>> find_unallocated_memory(). >>>>>>>>> >>>>>>>>> Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> >>>>>>>>> --- >>>>>>>>> v11: >>>>>>>>> - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() >>>>>>>>> - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() >>>>>>>>> - added a comment in allocate_memory() when skipping small banks >>>>>>>>> v10: >>>>>>>>> - fixed a compilation bug that happened when dom0less support was disabled >>>>>>>>> v9: >>>>>>>>> - no changes >>>>>>>>> v8: >>>>>>>>> - patch adapted to new changes to allocate_memory() >>>>>>>>> v7: >>>>>>>>> - allocate_memory() now uses the host layout when called on the hwdom >>>>>>>>> v6: >>>>>>>>> - new patch >>>>>>>>> --- >>>>>>>>> xen/arch/arm/dom0less-build.c | 44 ----------- >>>>>>>>> xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- >>>>>>>>> xen/arch/arm/include/asm/domain_build.h | 1 + >>>>>>>>> 3 files changed, 94 insertions(+), 48 deletions(-) >>>>>>>>> >>>>>>>>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c >>>>>>>>> index d93a85434e..67b1503647 100644 >>>>>>>>> --- a/xen/arch/arm/dom0less-build.c >>>>>>>>> +++ b/xen/arch/arm/dom0less-build.c >>>>>>>>> @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) >>>>>>>>> return ( !dom0found && domUfound ); >>>>>>>>> } >>>>>>>>> >>>>>>>>> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >>>>>>>>> -{ >>>>>>>>> - struct membanks *mem = kernel_info_get_mem(kinfo); >>>>>>>>> - unsigned int i; >>>>>>>>> - paddr_t bank_size; >>>>>>>>> - >>>>>>>>> - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >>>>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>>>>> - (unsigned long)(kinfo->unassigned_mem >> 20), d); >>>>>>>>> - >>>>>>>>> - mem->nr_banks = 0; >>>>>>>>> - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); >>>>>>>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), >>>>>>>>> - bank_size) ) >>>>>>>>> - goto fail; >>>>>>>>> - >>>>>>>>> - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); >>>>>>>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), >>>>>>>>> - bank_size) ) >>>>>>>>> - goto fail; >>>>>>>>> - >>>>>>>>> - if ( kinfo->unassigned_mem ) >>>>>>>>> - goto fail; >>>>>>>>> - >>>>>>>>> - for( i = 0; i < mem->nr_banks; i++ ) >>>>>>>>> - { >>>>>>>>> - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", >>>>>>>>> - d, >>>>>>>>> - i, >>>>>>>>> - mem->bank[i].start, >>>>>>>>> - mem->bank[i].start + mem->bank[i].size, >>>>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>>>>> - (unsigned long)(mem->bank[i].size >> 20)); >>>>>>>>> - } >>>>>>>>> - >>>>>>>>> - return; >>>>>>>>> - >>>>>>>>> -fail: >>>>>>>>> - panic("Failed to allocate requested domain memory." >>>>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>>>>> - " %ldKB unallocated. Fix the VMs configurations.\n", >>>>>>>>> - (unsigned long)kinfo->unassigned_mem >> 10); >>>>>>>>> -} >>>>>>>>> - >>>>>>>>> #ifdef CONFIG_VGICV2 >>>>>>>>> static int __init make_gicv2_domU_node(struct kernel_info *kinfo) >>>>>>>>> { >>>>>>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c >>>>>>>>> index 2c30792de8..2b8cba9b2f 100644 >>>>>>>>> --- a/xen/arch/arm/domain_build.c >>>>>>>>> +++ b/xen/arch/arm/domain_build.c >>>>>>>>> @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, >>>>>>>>> } >>>>>>>>> } >>>>>>>>> >>>>>>>>> -#ifdef CONFIG_DOM0LESS_BOOT >>>>>>>>> bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, >>>>>>>>> alloc_domheap_mem_cb cb, void *extra) >>>>>>>>> { >>>>>>>>> @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, >>>>>>>>> >>>>>>>>> return true; >>>>>>>>> } >>>>>>>>> -#endif >>>>>>>>> >>>>>>>>> /* >>>>>>>>> * When PCI passthrough is available we want to keep the >>>>>>>>> @@ -1003,6 +1001,94 @@ out: >>>>>>>>> return res; >>>>>>>>> } >>>>>>>>> >>>>>>>>> +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) >>>>>>>>> +{ >>>>>>>>> + struct membanks *mem = kernel_info_get_mem(kinfo); >>>>>>>>> + unsigned int i, nr_banks = GUEST_RAM_BANKS; >>>>>>>>> + paddr_t bank_start, bank_size; >>>>>>>> Limit the scope >>>>>>>> >>>>>>>>> + struct membanks *hwdom_free_mem = NULL; >>>>>>>>> + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; >>>>>>>>> + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; >>>>>>>> Limit the scope >>>>>>>> >>>>>>>>> + >>>>>>>>> + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", >>>>>>>>> + /* Don't want format this as PRIpaddr (16 digit hex) */ >>>>>>>>> + (unsigned long)(kinfo->unassigned_mem >> 20), d); >>>>>>>>> + >>>>>>>>> + mem->nr_banks = 0; >>>>>>>>> + /* >>>>>>>>> + * Use host memory layout for hwdom. Only case for this is when LLC coloring >>>>>>>>> + * is enabled. >>>>>>>>> + */ >>>>>>>>> + if ( is_hardware_domain(d) ) >>>>>>>>> + { >>>>>>>>> + ASSERT(llc_coloring_enabled); >>>>>>>> This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. >>>>>>> >>>>>>> Piggying back on this comment. AFAICT, the code below would work also in >>>>>>> the non cache coloring case. So what's the assert is for? >>>>>>> >>>>>>>> >>>>>>>>> + >>>>>>>>> + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, >>>>>>>>> + NR_MEM_BANKS); >>>>>>>>> + if ( !hwdom_free_mem ) >>>>>>>>> + goto fail; >>>>>>>>> + >>>>>>>>> + hwdom_free_mem->max_banks = NR_MEM_BANKS; >>>>>>>>> + >>>>>>>>> + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) >>>>>>>> My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even >>>>>>>> change the comments inside the function. The problem is that the function is specifically designed >>>>>>>> for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab >>>>>>>> region allocated, etc. >>>>>>> >>>>>>> So I agree that the function should be updated if we plan to use it for >>>>>>> other purpose. >>>>>>> >>>>>>> My opinion is that we should attempt to make the function generic so >>>>>>> that in your >>>>>>>> case you can choose which regions to exclude, define even your own function to grab free regions (at the moment >>>>>>>> add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. >>>>>>>> >>>>>>>> My very short attempt to make the function as generic as possible in the first iteration: >>>>>>>> https://paste.debian.net/1338334/ >>>>>>> >>>>>>> This looks better, but I wonder why we need still need to exclude the >>>>>>> static regions? Wouldn't it be sufficient to exclude just reserved regions? >>>>>> Static shared memory banks are not part of reserved memory (i.e. bootinfo.reserved_mem) if that's what you're asking. >>>>>> They are stored in bootinfo.shmem, hence we need to take them into account when searching for unused address space. >>>>> >>>>> Oh I missed the fact you now pass "mem_banks" as a parameter. I thought >>>>> they would still get excluded for cache coloring case. >>>>> >>>>>> >>>>>> If you and Carlo are ok with my proposed solution for making the function generic, I can send a patch as a prerequisite >>>>>> patch for Carlo series. >>>>> >>>>> I am fine with the approach. >>>>> >>>>> Cheers, >>>>> >>>>> -- >>>>> Julien Grall >>>>> >>>> >>>>> @@ -2152,7 +2238,10 @@ static int __init construct_dom0(struct domain *d) >>>>> /* type must be set before allocate_memory */ >>>>> d->arch.type = kinfo.type; >>>>> #endif >>>>> - allocate_memory_11(d, &kinfo); >>>>> + if ( is_domain_direct_mapped(d) ) >>>>> + allocate_memory_11(d, &kinfo); >>>>> + else >>>>> + allocate_memory(d, &kinfo); >>>>> find_gnttab_region(d, &kinfo); >>>> >>>> Since find_gnttab_region() is called after allocate_memory(), kinfo->gnttab_* >>>> fields aren't initialized and the call to find_unallocated_memory() with >>>> gnttab as the region to exclude, fails ending in a crash since memory for >>>> dom0 can't be allocated. >>>> >>>> Can the solution be to call find_gnttab_region() before the above if? >>> >>> The function is called find, but currently it only initializes kinfo->gnttab_start >>> and kinfo->gnttab_size and we tested that moving it before allocate_memory* doesn't >>> cause fallouts. >>> >>> If moving before allocate_memory*, would it be better to rename it e.g., init_gnttab_region()? >>> >>> Thanks, >>> Andrea >>> >>>> Or should I just call it before allocate_memory() in one case, but still after >>>> allocate_memory_11() in the other? >>>> >>>> Thanks. >>> >> >> AFAICT there is nothing stopping us from moving find_gnttab_region() before allocate_*. This function initializes >> gnttab region with PA of Xen. In normal case, because Xen is added as bootmodule, it will never be mapped in dom0 memory map >> and the placement does not matter. In LLC case, it will point to relocated address of Xen and it needs to be known before >> calling find_unallocated_memory. Don't rename it, leave as is, just move before allocate_*. >> >> @Carlo: >> My prerequisite patch has been merged, so you're good to respin a series (unless you wait for some feedback in which case do let me know). >> To prevent too many respins, you're going to call find_unallocated_memory for LLC passing resmem and gnttab to be excluded. If you're going >> to reuse add_ext_regions, you need to rename it and fix comments to make it more generic. As for the size, the decision is yours. One solution >> would be to modify add_ext_regions to take min bank size as parameter (64MB for extended regions, X for LLC dom0). In your code, you write that >> the first bank must contain dom0, dtb, ramdisk and you chose 128MB. However, looking at the code, you seem to discard banks < 128 for all the banks, >> not only for the first one. This is the part that I don't have a ready solution. Maybe you could define your own add_free_region function and sort >> the banks, so that you take the largest possible bank first for dom0. This could simplify things. > > For the moment I added a __add_ext_regions() helper that also takes a skip_size I'm not sure if MISRA and our guidelines are happy with prefixing with function with __. I don't understand the skip_size parameter. In which scenario do you want to use it? Not for extended regions and for LLC, even with your current solution, you also want to find banks bigger than some size. > parameter. This is called by add_ext_regions() and by a new > add_hwdom_free_regions() callback used in allocate_memory(). > I still use 128MB for all the banks. Do you think this is acceptable, maybe > with a FIXME comment cause we should skip only the first bank, or not? First of all, I'm not convinced with 128MB. This is definitely not a requirement for arm64. allocate_memory_11 uses it but the algorithm of finding banks is completely different. AFAICT, with my suggested solution i.e. sorting banks in a helper like add_ext_regions used only for LLC case, you no longer need to worry about size. You simply start with the biggest possible bank as the first bank. ~Michal
On Fri, Dec 13, 2024 at 11:56 AM Michal Orzel <michal.orzel@amd.com> wrote: > > > > On 13/12/2024 11:26, Carlo Nonato wrote: > > > > > > Hi Michal, > > > > On Fri, Dec 13, 2024 at 10:46 AM Michal Orzel <michal.orzel@amd.com> wrote: > >> > >> Hi Carlo, Andrea, > >> > >> On 12/12/2024 19:22, Andrea Bastoni wrote: > >>> > >>> > >>> On 12/12/2024 18:48, Carlo Nonato wrote: > >>>> Hi, > >>>> > >>>> On Mon, Dec 9, 2024 at 8:17 PM Julien Grall <julien@xen.org> wrote: > >>>>> > >>>>> Hi Michal, > >>>>> > >>>>> On 07/12/2024 15:04, Michal Orzel wrote: > >>>>>> > >>>>>> > >>>>>> On 06/12/2024 19:37, Julien Grall wrote: > >>>>>>> > >>>>>>> > >>>>>>> Hi, > >>>>>>> > >>>>>>> Sorry for the late answer. > >>>>>>> > >>>>>>> On 05/12/2024 09:40, Michal Orzel wrote: > >>>>>>>> > >>>>>>>> > >>>>>>>> On 02/12/2024 17:59, Carlo Nonato wrote: > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> Cache coloring requires Dom0 not to be direct-mapped because of its non > >>>>>>>>> contiguous mapping nature, so allocate_memory() is needed in this case. > >>>>>>>>> 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") > >>>>>>>>> moved allocate_memory() in dom0less_build.c. In order to use it > >>>>>>>>> in Dom0 construction bring it back to domain_build.c and declare it in > >>>>>>>>> domain_build.h. > >>>>>>>>> > >>>>>>>>> Take the opportunity to adapt the implementation of allocate_memory() so > >>>>>>>>> that it uses the host layout when called on the hwdom, via > >>>>>>>>> find_unallocated_memory(). > >>>>>>>>> > >>>>>>>>> Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> > >>>>>>>>> --- > >>>>>>>>> v11: > >>>>>>>>> - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() > >>>>>>>>> - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() > >>>>>>>>> - added a comment in allocate_memory() when skipping small banks > >>>>>>>>> v10: > >>>>>>>>> - fixed a compilation bug that happened when dom0less support was disabled > >>>>>>>>> v9: > >>>>>>>>> - no changes > >>>>>>>>> v8: > >>>>>>>>> - patch adapted to new changes to allocate_memory() > >>>>>>>>> v7: > >>>>>>>>> - allocate_memory() now uses the host layout when called on the hwdom > >>>>>>>>> v6: > >>>>>>>>> - new patch > >>>>>>>>> --- > >>>>>>>>> xen/arch/arm/dom0less-build.c | 44 ----------- > >>>>>>>>> xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- > >>>>>>>>> xen/arch/arm/include/asm/domain_build.h | 1 + > >>>>>>>>> 3 files changed, 94 insertions(+), 48 deletions(-) > >>>>>>>>> > >>>>>>>>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c > >>>>>>>>> index d93a85434e..67b1503647 100644 > >>>>>>>>> --- a/xen/arch/arm/dom0less-build.c > >>>>>>>>> +++ b/xen/arch/arm/dom0less-build.c > >>>>>>>>> @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) > >>>>>>>>> return ( !dom0found && domUfound ); > >>>>>>>>> } > >>>>>>>>> > >>>>>>>>> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) > >>>>>>>>> -{ > >>>>>>>>> - struct membanks *mem = kernel_info_get_mem(kinfo); > >>>>>>>>> - unsigned int i; > >>>>>>>>> - paddr_t bank_size; > >>>>>>>>> - > >>>>>>>>> - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", > >>>>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>>>>>>> - (unsigned long)(kinfo->unassigned_mem >> 20), d); > >>>>>>>>> - > >>>>>>>>> - mem->nr_banks = 0; > >>>>>>>>> - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); > >>>>>>>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), > >>>>>>>>> - bank_size) ) > >>>>>>>>> - goto fail; > >>>>>>>>> - > >>>>>>>>> - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); > >>>>>>>>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), > >>>>>>>>> - bank_size) ) > >>>>>>>>> - goto fail; > >>>>>>>>> - > >>>>>>>>> - if ( kinfo->unassigned_mem ) > >>>>>>>>> - goto fail; > >>>>>>>>> - > >>>>>>>>> - for( i = 0; i < mem->nr_banks; i++ ) > >>>>>>>>> - { > >>>>>>>>> - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", > >>>>>>>>> - d, > >>>>>>>>> - i, > >>>>>>>>> - mem->bank[i].start, > >>>>>>>>> - mem->bank[i].start + mem->bank[i].size, > >>>>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>>>>>>> - (unsigned long)(mem->bank[i].size >> 20)); > >>>>>>>>> - } > >>>>>>>>> - > >>>>>>>>> - return; > >>>>>>>>> - > >>>>>>>>> -fail: > >>>>>>>>> - panic("Failed to allocate requested domain memory." > >>>>>>>>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>>>>>>> - " %ldKB unallocated. Fix the VMs configurations.\n", > >>>>>>>>> - (unsigned long)kinfo->unassigned_mem >> 10); > >>>>>>>>> -} > >>>>>>>>> - > >>>>>>>>> #ifdef CONFIG_VGICV2 > >>>>>>>>> static int __init make_gicv2_domU_node(struct kernel_info *kinfo) > >>>>>>>>> { > >>>>>>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > >>>>>>>>> index 2c30792de8..2b8cba9b2f 100644 > >>>>>>>>> --- a/xen/arch/arm/domain_build.c > >>>>>>>>> +++ b/xen/arch/arm/domain_build.c > >>>>>>>>> @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, > >>>>>>>>> } > >>>>>>>>> } > >>>>>>>>> > >>>>>>>>> -#ifdef CONFIG_DOM0LESS_BOOT > >>>>>>>>> bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, > >>>>>>>>> alloc_domheap_mem_cb cb, void *extra) > >>>>>>>>> { > >>>>>>>>> @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, > >>>>>>>>> > >>>>>>>>> return true; > >>>>>>>>> } > >>>>>>>>> -#endif > >>>>>>>>> > >>>>>>>>> /* > >>>>>>>>> * When PCI passthrough is available we want to keep the > >>>>>>>>> @@ -1003,6 +1001,94 @@ out: > >>>>>>>>> return res; > >>>>>>>>> } > >>>>>>>>> > >>>>>>>>> +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) > >>>>>>>>> +{ > >>>>>>>>> + struct membanks *mem = kernel_info_get_mem(kinfo); > >>>>>>>>> + unsigned int i, nr_banks = GUEST_RAM_BANKS; > >>>>>>>>> + paddr_t bank_start, bank_size; > >>>>>>>> Limit the scope > >>>>>>>> > >>>>>>>>> + struct membanks *hwdom_free_mem = NULL; > >>>>>>>>> + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; > >>>>>>>>> + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; > >>>>>>>> Limit the scope > >>>>>>>> > >>>>>>>>> + > >>>>>>>>> + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", > >>>>>>>>> + /* Don't want format this as PRIpaddr (16 digit hex) */ > >>>>>>>>> + (unsigned long)(kinfo->unassigned_mem >> 20), d); > >>>>>>>>> + > >>>>>>>>> + mem->nr_banks = 0; > >>>>>>>>> + /* > >>>>>>>>> + * Use host memory layout for hwdom. Only case for this is when LLC coloring > >>>>>>>>> + * is enabled. > >>>>>>>>> + */ > >>>>>>>>> + if ( is_hardware_domain(d) ) > >>>>>>>>> + { > >>>>>>>>> + ASSERT(llc_coloring_enabled); > >>>>>>>> This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. > >>>>>>> > >>>>>>> Piggying back on this comment. AFAICT, the code below would work also in > >>>>>>> the non cache coloring case. So what's the assert is for? > >>>>>>> > >>>>>>>> > >>>>>>>>> + > >>>>>>>>> + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, > >>>>>>>>> + NR_MEM_BANKS); > >>>>>>>>> + if ( !hwdom_free_mem ) > >>>>>>>>> + goto fail; > >>>>>>>>> + > >>>>>>>>> + hwdom_free_mem->max_banks = NR_MEM_BANKS; > >>>>>>>>> + > >>>>>>>>> + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) > >>>>>>>> My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even > >>>>>>>> change the comments inside the function. The problem is that the function is specifically designed > >>>>>>>> for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab > >>>>>>>> region allocated, etc. > >>>>>>> > >>>>>>> So I agree that the function should be updated if we plan to use it for > >>>>>>> other purpose. > >>>>>>> > >>>>>>> My opinion is that we should attempt to make the function generic so > >>>>>>> that in your > >>>>>>>> case you can choose which regions to exclude, define even your own function to grab free regions (at the moment > >>>>>>>> add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. > >>>>>>>> > >>>>>>>> My very short attempt to make the function as generic as possible in the first iteration: > >>>>>>>> https://paste.debian.net/1338334/ > >>>>>>> > >>>>>>> This looks better, but I wonder why we need still need to exclude the > >>>>>>> static regions? Wouldn't it be sufficient to exclude just reserved regions? > >>>>>> Static shared memory banks are not part of reserved memory (i.e. bootinfo.reserved_mem) if that's what you're asking. > >>>>>> They are stored in bootinfo.shmem, hence we need to take them into account when searching for unused address space. > >>>>> > >>>>> Oh I missed the fact you now pass "mem_banks" as a parameter. I thought > >>>>> they would still get excluded for cache coloring case. > >>>>> > >>>>>> > >>>>>> If you and Carlo are ok with my proposed solution for making the function generic, I can send a patch as a prerequisite > >>>>>> patch for Carlo series. > >>>>> > >>>>> I am fine with the approach. > >>>>> > >>>>> Cheers, > >>>>> > >>>>> -- > >>>>> Julien Grall > >>>>> > >>>> > >>>>> @@ -2152,7 +2238,10 @@ static int __init construct_dom0(struct domain *d) > >>>>> /* type must be set before allocate_memory */ > >>>>> d->arch.type = kinfo.type; > >>>>> #endif > >>>>> - allocate_memory_11(d, &kinfo); > >>>>> + if ( is_domain_direct_mapped(d) ) > >>>>> + allocate_memory_11(d, &kinfo); > >>>>> + else > >>>>> + allocate_memory(d, &kinfo); > >>>>> find_gnttab_region(d, &kinfo); > >>>> > >>>> Since find_gnttab_region() is called after allocate_memory(), kinfo->gnttab_* > >>>> fields aren't initialized and the call to find_unallocated_memory() with > >>>> gnttab as the region to exclude, fails ending in a crash since memory for > >>>> dom0 can't be allocated. > >>>> > >>>> Can the solution be to call find_gnttab_region() before the above if? > >>> > >>> The function is called find, but currently it only initializes kinfo->gnttab_start > >>> and kinfo->gnttab_size and we tested that moving it before allocate_memory* doesn't > >>> cause fallouts. > >>> > >>> If moving before allocate_memory*, would it be better to rename it e.g., init_gnttab_region()? > >>> > >>> Thanks, > >>> Andrea > >>> > >>>> Or should I just call it before allocate_memory() in one case, but still after > >>>> allocate_memory_11() in the other? > >>>> > >>>> Thanks. > >>> > >> > >> AFAICT there is nothing stopping us from moving find_gnttab_region() before allocate_*. This function initializes > >> gnttab region with PA of Xen. In normal case, because Xen is added as bootmodule, it will never be mapped in dom0 memory map > >> and the placement does not matter. In LLC case, it will point to relocated address of Xen and it needs to be known before > >> calling find_unallocated_memory. Don't rename it, leave as is, just move before allocate_*. > >> > >> @Carlo: > >> My prerequisite patch has been merged, so you're good to respin a series (unless you wait for some feedback in which case do let me know). > >> To prevent too many respins, you're going to call find_unallocated_memory for LLC passing resmem and gnttab to be excluded. If you're going > >> to reuse add_ext_regions, you need to rename it and fix comments to make it more generic. As for the size, the decision is yours. One solution > >> would be to modify add_ext_regions to take min bank size as parameter (64MB for extended regions, X for LLC dom0). In your code, you write that > >> the first bank must contain dom0, dtb, ramdisk and you chose 128MB. However, looking at the code, you seem to discard banks < 128 for all the banks, > >> not only for the first one. This is the part that I don't have a ready solution. Maybe you could define your own add_free_region function and sort > >> the banks, so that you take the largest possible bank first for dom0. This could simplify things. > > > > For the moment I added a __add_ext_regions() helper that also takes a skip_size > I'm not sure if MISRA and our guidelines are happy with prefixing with function with __. > I don't understand the skip_size parameter. In which scenario do you want to use it? Not for > extended regions and for LLC, even with your current solution, you also want to find banks bigger than > some size. > > > parameter. This is called by add_ext_regions() and by a new > > add_hwdom_free_regions() callback used in allocate_memory(). > > I still use 128MB for all the banks. Do you think this is acceptable, maybe > > with a FIXME comment cause we should skip only the first bank, or not? > First of all, I'm not convinced with 128MB. This is definitely not a requirement for arm64. > allocate_memory_11 uses it but the algorithm of finding banks is completely different. > > AFAICT, with my suggested solution i.e. sorting banks in a helper like add_ext_regions used only > for LLC case, you no longer need to worry about size. You simply start with the biggest possible bank > as the first bank. > > ~Michal > Here's my current patch: diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c index d93a85434e..67b1503647 100644 --- a/xen/arch/arm/dom0less-build.c +++ b/xen/arch/arm/dom0less-build.c @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) return ( !dom0found && domUfound ); } -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) -{ - struct membanks *mem = kernel_info_get_mem(kinfo); - unsigned int i; - paddr_t bank_size; - - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", - /* Don't want format this as PRIpaddr (16 digit hex) */ - (unsigned long)(kinfo->unassigned_mem >> 20), d); - - mem->nr_banks = 0; - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), - bank_size) ) - goto fail; - - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), - bank_size) ) - goto fail; - - if ( kinfo->unassigned_mem ) - goto fail; - - for( i = 0; i < mem->nr_banks; i++ ) - { - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", - d, - i, - mem->bank[i].start, - mem->bank[i].start + mem->bank[i].size, - /* Don't want format this as PRIpaddr (16 digit hex) */ - (unsigned long)(mem->bank[i].size >> 20)); - } - - return; - -fail: - panic("Failed to allocate requested domain memory." - /* Don't want format this as PRIpaddr (16 digit hex) */ - " %ldKB unallocated. Fix the VMs configurations.\n", - (unsigned long)kinfo->unassigned_mem >> 10); -} - #ifdef CONFIG_VGICV2 static int __init make_gicv2_domU_node(struct kernel_info *kinfo) { diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c index adf26f2778..59ac45c4e0 100644 --- a/xen/arch/arm/domain_build.c +++ b/xen/arch/arm/domain_build.c @@ -2,6 +2,7 @@ #include <xen/init.h> #include <xen/compile.h> #include <xen/lib.h> +#include <xen/llc-coloring.h> #include <xen/mm.h> #include <xen/param.h> #include <xen/domain_page.h> @@ -416,7 +417,6 @@ static void __init allocate_memory_11(struct domain *d, } } -#ifdef CONFIG_DOM0LESS_BOOT bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, alloc_domheap_mem_cb cb, void *extra) { @@ -508,7 +508,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, return true; } -#endif /* * When PCI passthrough is available we want to keep the @@ -859,8 +858,8 @@ int __init make_memory_node(const struct kernel_info *kinfo, int addrcells, return res; } -int __init add_ext_regions(unsigned long s_gfn, unsigned long e_gfn, - void *data) +static int __init __add_ext_regions(unsigned long s_gfn, unsigned long e_gfn, + void *data, paddr_t skip_size) { struct membanks *ext_regions = data; paddr_t start, size; @@ -885,12 +884,7 @@ int __init add_ext_regions(unsigned long s_gfn, unsigned long e_gfn, e += 1; size = (e - start) & ~(SZ_2M - 1); - /* - * Reasonable size. Not too little to pick up small ranges which are - * not quite useful but increase bookkeeping and not too large - * to skip a large proportion of unused address space. - */ - if ( size < MB(64) ) + if ( size < skip_size ) return 0; ext_regions->bank[ext_regions->nr_banks].start = start; @@ -900,6 +894,28 @@ int __init add_ext_regions(unsigned long s_gfn, unsigned long e_gfn, return 0; } +static int __init add_hwdom_free_regions(unsigned long s_gfn, + unsigned long e_gfn, void *data) +{ + /* + * Skip banks that are too small. The first bank must contain dom0 kernel + + * ramdisk + dtb and 128 MB is the same limit used in allocate_memory_11(). + */ + return __add_ext_regions(s_gfn, e_gfn, data, MB(128)); +} + + +int __init add_ext_regions(unsigned long s_gfn, unsigned long e_gfn, + void *data) +{ + /* + * Reasonable size. Not too little to pick up small ranges which are + * not quite useful but increase bookkeeping and not too large + * to skip a large proportion of unused address space. + */ + return __add_ext_regions(s_gfn, e_gfn, data, MB(64)); +} + /* * Find unused regions of Host address space which can be exposed to domain * using the host memory layout. In order to calculate regions we exclude every @@ -977,6 +993,109 @@ out: return res; } +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) +{ + struct membanks *mem = kernel_info_get_mem(kinfo); + unsigned int i, nr_banks = GUEST_RAM_BANKS; + struct membanks *hwdom_free_mem = NULL; + + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", + /* Don't want format this as PRIpaddr (16 digit hex) */ + (unsigned long)(kinfo->unassigned_mem >> 20), d); + + mem->nr_banks = 0; + /* + * Use host memory layout for hwdom. Only case for this is when LLC coloring + * is enabled. + */ + if ( is_hardware_domain(d) ) + { + struct membanks *gnttab = xzalloc_flex_struct(struct membanks, bank, 1); + /* + * Exclude the following regions: + * 1) Remove reserved memory + * 2) Grant table assigned to Dom0 + */ + const struct membanks *mem_banks[] = { + bootinfo_get_reserved_mem(), + gnttab, + }; + + ASSERT(llc_coloring_enabled); + + if ( !gnttab ) + goto fail; + + gnttab->nr_banks = 1; + gnttab->bank[0].start = kinfo->gnttab_start; + gnttab->bank[0].size = kinfo->gnttab_start + kinfo->gnttab_size; + + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, + NR_MEM_BANKS); + if ( !hwdom_free_mem ) + goto fail; + + hwdom_free_mem->max_banks = NR_MEM_BANKS; + + if ( find_unallocated_memory(kinfo, mem_banks, ARRAY_SIZE(mem_banks), + hwdom_free_mem, add_hwdom_free_regions) ) + goto fail; + + nr_banks = hwdom_free_mem->nr_banks; + xfree(gnttab); + } + + for ( i = 0; kinfo->unassigned_mem > 0 && nr_banks > 0; i++, nr_banks-- ) + { + paddr_t bank_start, bank_size; + + if ( is_hardware_domain(d) ) + { + bank_start = hwdom_free_mem->bank[i].start; + bank_size = hwdom_free_mem->bank[i].size; + ASSERT(bank_size >= MB(128)); + } + else + { + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; + + if ( i >= GUEST_RAM_BANKS ) + goto fail; + + bank_start = bankbase[i]; + bank_size = banksize[i]; + } + + bank_size = MIN(bank_size, kinfo->unassigned_mem); + if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(bank_start), bank_size) ) + goto fail; + } + + if ( kinfo->unassigned_mem ) + goto fail; + + for( i = 0; i < mem->nr_banks; i++ ) + { + printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", + d, + i, + mem->bank[i].start, + mem->bank[i].start + mem->bank[i].size, + /* Don't want format this as PRIpaddr (16 digit hex) */ + (unsigned long)(mem->bank[i].size >> 20)); + } + + xfree(hwdom_free_mem); + return; + +fail: + panic("Failed to allocate requested domain memory." + /* Don't want format this as PRIpaddr (16 digit hex) */ + " %ldKB unallocated. Fix the VMs configurations.\n", + (unsigned long)kinfo->unassigned_mem >> 10); +} + static int __init handle_pci_range(const struct dt_device_node *dev, uint64_t addr, uint64_t len, void *data) { @@ -1235,7 +1354,7 @@ int __init make_hypervisor_node(struct domain *d, ext_regions->max_banks = NR_MEM_BANKS; - if ( is_domain_direct_mapped(d) ) + if ( domain_use_host_layout(d) ) { if ( !is_iommu_enabled(d) ) res = find_host_extended_regions(kinfo, ext_regions); @@ -2164,8 +2283,11 @@ static int __init construct_dom0(struct domain *d) /* type must be set before allocate_memory */ d->arch.type = kinfo.type; #endif - allocate_memory_11(d, &kinfo); find_gnttab_region(d, &kinfo); + if ( is_domain_direct_mapped(d) ) + allocate_memory_11(d, &kinfo); + else + allocate_memory(d, &kinfo); rc = process_shm_chosen(d, &kinfo); if ( rc < 0 ) diff --git a/xen/arch/arm/include/asm/domain_build.h b/xen/arch/arm/include/asm/domain_build.h index e712afbc7f..b0d646e173 100644 --- a/xen/arch/arm/include/asm/domain_build.h +++ b/xen/arch/arm/include/asm/domain_build.h @@ -11,6 +11,7 @@ bool allocate_domheap_memory(struct domain *d, paddr_t tot_size, alloc_domheap_mem_cb cb, void *extra); bool allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, paddr_t tot_size); +void allocate_memory(struct domain *d, struct kernel_info *kinfo); int construct_domain(struct domain *d, struct kernel_info *kinfo); int domain_fdt_begin_node(void *fdt, const char *name, uint64_t unit); int make_chosen_node(const struct kernel_info *kinfo); @@ -54,6 +55,9 @@ static inline int prepare_acpi(struct domain *d, struct kernel_info *kinfo) int prepare_acpi(struct domain *d, struct kernel_info *kinfo); #endif +typedef int (*add_free_regions_fn)(unsigned long s_gfn, unsigned long e_gfn, + void *data); + int add_ext_regions(unsigned long s_gfn, unsigned long e_gfn, void *data); #endif skip_size can be renamed to threshold_size to make it more clear. Anyway I'm not following you on the suggested solution: when should I sort the banks, how can I do it in the callback of find_unallocated_memory() and what if the first biggest bank is lower than 128MB, I should not care for that? Thanks. - Carlo
Using paste.debian: https://paste.debian.net/1339647/ Thanks. - Carlo
On 13/12/2024 12:33, Carlo Nonato wrote: > > > Using paste.debian: > > https://paste.debian.net/1339647/ 1. Issue I mentioned with prefixing with double underscore 2. Generic helper should not be named ext_regions 3. s/skip_size/min_bank_size/ And still you need to convince others about 128MB limit because I'm not sure. Imagine, that our kernel+dtb+ramdisk is > 128MB and your first bank is 128MB. With your solution this would fail. Now, imagine that you sort your banks and start with the biggest one. You don't care about its size. It's the biggest one so if it does not fit, then that's not your problem. ~Michal
On Fri, Dec 13, 2024 at 12:47 PM Michal Orzel <michal.orzel@amd.com> wrote: > > > > On 13/12/2024 12:33, Carlo Nonato wrote: > > > > > > Using paste.debian: > > > > https://paste.debian.net/1339647/ > > 1. Issue I mentioned with prefixing with double underscore > 2. Generic helper should not be named ext_regions > 3. s/skip_size/min_bank_size/ > > And still you need to convince others about 128MB limit because I'm not sure. > > Imagine, that our kernel+dtb+ramdisk is > 128MB and your first bank is 128MB. With your > solution this would fail. Now, imagine that you sort your banks and start with the biggest > one. You don't care about its size. It's the biggest one so if it does not fit, then that's not > your problem. > > ~Michal > Something like that in add_hwdom_free_regions()? > /* Find the insert position (descending order). */ > for ( i = 0; i < free_regions->nr_banks ; i++) > if ( size > free_regions->bank[i].size ) > break; > > /* Move the other banks to make space. */ > for ( j = free_regions->nr_banks; j > i ; j-- ) > { > free_regions->bank[j].start = free_regions->bank[j - 1].start; > free_regions->bank[j].size = free_regions->bank[j - 1].size; > } > > free_regions->bank[i].start = start; > free_regions->bank[i].size = size; > free_regions->nr_banks++; With that (if I didn't make any mistake) I'm inserting banks in descending size order. Is it ok? Thanks. - Carlo
Hi Michal, Julien On Sat, Dec 7, 2024 at 4:05 PM Michal Orzel <michal.orzel@amd.com> wrote: > > On 06/12/2024 19:37, Julien Grall wrote: > > > > > > Hi, > > > > Sorry for the late answer. > > > > On 05/12/2024 09:40, Michal Orzel wrote: > >> > >> > >> On 02/12/2024 17:59, Carlo Nonato wrote: > >>> > >>> > >>> Cache coloring requires Dom0 not to be direct-mapped because of its non > >>> contiguous mapping nature, so allocate_memory() is needed in this case. > >>> 8d2c3ab18cc1 ("arm/dom0less: put dom0less feature code in a separate module") > >>> moved allocate_memory() in dom0less_build.c. In order to use it > >>> in Dom0 construction bring it back to domain_build.c and declare it in > >>> domain_build.h. > >>> > >>> Take the opportunity to adapt the implementation of allocate_memory() so > >>> that it uses the host layout when called on the hwdom, via > >>> find_unallocated_memory(). > >>> > >>> Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech> > >>> --- > >>> v11: > >>> - GUEST_RAM_BANKS instead of hardcoding the number of banks in allocate_memory() > >>> - hwdom_ext_regions -> hwdom_free_mem in allocate_memory() > >>> - added a comment in allocate_memory() when skipping small banks > >>> v10: > >>> - fixed a compilation bug that happened when dom0less support was disabled > >>> v9: > >>> - no changes > >>> v8: > >>> - patch adapted to new changes to allocate_memory() > >>> v7: > >>> - allocate_memory() now uses the host layout when called on the hwdom > >>> v6: > >>> - new patch > >>> --- > >>> xen/arch/arm/dom0less-build.c | 44 ----------- > >>> xen/arch/arm/domain_build.c | 97 ++++++++++++++++++++++++- > >>> xen/arch/arm/include/asm/domain_build.h | 1 + > >>> 3 files changed, 94 insertions(+), 48 deletions(-) > >>> > >>> diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c > >>> index d93a85434e..67b1503647 100644 > >>> --- a/xen/arch/arm/dom0less-build.c > >>> +++ b/xen/arch/arm/dom0less-build.c > >>> @@ -49,50 +49,6 @@ bool __init is_dom0less_mode(void) > >>> return ( !dom0found && domUfound ); > >>> } > >>> > >>> -static void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) > >>> -{ > >>> - struct membanks *mem = kernel_info_get_mem(kinfo); > >>> - unsigned int i; > >>> - paddr_t bank_size; > >>> - > >>> - printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", > >>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>> - (unsigned long)(kinfo->unassigned_mem >> 20), d); > >>> - > >>> - mem->nr_banks = 0; > >>> - bank_size = MIN(GUEST_RAM0_SIZE, kinfo->unassigned_mem); > >>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM0_BASE), > >>> - bank_size) ) > >>> - goto fail; > >>> - > >>> - bank_size = MIN(GUEST_RAM1_SIZE, kinfo->unassigned_mem); > >>> - if ( !allocate_bank_memory(kinfo, gaddr_to_gfn(GUEST_RAM1_BASE), > >>> - bank_size) ) > >>> - goto fail; > >>> - > >>> - if ( kinfo->unassigned_mem ) > >>> - goto fail; > >>> - > >>> - for( i = 0; i < mem->nr_banks; i++ ) > >>> - { > >>> - printk(XENLOG_INFO "%pd BANK[%d] %#"PRIpaddr"-%#"PRIpaddr" (%ldMB)\n", > >>> - d, > >>> - i, > >>> - mem->bank[i].start, > >>> - mem->bank[i].start + mem->bank[i].size, > >>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>> - (unsigned long)(mem->bank[i].size >> 20)); > >>> - } > >>> - > >>> - return; > >>> - > >>> -fail: > >>> - panic("Failed to allocate requested domain memory." > >>> - /* Don't want format this as PRIpaddr (16 digit hex) */ > >>> - " %ldKB unallocated. Fix the VMs configurations.\n", > >>> - (unsigned long)kinfo->unassigned_mem >> 10); > >>> -} > >>> - > >>> #ifdef CONFIG_VGICV2 > >>> static int __init make_gicv2_domU_node(struct kernel_info *kinfo) > >>> { > >>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > >>> index 2c30792de8..2b8cba9b2f 100644 > >>> --- a/xen/arch/arm/domain_build.c > >>> +++ b/xen/arch/arm/domain_build.c > >>> @@ -416,7 +416,6 @@ static void __init allocate_memory_11(struct domain *d, > >>> } > >>> } > >>> > >>> -#ifdef CONFIG_DOM0LESS_BOOT > >>> bool __init allocate_domheap_memory(struct domain *d, paddr_t tot_size, > >>> alloc_domheap_mem_cb cb, void *extra) > >>> { > >>> @@ -508,7 +507,6 @@ bool __init allocate_bank_memory(struct kernel_info *kinfo, gfn_t sgfn, > >>> > >>> return true; > >>> } > >>> -#endif > >>> > >>> /* > >>> * When PCI passthrough is available we want to keep the > >>> @@ -1003,6 +1001,94 @@ out: > >>> return res; > >>> } > >>> > >>> +void __init allocate_memory(struct domain *d, struct kernel_info *kinfo) > >>> +{ > >>> + struct membanks *mem = kernel_info_get_mem(kinfo); > >>> + unsigned int i, nr_banks = GUEST_RAM_BANKS; > >>> + paddr_t bank_start, bank_size; > >> Limit the scope > >> > >>> + struct membanks *hwdom_free_mem = NULL; > >>> + const uint64_t bankbase[] = GUEST_RAM_BANK_BASES; > >>> + const uint64_t banksize[] = GUEST_RAM_BANK_SIZES; > >> Limit the scope > >> > >>> + > >>> + printk(XENLOG_INFO "Allocating mappings totalling %ldMB for %pd:\n", > >>> + /* Don't want format this as PRIpaddr (16 digit hex) */ > >>> + (unsigned long)(kinfo->unassigned_mem >> 20), d); > >>> + > >>> + mem->nr_banks = 0; > >>> + /* > >>> + * Use host memory layout for hwdom. Only case for this is when LLC coloring > >>> + * is enabled. > >>> + */ > >>> + if ( is_hardware_domain(d) ) > >>> + { > >>> + ASSERT(llc_coloring_enabled); > >> This patch does not build because of declaration not being visible. You must include <xen/llc-coloring.h>. > > > > Piggying back on this comment. AFAICT, the code below would work also in > > the non cache coloring case. So what's the assert is for? > > > >> > >>> + > >>> + hwdom_free_mem = xzalloc_flex_struct(struct membanks, bank, > >>> + NR_MEM_BANKS); > >>> + if ( !hwdom_free_mem ) > >>> + goto fail; > >>> + > >>> + hwdom_free_mem->max_banks = NR_MEM_BANKS; > >>> + > >>> + if ( find_unallocated_memory(kinfo, hwdom_free_mem) ) > >> My remarks for the use of find_unallocated_memory() 1:1 have not been addressed. You did not even > >> change the comments inside the function. The problem is that the function is specifically designed > >> for finding extended regions and assumes being called at certain point i.e. dom0 RAM allocated, gnttab > >> region allocated, etc. Answering Michal. Sorry about it, since we were waiting for comments and I wanted to keep the revision alive (it happend too many times that we (minervasys) left the discussion hanging for too long) I sent the v11 even if it was incomplete. I should have at least added commens, you're right. > > So I agree that the function should be updated if we plan to use it for > > other purpose. > > > > My opinion is that we should attempt to make the function generic so > > that in your > >> case you can choose which regions to exclude, define even your own function to grab free regions (at the moment > >> add_ext_regions grabs banks >= 64M but you still discards banks >= 128M, so it's a bit wasteful. > >> > >> My very short attempt to make the function as generic as possible in the first iteration: > >> https://paste.debian.net/1338334/ > > > > This looks better, but I wonder why we need still need to exclude the > > static regions? Wouldn't it be sufficient to exclude just reserved regions? > Static shared memory banks are not part of reserved memory (i.e. bootinfo.reserved_mem) if that's what you're asking. > They are stored in bootinfo.shmem, hence we need to take them into account when searching for unused address space. > > If you and Carlo are ok with my proposed solution for making the function generic, I can send a patch as a prerequisite > patch for Carlo series. I'm ok with that. > ~Michal Thanks both. - Carlo
© 2016 - 2024 Red Hat, Inc.