[PATCH v2] x86+libxl: correct p2m (shadow) memory pool size calculation

Jan Beulich posted 1 patch 2 years ago
Failed in applying to current master (apply log)
There is a newer version of this series
[PATCH v2] x86+libxl: correct p2m (shadow) memory pool size calculation
Posted by Jan Beulich 2 years ago
The reference "to shadow the resident processes" is applicable to
domains (potentially) running in shadow mode only. Adjust the
calculations accordingly. This, however, requires further parameters.
Since the original function is deprecated anyway, and since it can't be
changed (for being part of a stable ABI), introduce a new (internal
only) function, with the deprecated one simply becoming a wrapper.

In dom0_paging_pages() also take the opportunity and stop open-coding
DIV_ROUND_UP().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Introduce libxl__get_required_paging_memory().

--- a/tools/libs/light/libxl_create.c
+++ b/tools/libs/light/libxl_create.c
@@ -1017,7 +1017,7 @@ static bool ok_to_default_memkb_in_creat
      * The result is that the behaviour with old callers is the same
      * as in 4.13: no additional memory is allocated for shadow and
      * iommu (unless the caller set shadow_memkb, eg from a call to
-     * libxl_get_required_shadow_memory).
+     * libxl__get_required_paging_memory).
      */
     return !CTX->libxl_domain_need_memory_0x041200_called ||
             CTX->libxl_domain_need_memory_called;
@@ -1027,6 +1027,24 @@ static bool ok_to_default_memkb_in_creat
      */
 }
 
+unsigned long libxl__get_required_paging_memory(unsigned long maxmem_kb,
+                                                unsigned int smp_cpus,
+                                                libxl_domain_type type,
+                                                bool hap)
+{
+    /*
+     * 256 pages (1MB) per vcpu,
+     * plus 1 page per MiB of RAM for the P2M map (for non-PV guests),
+     * plus 1 page per MiB of RAM to shadow the resident processes (for shadow
+     * mode guests).
+     * This is higher than the minimum that Xen would allocate if no value
+     * were given (but the Xen minimum is for safety, not performance).
+     */
+    return 4 * (256 * smp_cpus +
+                ((type != LIBXL_DOMAIN_TYPE_PV) + !hap) *
+                (maxmem_kb / 1024));
+}
+
 static unsigned long libxl__get_required_iommu_memory(unsigned long maxmem_kb)
 {
     unsigned long iommu_pages = 0, mem_pages = maxmem_kb / 4;
@@ -1194,10 +1212,16 @@ int libxl__domain_config_setdefault(libx
     }
 
     if (d_config->b_info.shadow_memkb == LIBXL_MEMKB_DEFAULT
-        && ok_to_default_memkb_in_create(gc))
+        && ok_to_default_memkb_in_create(gc)) {
+        bool hap = d_config->c_info.type != LIBXL_DOMAIN_TYPE_PV &&
+                   libxl_defbool_val(d_config->c_info.hap);
+
         d_config->b_info.shadow_memkb =
-            libxl_get_required_shadow_memory(d_config->b_info.max_memkb,
-                                             d_config->b_info.max_vcpus);
+            libxl__get_required_paging_memory(d_config->b_info.max_memkb,
+                                              d_config->b_info.max_vcpus,
+                                              d_config->c_info.type,
+                                              hap);
+    }
 
     /* No IOMMU reservation is needed if passthrough mode is not 'sync_pt' */
     if (d_config->b_info.iommu_memkb == LIBXL_MEMKB_DEFAULT
--- a/tools/libs/light/libxl_internal.h
+++ b/tools/libs/light/libxl_internal.h
@@ -1569,6 +1569,11 @@ _hidden int libxl__domain_need_memory_ca
                                       libxl_domain_build_info *b_info,
                                       uint64_t *need_memkb);
 
+_hidden unsigned long libxl__get_required_paging_memory(unsigned long maxmem_kb,
+                                                        unsigned int smp_cpus,
+                                                        libxl_domain_type type,
+                                                        bool hap);
+
 _hidden const char *libxl__device_nic_devname(libxl__gc *gc,
                                               uint32_t domid,
                                               uint32_t devid,
--- a/tools/libs/light/libxl_utils.c
+++ b/tools/libs/light/libxl_utils.c
@@ -38,13 +38,8 @@ char *libxl_basename(const char *name)
 
 unsigned long libxl_get_required_shadow_memory(unsigned long maxmem_kb, unsigned int smp_cpus)
 {
-    /* 256 pages (1MB) per vcpu,
-       plus 1 page per MiB of RAM for the P2M map,
-       plus 1 page per MiB of RAM to shadow the resident processes.
-       This is higher than the minimum that Xen would allocate if no value
-       were given (but the Xen minimum is for safety, not performance).
-     */
-    return 4 * (256 * smp_cpus + 2 * (maxmem_kb / 1024));
+    return libxl__get_required_paging_memory(maxmem_kb, smp_cpus,
+                                             LIBXL_DOMAIN_TYPE_INVALID, false);
 }
 
 char *libxl_domid_to_name(libxl_ctx *ctx, uint32_t domid)
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -317,9 +317,12 @@ unsigned long __init dom0_paging_pages(c
     /* Copied from: libxl_get_required_shadow_memory() */
     unsigned long memkb = nr_pages * (PAGE_SIZE / 1024);
 
-    memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024));
+    memkb = 4 * (256 * d->max_vcpus +
+                 (paging_mode_enabled(d) +
+                  (opt_dom0_shadow || opt_pv_l1tf_hwdom)) *
+                 (memkb / 1024));
 
-    return ((memkb + 1023) / 1024) << (20 - PAGE_SHIFT);
+    return DIV_ROUND_UP(memkb, 1024) << (20 - PAGE_SHIFT);
 }
Re: [PATCH v2] x86+libxl: correct p2m (shadow) memory pool size calculation
Posted by Roger Pau Monné 2 years ago
On Mon, Apr 25, 2022 at 10:49:34AM +0200, Jan Beulich wrote:
>  char *libxl_domid_to_name(libxl_ctx *ctx, uint32_t domid)
> --- a/xen/arch/x86/dom0_build.c
> +++ b/xen/arch/x86/dom0_build.c
> @@ -317,9 +317,12 @@ unsigned long __init dom0_paging_pages(c
>      /* Copied from: libxl_get_required_shadow_memory() */

Could you also update the comment, maybe better would be:

/* Keep in sync with libxl__get_required_paging_memory(). */

>      unsigned long memkb = nr_pages * (PAGE_SIZE / 1024);
>  
> -    memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024));
> +    memkb = 4 * (256 * d->max_vcpus +
> +                 (paging_mode_enabled(d) +
> +                  (opt_dom0_shadow || opt_pv_l1tf_hwdom)) *

opt_pv_l1tf_hwdom is only relevant for PV guests, so maybe it would be
best to use:

paging_mode_enabled(d) ? 1 + opt_dom0_shadow
                       : 0 + (opt_dom0_shadow || opt_pv_l1tf_hwdom)

Or something similar.  Maybe placing this inside the sum will make the
expression too complex, so we could use a separate is_shadow boolean
to signal whether the domain will use shadow pagetables?

Thanks, Roger.
Re: [PATCH v2] x86+libxl: correct p2m (shadow) memory pool size calculation
Posted by Jan Beulich 2 years ago
On 25.04.2022 14:59, Roger Pau Monné wrote:
> On Mon, Apr 25, 2022 at 10:49:34AM +0200, Jan Beulich wrote:
>>  char *libxl_domid_to_name(libxl_ctx *ctx, uint32_t domid)
>> --- a/xen/arch/x86/dom0_build.c
>> +++ b/xen/arch/x86/dom0_build.c
>> @@ -317,9 +317,12 @@ unsigned long __init dom0_paging_pages(c
>>      /* Copied from: libxl_get_required_shadow_memory() */
> 
> Could you also update the comment, maybe better would be:
> 
> /* Keep in sync with libxl__get_required_paging_memory(). */

Oh, of course.

>>      unsigned long memkb = nr_pages * (PAGE_SIZE / 1024);
>>  
>> -    memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024));
>> +    memkb = 4 * (256 * d->max_vcpus +
>> +                 (paging_mode_enabled(d) +
>> +                  (opt_dom0_shadow || opt_pv_l1tf_hwdom)) *
> 
> opt_pv_l1tf_hwdom is only relevant for PV guests, so maybe it would be
> best to use:
> 
> paging_mode_enabled(d) ? 1 + opt_dom0_shadow
>                        : 0 + (opt_dom0_shadow || opt_pv_l1tf_hwdom)
> 
> Or something similar.

Originally I was thinking that people simply shouldn't use the option
when Dom0 isn't PV. But meanwhile I've figured that late-hwdom may be
PV even if domain 0 is PVH. So yes.

>  Maybe placing this inside the sum will make the
> expression too complex, so we could use a separate is_shadow boolean
> to signal whether the domain will use shadow pagetables?

I think

    memkb = 4 * (256 * d->max_vcpus +
                 (is_pv_domain(d) ? opt_dom0_shadow || opt_pv_l1tf_hwdom
                                  : 1 + opt_dom0_shadow) *
                 (memkb / 1024));

is still okay-ish. Note that I've switched to is_pv_domain() to be
independent of the point in time when shadow mode would be enabled
for a PV Dom0.

Jan
Re: [PATCH v2] x86+libxl: correct p2m (shadow) memory pool size calculation
Posted by Roger Pau Monné 2 years ago
On Mon, Apr 25, 2022 at 03:19:46PM +0200, Jan Beulich wrote:
> On 25.04.2022 14:59, Roger Pau Monné wrote:
> > On Mon, Apr 25, 2022 at 10:49:34AM +0200, Jan Beulich wrote:
> >>  char *libxl_domid_to_name(libxl_ctx *ctx, uint32_t domid)
> >> --- a/xen/arch/x86/dom0_build.c
> >> +++ b/xen/arch/x86/dom0_build.c
> >> @@ -317,9 +317,12 @@ unsigned long __init dom0_paging_pages(c
> >>      /* Copied from: libxl_get_required_shadow_memory() */
> > 
> > Could you also update the comment, maybe better would be:
> > 
> > /* Keep in sync with libxl__get_required_paging_memory(). */
> 
> Oh, of course.
> 
> >>      unsigned long memkb = nr_pages * (PAGE_SIZE / 1024);
> >>  
> >> -    memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024));
> >> +    memkb = 4 * (256 * d->max_vcpus +
> >> +                 (paging_mode_enabled(d) +
> >> +                  (opt_dom0_shadow || opt_pv_l1tf_hwdom)) *
> > 
> > opt_pv_l1tf_hwdom is only relevant for PV guests, so maybe it would be
> > best to use:
> > 
> > paging_mode_enabled(d) ? 1 + opt_dom0_shadow
> >                        : 0 + (opt_dom0_shadow || opt_pv_l1tf_hwdom)
> > 
> > Or something similar.
> 
> Originally I was thinking that people simply shouldn't use the option
> when Dom0 isn't PV. But meanwhile I've figured that late-hwdom may be
> PV even if domain 0 is PVH. So yes.
> 
> >  Maybe placing this inside the sum will make the
> > expression too complex, so we could use a separate is_shadow boolean
> > to signal whether the domain will use shadow pagetables?
> 
> I think
> 
>     memkb = 4 * (256 * d->max_vcpus +
>                  (is_pv_domain(d) ? opt_dom0_shadow || opt_pv_l1tf_hwdom
>                                   : 1 + opt_dom0_shadow) *
>                  (memkb / 1024));
> 
> is still okay-ish. Note that I've switched to is_pv_domain() to be
> independent of the point in time when shadow mode would be enabled
> for a PV Dom0.

Thanks, LGTM.