x86: Add configuration options for max_altp2m and max_nestedp2m limits

[PATCH] x86: Add configuration options for max_altp2m and max_nestedp2m limits

Posted by Petr Beneš 5 months, 3 weeks ago

From: Petr Beneš <w1benny@gmail.com>

This commit introduces the ability to configure the maximum number of altp2m
and nestedp2m tables through boot-time parameters.  Previously, the limits were
hardcoded to a maximum of 10 for both.  This change allows for greater
flexibility in environments that require more or fewer tables, enhancing Xen's
adaptability to different workloads and scenarios.

Adjustments include:
- Adding boot parameters `max_altp2m` and `max_nestedp2m` to allow setting
  these limits at boot time.
- Modifying various parts of the code to use these configurable limits instead
  of the previous hardcoded values.
- Ensuring that if the configured values exceed the maximum supported EPTP
  entries, they are adjusted down with a warning logged.
- Adjusting the initial allocation of pages in `hap_enable` from 256 to 2048
  pages when `old_pages == 0`.

  This change anticipates scenarios where `max_altp2m` and `max_nestedp2m`
  are set to their maximum supported values (i.e., 512), ensuring sufficient
  memory is allocated upfront to accommodate all altp2m and nestedp2m tables
  without initialization failure.

This enhancement is particularly relevant for users leveraging Xen's features
for virtual machine introspection or nested virtualization support.

Signed-off-by: Petr Beneš <w1benny@gmail.com>
---
 .../paging-mempool/test-paging-mempool.c      |  2 +-
 xen/arch/x86/hvm/hvm.c                        | 22 +++++++-
 xen/arch/x86/hvm/vmx/vmx.c                    |  2 +-
 xen/arch/x86/include/asm/domain.h             |  8 +--
 xen/arch/x86/include/asm/p2m.h                |  4 +-
 xen/arch/x86/mm/altp2m.c                      | 16 +++++-
 xen/arch/x86/mm/hap/hap.c                     | 14 ++---
 xen/arch/x86/mm/mem_access.c                  | 14 ++---
 xen/arch/x86/mm/mem_sharing.c                 |  6 +-
 xen/arch/x86/mm/nested.c                      | 16 +++++-
 xen/arch/x86/mm/p2m-ept.c                     |  6 +-
 xen/arch/x86/mm/p2m.c                         | 56 +++++++++----------
 12 files changed, 105 insertions(+), 61 deletions(-)

diff --git a/tools/tests/paging-mempool/test-paging-mempool.c b/tools/tests/paging-mempool/test-paging-mempool.c
index 1ebc13455a..b94d4a4fe1 100644
--- a/tools/tests/paging-mempool/test-paging-mempool.c
+++ b/tools/tests/paging-mempool/test-paging-mempool.c
@@ -35,7 +35,7 @@ static struct xen_domctl_createdomain create = {
 
 static uint64_t default_mempool_size_bytes =
 #if defined(__x86_64__) || defined(__i386__)
-    256 << 12; /* Only x86 HAP for now.  x86 Shadow needs more work. */
+    2048 << 12; /* Only x86 HAP for now.  x86 Shadow needs more work. */
 #elif defined (__arm__) || defined(__aarch64__)
     16 << 12;
 #endif
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 482eebbabf..cb5e190083 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -105,6 +105,12 @@ static const char __initconst warning_hvm_fep[] =
 static bool_t __initdata opt_altp2m_enabled = 0;
 boolean_param("altp2m", opt_altp2m_enabled);
 
+unsigned long __read_mostly max_altp2m = 10;
+integer_param("max_altp2m", max_altp2m);
+
+unsigned long __read_mostly max_nestedp2m = 10;
+integer_param("max_nestedp2m", max_nestedp2m);
+
 static int cf_check cpu_callback(
     struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
@@ -191,6 +197,20 @@ static int __init cf_check hvm_enable(void)
 
     if ( !opt_altp2m_enabled )
         hvm_funcs.altp2m_supported = 0;
+    else
+    {
+        if ( max_altp2m > MAX_EPTP )
+        {
+            printk(XENLOG_WARNING "HVM: max_altp2m must be <= %lu, adjusting\n", MAX_EPTP);
+            max_altp2m = MAX_EPTP;
+        }
+
+        if ( max_nestedp2m > MAX_EPTP )
+        {
+            printk(XENLOG_WARNING "HVM: max_nestedp2m must be <= %lu, adjusting\n", MAX_EPTP);
+            max_nestedp2m = MAX_EPTP;
+        }
+    }
 
     if ( opt_hvm_fep )
         warning_add(warning_hvm_fep);
@@ -5207,7 +5227,7 @@ void hvm_fast_singlestep(struct vcpu *v, uint16_t p2midx)
     if ( !hvm_is_singlestep_supported() )
         return;
 
-    if ( p2midx >= MAX_ALTP2M )
+    if ( p2midx >= max_altp2m )
         return;
 
     v->arch.hvm.single_step = true;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 1edc7f1e91..71a9269f1d 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -4857,7 +4857,7 @@ bool vmx_vmenter_helper(const struct cpu_user_regs *regs)
         {
             unsigned int i;
 
-            for ( i = 0; i < MAX_ALTP2M; ++i )
+            for ( i = 0; i < max_altp2m; ++i )
             {
                 if ( currd->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
                     continue;
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index 619e667938..429f0d6668 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -259,9 +259,9 @@ struct paging_vcpu {
     struct shadow_vcpu shadow;
 };
 
-#define MAX_NESTEDP2M 10
+extern unsigned long max_altp2m;
+extern unsigned long max_nestedp2m;
 
-#define MAX_ALTP2M      10 /* arbitrary */
 #define INVALID_ALTP2M  0xffff
 #define MAX_EPTP        (PAGE_SIZE / sizeof(uint64_t))
 struct p2m_domain;
@@ -349,12 +349,12 @@ struct arch_domain
 
 #ifdef CONFIG_HVM
     /* nestedhvm: translate l2 guest physical to host physical */
-    struct p2m_domain *nested_p2m[MAX_NESTEDP2M];
+    struct p2m_domain **nested_p2m;
     mm_lock_t nested_p2m_lock;
 
     /* altp2m: allow multiple copies of host p2m */
     bool_t altp2m_active;
-    struct p2m_domain *altp2m_p2m[MAX_ALTP2M];
+    struct p2m_domain **altp2m_p2m;
     mm_lock_t altp2m_list_lock;
     uint64_t *altp2m_eptp;
     uint64_t *altp2m_visible_eptp;
diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h
index 40545f5fa8..0810293f7f 100644
--- a/xen/arch/x86/include/asm/p2m.h
+++ b/xen/arch/x86/include/asm/p2m.h
@@ -862,7 +862,7 @@ static inline struct p2m_domain *p2m_get_altp2m(struct vcpu *v)
     if ( index == INVALID_ALTP2M )
         return NULL;
 
-    BUG_ON(index >= MAX_ALTP2M);
+    BUG_ON(index >= max_altp2m);
 
     return v->domain->arch.altp2m_p2m[index];
 }
@@ -872,7 +872,7 @@ static inline bool p2m_set_altp2m(struct vcpu *v, unsigned int idx)
 {
     struct p2m_domain *orig;
 
-    BUG_ON(idx >= MAX_ALTP2M);
+    BUG_ON(idx >= max_altp2m);
 
     if ( idx == vcpu_altp2m(v).p2midx )
         return false;
diff --git a/xen/arch/x86/mm/altp2m.c b/xen/arch/x86/mm/altp2m.c
index a04297b646..468ccb0b58 100644
--- a/xen/arch/x86/mm/altp2m.c
+++ b/xen/arch/x86/mm/altp2m.c
@@ -120,7 +120,13 @@ int p2m_init_altp2m(struct domain *d)
     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
 
     mm_lock_init(&d->arch.altp2m_list_lock);
-    for ( i = 0; i < MAX_ALTP2M; i++ )
+
+    if ( (d->arch.altp2m_p2m = xzalloc_array(struct p2m_domain *, max_altp2m)) == NULL )
+    {
+        return -ENOMEM;
+    }
+
+    for ( i = 0; i < max_altp2m; i++ )
     {
         d->arch.altp2m_p2m[i] = p2m = p2m_init_one(d);
         if ( p2m == NULL )
@@ -141,7 +147,10 @@ void p2m_teardown_altp2m(struct domain *d)
     unsigned int i;
     struct p2m_domain *p2m;
 
-    for ( i = 0; i < MAX_ALTP2M; i++ )
+    if ( !d->arch.altp2m_p2m )
+        return;
+
+    for ( i = 0; i < max_altp2m; i++ )
     {
         if ( !d->arch.altp2m_p2m[i] )
             continue;
@@ -149,6 +158,9 @@ void p2m_teardown_altp2m(struct domain *d)
         d->arch.altp2m_p2m[i] = NULL;
         p2m_free_one(p2m);
     }
+
+    xfree(d->arch.altp2m_p2m);
+    d->arch.altp2m_p2m = NULL;
 }
 
 /*
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 9f964c1d87..812a0f37d8 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -468,7 +468,7 @@ int hap_enable(struct domain *d, u32 mode)
     if ( old_pages == 0 )
     {
         paging_lock(d);
-        rv = hap_set_allocation(d, 256, NULL);
+        rv = hap_set_allocation(d, 2048, NULL);
         if ( rv != 0 )
         {
             hap_set_allocation(d, 0, NULL);
@@ -487,7 +487,7 @@ int hap_enable(struct domain *d, u32 mode)
     if ( rv != 0 )
         goto out;
 
-    for ( i = 0; i < MAX_NESTEDP2M; i++ )
+    for ( i = 0; i < max_nestedp2m; i++ )
     {
         rv = p2m_alloc_table(d->arch.nested_p2m[i]);
         if ( rv != 0 )
@@ -515,7 +515,7 @@ int hap_enable(struct domain *d, u32 mode)
             d->arch.altp2m_visible_eptp[i] = mfn_x(INVALID_MFN);
         }
 
-        for ( i = 0; i < MAX_ALTP2M; i++ )
+        for ( i = 0; i < max_altp2m; i++ )
         {
             rv = p2m_alloc_table(d->arch.altp2m_p2m[i]);
             if ( rv != 0 )
@@ -538,11 +538,11 @@ void hap_final_teardown(struct domain *d)
     unsigned int i;
 
     if ( hvm_altp2m_supported() )
-        for ( i = 0; i < MAX_ALTP2M; i++ )
+        for ( i = 0; i < max_altp2m; i++ )
             p2m_teardown(d->arch.altp2m_p2m[i], true, NULL);
 
     /* Destroy nestedp2m's first */
-    for (i = 0; i < MAX_NESTEDP2M; i++) {
+    for (i = 0; i < max_nestedp2m; i++) {
         p2m_teardown(d->arch.nested_p2m[i], true, NULL);
     }
 }
@@ -590,7 +590,7 @@ void hap_teardown(struct domain *d, bool *preempted)
         FREE_XENHEAP_PAGE(d->arch.altp2m_eptp);
         FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
 
-        for ( i = 0; i < MAX_ALTP2M; i++ )
+        for ( i = 0; i < max_altp2m; i++ )
         {
             p2m_teardown(d->arch.altp2m_p2m[i], false, preempted);
             if ( preempted && *preempted )
@@ -599,7 +599,7 @@ void hap_teardown(struct domain *d, bool *preempted)
     }
 
     /* Destroy nestedp2m's after altp2m. */
-    for ( i = 0; i < MAX_NESTEDP2M; i++ )
+    for ( i = 0; i < max_nestedp2m; i++ )
     {
         p2m_teardown(d->arch.nested_p2m[i], false, preempted);
         if ( preempted && *preempted )
diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
index 3449e0ee85..85f38e0ad4 100644
--- a/xen/arch/x86/mm/mem_access.c
+++ b/xen/arch/x86/mm/mem_access.c
@@ -347,12 +347,12 @@ long p2m_set_mem_access(struct domain *d, gfn_t gfn, uint32_t nr,
     /* altp2m view 0 is treated as the hostp2m */
     if ( altp2m_idx )
     {
-        if ( altp2m_idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
+        if ( altp2m_idx >= min(max_altp2m, MAX_EPTP) ||
              d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
              mfn_x(INVALID_MFN) )
             return -EINVAL;
 
-        ap2m = array_access_nospec(d->arch.altp2m_p2m, altp2m_idx);
+        ap2m = d->arch.altp2m_p2m[array_index_nospec(altp2m_idx, max_altp2m)];
     }
 
     if ( !xenmem_access_to_p2m_access(p2m, access, &a) )
@@ -403,12 +403,12 @@ long p2m_set_mem_access_multi(struct domain *d,
     /* altp2m view 0 is treated as the hostp2m */
     if ( altp2m_idx )
     {
-        if ( altp2m_idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
+        if ( altp2m_idx >= min(max_altp2m, MAX_EPTP) ||
              d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
              mfn_x(INVALID_MFN) )
             return -EINVAL;
 
-        ap2m = array_access_nospec(d->arch.altp2m_p2m, altp2m_idx);
+        ap2m = d->arch.altp2m_p2m[array_index_nospec(altp2m_idx, max_altp2m)];
     }
 
     p2m_lock(p2m);
@@ -466,12 +466,12 @@ int p2m_get_mem_access(struct domain *d, gfn_t gfn, xenmem_access_t *access,
     }
     else if ( altp2m_idx ) /* altp2m view 0 is treated as the hostp2m */
     {
-        if ( altp2m_idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
+        if ( altp2m_idx >= min(max_altp2m, MAX_EPTP) ||
              d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
              mfn_x(INVALID_MFN) )
             return -EINVAL;
 
-        p2m = array_access_nospec(d->arch.altp2m_p2m, altp2m_idx);
+        p2m = d->arch.altp2m_p2m[array_index_nospec(altp2m_idx, max_altp2m)];
     }
 
     return _p2m_get_mem_access(p2m, gfn, access);
@@ -486,7 +486,7 @@ void arch_p2m_set_access_required(struct domain *d, bool access_required)
     if ( altp2m_active(d) )
     {
         unsigned int i;
-        for ( i = 0; i < MAX_ALTP2M; i++ )
+        for ( i = 0; i < max_altp2m; i++ )
         {
             struct p2m_domain *p2m = d->arch.altp2m_p2m[i];
 
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 94b6b782ef..bb4ce3c8cc 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -918,7 +918,7 @@ static int nominate_page(struct domain *d, gfn_t gfn,
 
         altp2m_list_lock(d);
 
-        for ( i = 0; i < MAX_ALTP2M; i++ )
+        for ( i = 0; i < max_altp2m; i++ )
         {
             ap2m = d->arch.altp2m_p2m[i];
             if ( !ap2m )
@@ -1280,7 +1280,7 @@ int __mem_sharing_unshare_page(struct domain *d,
     {
         unsigned int i;
 
-        for ( i = 0; i < MAX_NESTEDP2M; i++ )
+        for ( i = 0; i < max_nestedp2m; i++ )
             p2m_lock(d->arch.nested_p2m[i]);
     }
 
@@ -1389,7 +1389,7 @@ int __mem_sharing_unshare_page(struct domain *d,
     {
         unsigned int i;
 
-        for ( i = 0; i < MAX_NESTEDP2M; i++ )
+        for ( i = 0; i < max_nestedp2m; i++ )
             p2m_unlock(d->arch.nested_p2m[i]);
     }
 
diff --git a/xen/arch/x86/mm/nested.c b/xen/arch/x86/mm/nested.c
index 03741ffae4..a765bb27d4 100644
--- a/xen/arch/x86/mm/nested.c
+++ b/xen/arch/x86/mm/nested.c
@@ -28,7 +28,13 @@ int p2m_init_nestedp2m(struct domain *d)
     struct p2m_domain *p2m;
 
     mm_lock_init(&d->arch.nested_p2m_lock);
-    for ( i = 0; i < MAX_NESTEDP2M; i++ )
+
+    if ( (d->arch.nested_p2m = xzalloc_array(struct p2m_domain *, max_nestedp2m)) == NULL )
+    {
+        return -ENOMEM;
+    }
+
+    for ( i = 0; i < max_nestedp2m; i++ )
     {
         d->arch.nested_p2m[i] = p2m = p2m_init_one(d);
         if ( p2m == NULL )
@@ -50,7 +56,10 @@ void p2m_teardown_nestedp2m(struct domain *d)
     unsigned int i;
     struct p2m_domain *p2m;
 
-    for ( i = 0; i < MAX_NESTEDP2M; i++ )
+    if ( !d->arch.nested_p2m )
+        return;
+
+    for ( i = 0; i < max_nestedp2m; i++ )
     {
         if ( !d->arch.nested_p2m[i] )
             continue;
@@ -59,4 +68,7 @@ void p2m_teardown_nestedp2m(struct domain *d)
         p2m_free_one(p2m);
         d->arch.nested_p2m[i] = NULL;
     }
+
+    xfree(d->arch.nested_p2m);
+    d->arch.nested_p2m = NULL;
 }
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 85c4e8e54f..6d9267552e 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -1292,7 +1292,7 @@ static void ept_set_ad_sync(struct domain *d, bool value)
     {
         unsigned int i;
 
-        for ( i = 0; i < MAX_ALTP2M; i++ )
+        for ( i = 0; i < max_altp2m; i++ )
         {
             struct p2m_domain *p2m;
 
@@ -1499,7 +1499,7 @@ void setup_ept_dump(void)
 
 void p2m_init_altp2m_ept(struct domain *d, unsigned int i)
 {
-    struct p2m_domain *p2m = array_access_nospec(d->arch.altp2m_p2m, i);
+    struct p2m_domain *p2m = d->arch.altp2m_p2m[array_index_nospec(i, max_altp2m)];
     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
     struct ept_data *ept;
 
@@ -1518,7 +1518,7 @@ unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp)
 
     altp2m_list_lock(d);
 
-    for ( i = 0; i < MAX_ALTP2M; i++ )
+    for ( i = 0; i < max_altp2m; i++ )
     {
         if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
             continue;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 4251144704..265584d243 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -104,7 +104,7 @@ void p2m_change_entry_type_global(struct domain *d,
     {
         unsigned int i;
 
-        for ( i = 0; i < MAX_ALTP2M; i++ )
+        for ( i = 0; i < max_altp2m; i++ )
             if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
             {
                 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
@@ -137,7 +137,7 @@ void p2m_memory_type_changed(struct domain *d)
     {
         unsigned int i;
 
-        for ( i = 0; i < MAX_ALTP2M; i++ )
+        for ( i = 0; i < max_altp2m; i++ )
             if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
             {
                 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
@@ -906,7 +906,7 @@ void p2m_change_type_range(struct domain *d,
     {
         unsigned int i;
 
-        for ( i = 0; i < MAX_ALTP2M; i++ )
+        for ( i = 0; i < max_altp2m; i++ )
             if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
             {
                 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
@@ -977,7 +977,7 @@ int p2m_finish_type_change(struct domain *d,
     {
         unsigned int i;
 
-        for ( i = 0; i < MAX_ALTP2M; i++ )
+        for ( i = 0; i < max_altp2m; i++ )
             if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
             {
                 struct p2m_domain *altp2m = d->arch.altp2m_p2m[i];
@@ -1390,7 +1390,7 @@ p2m_flush_nestedp2m(struct domain *d)
 {
     unsigned int i;
 
-    for ( i = 0; i < MAX_NESTEDP2M; i++ )
+    for ( i = 0; i < max_nestedp2m; i++ )
     {
         struct p2m_domain *p2m = d->arch.nested_p2m[i];
 
@@ -1410,7 +1410,7 @@ void np2m_flush_base(struct vcpu *v, unsigned long np2m_base)
     np2m_base &= ~(0xfffULL);
 
     nestedp2m_lock(d);
-    for ( i = 0; i < MAX_NESTEDP2M; i++ )
+    for ( i = 0; i < max_nestedp2m; i++ )
     {
         p2m = d->arch.nested_p2m[i];
         p2m_lock(p2m);
@@ -1484,7 +1484,7 @@ p2m_get_nestedp2m_locked(struct vcpu *v)
     }
 
     /* Share a np2m if possible */
-    for ( i = 0; i < MAX_NESTEDP2M; i++ )
+    for ( i = 0; i < max_nestedp2m; i++ )
     {
         p2m = d->arch.nested_p2m[i];
         p2m_lock(p2m);
@@ -1770,7 +1770,7 @@ bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx)
     struct domain *d = v->domain;
     bool_t rc = 0;
 
-    if ( idx >= MAX_ALTP2M )
+    if ( idx >= max_altp2m )
         return rc;
 
     altp2m_list_lock(d);
@@ -1876,8 +1876,8 @@ static void p2m_reset_altp2m(struct domain *d, unsigned int idx,
 {
     struct p2m_domain *p2m;
 
-    ASSERT(idx < MAX_ALTP2M);
-    p2m = array_access_nospec(d->arch.altp2m_p2m, idx);
+    ASSERT(idx < max_altp2m);
+    p2m = d->arch.altp2m_p2m[array_index_nospec(idx, max_altp2m)];
 
     p2m_lock(p2m);
 
@@ -1902,7 +1902,7 @@ void p2m_flush_altp2m(struct domain *d)
 
     altp2m_list_lock(d);
 
-    for ( i = 0; i < MAX_ALTP2M; i++ )
+    for ( i = 0; i < max_altp2m; i++ )
     {
         p2m_reset_altp2m(d, i, ALTP2M_DEACTIVATE);
         d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN);
@@ -1918,9 +1918,9 @@ static int p2m_activate_altp2m(struct domain *d, unsigned int idx,
     struct p2m_domain *hostp2m, *p2m;
     int rc;
 
-    ASSERT(idx < MAX_ALTP2M);
+    ASSERT(idx < max_altp2m);
 
-    p2m = array_access_nospec(d->arch.altp2m_p2m, idx);
+    p2m = d->arch.altp2m_p2m[array_index_nospec(idx, max_altp2m)];
     hostp2m = p2m_get_hostp2m(d);
 
     p2m_lock(p2m);
@@ -1958,7 +1958,7 @@ int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx)
     int rc = -EINVAL;
     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
 
-    if ( idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) )
+    if ( idx >= min(max_altp2m, MAX_EPTP) )
         return rc;
 
     altp2m_list_lock(d);
@@ -1985,7 +1985,7 @@ int p2m_init_next_altp2m(struct domain *d, uint16_t *idx,
 
     altp2m_list_lock(d);
 
-    for ( i = 0; i < MAX_ALTP2M; i++ )
+    for ( i = 0; i < max_altp2m; i++ )
     {
         if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
             continue;
@@ -2007,7 +2007,7 @@ int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx)
     struct p2m_domain *p2m;
     int rc = -EBUSY;
 
-    if ( !idx || idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) )
+    if ( !idx || idx >= min(max_altp2m, MAX_EPTP) )
         return rc;
 
     rc = domain_pause_except_self(d);
@@ -2020,7 +2020,7 @@ int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx)
     if ( d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] !=
          mfn_x(INVALID_MFN) )
     {
-        p2m = array_access_nospec(d->arch.altp2m_p2m, idx);
+        p2m = d->arch.altp2m_p2m[array_index_nospec(idx, max_altp2m)];
 
         if ( !_atomic_read(p2m->active_vcpus) )
         {
@@ -2045,7 +2045,7 @@ int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx)
     struct vcpu *v;
     int rc = -EINVAL;
 
-    if ( idx >= MAX_ALTP2M )
+    if ( idx >= max_altp2m )
         return rc;
 
     rc = domain_pause_except_self(d);
@@ -2080,13 +2080,13 @@ int p2m_change_altp2m_gfn(struct domain *d, unsigned int idx,
     mfn_t mfn;
     int rc = -EINVAL;
 
-    if ( idx >=  min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
+    if ( idx >=  min(max_altp2m, MAX_EPTP) ||
          d->arch.altp2m_eptp[array_index_nospec(idx, MAX_EPTP)] ==
          mfn_x(INVALID_MFN) )
         return rc;
 
     hp2m = p2m_get_hostp2m(d);
-    ap2m = array_access_nospec(d->arch.altp2m_p2m, idx);
+    ap2m = d->arch.altp2m_p2m[array_index_nospec(idx, max_altp2m)];
 
     p2m_lock(hp2m);
     p2m_lock(ap2m);
@@ -2142,7 +2142,7 @@ int p2m_altp2m_propagate_change(struct domain *d, gfn_t gfn,
 
     altp2m_list_lock(d);
 
-    for ( i = 0; i < MAX_ALTP2M; i++ )
+    for ( i = 0; i < max_altp2m; i++ )
     {
         p2m_type_t t;
         p2m_access_t a;
@@ -2165,7 +2165,7 @@ int p2m_altp2m_propagate_change(struct domain *d, gfn_t gfn,
             else
             {
                 /* At least 2 altp2m's impacted, so reset everything */
-                for ( i = 0; i < MAX_ALTP2M; i++ )
+                for ( i = 0; i < max_altp2m; i++ )
                 {
                     if ( i == last_reset_idx ||
                          d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
@@ -2565,12 +2565,12 @@ int p2m_set_suppress_ve_multi(struct domain *d,
 
     if ( sve->view > 0 )
     {
-        if ( sve->view >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
+        if ( sve->view >= min(max_altp2m, MAX_EPTP) ||
              d->arch.altp2m_eptp[array_index_nospec(sve->view, MAX_EPTP)] ==
              mfn_x(INVALID_MFN) )
             return -EINVAL;
 
-        p2m = ap2m = array_access_nospec(d->arch.altp2m_p2m, sve->view);
+        p2m = ap2m = d->arch.altp2m_p2m[array_index_nospec(sve->view, max_altp2m)];
     }
 
     p2m_lock(host_p2m);
@@ -2633,12 +2633,12 @@ int p2m_get_suppress_ve(struct domain *d, gfn_t gfn, bool *suppress_ve,
 
     if ( altp2m_idx > 0 )
     {
-        if ( altp2m_idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
+        if ( altp2m_idx >= min(max_altp2m, MAX_EPTP) ||
              d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
              mfn_x(INVALID_MFN) )
             return -EINVAL;
 
-        p2m = ap2m = array_access_nospec(d->arch.altp2m_p2m, altp2m_idx);
+        p2m = ap2m = d->arch.altp2m_p2m[array_index_nospec(altp2m_idx, max_altp2m)];
     }
     else
         p2m = host_p2m;
@@ -2669,9 +2669,9 @@ int p2m_set_altp2m_view_visibility(struct domain *d, unsigned int altp2m_idx,
 
     /*
      * Eptp index is correlated with altp2m index and should not exceed
-     * min(MAX_ALTP2M, MAX_EPTP).
+     * min(max_altp2m, MAX_EPTP).
      */
-    if ( altp2m_idx >= min(ARRAY_SIZE(d->arch.altp2m_p2m), MAX_EPTP) ||
+    if ( altp2m_idx >= min(max_altp2m, MAX_EPTP) ||
          d->arch.altp2m_eptp[array_index_nospec(altp2m_idx, MAX_EPTP)] ==
          mfn_x(INVALID_MFN) )
         rc = -EINVAL;
-- 
2.34.1

Re: [PATCH] x86: Add configuration options for max_altp2m and max_nestedp2m limits

Posted by Jan Beulich 5 months, 3 weeks ago

On 06.02.2024 11:08, Petr Beneš wrote:
> From: Petr Beneš <w1benny@gmail.com>
> 
> This commit introduces the ability to configure the maximum number of altp2m
> and nestedp2m tables through boot-time parameters.  Previously, the limits were
> hardcoded to a maximum of 10 for both.  This change allows for greater
> flexibility in environments that require more or fewer tables, enhancing Xen's
> adaptability to different workloads and scenarios.
> 
> Adjustments include:
> - Adding boot parameters `max_altp2m` and `max_nestedp2m` to allow setting
>   these limits at boot time.
> - Modifying various parts of the code to use these configurable limits instead
>   of the previous hardcoded values.
> - Ensuring that if the configured values exceed the maximum supported EPTP
>   entries, they are adjusted down with a warning logged.
> - Adjusting the initial allocation of pages in `hap_enable` from 256 to 2048
>   pages when `old_pages == 0`.

This change ought to come separately, with its own justification. Assuming
it can be justified at all, considering it also affects the non-nested,
non-alternative case.

Jan

Re: [PATCH] x86: Add configuration options for max_altp2m and max_nestedp2m limits

Posted by Andrew Cooper 5 months, 3 weeks ago

On 06/02/2024 10:08 am, Petr Beneš wrote:
> From: Petr Beneš <w1benny@gmail.com>
>
> This commit introduces the ability to configure the maximum number of altp2m
> and nestedp2m tables through boot-time parameters.  Previously, the limits were
> hardcoded to a maximum of 10 for both.  This change allows for greater
> flexibility in environments that require more or fewer tables, enhancing Xen's
> adaptability to different workloads and scenarios.
>
> Adjustments include:
> - Adding boot parameters `max_altp2m` and `max_nestedp2m` to allow setting
>   these limits at boot time.
> - Modifying various parts of the code to use these configurable limits instead
>   of the previous hardcoded values.
> - Ensuring that if the configured values exceed the maximum supported EPTP
>   entries, they are adjusted down with a warning logged.
> - Adjusting the initial allocation of pages in `hap_enable` from 256 to 2048
>   pages when `old_pages == 0`.
>
>   This change anticipates scenarios where `max_altp2m` and `max_nestedp2m`
>   are set to their maximum supported values (i.e., 512), ensuring sufficient
>   memory is allocated upfront to accommodate all altp2m and nestedp2m tables
>   without initialization failure.
>
> This enhancement is particularly relevant for users leveraging Xen's features
> for virtual machine introspection or nested virtualization support.
>
> Signed-off-by: Petr Beneš <w1benny@gmail.com>

Thankyou for the patch.  These fields should definitely be configurable.

I realise you're copying existing examples, but they happen to be bad
examples that we're trying to remove.  It is erroneous to believe that
there's one system-wide configuration that applies to all domains, and
this has been the source of too many bugs.

Instead, they should be parameters to the domain create hypercall, so
they can be chosen on a per-VM basis.

e.g. for some introspection tasks you only want 1 view + 1 alt per vCPU,
for usually only a single vCPU.

On the other hand, for nested-virt, you really need a minimum of two per
vCPU, and probably 4, and there's no architectural upper bound.

See the series adding vmtrace= support, which is the last time I plumbed
a number like this into the domain create hypercall.

You will want to split the patch into a series.  For each of option,
you'll want one patch plumbing the new parameter into the hypercall
(along with a suitable toolstack default, and sanity checking), and a
separate patch to rearrange Xen to use the new dynamic size.

~Andrew