domain_adjust_tot_pages() consumes remaining claims as pages
are allocated, now also from the claimed node.
Update it to skip consuming the outstanding claims when the page
was allocated from a different NUMA node.
This in itself would not be critically needed as the page should
only be allocated from a different NUMA node in case the target
node has no available memory, but for multi-node claims, we need
to reduce the outstanding claims only on the NUMA node the page
was allocated from.
For this, we need to pass the NUMA node of the allocated page,
so we can use it to perform this check (and in the future update
the claim only on the NUMA node the page was allocated from)
Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
Signed-off-by: Bernhard Kaindl <bernhard.kaindl@cloud.com>
---
- Reorganized v3, v4 and v5 as per review to avoid non-functional
changes:
- Split from patch v2#3 and merged the related changed from v2#5
into a consolidated patch.
---
xen/arch/x86/mm.c | 3 ++-
xen/arch/x86/mm/mem_sharing.c | 4 ++--
xen/common/grant_table.c | 4 ++--
xen/common/memory.c | 3 ++-
xen/common/page_alloc.c | 21 ++++++++++++++++-----
xen/include/xen/mm.h | 2 +-
6 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index b929d15d00..b0f654e02e 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4442,7 +4442,8 @@ int steal_page(
page_list_del(page, &d->page_list);
/* Unlink from original owner. */
- if ( !(memflags & MEMF_no_refcount) && !domain_adjust_tot_pages(d, -1) )
+ if ( !(memflags & MEMF_no_refcount) &&
+ !domain_adjust_tot_pages(d, NUMA_NO_NODE, -1) )
drop_dom_ref = true;
nrspin_unlock(&d->page_alloc_lock);
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 4787b27964..15b8a3a9d9 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -720,7 +720,7 @@ static int page_make_sharable(struct domain *d,
if ( !validate_only )
{
page_set_owner(page, dom_cow);
- drop_dom_ref = !domain_adjust_tot_pages(d, -1);
+ drop_dom_ref = !domain_adjust_tot_pages(d, NUMA_NO_NODE, -1);
page_list_del(page, &d->page_list);
}
@@ -766,7 +766,7 @@ static int page_make_private(struct domain *d, struct page_info *page)
ASSERT(page_get_owner(page) == dom_cow);
page_set_owner(page, d);
- if ( domain_adjust_tot_pages(d, 1) == 1 )
+ if ( domain_adjust_tot_pages(d, page_to_nid(page), 1) == 1 )
get_knownalive_domain(d);
page_list_add_tail(page, &d->page_list);
nrspin_unlock(&d->page_alloc_lock);
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index cf131c43a1..8fea75dbb2 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2405,7 +2405,7 @@ gnttab_transfer(
}
/* Okay, add the page to 'e'. */
- if ( unlikely(domain_adjust_tot_pages(e, 1) == 1) )
+ if ( unlikely(domain_adjust_tot_pages(e, page_to_nid(page), 1) == 1) )
get_knownalive_domain(e);
/*
@@ -2431,7 +2431,7 @@ gnttab_transfer(
* page in the page total
*/
nrspin_lock(&e->page_alloc_lock);
- drop_dom_ref = !domain_adjust_tot_pages(e, -1);
+ drop_dom_ref = !domain_adjust_tot_pages(e, NUMA_NO_NODE, -1);
nrspin_unlock(&e->page_alloc_lock);
if ( okay /* i.e. e->is_dying due to the surrounding if() */ )
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 3371edec11..4c54ce5ede 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -775,7 +775,8 @@ static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
nrspin_lock(&d->page_alloc_lock);
drop_dom_ref = (dec_count &&
- !domain_adjust_tot_pages(d, -dec_count));
+ !domain_adjust_tot_pages(d, NUMA_NO_NODE,
+ -dec_count));
nrspin_unlock(&d->page_alloc_lock);
if ( drop_dom_ref )
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index bbb34994b7..ebf41a1b33 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -542,8 +542,11 @@ static unsigned long avail_heap_pages(
* Update the total number of pages and outstanding claims of a domain.
* - When pages were freed, we do not increase outstanding claims.
* - On a domain's claims update, global outstanding_claims are updated as well.
+ * - If the domain's claim is on a NUMA node, we only update outstanding claims
+ * of the domain and the node, when the allocation is from the same NUMA node.
*/
-unsigned long domain_adjust_tot_pages(struct domain *d, long pages)
+unsigned long domain_adjust_tot_pages(struct domain *d, nodeid_t node,
+ long pages)
{
unsigned long adjustment;
@@ -557,8 +560,12 @@ unsigned long domain_adjust_tot_pages(struct domain *d, long pages)
*
* If the domain has no outstanding claims (or we freed pages instead),
* we don't update outstanding claims and skip the claims adjustment.
+ *
+ * Else, a page was allocated: But if the domain has a node_claim and
+ * the page was allocated from a different node, don't update claims.
*/
- if ( !d->outstanding_pages || pages <= 0 )
+ if ( !d->outstanding_pages || pages <= 0 ||
+ (domain_has_node_claim(d) && d->claim_node != node) )
goto out;
spin_lock(&heap_lock);
@@ -2662,6 +2669,8 @@ int assign_pages(
if ( !(memflags & MEMF_no_refcount) )
{
+ nodeid_t node = page_to_nid(&pg[0]);
+
if ( unlikely(d->tot_pages + nr < nr) )
{
gprintk(XENLOG_INFO,
@@ -2672,8 +2681,9 @@ int assign_pages(
rc = -E2BIG;
goto out;
}
+ ASSERT(node == page_to_nid(&pg[nr - 1]));
- if ( unlikely(domain_adjust_tot_pages(d, nr) == nr) )
+ if ( unlikely(domain_adjust_tot_pages(d, node, nr) == nr) )
get_knownalive_domain(d);
}
@@ -2806,7 +2816,8 @@ void free_domheap_pages(struct page_info *pg, unsigned int order)
}
}
- drop_dom_ref = !domain_adjust_tot_pages(d, -(1 << order));
+ drop_dom_ref = !domain_adjust_tot_pages(d, NUMA_NO_NODE,
+ -(1 << order));
rspin_unlock(&d->page_alloc_lock);
@@ -3012,7 +3023,7 @@ void free_domstatic_page(struct page_info *page)
arch_free_heap_page(d, page);
- drop_dom_ref = !domain_adjust_tot_pages(d, -1);
+ drop_dom_ref = !domain_adjust_tot_pages(d, NUMA_NO_NODE, -1);
unprepare_staticmem_pages(page, 1, scrub_debug);
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 52c12c5783..5a5252fc69 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -131,7 +131,7 @@ mfn_t xen_map_to_mfn(unsigned long va);
int populate_pt_range(unsigned long virt, unsigned long nr_mfns);
/* Claim handling */
unsigned long __must_check domain_adjust_tot_pages(struct domain *d,
- long pages);
+ nodeid_t node, long pages);
int domain_claim_pages(struct domain *d, nodeid_t node, unsigned long pages);
void get_outstanding_claims(uint64_t *free_pages, uint64_t *outstanding_pages);
--
2.43.0
© 2016 - 2025 Red Hat, Inc.