[PATCH v3] xen/mm: improve freeing of partially scrubbed pages

Roger Pau Monne posted 1 patch 2 days, 10 hours ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20260330150119.10546-1-roger.pau@citrix.com
xen/common/domain.c     |  4 +++-
xen/common/memory.c     |  8 +++++---
xen/common/page_alloc.c | 16 +++++++++++++---
xen/common/page_alloc.h | 14 ++++++++++++++
4 files changed, 35 insertions(+), 7 deletions(-)
create mode 100644 xen/common/page_alloc.h
[PATCH v3] xen/mm: improve freeing of partially scrubbed pages
Posted by Roger Pau Monne 2 days, 10 hours ago
When freeing possibly partially scrubbed pages in populate_physmap() and
domain_pending_scrub_free() the whole page is marked as dirty, but that's
not fully accurate.  Since the PGC_need_scrub bit is preserved for the
populate_physmap() allocation we can use those when freeing to detect which
pages need scrubbing instead of marking the whole page as dirty.

This requires exposing free_heap_pages() globally, and switching
populate_physmap() and domain_pending_scrub_free() to use it instead of
free_domheap_pages().

Suggested-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v2:
 - Use free_heap_pages() in domain destruction.
 - Introduce private header.
---
 xen/common/domain.c     |  4 +++-
 xen/common/memory.c     |  8 +++++---
 xen/common/page_alloc.c | 16 +++++++++++++---
 xen/common/page_alloc.h | 14 ++++++++++++++
 4 files changed, 35 insertions(+), 7 deletions(-)
 create mode 100644 xen/common/page_alloc.h

diff --git a/xen/common/domain.c b/xen/common/domain.c
index ab910fcf9306..567887ab3202 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -48,6 +48,8 @@
 #include <asm/guest.h>
 #endif
 
+#include "page_alloc.h"
+
 /* Linux config option: propageted to domain0 */
 /* xen_processor_pmbits: xen control Cx, Px, ... */
 unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
@@ -636,7 +638,7 @@ static void domain_pending_scrub_free(struct domain *d)
     rspin_lock(&d->page_alloc_lock);
     if ( d->pending_scrub )
     {
-        FREE_DOMHEAP_PAGES(d->pending_scrub, d->pending_scrub_order);
+        FREE_HEAP_PAGES(d->pending_scrub, d->pending_scrub_order, false);
         d->pending_scrub_order = 0;
         d->pending_scrub_index = 0;
     }
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 1ad4b51c5b02..0e3a86e37381 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -40,6 +40,8 @@
 #include <asm/guest.h>
 #endif
 
+#include "page_alloc.h"
+
 struct memop_args {
     /* INPUT */
     struct domain *domain;     /* Domain to be affected. */
@@ -177,7 +179,7 @@ static void stash_allocation(struct domain *d, struct page_info *page,
      * interface is designed to be used for single-threaded domain creation.
      */
     if ( d->pending_scrub || d->is_dying )
-        free_domheap_pages(page, order);
+        free_heap_pages(page, order, false);
     else
     {
         d->pending_scrub_index = scrub_index;
@@ -210,7 +212,7 @@ static struct page_info *get_stashed_allocation(struct domain *d,
             *scrub_index = d->pending_scrub_index;
         }
         else
-            free_domheap_pages(d->pending_scrub, d->pending_scrub_order);
+            free_heap_pages(d->pending_scrub, d->pending_scrub_order, false);
 
         /*
          * The caller now owns the page or it has been freed, clear stashed
@@ -391,7 +393,7 @@ static void populate_physmap(struct memop_args *a)
 
                     if ( assign_page(page, a->extent_order, d, memflags) )
                     {
-                        free_domheap_pages(page, a->extent_order);
+                        free_heap_pages(page, a->extent_order, false);
                         goto out;
                     }
                 }
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index b1edef87124f..8fc9b5a27f1b 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1529,13 +1529,13 @@ static bool mark_page_free(struct page_info *pg, mfn_t mfn)
 static void free_color_heap_page(struct page_info *pg, bool need_scrub);
 
 /* Free 2^@order set of pages. */
-static void free_heap_pages(
-    struct page_info *pg, unsigned int order, bool need_scrub)
+void free_heap_pages(struct page_info *pg, unsigned int order, bool need_scrub)
 {
     unsigned long mask;
     mfn_t mfn = page_to_mfn(pg);
     unsigned int i, node = mfn_to_nid(mfn);
     unsigned int zone = page_to_zone(pg);
+    unsigned int first_dirty = INVALID_DIRTY_IDX, dirty_cnt = 0;
     bool pg_offlined = false;
 
     ASSERT(order <= MAX_ORDER);
@@ -1552,6 +1552,13 @@ static void free_heap_pages(
             pg[i].count_info |= PGC_need_scrub;
             poison_one_page(&pg[i]);
         }
+        else if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
+        {
+            /* The caller might have returned pages pending scrub. */
+            if ( first_dirty == INVALID_DIRTY_IDX )
+                first_dirty = i;
+            dirty_cnt++;
+        }
 
         if ( pg->count_info & PGC_colored )
         {
@@ -1571,7 +1578,10 @@ static void free_heap_pages(
         pg->u.free.first_dirty = 0;
     }
     else
-        pg->u.free.first_dirty = INVALID_DIRTY_IDX;
+    {
+        node_need_scrub[node] += dirty_cnt;
+        pg->u.free.first_dirty = first_dirty;
+    }
 
     /* Merge chunks as far as possible. */
     while ( order < MAX_ORDER )
diff --git a/xen/common/page_alloc.h b/xen/common/page_alloc.h
new file mode 100644
index 000000000000..4661767148d1
--- /dev/null
+++ b/xen/common/page_alloc.h
@@ -0,0 +1,14 @@
+#ifndef PAGE_ALLOC_H
+#define PAGE_ALLOC_H
+
+void free_heap_pages(struct page_info *pg, unsigned int order, bool need_scrub);
+
+/* Free an allocation, and zero the pointer to it. */
+#define FREE_HEAP_PAGES(p, o, s) do { \
+    void *_ptr_ = (p);                \
+    (p) = NULL;                       \
+    free_heap_pages(_ptr_, o, s);     \
+} while ( false )
+#define FREE_HEAP_PAGE(p, s) FREE_HEAP_PAGES(p, 0, s)
+
+#endif
-- 
2.51.0


Re: [PATCH v3] xen/mm: improve freeing of partially scrubbed pages
Posted by Jan Beulich 1 day, 15 hours ago
On 30.03.2026 17:01, Roger Pau Monne wrote:
> When freeing possibly partially scrubbed pages in populate_physmap() and
> domain_pending_scrub_free() the whole page is marked as dirty, but that's
> not fully accurate.  Since the PGC_need_scrub bit is preserved for the
> populate_physmap() allocation we can use those when freeing to detect which
> pages need scrubbing instead of marking the whole page as dirty.
> 
> This requires exposing free_heap_pages() globally, and switching
> populate_physmap() and domain_pending_scrub_free() to use it instead of
> free_domheap_pages().
> 
> Suggested-by: Jan Beulich <jbeulich@suse.com>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

This is okay as is, i.e.:
Reviewed-by: Jan Beulich <jbeulich@suse.com>

However, a few remarks below.

> ---
>  xen/common/domain.c     |  4 +++-
>  xen/common/memory.c     |  8 +++++---
>  xen/common/page_alloc.c | 16 +++++++++++++---
>  xen/common/page_alloc.h | 14 ++++++++++++++
>  4 files changed, 35 insertions(+), 7 deletions(-)
>  create mode 100644 xen/common/page_alloc.h

I'm on the edge of requesting page-alloc.h as the name here. I can see though
how the name you picked better fits page_alloc.c.

> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -1529,13 +1529,13 @@ static bool mark_page_free(struct page_info *pg, mfn_t mfn)
>  static void free_color_heap_page(struct page_info *pg, bool need_scrub);
>  
>  /* Free 2^@order set of pages. */
> -static void free_heap_pages(
> -    struct page_info *pg, unsigned int order, bool need_scrub)
> +void free_heap_pages(struct page_info *pg, unsigned int order, bool need_scrub)
>  {
>      unsigned long mask;
>      mfn_t mfn = page_to_mfn(pg);
>      unsigned int i, node = mfn_to_nid(mfn);
>      unsigned int zone = page_to_zone(pg);
> +    unsigned int first_dirty = INVALID_DIRTY_IDX, dirty_cnt = 0;
>      bool pg_offlined = false;
>  
>      ASSERT(order <= MAX_ORDER);
> @@ -1552,6 +1552,13 @@ static void free_heap_pages(
>              pg[i].count_info |= PGC_need_scrub;
>              poison_one_page(&pg[i]);
>          }
> +        else if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
> +        {
> +            /* The caller might have returned pages pending scrub. */
> +            if ( first_dirty == INVALID_DIRTY_IDX )
> +                first_dirty = i;
> +            dirty_cnt++;
> +        }

Would we perhaps want another "else" here, invoking check_one_page()?

> --- /dev/null
> +++ b/xen/common/page_alloc.h
> @@ -0,0 +1,14 @@
> +#ifndef PAGE_ALLOC_H
> +#define PAGE_ALLOC_H
> +
> +void free_heap_pages(struct page_info *pg, unsigned int order, bool need_scrub);
> +
> +/* Free an allocation, and zero the pointer to it. */
> +#define FREE_HEAP_PAGES(p, o, s) do { \
> +    void *_ptr_ = (p);                \
> +    (p) = NULL;                       \
> +    free_heap_pages(_ptr_, o, s);     \
> +} while ( false )
> +#define FREE_HEAP_PAGE(p, s) FREE_HEAP_PAGES(p, 0, s)

I'm not quite sure whether in this case we need the order-0 shorthand. I'm
inclined to think that either we want to go further:

#define FREE_HEAP_PAGE(p) FREE_HEAP_PAGES(p, 0, true)

Or that we want to omit the shorthand, until a clear need appears.

Jan