alloc_heap_pages() will unconditionally clear PGC_need_scrub, even when
MEMF_no_scrub is requested. This is kind of expected as otherwise some
callers will assert on seeing non-expected flags set on the count_info
field.
Introduce a new MEMF bit to signal to alloc_heap_pages() that non-scrubbed
pages should keep the PGC_need_scrub bit set. This fixes returning dirty
pages from alloc_domheap_pages() without the PGC_need_scrub bit set for
populate_physmap() to consume.
With the above change alloc_domheap_pages() needs an adjustment to cope
with allocated pages possibly having the PGC_need_scrub set.
Fixes: 83a784a15b47 ("xen/mm: allow deferred scrub of physmap populate allocated pages")
Reported-by: Ayden Bottos <aydenbottos12@gmail.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
This issue was initially reported to the Xen Security Team, and it did turn
out to not require an XSA only because the code hasn't been part of any
release, otherwise an XSA would have been issued.
The Security Team would like to thanks Ayden for the prompt report.
In the scrubbing loop in alloc_heap_pages() i should better be unsigned
long.
---
xen/common/memory.c | 3 ++-
xen/common/page_alloc.c | 31 ++++++++++++++++++++++---------
xen/include/xen/mm.h | 2 ++
3 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 918510f287a0..f0ff1311881c 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -345,7 +345,8 @@ static void populate_physmap(struct memop_args *a)
unsigned int scrub_start = 0;
unsigned int memflags =
a->memflags | (d->creation_finished ? 0
- : MEMF_no_scrub);
+ : (MEMF_no_scrub |
+ MEMF_keep_scrub));
nodeid_t node =
(a->memflags & MEMF_exact_node) ? MEMF_get_node(a->memflags)
: NUMA_NO_NODE;
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 588b5b99cbc7..1316dfbd15ee 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -989,6 +989,8 @@ static struct page_info *alloc_heap_pages(
ASSERT(zone_lo <= zone_hi);
ASSERT(zone_hi < NR_ZONES);
+ ASSERT(!(memflags & MEMF_keep_scrub) || (memflags & MEMF_no_scrub));
+
if ( unlikely(order > MAX_ORDER) )
return NULL;
@@ -1110,17 +1112,26 @@ static struct page_info *alloc_heap_pages(
{
bool cold = d && d != current->domain;
- for ( i = 0; i < (1U << order); i++ )
+ if ( !(memflags & MEMF_no_scrub) )
{
- if ( test_and_clear_bit(_PGC_need_scrub, &pg[i].count_info) )
+ for ( i = 0; i < (1U << order); i++ )
{
- if ( !(memflags & MEMF_no_scrub) )
+ if ( test_and_clear_bit(_PGC_need_scrub, &pg[i].count_info) )
+ {
scrub_one_page(&pg[i], cold);
-
- dirty_cnt++;
+ dirty_cnt++;
+ }
+ else
+ check_one_page(&pg[i]);
}
- else if ( !(memflags & MEMF_no_scrub) )
- check_one_page(&pg[i]);
+ }
+ else
+ {
+ for ( i = 0; i < (1U << order); i++ )
+ if ( (memflags & MEMF_keep_scrub)
+ ? test_bit(_PGC_need_scrub, &pg[i].count_info)
+ : test_and_clear_bit(_PGC_need_scrub, &pg[i].count_info) )
+ dirty_cnt++;
}
if ( dirty_cnt )
@@ -2696,8 +2707,10 @@ struct page_info *alloc_domheap_pages(
for ( i = 0; i < (1UL << order); i++ )
{
- ASSERT(!pg[i].count_info);
- pg[i].count_info = PGC_extra;
+ ASSERT(!(pg[i].count_info &
+ ~((memflags & MEMF_keep_scrub) ? PGC_need_scrub
+ : 0UL)));
+ pg[i].count_info |= PGC_extra;
}
}
if ( assign_page(pg, order, d, memflags) )
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index d80bfba6d393..0639fc0d21fb 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -208,6 +208,8 @@ struct npfec {
#define MEMF_no_refcount (1U<<_MEMF_no_refcount)
#define _MEMF_populate_on_demand 1
#define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand)
+#define _MEMF_keep_scrub 2
+#define MEMF_keep_scrub (1U<<_MEMF_keep_scrub)
#define _MEMF_no_dma 3
#define MEMF_no_dma (1U<<_MEMF_no_dma)
#define _MEMF_exact_node 4
--
2.51.0
On 25.03.2026 11:08, Roger Pau Monne wrote:
> alloc_heap_pages() will unconditionally clear PGC_need_scrub, even when
> MEMF_no_scrub is requested. This is kind of expected as otherwise some
> callers will assert on seeing non-expected flags set on the count_info
> field.
>
> Introduce a new MEMF bit to signal to alloc_heap_pages() that non-scrubbed
> pages should keep the PGC_need_scrub bit set. This fixes returning dirty
> pages from alloc_domheap_pages() without the PGC_need_scrub bit set for
> populate_physmap() to consume.
>
> With the above change alloc_domheap_pages() needs an adjustment to cope
> with allocated pages possibly having the PGC_need_scrub set.
>
> Fixes: 83a784a15b47 ("xen/mm: allow deferred scrub of physmap populate allocated pages")
> Reported-by: Ayden Bottos <aydenbottos12@gmail.com>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
with one nit (minor request) at the bottom.
> ---
> This issue was initially reported to the Xen Security Team, and it did turn
> out to not require an XSA only because the code hasn't been part of any
> release, otherwise an XSA would have been issued.
>
> The Security Team would like to thanks Ayden for the prompt report.
>
> In the scrubbing loop in alloc_heap_pages() i should better be unsigned
> long.
This issue is wider than just that function. As long as MAX_ORDER <= BITS_PER_INT,
I think we could have all such loops consistently use unsigned int induction
variables. But of course switching to unsigned long would be okay as well, just
perhaps a little less efficient on (at least) x86. My main wish would be for all
of those variables to be consistent in type (and hence all involved literal
number suffixes also being consistently U or UL).
> --- a/xen/include/xen/mm.h
> +++ b/xen/include/xen/mm.h
> @@ -208,6 +208,8 @@ struct npfec {
> #define MEMF_no_refcount (1U<<_MEMF_no_refcount)
> #define _MEMF_populate_on_demand 1
> #define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand)
> +#define _MEMF_keep_scrub 2
> +#define MEMF_keep_scrub (1U<<_MEMF_keep_scrub)
> #define _MEMF_no_dma 3
> #define MEMF_no_dma (1U<<_MEMF_no_dma)
> #define _MEMF_exact_node 4
Irrespective of all the similar issues in surrounding code, may I ask that << be
surrounded by blanks in the new addition, to conform to ./CODING_STYLE?
As an aside, I wonder whether we really need the separate _MEMF_keep_scrub, but
the same likely applies to most other _MEMF_*.
Jan
This looks good to me. I would also add a brief comment in mm.h to
make the contract clearer for future callers: MEMF_keep_scrub is an
internal allocator flag and only valid together with MEMF_no_scrub.
On Thu, Mar 26, 2026 at 12:37 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 25.03.2026 11:08, Roger Pau Monne wrote:
> > alloc_heap_pages() will unconditionally clear PGC_need_scrub, even when
> > MEMF_no_scrub is requested. This is kind of expected as otherwise some
> > callers will assert on seeing non-expected flags set on the count_info
> > field.
> >
> > Introduce a new MEMF bit to signal to alloc_heap_pages() that non-scrubbed
> > pages should keep the PGC_need_scrub bit set. This fixes returning dirty
> > pages from alloc_domheap_pages() without the PGC_need_scrub bit set for
> > populate_physmap() to consume.
> >
> > With the above change alloc_domheap_pages() needs an adjustment to cope
> > with allocated pages possibly having the PGC_need_scrub set.
> >
> > Fixes: 83a784a15b47 ("xen/mm: allow deferred scrub of physmap populate allocated pages")
> > Reported-by: Ayden Bottos <aydenbottos12@gmail.com>
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> with one nit (minor request) at the bottom.
>
> > ---
> > This issue was initially reported to the Xen Security Team, and it did turn
> > out to not require an XSA only because the code hasn't been part of any
> > release, otherwise an XSA would have been issued.
> >
> > The Security Team would like to thanks Ayden for the prompt report.
> >
> > In the scrubbing loop in alloc_heap_pages() i should better be unsigned
> > long.
>
> This issue is wider than just that function. As long as MAX_ORDER <= BITS_PER_INT,
> I think we could have all such loops consistently use unsigned int induction
> variables. But of course switching to unsigned long would be okay as well, just
> perhaps a little less efficient on (at least) x86. My main wish would be for all
> of those variables to be consistent in type (and hence all involved literal
> number suffixes also being consistently U or UL).
>
> > --- a/xen/include/xen/mm.h
> > +++ b/xen/include/xen/mm.h
> > @@ -208,6 +208,8 @@ struct npfec {
> > #define MEMF_no_refcount (1U<<_MEMF_no_refcount)
> > #define _MEMF_populate_on_demand 1
> > #define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand)
> > +#define _MEMF_keep_scrub 2
> > +#define MEMF_keep_scrub (1U<<_MEMF_keep_scrub)
> > #define _MEMF_no_dma 3
> > #define MEMF_no_dma (1U<<_MEMF_no_dma)
> > #define _MEMF_exact_node 4
>
> Irrespective of all the similar issues in surrounding code, may I ask that << be
> surrounded by blanks in the new addition, to conform to ./CODING_STYLE?
>
> As an aside, I wonder whether we really need the separate _MEMF_keep_scrub, but
> the same likely applies to most other _MEMF_*.
>
> Jan
© 2016 - 2026 Red Hat, Inc.