Especially when dealing with large amounts of memory, memset() may not
be very efficient; this can be bad enough that even for debug builds a
custom function is warranted. We additionally want to distinguish "hot"
and "cold" cases (with, as initial heuristic, "hot" being for any
allocations a domain does for itself, assuming that in all other cases
the page wouldn't be accessed [again] soon). The goal is for accesses
of "cold" pages to not disturb caches (albeit finding a good balance
between this and the higher latency looks to be difficult).
Keep the default fallback to clear_page_*() in common code; this may
want to be revisited down the road.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Julien Grall <jgrall@amazon.com>
---
v4: Re-base.
v3: Re-base.
v2: New.
---
The choice between hot and cold in scrub_one_page()'s callers is
certainly up for discussion / improvement.
--- a/xen/arch/arm/include/asm/page.h
+++ b/xen/arch/arm/include/asm/page.h
@@ -144,6 +144,12 @@ extern size_t dcache_line_bytes;
 
 #define copy_page(dp, sp) memcpy(dp, sp, PAGE_SIZE)
 
+#define clear_page_hot  clear_page
+#define clear_page_cold clear_page
+
+#define scrub_page_hot(page) memset(page, SCRUB_BYTE_PATTERN, PAGE_SIZE)
+#define scrub_page_cold      scrub_page_hot
+
 static inline size_t read_dcache_line_bytes(void)
 {
     register_t ctr;
--- a/xen/arch/ppc/include/asm/page.h
+++ b/xen/arch/ppc/include/asm/page.h
@@ -188,6 +188,12 @@ static inline void invalidate_icache(voi
 #define clear_page(page) memset(page, 0, PAGE_SIZE)
 #define copy_page(dp, sp) memcpy(dp, sp, PAGE_SIZE)
 
+#define clear_page_hot  clear_page
+#define clear_page_cold clear_page
+
+#define scrub_page_hot(page) memset(page, SCRUB_BYTE_PATTERN, PAGE_SIZE)
+#define scrub_page_cold      scrub_page_hot
+
 /* TODO: Flush the dcache for an entire page. */
 static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
 {
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -198,6 +198,12 @@ static inline void invalidate_icache(voi
 #define clear_page(page) memset((void *)(page), 0, PAGE_SIZE)
 #define copy_page(dp, sp) memcpy(dp, sp, PAGE_SIZE)
 
+#define clear_page_hot  clear_page
+#define clear_page_cold clear_page
+
+#define scrub_page_hot(page) memset(page, SCRUB_BYTE_PATTERN, PAGE_SIZE)
+#define scrub_page_cold      scrub_page_hot
+
 static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
 {
     const void *v = map_domain_page(_mfn(mfn));
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -58,6 +58,7 @@ obj-y += pci.o
 obj-y += physdev.o
 obj-$(CONFIG_COMPAT) += x86_64/physdev.o
 obj-$(CONFIG_X86_PSR) += psr.o
+obj-bin-$(CONFIG_DEBUG) += scrub_page.o
 obj-y += setup.o
 obj-y += shutdown.o
 obj-y += smp.o
--- a/xen/arch/x86/include/asm/page.h
+++ b/xen/arch/x86/include/asm/page.h
@@ -226,6 +226,11 @@ void copy_page_sse2(void *to, const void
 #define clear_page(_p)      clear_page_cold(_p)
 #define copy_page(_t, _f)   copy_page_sse2(_t, _f)
 
+#ifdef CONFIG_DEBUG
+void scrub_page_hot(void *);
+void scrub_page_cold(void *);
+#endif
+
 /* Convert between Xen-heap virtual addresses and machine addresses. */
 #define __pa(x)             (virt_to_maddr(x))
 #define __va(x)             (maddr_to_virt(x))
--- /dev/null
+++ b/xen/arch/x86/scrub_page.S
@@ -0,0 +1,39 @@
+        .file __FILE__
+
+#include <asm/asm_defns.h>
+#include <xen/page-size.h>
+#include <xen/scrub.h>
+
+FUNC(scrub_page_cold)
+        mov     $PAGE_SIZE/32, %ecx
+        mov     $SCRUB_PATTERN, %rax
+
+0:      movnti  %rax,   (%rdi)
+        movnti  %rax,  8(%rdi)
+        movnti  %rax, 16(%rdi)
+        movnti  %rax, 24(%rdi)
+        add     $32, %rdi
+        sub     $1, %ecx
+        jnz     0b
+
+        sfence
+        ret
+END(scrub_page_cold)
+
+        .macro scrub_page_stosb
+        mov     $PAGE_SIZE, %ecx
+        mov     $SCRUB_BYTE_PATTERN, %eax
+        rep stosb
+        ret
+        .endm
+
+        .macro scrub_page_stosq
+        mov     $PAGE_SIZE/8, %ecx
+        mov     $SCRUB_PATTERN, %rax
+        rep stosq
+        ret
+        .endm
+
+FUNC(scrub_page_hot)
+        ALTERNATIVE scrub_page_stosq, scrub_page_stosb, X86_FEATURE_ERMS
+END(scrub_page_hot)
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -135,6 +135,7 @@
 #include <xen/pfn.h>
 #include <xen/types.h>
 #include <xen/sched.h>
+#include <xen/scrub.h>
 #include <xen/sections.h>
 #include <xen/softirq.h>
 #include <xen/spinlock.h>
@@ -779,27 +780,31 @@ static void page_list_add_scrub(struct p
         page_list_add(pg, &heap(node, zone, order));
 }
 
-/* SCRUB_PATTERN needs to be a repeating series of bytes. */
-#ifndef NDEBUG
-#define SCRUB_PATTERN        0xc2c2c2c2c2c2c2c2ULL
-#else
-#define SCRUB_PATTERN        0ULL
+/*
+ * While in debug builds we want callers to avoid relying on allocations
+ * returning zeroed pages, for a production build, clear_page_*() is the
+ * fastest way to scrub.
+ */
+#ifndef CONFIG_DEBUG
+# undef  scrub_page_hot
+# define scrub_page_hot clear_page_hot
+# undef  scrub_page_cold
+# define scrub_page_cold clear_page_cold
 #endif
-#define SCRUB_BYTE_PATTERN   (SCRUB_PATTERN & 0xff)
 
-static void scrub_one_page(const struct page_info *pg)
+static void scrub_one_page(const struct page_info *pg, bool cold)
 {
+    void *ptr;
+
     if ( unlikely(pg->count_info & PGC_broken) )
         return;
 
-#ifndef NDEBUG
-    /* Avoid callers relying on allocations returning zeroed pages. */
-    unmap_domain_page(memset(__map_domain_page(pg),
-                             SCRUB_BYTE_PATTERN, PAGE_SIZE));
-#else
-    /* For a production build, clear_page() is the fastest way to scrub. */
-    clear_domain_page(_mfn(page_to_mfn(pg)));
-#endif
+    ptr = __map_domain_page(pg);
+    if ( cold )
+        scrub_page_cold(ptr);
+    else
+        scrub_page_hot(ptr);
+    unmap_domain_page(ptr);
 }
 
 static void poison_one_page(struct page_info *pg)
@@ -1079,12 +1084,14 @@ static struct page_info *alloc_heap_page
     if ( first_dirty != INVALID_DIRTY_IDX ||
          (scrub_debug && !(memflags & MEMF_no_scrub)) )
     {
+        bool cold = d && d != current->domain;
+
         for ( i = 0; i < (1U << order); i++ )
         {
             if ( test_and_clear_bit(_PGC_need_scrub, &pg[i].count_info) )
             {
                 if ( !(memflags & MEMF_no_scrub) )
-                    scrub_one_page(&pg[i]);
+                    scrub_one_page(&pg[i], cold);
 
                 dirty_cnt++;
             }
@@ -1349,7 +1356,7 @@ bool scrub_free_pages(void)
                 {
                     if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                     {
-                        scrub_one_page(&pg[i]);
+                        scrub_one_page(&pg[i], true);
                         /*
                          * We can modify count_info without holding heap
                          * lock since we effectively locked this buddy by
@@ -2074,7 +2081,7 @@ static struct page_info *alloc_color_hea
     if ( !(memflags & MEMF_no_scrub) )
     {
         if ( need_scrub )
-            scrub_one_page(pg);
+            scrub_one_page(pg, d != current->domain);
         else
             check_one_page(pg);
     }
@@ -2225,7 +2232,7 @@ static void __init cf_check smp_scrub_he
         if ( !mfn_valid(_mfn(mfn)) || !page_state_is(pg, free) )
             continue;
 
-        scrub_one_page(pg);
+        scrub_one_page(pg, true);
     }
 }
 
@@ -2930,7 +2937,7 @@ void unprepare_staticmem_pages(struct pa
         if ( need_scrub )
         {
             /* TODO: asynchronous scrubbing for pages of static memory. */
-            scrub_one_page(pg);
+            scrub_one_page(pg, true);
         }
 
         pg[i].count_info |= PGC_static;
--- /dev/null
+++ b/xen/include/xen/scrub.h
@@ -0,0 +1,24 @@
+#ifndef __XEN_SCRUB_H__
+#define __XEN_SCRUB_H__
+
+#include <xen/const.h>
+
+/* SCRUB_PATTERN needs to be a repeating series of bytes. */
+#ifdef CONFIG_DEBUG
+# define SCRUB_PATTERN       _AC(0xc2c2c2c2c2c2c2c2,ULL)
+#else
+# define SCRUB_PATTERN       _AC(0,ULL)
+#endif
+#define SCRUB_BYTE_PATTERN   (SCRUB_PATTERN & 0xff)
+
+#endif /* __XEN_SCRUB_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
On 6/5/25 12:28 PM, Jan Beulich wrote:
> Especially when dealing with large amounts of memory, memset() may not
> be very efficient; this can be bad enough that even for debug builds a
> custom function is warranted. We additionally want to distinguish "hot"
> and "cold" cases (with, as initial heuristic, "hot" being for any
> allocations a domain does for itself, assuming that in all other cases
> the page wouldn't be accessed [again] soon). The goal is for accesses
> of "cold" pages to not disturb caches (albeit finding a good balance
> between this and the higher latency looks to be difficult).
>
> Keep the default fallback to clear_page_*() in common code; this may
> want to be revisited down the road.
>
> Signed-off-by: Jan Beulich<jbeulich@suse.com>
> Acked-by: Julien Grall<jgrall@amazon.com>
> ---
> v4: Re-base.
> v3: Re-base.
> v2: New.
> ---
> The choice between hot and cold in scrub_one_page()'s callers is
> certainly up for discussion / improvement.
>
> --- a/xen/arch/arm/include/asm/page.h
> +++ b/xen/arch/arm/include/asm/page.h
> @@ -144,6 +144,12 @@ extern size_t dcache_line_bytes;
>   
>   #define copy_page(dp, sp) memcpy(dp, sp, PAGE_SIZE)
>   
> +#define clear_page_hot  clear_page
> +#define clear_page_cold clear_page
> +
> +#define scrub_page_hot(page) memset(page, SCRUB_BYTE_PATTERN, PAGE_SIZE)
> +#define scrub_page_cold      scrub_page_hot
> +
>   static inline size_t read_dcache_line_bytes(void)
>   {
>       register_t ctr;
> --- a/xen/arch/ppc/include/asm/page.h
> +++ b/xen/arch/ppc/include/asm/page.h
> @@ -188,6 +188,12 @@ static inline void invalidate_icache(voi
>   #define clear_page(page) memset(page, 0, PAGE_SIZE)
>   #define copy_page(dp, sp) memcpy(dp, sp, PAGE_SIZE)
>   
> +#define clear_page_hot  clear_page
> +#define clear_page_cold clear_page
> +
> +#define scrub_page_hot(page) memset(page, SCRUB_BYTE_PATTERN, PAGE_SIZE)
> +#define scrub_page_cold      scrub_page_hot
> +
>   /* TODO: Flush the dcache for an entire page. */
>   static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
>   {
> --- a/xen/arch/riscv/include/asm/page.h
> +++ b/xen/arch/riscv/include/asm/page.h
> @@ -198,6 +198,12 @@ static inline void invalidate_icache(voi
>   #define clear_page(page) memset((void *)(page), 0, PAGE_SIZE)
>   #define copy_page(dp, sp) memcpy(dp, sp, PAGE_SIZE)
>   
> +#define clear_page_hot  clear_page
> +#define clear_page_cold clear_page
> +
> +#define scrub_page_hot(page) memset(page, SCRUB_BYTE_PATTERN, PAGE_SIZE)
> +#define scrub_page_cold      scrub_page_hot
> +
LGTM: Reviewed-by: Oleksii Kurochko<oleksii.kurochko@gmail.com>
Thanks.
~ Oleksii
>   static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
>   {
>       const void *v = map_domain_page(_mfn(mfn));
> --- a/xen/arch/x86/Makefile
> +++ b/xen/arch/x86/Makefile
> @@ -58,6 +58,7 @@ obj-y += pci.o
>   obj-y += physdev.o
>   obj-$(CONFIG_COMPAT) += x86_64/physdev.o
>   obj-$(CONFIG_X86_PSR) += psr.o
> +obj-bin-$(CONFIG_DEBUG) += scrub_page.o
>   obj-y += setup.o
>   obj-y += shutdown.o
>   obj-y += smp.o
> --- a/xen/arch/x86/include/asm/page.h
> +++ b/xen/arch/x86/include/asm/page.h
> @@ -226,6 +226,11 @@ void copy_page_sse2(void *to, const void
>   #define clear_page(_p)      clear_page_cold(_p)
>   #define copy_page(_t, _f)   copy_page_sse2(_t, _f)
>   
> +#ifdef CONFIG_DEBUG
> +void scrub_page_hot(void *);
> +void scrub_page_cold(void *);
> +#endif
> +
>   /* Convert between Xen-heap virtual addresses and machine addresses. */
>   #define __pa(x)             (virt_to_maddr(x))
>   #define __va(x)             (maddr_to_virt(x))
> --- /dev/null
> +++ b/xen/arch/x86/scrub_page.S
> @@ -0,0 +1,39 @@
> +        .file __FILE__
> +
> +#include <asm/asm_defns.h>
> +#include <xen/page-size.h>
> +#include <xen/scrub.h>
> +
> +FUNC(scrub_page_cold)
> +        mov     $PAGE_SIZE/32, %ecx
> +        mov     $SCRUB_PATTERN, %rax
> +
> +0:      movnti  %rax,   (%rdi)
> +        movnti  %rax,  8(%rdi)
> +        movnti  %rax, 16(%rdi)
> +        movnti  %rax, 24(%rdi)
> +        add     $32, %rdi
> +        sub     $1, %ecx
> +        jnz     0b
> +
> +        sfence
> +        ret
> +END(scrub_page_cold)
> +
> +        .macro scrub_page_stosb
> +        mov     $PAGE_SIZE, %ecx
> +        mov     $SCRUB_BYTE_PATTERN, %eax
> +        rep stosb
> +        ret
> +        .endm
> +
> +        .macro scrub_page_stosq
> +        mov     $PAGE_SIZE/8, %ecx
> +        mov     $SCRUB_PATTERN, %rax
> +        rep stosq
> +        ret
> +        .endm
> +
> +FUNC(scrub_page_hot)
> +        ALTERNATIVE scrub_page_stosq, scrub_page_stosb, X86_FEATURE_ERMS
> +END(scrub_page_hot)
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -135,6 +135,7 @@
>   #include <xen/pfn.h>
>   #include <xen/types.h>
>   #include <xen/sched.h>
> +#include <xen/scrub.h>
>   #include <xen/sections.h>
>   #include <xen/softirq.h>
>   #include <xen/spinlock.h>
> @@ -779,27 +780,31 @@ static void page_list_add_scrub(struct p
>           page_list_add(pg, &heap(node, zone, order));
>   }
>   
> -/* SCRUB_PATTERN needs to be a repeating series of bytes. */
> -#ifndef NDEBUG
> -#define SCRUB_PATTERN        0xc2c2c2c2c2c2c2c2ULL
> -#else
> -#define SCRUB_PATTERN        0ULL
> +/*
> + * While in debug builds we want callers to avoid relying on allocations
> + * returning zeroed pages, for a production build, clear_page_*() is the
> + * fastest way to scrub.
> + */
> +#ifndef CONFIG_DEBUG
> +# undef  scrub_page_hot
> +# define scrub_page_hot clear_page_hot
> +# undef  scrub_page_cold
> +# define scrub_page_cold clear_page_cold
>   #endif
> -#define SCRUB_BYTE_PATTERN   (SCRUB_PATTERN & 0xff)
>   
> -static void scrub_one_page(const struct page_info *pg)
> +static void scrub_one_page(const struct page_info *pg, bool cold)
>   {
> +    void *ptr;
> +
>       if ( unlikely(pg->count_info & PGC_broken) )
>           return;
>   
> -#ifndef NDEBUG
> -    /* Avoid callers relying on allocations returning zeroed pages. */
> -    unmap_domain_page(memset(__map_domain_page(pg),
> -                             SCRUB_BYTE_PATTERN, PAGE_SIZE));
> -#else
> -    /* For a production build, clear_page() is the fastest way to scrub. */
> -    clear_domain_page(_mfn(page_to_mfn(pg)));
> -#endif
> +    ptr = __map_domain_page(pg);
> +    if ( cold )
> +        scrub_page_cold(ptr);
> +    else
> +        scrub_page_hot(ptr);
> +    unmap_domain_page(ptr);
>   }
>   
>   static void poison_one_page(struct page_info *pg)
> @@ -1079,12 +1084,14 @@ static struct page_info *alloc_heap_page
>       if ( first_dirty != INVALID_DIRTY_IDX ||
>            (scrub_debug && !(memflags & MEMF_no_scrub)) )
>       {
> +        bool cold = d && d != current->domain;
> +
>           for ( i = 0; i < (1U << order); i++ )
>           {
>               if ( test_and_clear_bit(_PGC_need_scrub, &pg[i].count_info) )
>               {
>                   if ( !(memflags & MEMF_no_scrub) )
> -                    scrub_one_page(&pg[i]);
> +                    scrub_one_page(&pg[i], cold);
>   
>                   dirty_cnt++;
>               }
> @@ -1349,7 +1356,7 @@ bool scrub_free_pages(void)
>                   {
>                       if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
>                       {
> -                        scrub_one_page(&pg[i]);
> +                        scrub_one_page(&pg[i], true);
>                           /*
>                            * We can modify count_info without holding heap
>                            * lock since we effectively locked this buddy by
> @@ -2074,7 +2081,7 @@ static struct page_info *alloc_color_hea
>       if ( !(memflags & MEMF_no_scrub) )
>       {
>           if ( need_scrub )
> -            scrub_one_page(pg);
> +            scrub_one_page(pg, d != current->domain);
>           else
>               check_one_page(pg);
>       }
> @@ -2225,7 +2232,7 @@ static void __init cf_check smp_scrub_he
>           if ( !mfn_valid(_mfn(mfn)) || !page_state_is(pg, free) )
>               continue;
>   
> -        scrub_one_page(pg);
> +        scrub_one_page(pg, true);
>       }
>   }
>   
> @@ -2930,7 +2937,7 @@ void unprepare_staticmem_pages(struct pa
>           if ( need_scrub )
>           {
>               /* TODO: asynchronous scrubbing for pages of static memory. */
> -            scrub_one_page(pg);
> +            scrub_one_page(pg, true);
>           }
>   
>           pg[i].count_info |= PGC_static;
> --- /dev/null
> +++ b/xen/include/xen/scrub.h
> @@ -0,0 +1,24 @@
> +#ifndef __XEN_SCRUB_H__
> +#define __XEN_SCRUB_H__
> +
> +#include <xen/const.h>
> +
> +/* SCRUB_PATTERN needs to be a repeating series of bytes. */
> +#ifdef CONFIG_DEBUG
> +# define SCRUB_PATTERN       _AC(0xc2c2c2c2c2c2c2c2,ULL)
> +#else
> +# define SCRUB_PATTERN       _AC(0,ULL)
> +#endif
> +#define SCRUB_BYTE_PATTERN   (SCRUB_PATTERN & 0xff)
> +
> +#endif /* __XEN_SCRUB_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
>
                
            © 2016 - 2025 Red Hat, Inc.