[PATCH v3 3/3] xen/mm: limit non-scrubbed allocations to a specific order

Roger Pau Monne posted 3 patches 3 days, 14 hours ago
[PATCH v3 3/3] xen/mm: limit non-scrubbed allocations to a specific order
Posted by Roger Pau Monne 3 days, 14 hours ago
The current logic allows for up to 1G pages to be scrubbed in place, which
can cause the watchdog to trigger in practice.  Reduce the limit for
in-place scrubbed allocations to a newly introduced define:
CONFIG_DIRTY_MAX_ORDER.  This currently defaults to CONFIG_PTDOM_MAX_ORDER
on all architectures.  Also introduce a command line option to set the
value.

Fixes: 74d2e11ccfd2 ("mm: Scrub pages in alloc_heap_pages() if needed")
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v2:
 - Move placement of the max-order-dirty option help.
 - Add note in memop-max-order about interactions.
 - Use CONFIG_PTDOM_MAX_ORDER as the default.

Changes since v1:
 - Split from previous patch.
 - Introduce a command line option to set the limit.
---
 docs/misc/xen-command-line.pandoc | 13 +++++++++++++
 xen/common/memory.c               |  3 ---
 xen/common/page_alloc.c           | 23 ++++++++++++++++++++++-
 xen/include/xen/mm.h              |  4 ++++
 4 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index 15f7a315a4b5..3577e491e379 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1837,6 +1837,16 @@ presented as the number of bits needed to encode it. This must be at least
 one pending bit to be allocated.
 Defaults to 20 bits (to cover at most 1048576 interrupts).
 
+### max-order-dirty
+> `= <integer>`
+
+Specify the maximum allocation order allowed when scrubbing allocated pages
+in-place.  The allocation is non-preemptive, and hence the value must be keep
+low enough to avoid hogging the CPU for too long.
+
+Defaults to `CONFIG_DIRTY_MAX_ORDER` or if unset to `CONFIG_PTDOM_MAX_ORDER`.
+Note those are internal per-architecture defines not available from Kconfig.
+
 ### mce (x86)
 > `= <boolean>`
 
@@ -1878,6 +1888,9 @@ requests issued by the various kinds of domains (in this order:
 ordinary DomU, control domain, hardware domain, and - when supported
 by the platform - DomU with pass-through device assigned).
 
+Note orders here can be further limited by the value in `max-order-dirty` for
+allocations requesting pages to be scrubbed in-place.
+
 ### mmcfg (x86)
 > `= <boolean>[,amd-fam10]`
 
diff --git a/xen/common/memory.c b/xen/common/memory.c
index db20da1bcaaa..cf63bd077d42 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -56,9 +56,6 @@ struct memop_args {
 #ifndef CONFIG_CTLDOM_MAX_ORDER
 #define CONFIG_CTLDOM_MAX_ORDER CONFIG_PAGEALLOC_MAX_ORDER
 #endif
-#ifndef CONFIG_PTDOM_MAX_ORDER
-#define CONFIG_PTDOM_MAX_ORDER CONFIG_HWDOM_MAX_ORDER
-#endif
 
 static unsigned int __read_mostly domu_max_order = CONFIG_DOMU_MAX_ORDER;
 static unsigned int __read_mostly ctldom_max_order = CONFIG_CTLDOM_MAX_ORDER;
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index c9e82fd7ab62..d2d5e4762d59 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -267,6 +267,13 @@ static PAGE_LIST_HEAD(page_offlined_list);
 /* Broken page list, protected by heap_lock. */
 static PAGE_LIST_HEAD(page_broken_list);
 
+/* Maximum order allowed for allocations with MEMF_no_scrub. */
+#ifndef CONFIG_DIRTY_MAX_ORDER
+# define CONFIG_DIRTY_MAX_ORDER CONFIG_PTDOM_MAX_ORDER
+#endif
+static unsigned int __ro_after_init dirty_max_order = CONFIG_DIRTY_MAX_ORDER;
+integer_param("max-order-dirty", dirty_max_order);
+
 /*************************
  * BOOT-TIME ALLOCATOR
  */
@@ -1008,7 +1015,13 @@ static struct page_info *alloc_heap_pages(
 
     pg = get_free_buddy(zone_lo, zone_hi, order, memflags, d);
     /* Try getting a dirty buddy if we couldn't get a clean one. */
-    if ( !pg && !(memflags & MEMF_no_scrub) )
+    if ( !pg && !(memflags & MEMF_no_scrub) &&
+         /*
+          * Allow any order unscrubbed allocations during boot time, we
+          * compensate by processing softirqs in the scrubbing loop below once
+          * irqs are enabled.
+          */
+         (order <= dirty_max_order || system_state < SYS_STATE_active) )
         pg = get_free_buddy(zone_lo, zone_hi, order,
                             memflags | MEMF_no_scrub, d);
     if ( !pg )
@@ -1117,6 +1130,14 @@ static struct page_info *alloc_heap_pages(
                     scrub_one_page(&pg[i], cold);
 
                 dirty_cnt++;
+
+                /*
+                 * Use SYS_STATE_smp_boot explicitly; ahead of that state
+                 * interrupts are disabled.
+                 */
+                if ( system_state == SYS_STATE_smp_boot &&
+                     !(dirty_cnt & 0xff) )
+                    process_pending_softirqs();
             }
             else
                 check_one_page(&pg[i]);
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index d80bfba6d393..cf3796d4286d 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -232,6 +232,10 @@ struct npfec {
 #else
 #define MAX_ORDER 20 /* 2^20 contiguous pages */
 #endif
+#ifndef CONFIG_PTDOM_MAX_ORDER
+# define CONFIG_PTDOM_MAX_ORDER CONFIG_HWDOM_MAX_ORDER
+#endif
+
 mfn_t acquire_reserved_page(struct domain *d, unsigned int memflags);
 
 /* Private domain structs for DOMID_XEN, DOMID_IO, etc. */
-- 
2.51.0