[PATCH] numa: add 'spm' option for special purpose memory

fanhuang posted 1 patch 4 days, 4 hours ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20250924103324.2074819-2-FangSheng.Huang@amd.com
Maintainers: Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Yanan Wang <wangyanan55@huawei.com>, Zhao Liu <zhao1.liu@intel.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, "Michael S. Tsirkin" <mst@redhat.com>, Peter Xu <peterx@redhat.com>, David Hildenbrand <david@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
hw/core/numa.c               |  3 +++
hw/i386/e820_memory_layout.h |  1 +
hw/i386/pc.c                 | 34 ++++++++++++++++++++++++++++++++++
include/exec/cpu-common.h    |  1 +
include/system/memory.h      |  3 +++
include/system/numa.h        |  1 +
qapi/machine.json            |  6 ++++++
system/physmem.c             |  7 ++++++-
8 files changed, 55 insertions(+), 1 deletion(-)
[PATCH] numa: add 'spm' option for special purpose memory
Posted by fanhuang 4 days, 4 hours ago
This patch adds support for special purpose memory (SPM) through the
NUMA node configuration. When 'spm=on' is specified for a NUMA node,
QEMU will:

1. Set the RAM_SPM flag in the RAM block of the corresponding memory region
2. Set the E820 type to E820_SOFT_RESERVED for this memory region

This allows guest operating systems to recognize the memory as soft reserved
memory, which can be used for heterogeneous memory management or other
special purposes.

Usage:
  -numa node,nodeid=0,memdev=m1,spm=on
Signed-off-by: fanhuang <FangSheng.Huang@amd.com>
---
 hw/core/numa.c               |  3 +++
 hw/i386/e820_memory_layout.h |  1 +
 hw/i386/pc.c                 | 34 ++++++++++++++++++++++++++++++++++
 include/exec/cpu-common.h    |  1 +
 include/system/memory.h      |  3 +++
 include/system/numa.h        |  1 +
 qapi/machine.json            |  6 ++++++
 system/physmem.c             |  7 ++++++-
 8 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index 218576f745..e680130460 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -163,6 +163,9 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
         numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
     }
 
+    /* Store spm configuration for later processing */
+    numa_info[nodenr].is_spm = node->has_spm && node->spm;
+
     numa_info[nodenr].present = true;
     max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
     ms->numa_state->num_nodes++;
diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h
index b50acfa201..8af6a9cfac 100644
--- a/hw/i386/e820_memory_layout.h
+++ b/hw/i386/e820_memory_layout.h
@@ -15,6 +15,7 @@
 #define E820_ACPI       3
 #define E820_NVS        4
 #define E820_UNUSABLE   5
+#define E820_SOFT_RESERVED  0xEFFFFFFF
 
 struct e820_entry {
     uint64_t address;
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index bc048a6d13..10ecd25728 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -26,6 +26,7 @@
 #include "qemu/units.h"
 #include "exec/target_page.h"
 #include "hw/i386/pc.h"
+#include "system/ramblock.h"
 #include "hw/char/serial-isa.h"
 #include "hw/char/parallel.h"
 #include "hw/hyperv/hv-balloon.h"
@@ -787,6 +788,38 @@ static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
     return pc_above_4g_end(pcms) - 1;
 }
 
+static int pc_update_spm_memory(RAMBlock *rb, void *opaque)
+{
+    X86MachineState *x86ms = opaque;
+    MachineState *ms = MACHINE(x86ms);
+    ram_addr_t offset;
+    ram_addr_t length;
+    bool is_spm = false;
+
+    /* Check if this RAM block belongs to a NUMA node with spm=on */
+    for (int i = 0; i < ms->numa_state->num_nodes; i++) {
+        NodeInfo *numa_info = &ms->numa_state->nodes[i];
+        if (numa_info->is_spm && numa_info->node_memdev) {
+            MemoryRegion *mr = &numa_info->node_memdev->mr;
+            if (mr->ram_block == rb) {
+                /* Mark this RAM block as SPM and set the flag */
+                rb->flags |= RAM_SPM;
+                is_spm = true;
+                break;
+            }
+        }
+    }
+
+    if (is_spm) {
+        offset = qemu_ram_get_offset(rb) +
+                 (0x100000000ULL - x86ms->below_4g_mem_size);
+        length = qemu_ram_get_used_length(rb);
+        e820_add_entry(offset, length, E820_SOFT_RESERVED);
+    }
+
+    return 0;
+}
+
 /*
  * AMD systems with an IOMMU have an additional hole close to the
  * 1Tb, which are special GPAs that cannot be DMA mapped. Depending
@@ -901,6 +934,7 @@ void pc_memory_init(PCMachineState *pcms,
     if (pcms->sgx_epc.size != 0) {
         e820_add_entry(pcms->sgx_epc.base, pcms->sgx_epc.size, E820_RESERVED);
     }
+    qemu_ram_foreach_block(pc_update_spm_memory, x86ms);
 
     if (!pcmc->has_reserved_memory &&
         (machine->ram_slots ||
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 9b658a3f48..9b437eaa10 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -89,6 +89,7 @@ ram_addr_t qemu_ram_get_fd_offset(RAMBlock *rb);
 ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
 ram_addr_t qemu_ram_get_max_length(RAMBlock *rb);
 bool qemu_ram_is_shared(RAMBlock *rb);
+bool qemu_ram_is_spm(RAMBlock *rb);
 bool qemu_ram_is_noreserve(RAMBlock *rb);
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
diff --git a/include/system/memory.h b/include/system/memory.h
index aa85fc27a1..520dda969e 100644
--- a/include/system/memory.h
+++ b/include/system/memory.h
@@ -275,6 +275,9 @@ typedef struct IOMMUTLBEvent {
  */
 #define RAM_PRIVATE (1 << 13)
 
+/* RAM is special purpose memory */
+#define RAM_SPM (1 << 14)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
diff --git a/include/system/numa.h b/include/system/numa.h
index 1044b0eb6e..438511a756 100644
--- a/include/system/numa.h
+++ b/include/system/numa.h
@@ -41,6 +41,7 @@ typedef struct NodeInfo {
     bool present;
     bool has_cpu;
     bool has_gi;
+    bool is_spm;
     uint8_t lb_info_provided;
     uint16_t initiator;
     uint8_t distance[MAX_NODES];
diff --git a/qapi/machine.json b/qapi/machine.json
index 038eab281c..1a513b38cf 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -500,6 +500,11 @@
 # @memdev: memory backend object.  If specified for one node, it must
 #     be specified for all nodes.
 #
+# @spm: if true, mark the memory region of this node as special
+#     purpose memory (SPM). This will set the RAM_SPM flag for the
+#     corresponding memory region and set the E820 type to
+#     E820_SOFT_RESERVED. (default: false, since 9.2)
+#
 # @initiator: defined in ACPI 6.3 Chapter 5.2.27.3 Table 5-145, points
 #     to the nodeid which has the memory controller responsible for
 #     this NUMA node.  This field provides additional information as
@@ -514,6 +519,7 @@
    '*cpus':   ['uint16'],
    '*mem':    'size',
    '*memdev': 'str',
+   '*spm':    'bool',
    '*initiator': 'uint16' }}
 
 ##
diff --git a/system/physmem.c b/system/physmem.c
index ae8ecd50ea..0090d9955d 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1611,6 +1611,11 @@ bool qemu_ram_is_noreserve(RAMBlock *rb)
     return rb->flags & RAM_NORESERVE;
 }
 
+bool qemu_ram_is_spm(RAMBlock *rb)
+{
+    return rb->flags & RAM_SPM;
+}
+
 /* Note: Only set at the start of postcopy */
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb)
 {
@@ -2032,7 +2037,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size,
     ram_flags &= ~RAM_PRIVATE;
 
     /* Just support these ram flags by now. */
-    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
+    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_SPM | RAM_NORESERVE |
                           RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
                           RAM_READONLY_FD | RAM_GUEST_MEMFD |
                           RAM_RESIZEABLE)) == 0);
-- 
2.34.1