[PATCH] hostmem-file: add the 'hmem' option

Zhigang Luo posted 1 patch 5 months, 1 week ago
Failed in applying to current master (apply log)
backends/hostmem-file.c      | 23 +++++++++++++++++++++++
hw/i386/e820_memory_layout.h |  1 +
hw/i386/pc.c                 | 16 ++++++++++++++++
include/exec/cpu-common.h    |  1 +
include/exec/memory.h        |  3 +++
qapi/qom.json                |  4 ++++
system/physmem.c             |  7 ++++++-
7 files changed, 54 insertions(+), 1 deletion(-)
[PATCH] hostmem-file: add the 'hmem' option
Posted by Zhigang Luo 5 months, 1 week ago
This boolean option 'hmem' allows users to set a memory region from
memory-backend-file as heterogeneous memory. If 'hmem=on', QEMU will
set the flag RAM_HMEM in the RAM block of the corresponding memory
region and set the e820 type to E820_SOFT_RESERVED for this region.

Signed-off-by: Zhigang Luo <Zhigang.Luo@amd.com>
---
 backends/hostmem-file.c      | 23 +++++++++++++++++++++++
 hw/i386/e820_memory_layout.h |  1 +
 hw/i386/pc.c                 | 16 ++++++++++++++++
 include/exec/cpu-common.h    |  1 +
 include/exec/memory.h        |  3 +++
 qapi/qom.json                |  4 ++++
 system/physmem.c             |  7 ++++++-
 7 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 7e5072e33e..5ddfdbaf86 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -32,6 +32,7 @@ struct HostMemoryBackendFile {
     uint64_t offset;
     bool discard_data;
     bool is_pmem;
+    bool is_hmem;
     bool readonly;
     OnOffAuto rom;
 };
@@ -88,6 +89,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
     ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
+    ram_flags |= fb->is_hmem ? RAM_HMEM : 0;
     ram_flags |= RAM_NAMED_FILE;
     return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
                                             backend->size, fb->align, ram_flags,
@@ -256,6 +258,25 @@ static void file_memory_backend_set_rom(Object *obj, Visitor *v,
     visit_type_OnOffAuto(v, name, &fb->rom, errp);
 }
 
+static bool file_memory_backend_get_hmem(Object *o, Error **errp)
+{
+    return MEMORY_BACKEND_FILE(o)->is_hmem;
+}
+
+static void file_memory_backend_set_hmem(Object *o, bool value, Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+
+    if (host_memory_backend_mr_inited(backend)) {
+        error_setg(errp, "cannot change property 'hmem' of %s.",
+                   object_get_typename(o));
+        return;
+    }
+
+    fb->is_hmem = value;
+}
+
 static void file_backend_unparent(Object *obj)
 {
     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@@ -295,6 +316,8 @@ file_backend_class_init(ObjectClass *oc, void *data)
     object_class_property_add_bool(oc, "pmem",
         file_memory_backend_get_pmem, file_memory_backend_set_pmem);
 #endif
+    object_class_property_add_bool(oc, "hmem",
+        file_memory_backend_get_hmem, file_memory_backend_set_hmem);
     object_class_property_add_bool(oc, "readonly",
         file_memory_backend_get_readonly,
         file_memory_backend_set_readonly);
diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h
index b50acfa201..8af6a9cfac 100644
--- a/hw/i386/e820_memory_layout.h
+++ b/hw/i386/e820_memory_layout.h
@@ -15,6 +15,7 @@
 #define E820_ACPI       3
 #define E820_NVS        4
 #define E820_UNUSABLE   5
+#define E820_SOFT_RESERVED  0xEFFFFFFF
 
 struct e820_entry {
     uint64_t address;
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 317aaca25a..41e9cc276c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -785,6 +785,21 @@ static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
     return pc_above_4g_end(pcms) - 1;
 }
 
+static int pc_update_hmem_memory(RAMBlock *rb, void *opaque)
+{
+    X86MachineState *x86ms = opaque;
+    ram_addr_t offset;
+    ram_addr_t length;
+
+    if (qemu_ram_is_hmem(rb)) {
+        offset = qemu_ram_get_offset(rb) + (0x100000000ULL - x86ms->below_4g_mem_size);
+        length = qemu_ram_get_used_length(rb);
+        e820_add_entry(offset, length, E820_SOFT_RESERVED);
+    }
+
+    return 0;
+}
+
 /*
  * AMD systems with an IOMMU have an additional hole close to the
  * 1Tb, which are special GPAs that cannot be DMA mapped. Depending
@@ -895,6 +910,7 @@ void pc_memory_init(PCMachineState *pcms,
         e820_add_entry(x86ms->above_4g_mem_start, x86ms->above_4g_mem_size,
                        E820_RAM);
     }
+    qemu_ram_foreach_block(pc_update_hmem_memory, x86ms);
 
     if (pcms->sgx_epc.size != 0) {
         e820_add_entry(pcms->sgx_epc.base, pcms->sgx_epc.size, E820_RESERVED);
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 638dc806a5..1b2dfb31e8 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -98,6 +98,7 @@ ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
 ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
 ram_addr_t qemu_ram_get_max_length(RAMBlock *rb);
 bool qemu_ram_is_shared(RAMBlock *rb);
+bool qemu_ram_is_hmem(RAMBlock *rb);
 bool qemu_ram_is_noreserve(RAMBlock *rb);
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9458e2801d..18c593a00b 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -246,6 +246,9 @@ typedef struct IOMMUTLBEvent {
 /* RAM can be private that has kvm guest memfd backend */
 #define RAM_GUEST_MEMFD   (1 << 12)
 
+/* RAM is a heterogeneous kind memory */
+#define RAM_HMEM (1 << 13)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
diff --git a/qapi/qom.json b/qapi/qom.json
index 28ce24cd8d..7b8632697f 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -669,6 +669,9 @@
 #     in host persistent memory that can be accessed using the SNIA
 #     NVM programming model (e.g. Intel NVDIMM).
 #
+# @hmem: specifies whether the backing file specified by @mem-path is
+#     in host heterogeneous memory.
+#
 # @readonly: if true, the backing file is opened read-only; if false,
 #     it is opened read-write.  (default: false)
 #
@@ -696,6 +699,7 @@
             '*discard-data': 'bool',
             'mem-path': 'str',
             '*pmem': { 'type': 'bool', 'if': 'CONFIG_LIBPMEM' },
+            '*hmem': 'bool',
             '*readonly': 'bool',
             '*rom': 'OnOffAuto' } }
 
diff --git a/system/physmem.c b/system/physmem.c
index dc1db3a384..f703398359 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1584,6 +1584,11 @@ bool qemu_ram_is_noreserve(RAMBlock *rb)
     return rb->flags & RAM_NORESERVE;
 }
 
+bool qemu_ram_is_hmem(RAMBlock *rb)
+{
+    return rb->flags & RAM_HMEM;
+}
+
 /* Note: Only set at the start of postcopy */
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb)
 {
@@ -1951,7 +1956,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
     int64_t file_size, file_align;
 
     /* Just support these ram flags by now. */
-    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
+    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_HMEM | RAM_NORESERVE |
                           RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
                           RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
 
-- 
2.25.1
Re: [PATCH] hostmem-file: add the 'hmem' option
Posted by David Hildenbrand 5 months, 1 week ago
On 04.12.24 18:11, Zhigang Luo wrote:
> This boolean option 'hmem' allows users to set a memory region from
> memory-backend-file as heterogeneous memory. If 'hmem=on', QEMU will
> set the flag RAM_HMEM in the RAM block of the corresponding memory
> region and set the e820 type to E820_SOFT_RESERVED for this region.
> 

Hi,

./scripts/get_maintainer.pl is your friend to figure out whom to CC on 
patches.

In general: not a fan. You seem to be abusing memory backend properties 
+ RAM flags to merely modify how memory is going to be exposed in the 
memory map on x86.

It's not even clear why heterogeneous memory should be exposed like 
that, and how reasonable it is to essentially expose all of guest RAM as 
E820_SOFT_RESERVED.


Note that the whole "pmem=on" case was very different, because it 
required mmap() modifications.

> Signed-off-by: Zhigang Luo <Zhigang.Luo@amd.com>
> ---
>   backends/hostmem-file.c      | 23 +++++++++++++++++++++++
>   hw/i386/e820_memory_layout.h |  1 +
>   hw/i386/pc.c                 | 16 ++++++++++++++++
>   include/exec/cpu-common.h    |  1 +
>   include/exec/memory.h        |  3 +++
>   qapi/qom.json                |  4 ++++
>   system/physmem.c             |  7 ++++++-
>   7 files changed, 54 insertions(+), 1 deletion(-)
> 
> diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
> index 7e5072e33e..5ddfdbaf86 100644
> --- a/backends/hostmem-file.c
> +++ b/backends/hostmem-file.c
> @@ -32,6 +32,7 @@ struct HostMemoryBackendFile {
>       uint64_t offset;
>       bool discard_data;
>       bool is_pmem;
> +    bool is_hmem;
>       bool readonly;
>       OnOffAuto rom;
>   };
> @@ -88,6 +89,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
>       ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
>       ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
>       ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
> +    ram_flags |= fb->is_hmem ? RAM_HMEM : 0;
>       ram_flags |= RAM_NAMED_FILE;
>       return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
>                                               backend->size, fb->align, ram_flags,
> @@ -256,6 +258,25 @@ static void file_memory_backend_set_rom(Object *obj, Visitor *v,
>       visit_type_OnOffAuto(v, name, &fb->rom, errp);
>   }
>   
> +static bool file_memory_backend_get_hmem(Object *o, Error **errp)
> +{
> +    return MEMORY_BACKEND_FILE(o)->is_hmem;
> +}
> +
> +static void file_memory_backend_set_hmem(Object *o, bool value, Error **errp)
> +{
> +    HostMemoryBackend *backend = MEMORY_BACKEND(o);
> +    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
> +
> +    if (host_memory_backend_mr_inited(backend)) {
> +        error_setg(errp, "cannot change property 'hmem' of %s.",
> +                   object_get_typename(o));
> +        return;
> +    }
> +
> +    fb->is_hmem = value;
> +}
> +
>   static void file_backend_unparent(Object *obj)
>   {
>       HostMemoryBackend *backend = MEMORY_BACKEND(obj);
> @@ -295,6 +316,8 @@ file_backend_class_init(ObjectClass *oc, void *data)
>       object_class_property_add_bool(oc, "pmem",
>           file_memory_backend_get_pmem, file_memory_backend_set_pmem);
>   #endif
> +    object_class_property_add_bool(oc, "hmem",
> +        file_memory_backend_get_hmem, file_memory_backend_set_hmem);
>       object_class_property_add_bool(oc, "readonly",
>           file_memory_backend_get_readonly,
>           file_memory_backend_set_readonly);
> diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h
> index b50acfa201..8af6a9cfac 100644
> --- a/hw/i386/e820_memory_layout.h
> +++ b/hw/i386/e820_memory_layout.h
> @@ -15,6 +15,7 @@
>   #define E820_ACPI       3
>   #define E820_NVS        4
>   #define E820_UNUSABLE   5
> +#define E820_SOFT_RESERVED  0xEFFFFFFF
>   
>   struct e820_entry {
>       uint64_t address;
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 317aaca25a..41e9cc276c 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -785,6 +785,21 @@ static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
>       return pc_above_4g_end(pcms) - 1;
>   }
>   
> +static int pc_update_hmem_memory(RAMBlock *rb, void *opaque)
> +{
> +    X86MachineState *x86ms = opaque;
> +    ram_addr_t offset;
> +    ram_addr_t length;
> +
> +    if (qemu_ram_is_hmem(rb)) {
> +        offset = qemu_ram_get_offset(rb) + (0x100000000ULL - x86ms->below_4g_mem_size);
> +        length = qemu_ram_get_used_length(rb);
> +        e820_add_entry(offset, length, E820_SOFT_RESERVED);
> +    }

I am pretty sure this will break in NUMA setups, where we have multiple 
memory backends mapped in different locations.

The whole "(0x100000000ULL - x86ms->below_4g_mem_size)" looks hacky.

-- 
Cheers,

David / dhildenb
RE: [PATCH] hostmem-file: add the 'hmem' option
Posted by Luo, Zhigang 5 months, 1 week ago
[AMD Official Use Only - AMD Internal Distribution Only]

Hi David,

Thanks for your comments.
Let me give you some background for this patch.
I am currently engaged in a project that requires to pass the EFI_MEMORY_SP (Special Purpose Memory) type memory from host to a virtual machine within QEMU. This memory needs to be EFI_MEMORY_SP type in the virtual machine as well.
This particular memory type is essential for the functionality of my project.
In Linux, the SPM memory will be claimed by hmem-dax driver by default. With this patch I can use the following config to pass the SPM memory to guest VM.
-object memory-backend-file,size=30G,id=m1,mem-path=/dev/dax0.0,prealloc=on,align=1G,hmem=on

I was thinking to change the option name from "hmem" to "spm" to avoid confusion.

Do you have any suggestions to achieve this more reasonable?

Thanks,
Zhigang

-----Original Message-----
From: David Hildenbrand <david@redhat.com>
Sent: Friday, December 6, 2024 5:08 AM
To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>
Subject: Re: [PATCH] hostmem-file: add the 'hmem' option

On 04.12.24 18:11, Zhigang Luo wrote:
> This boolean option 'hmem' allows users to set a memory region from
> memory-backend-file as heterogeneous memory. If 'hmem=on', QEMU will
> set the flag RAM_HMEM in the RAM block of the corresponding memory
> region and set the e820 type to E820_SOFT_RESERVED for this region.
>

Hi,

./scripts/get_maintainer.pl is your friend to figure out whom to CC on patches.

In general: not a fan. You seem to be abusing memory backend properties
+ RAM flags to merely modify how memory is going to be exposed in the
memory map on x86.

It's not even clear why heterogeneous memory should be exposed like
that, and how reasonable it is to essentially expose all of guest RAM as
E820_SOFT_RESERVED.


Note that the whole "pmem=on" case was very different, because it
required mmap() modifications.

> Signed-off-by: Zhigang Luo <Zhigang.Luo@amd.com>
> ---
>   backends/hostmem-file.c      | 23 +++++++++++++++++++++++
>   hw/i386/e820_memory_layout.h |  1 +
>   hw/i386/pc.c                 | 16 ++++++++++++++++
>   include/exec/cpu-common.h    |  1 +
>   include/exec/memory.h        |  3 +++
>   qapi/qom.json                |  4 ++++
>   system/physmem.c             |  7 ++++++-
>   7 files changed, 54 insertions(+), 1 deletion(-)
>
> diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
> index 7e5072e33e..5ddfdbaf86 100644
> --- a/backends/hostmem-file.c
> +++ b/backends/hostmem-file.c
> @@ -32,6 +32,7 @@ struct HostMemoryBackendFile {
>       uint64_t offset;
>       bool discard_data;
>       bool is_pmem;
> +    bool is_hmem;
>       bool readonly;
>       OnOffAuto rom;
>   };
> @@ -88,6 +89,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
>       ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
>       ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
>       ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
> +    ram_flags |= fb->is_hmem ? RAM_HMEM : 0;
>       ram_flags |= RAM_NAMED_FILE;
>       return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
>                                               backend->size, fb->align, ram_flags,
> @@ -256,6 +258,25 @@ static void file_memory_backend_set_rom(Object *obj, Visitor *v,
>       visit_type_OnOffAuto(v, name, &fb->rom, errp);
>   }
>
> +static bool file_memory_backend_get_hmem(Object *o, Error **errp)
> +{
> +    return MEMORY_BACKEND_FILE(o)->is_hmem;
> +}
> +
> +static void file_memory_backend_set_hmem(Object *o, bool value, Error **errp)
> +{
> +    HostMemoryBackend *backend = MEMORY_BACKEND(o);
> +    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
> +
> +    if (host_memory_backend_mr_inited(backend)) {
> +        error_setg(errp, "cannot change property 'hmem' of %s.",
> +                   object_get_typename(o));
> +        return;
> +    }
> +
> +    fb->is_hmem = value;
> +}
> +
>   static void file_backend_unparent(Object *obj)
>   {
>       HostMemoryBackend *backend = MEMORY_BACKEND(obj);
> @@ -295,6 +316,8 @@ file_backend_class_init(ObjectClass *oc, void *data)
>       object_class_property_add_bool(oc, "pmem",
>           file_memory_backend_get_pmem, file_memory_backend_set_pmem);
>   #endif
> +    object_class_property_add_bool(oc, "hmem",
> +        file_memory_backend_get_hmem, file_memory_backend_set_hmem);
>       object_class_property_add_bool(oc, "readonly",
>           file_memory_backend_get_readonly,
>           file_memory_backend_set_readonly);
> diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h
> index b50acfa201..8af6a9cfac 100644
> --- a/hw/i386/e820_memory_layout.h
> +++ b/hw/i386/e820_memory_layout.h
> @@ -15,6 +15,7 @@
>   #define E820_ACPI       3
>   #define E820_NVS        4
>   #define E820_UNUSABLE   5
> +#define E820_SOFT_RESERVED  0xEFFFFFFF
>
>   struct e820_entry {
>       uint64_t address;
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 317aaca25a..41e9cc276c 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -785,6 +785,21 @@ static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
>       return pc_above_4g_end(pcms) - 1;
>   }
>
> +static int pc_update_hmem_memory(RAMBlock *rb, void *opaque)
> +{
> +    X86MachineState *x86ms = opaque;
> +    ram_addr_t offset;
> +    ram_addr_t length;
> +
> +    if (qemu_ram_is_hmem(rb)) {
> +        offset = qemu_ram_get_offset(rb) + (0x100000000ULL - x86ms->below_4g_mem_size);
> +        length = qemu_ram_get_used_length(rb);
> +        e820_add_entry(offset, length, E820_SOFT_RESERVED);
> +    }

I am pretty sure this will break in NUMA setups, where we have multiple
memory backends mapped in different locations.

The whole "(0x100000000ULL - x86ms->below_4g_mem_size)" looks hacky.

--
Cheers,

David / dhildenb

Re: [PATCH] hostmem-file: add the 'hmem' option
Posted by David Hildenbrand 5 months ago
On 06.12.24 18:58, Luo, Zhigang wrote:
> [AMD Official Use Only - AMD Internal Distribution Only]
> 
> Hi David,

Hi!

> 
> Thanks for your comments.
> Let me give you some background for this patch.
> I am currently engaged in a project that requires to pass the EFI_MEMORY_SP (Special Purpose Memory) type memory from host to a virtual machine within QEMU. This memory needs to be EFI_MEMORY_SP type in the virtual machine as well.
> This particular memory type is essential for the functionality of my project.

Which exact guest memory will be backed by this memory? All guest-memory?

And, what is the guest OS going to do with this memory?

Usually, this SP memory (dax, cxl, ...) is not used as boot memory. Like 
on a bare metal system, one would expect that only CXL memory will be 
marked as special and put aside to the cxl driver, such that the OS can 
boot on ordinary DIMMs, such that cxl can online it etc.

So maybe you would want to expose this memory using CXL-mem device to 
the VM? Or a DIMM?

I assume the alternative is to tell the VM on the Linux kernel cmdline 
to set EFI_MEMORY_SP on this memory. I recall that there is a way to 
achieve that.

> In Linux, the SPM memory will be claimed by hmem-dax driver by default. With this patch I can use the following config to pass the SPM memory to guest VM.
> -object memory-backend-file,size=30G,id=m1,mem-path=/dev/dax0.0,prealloc=on,align=1G,hmem=on
> 
> I was thinking to change the option name from "hmem" to "spm" to avoid confusion.

Likely it should be specified elsewhere, that you want specific guest 
RAM ranges to be EFI_MEMORY_SP. For a DIMM, it could be a property, 
similarly maybe for CXL-mem devices (no expert on that).

For boot memory / machine memory it could be a machine property. But 
I'll first have to learn which ranges you actually want to expose that 
way, and what the VM will do with that information.

> 
> Do you have any suggestions to achieve this more reasonable?

The problem with qemu_ram_foreach_block() is that you would indicate 
also DIMMs, virtio-mem, ... and even RAMBlocks that are not even used 
for backing anything to the VM as EFI_MEMORY_SP, which is wrong.

> 
> Thanks,
> Zhigang
> 
> -----Original Message-----
> From: David Hildenbrand <david@redhat.com>
> Sent: Friday, December 6, 2024 5:08 AM
> To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
> Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>
> Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
> 
> On 04.12.24 18:11, Zhigang Luo wrote:
>> This boolean option 'hmem' allows users to set a memory region from
>> memory-backend-file as heterogeneous memory. If 'hmem=on', QEMU will
>> set the flag RAM_HMEM in the RAM block of the corresponding memory
>> region and set the e820 type to E820_SOFT_RESERVED for this region.
>>
> 
> Hi,
> 
> ./scripts/get_maintainer.pl is your friend to figure out whom to CC on patches.
> 
> In general: not a fan. You seem to be abusing memory backend properties
> + RAM flags to merely modify how memory is going to be exposed in the
> memory map on x86.
> 
> It's not even clear why heterogeneous memory should be exposed like
> that, and how reasonable it is to essentially expose all of guest RAM as
> E820_SOFT_RESERVED.
> 
> 
> Note that the whole "pmem=on" case was very different, because it
> required mmap() modifications.
> 
>> Signed-off-by: Zhigang Luo <Zhigang.Luo@amd.com>
>> ---
>>    backends/hostmem-file.c      | 23 +++++++++++++++++++++++
>>    hw/i386/e820_memory_layout.h |  1 +
>>    hw/i386/pc.c                 | 16 ++++++++++++++++
>>    include/exec/cpu-common.h    |  1 +
>>    include/exec/memory.h        |  3 +++
>>    qapi/qom.json                |  4 ++++
>>    system/physmem.c             |  7 ++++++-
>>    7 files changed, 54 insertions(+), 1 deletion(-)
>>
>> diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
>> index 7e5072e33e..5ddfdbaf86 100644
>> --- a/backends/hostmem-file.c
>> +++ b/backends/hostmem-file.c
>> @@ -32,6 +32,7 @@ struct HostMemoryBackendFile {
>>        uint64_t offset;
>>        bool discard_data;
>>        bool is_pmem;
>> +    bool is_hmem;
>>        bool readonly;
>>        OnOffAuto rom;
>>    };
>> @@ -88,6 +89,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
>>        ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
>>        ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
>>        ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
>> +    ram_flags |= fb->is_hmem ? RAM_HMEM : 0;
>>        ram_flags |= RAM_NAMED_FILE;
>>        return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
>>                                                backend->size, fb->align, ram_flags,
>> @@ -256,6 +258,25 @@ static void file_memory_backend_set_rom(Object *obj, Visitor *v,
>>        visit_type_OnOffAuto(v, name, &fb->rom, errp);
>>    }
>>
>> +static bool file_memory_backend_get_hmem(Object *o, Error **errp)
>> +{
>> +    return MEMORY_BACKEND_FILE(o)->is_hmem;
>> +}
>> +
>> +static void file_memory_backend_set_hmem(Object *o, bool value, Error **errp)
>> +{
>> +    HostMemoryBackend *backend = MEMORY_BACKEND(o);
>> +    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
>> +
>> +    if (host_memory_backend_mr_inited(backend)) {
>> +        error_setg(errp, "cannot change property 'hmem' of %s.",
>> +                   object_get_typename(o));
>> +        return;
>> +    }
>> +
>> +    fb->is_hmem = value;
>> +}
>> +
>>    static void file_backend_unparent(Object *obj)
>>    {
>>        HostMemoryBackend *backend = MEMORY_BACKEND(obj);
>> @@ -295,6 +316,8 @@ file_backend_class_init(ObjectClass *oc, void *data)
>>        object_class_property_add_bool(oc, "pmem",
>>            file_memory_backend_get_pmem, file_memory_backend_set_pmem);
>>    #endif
>> +    object_class_property_add_bool(oc, "hmem",
>> +        file_memory_backend_get_hmem, file_memory_backend_set_hmem);
>>        object_class_property_add_bool(oc, "readonly",
>>            file_memory_backend_get_readonly,
>>            file_memory_backend_set_readonly);
>> diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h
>> index b50acfa201..8af6a9cfac 100644
>> --- a/hw/i386/e820_memory_layout.h
>> +++ b/hw/i386/e820_memory_layout.h
>> @@ -15,6 +15,7 @@
>>    #define E820_ACPI       3
>>    #define E820_NVS        4
>>    #define E820_UNUSABLE   5
>> +#define E820_SOFT_RESERVED  0xEFFFFFFF
>>
>>    struct e820_entry {
>>        uint64_t address;
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 317aaca25a..41e9cc276c 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -785,6 +785,21 @@ static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
>>        return pc_above_4g_end(pcms) - 1;
>>    }
>>
>> +static int pc_update_hmem_memory(RAMBlock *rb, void *opaque)
>> +{
>> +    X86MachineState *x86ms = opaque;
>> +    ram_addr_t offset;
>> +    ram_addr_t length;
>> +
>> +    if (qemu_ram_is_hmem(rb)) {
>> +        offset = qemu_ram_get_offset(rb) + (0x100000000ULL - x86ms->below_4g_mem_size);
>> +        length = qemu_ram_get_used_length(rb);
>> +        e820_add_entry(offset, length, E820_SOFT_RESERVED);
>> +    }
> 
> I am pretty sure this will break in NUMA setups, where we have multiple
> memory backends mapped in different locations.
> 
> The whole "(0x100000000ULL - x86ms->below_4g_mem_size)" looks hacky.
> 
> --
> Cheers,
> 
> David / dhildenb
> 


-- 
Cheers,

David / dhildenb
RE: [PATCH] hostmem-file: add the 'hmem' option
Posted by Luo, Zhigang 5 months ago
[AMD Official Use Only - AMD Internal Distribution Only]

Hi David,

Please check my comments inline.

Thanks,
Zhigang

> -----Original Message-----
> From: David Hildenbrand <david@redhat.com>
> Sent: Monday, December 9, 2024 4:11 PM
> To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
> Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>
> Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
>
> On 06.12.24 18:58, Luo, Zhigang wrote:
> > [AMD Official Use Only - AMD Internal Distribution Only]
> >
> > Hi David,
>
> Hi!
>
> >
> > Thanks for your comments.
> > Let me give you some background for this patch.
> > I am currently engaged in a project that requires to pass the EFI_MEMORY_SP
> (Special Purpose Memory) type memory from host to a virtual machine within
> QEMU. This memory needs to be EFI_MEMORY_SP type in the virtual machine as
> well.
> > This particular memory type is essential for the functionality of my project.
>
> Which exact guest memory will be backed by this memory? All guest-memory?
[Luo, Zhigang] not all guest-memory. Only the memory reserved for specific device.

>
> And, what is the guest OS going to do with this memory?
[Luo, Zhigang] the device driver in guest will use this reserved memory.

>
> Usually, this SP memory (dax, cxl, ...) is not used as boot memory. Like on a bare
> metal system, one would expect that only CXL memory will be marked as special
> and put aside to the cxl driver, such that the OS can boot on ordinary DIMMs, such
> that cxl can online it etc.
>
> So maybe you would want to expose this memory using CXL-mem device to the
> VM? Or a DIMM?
>
> I assume the alternative is to tell the VM on the Linux kernel cmdline to set
> EFI_MEMORY_SP on this memory. I recall that there is a way to achieve that.
>
[Luo, Zhigang] I know this option. but it requires the end user to know where is the memory location in guest side(start address, size).


> > In Linux, the SPM memory will be claimed by hmem-dax driver by default. With
> this patch I can use the following config to pass the SPM memory to guest VM.
> > -object
> > memory-backend-file,size=30G,id=m1,mem-path=/dev/dax0.0,prealloc=on,al
> > ign=1G,hmem=on
> >
> > I was thinking to change the option name from "hmem" to "spm" to avoid
> confusion.
>
> Likely it should be specified elsewhere, that you want specific guest RAM ranges to
> be EFI_MEMORY_SP. For a DIMM, it could be a property, similarly maybe for CXL-
> mem devices (no expert on that).
>
> For boot memory / machine memory it could be a machine property. But I'll first have
> to learn which ranges you actually want to expose that way, and what the VM will do
> with that information.
[Luo, Zhigang] we want to expose the SPM memory reserved for specific device. And we will pass the SPM memory and the device to guest. Then the device driver can use the SPM memory in guest side.

>
> >
> > Do you have any suggestions to achieve this more reasonable?
>
> The problem with qemu_ram_foreach_block() is that you would indicate also DIMMs,
> virtio-mem, ... and even RAMBlocks that are not even used for backing anything to
> the VM as EFI_MEMORY_SP, which is wrong.
[Luo, Zhigang] qemu_ram_foreach_block() will list all memory block, but in pc_update_hmem_memory(), only the memory block with "hmem" flag will be updated to SPM memory.

>
> >
> > Thanks,
> > Zhigang
> >
> > -----Original Message-----
> > From: David Hildenbrand <david@redhat.com>
> > Sent: Friday, December 6, 2024 5:08 AM
> > To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
> > Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>
> > Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
> >
> > On 04.12.24 18:11, Zhigang Luo wrote:
> >> This boolean option 'hmem' allows users to set a memory region from
> >> memory-backend-file as heterogeneous memory. If 'hmem=on', QEMU will
> >> set the flag RAM_HMEM in the RAM block of the corresponding memory
> >> region and set the e820 type to E820_SOFT_RESERVED for this region.
> >>
> >
> > Hi,
> >
> > ./scripts/get_maintainer.pl is your friend to figure out whom to CC on patches.
> >
> > In general: not a fan. You seem to be abusing memory backend
> > properties
> > + RAM flags to merely modify how memory is going to be exposed in the
> > memory map on x86.
> >
> > It's not even clear why heterogeneous memory should be exposed like
> > that, and how reasonable it is to essentially expose all of guest RAM
> > as E820_SOFT_RESERVED.
> >
> >
> > Note that the whole "pmem=on" case was very different, because it
> > required mmap() modifications.
> >
> >> Signed-off-by: Zhigang Luo <Zhigang.Luo@amd.com>
> >> ---
> >>    backends/hostmem-file.c      | 23 +++++++++++++++++++++++
> >>    hw/i386/e820_memory_layout.h |  1 +
> >>    hw/i386/pc.c                 | 16 ++++++++++++++++
> >>    include/exec/cpu-common.h    |  1 +
> >>    include/exec/memory.h        |  3 +++
> >>    qapi/qom.json                |  4 ++++
> >>    system/physmem.c             |  7 ++++++-
> >>    7 files changed, 54 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index
> >> 7e5072e33e..5ddfdbaf86 100644
> >> --- a/backends/hostmem-file.c
> >> +++ b/backends/hostmem-file.c
> >> @@ -32,6 +32,7 @@ struct HostMemoryBackendFile {
> >>        uint64_t offset;
> >>        bool discard_data;
> >>        bool is_pmem;
> >> +    bool is_hmem;
> >>        bool readonly;
> >>        OnOffAuto rom;
> >>    };
> >> @@ -88,6 +89,7 @@ file_backend_memory_alloc(HostMemoryBackend
> *backend, Error **errp)
> >>        ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
> >>        ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
> >>        ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
> >> +    ram_flags |= fb->is_hmem ? RAM_HMEM : 0;
> >>        ram_flags |= RAM_NAMED_FILE;
> >>        return memory_region_init_ram_from_file(&backend->mr,
> OBJECT(backend), name,
> >>                                                backend->size,
> >> fb->align, ram_flags, @@ -256,6 +258,25 @@ static void
> file_memory_backend_set_rom(Object *obj, Visitor *v,
> >>        visit_type_OnOffAuto(v, name, &fb->rom, errp);
> >>    }
> >>
> >> +static bool file_memory_backend_get_hmem(Object *o, Error **errp) {
> >> +    return MEMORY_BACKEND_FILE(o)->is_hmem; }
> >> +
> >> +static void file_memory_backend_set_hmem(Object *o, bool value,
> >> +Error **errp) {
> >> +    HostMemoryBackend *backend = MEMORY_BACKEND(o);
> >> +    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
> >> +
> >> +    if (host_memory_backend_mr_inited(backend)) {
> >> +        error_setg(errp, "cannot change property 'hmem' of %s.",
> >> +                   object_get_typename(o));
> >> +        return;
> >> +    }
> >> +
> >> +    fb->is_hmem = value;
> >> +}
> >> +
> >>    static void file_backend_unparent(Object *obj)
> >>    {
> >>        HostMemoryBackend *backend = MEMORY_BACKEND(obj); @@ -295,6
> >> +316,8 @@ file_backend_class_init(ObjectClass *oc, void *data)
> >>        object_class_property_add_bool(oc, "pmem",
> >>            file_memory_backend_get_pmem, file_memory_backend_set_pmem);
> >>    #endif
> >> +    object_class_property_add_bool(oc, "hmem",
> >> +        file_memory_backend_get_hmem, file_memory_backend_set_hmem);
> >>        object_class_property_add_bool(oc, "readonly",
> >>            file_memory_backend_get_readonly,
> >>            file_memory_backend_set_readonly);
> >> diff --git a/hw/i386/e820_memory_layout.h
> >> b/hw/i386/e820_memory_layout.h index b50acfa201..8af6a9cfac 100644
> >> --- a/hw/i386/e820_memory_layout.h
> >> +++ b/hw/i386/e820_memory_layout.h
> >> @@ -15,6 +15,7 @@
> >>    #define E820_ACPI       3
> >>    #define E820_NVS        4
> >>    #define E820_UNUSABLE   5
> >> +#define E820_SOFT_RESERVED  0xEFFFFFFF
> >>
> >>    struct e820_entry {
> >>        uint64_t address;
> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 317aaca25a..41e9cc276c
> >> 100644
> >> --- a/hw/i386/pc.c
> >> +++ b/hw/i386/pc.c
> >> @@ -785,6 +785,21 @@ static hwaddr pc_max_used_gpa(PCMachineState
> *pcms, uint64_t pci_hole64_size)
> >>        return pc_above_4g_end(pcms) - 1;
> >>    }
> >>
> >> +static int pc_update_hmem_memory(RAMBlock *rb, void *opaque) {
> >> +    X86MachineState *x86ms = opaque;
> >> +    ram_addr_t offset;
> >> +    ram_addr_t length;
> >> +
> >> +    if (qemu_ram_is_hmem(rb)) {
> >> +        offset = qemu_ram_get_offset(rb) + (0x100000000ULL - x86ms-
> >below_4g_mem_size);
> >> +        length = qemu_ram_get_used_length(rb);
> >> +        e820_add_entry(offset, length, E820_SOFT_RESERVED);
> >> +    }
> >
> > I am pretty sure this will break in NUMA setups, where we have
> > multiple memory backends mapped in different locations.
> >
> > The whole "(0x100000000ULL - x86ms->below_4g_mem_size)" looks hacky.
> >
> > --
> > Cheers,
> >
> > David / dhildenb
> >
>
>
> --
> Cheers,
>
> David / dhildenb

Re: [PATCH] hostmem-file: add the 'hmem' option
Posted by David Hildenbrand 5 months ago
On 10.12.24 20:32, Luo, Zhigang wrote:
> [AMD Official Use Only - AMD Internal Distribution Only]
> 
> Hi David,
> 

Hi,

>>>
>>> Thanks for your comments.
>>> Let me give you some background for this patch.
>>> I am currently engaged in a project that requires to pass the EFI_MEMORY_SP
>> (Special Purpose Memory) type memory from host to a virtual machine within
>> QEMU. This memory needs to be EFI_MEMORY_SP type in the virtual machine as
>> well.
>>> This particular memory type is essential for the functionality of my project.
>>
>> Which exact guest memory will be backed by this memory? All guest-memory?
> [Luo, Zhigang] not all guest-memory. Only the memory reserved for specific device.

Can you show me an example QEMU cmdline, and how you would pass that 
hostmem-file object to the device?

> 
>>
>> And, what is the guest OS going to do with this memory?
> [Luo, Zhigang] the device driver in guest will use this reserved memory.

Okay, so just like CXL memory.

> 
>>
>> Usually, this SP memory (dax, cxl, ...) is not used as boot memory. Like on a bare
>> metal system, one would expect that only CXL memory will be marked as special
>> and put aside to the cxl driver, such that the OS can boot on ordinary DIMMs, such
>> that cxl can online it etc.
>>
>> So maybe you would want to expose this memory using CXL-mem device to the
>> VM? Or a DIMM?
>>
>> I assume the alternative is to tell the VM on the Linux kernel cmdline to set
>> EFI_MEMORY_SP on this memory. I recall that there is a way to achieve that.
>>
> [Luo, Zhigang] I know this option. but it requires the end user to know where is the memory location in guest side(start address, size).

Right.

> 
> 
>>> In Linux, the SPM memory will be claimed by hmem-dax driver by default. With
>> this patch I can use the following config to pass the SPM memory to guest VM.
>>> -object
>>> memory-backend-file,size=30G,id=m1,mem-path=/dev/dax0.0,prealloc=on,al
>>> ign=1G,hmem=on
>>>
>>> I was thinking to change the option name from "hmem" to "spm" to avoid
>> confusion.
>>
>> Likely it should be specified elsewhere, that you want specific guest RAM ranges to
>> be EFI_MEMORY_SP. For a DIMM, it could be a property, similarly maybe for CXL-
>> mem devices (no expert on that).
>>
>> For boot memory / machine memory it could be a machine property. But I'll first have
>> to learn which ranges you actually want to expose that way, and what the VM will do
>> with that information.
> [Luo, Zhigang] we want to expose the SPM memory reserved for specific device. And we will pass the SPM memory and the device to guest. Then the device driver can use the SPM memory in guest side.

Then the device driver should likely have a way to configure that, not 
the memory backend.

After all, the device driver will map it somehow into guest physical 
address space (how?).

> 
>>
>>>
>>> Do you have any suggestions to achieve this more reasonable?
>>
>> The problem with qemu_ram_foreach_block() is that you would indicate also DIMMs,
>> virtio-mem, ... and even RAMBlocks that are not even used for backing anything to
>> the VM as EFI_MEMORY_SP, which is wrong.
> [Luo, Zhigang] qemu_ram_foreach_block() will list all memory block, but in pc_update_hmem_memory(), only the memory block with "hmem" flag will be updated to SPM memory.

Yes, but imagine a user passing such a memory backend to a 
DIMM/virtio-mem/boot memory etc. It will have very undesired side effects.

-- 
Cheers,

David / dhildenb
RE: [PATCH] hostmem-file: add the 'hmem' option
Posted by Luo, Zhigang 5 months ago
[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: David Hildenbrand <david@redhat.com>
> Sent: Tuesday, December 10, 2024 2:55 PM
> To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
> Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>
> Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
>
> On 10.12.24 20:32, Luo, Zhigang wrote:
> > [AMD Official Use Only - AMD Internal Distribution Only]
> >
> > Hi David,
> >
>
> Hi,
>
> >>>
> >>> Thanks for your comments.
> >>> Let me give you some background for this patch.
> >>> I am currently engaged in a project that requires to pass the
> >>> EFI_MEMORY_SP
> >> (Special Purpose Memory) type memory from host to a virtual machine
> >> within QEMU. This memory needs to be EFI_MEMORY_SP type in the
> >> virtual machine as well.
> >>> This particular memory type is essential for the functionality of my project.
> >>
> >> Which exact guest memory will be backed by this memory? All guest-memory?
> > [Luo, Zhigang] not all guest-memory. Only the memory reserved for specific
> device.
>
> Can you show me an example QEMU cmdline, and how you would pass that
> hostmem-file object to the device?
>
[Luo, Zhigang] the following is an example. m1 is the reserved memory for pci device "0000:03:00.0". both the memory and pci device are set to same numa node.

-object memory-backend-ram,size=8G,id=m0 \
-object memory-backend-file,size=16G,id=m1,mem-path=/dev/dax0.0,prealloc=on,align=1G,hmem=on \
-numa node,nodeid=0,memdev=m0 -numa node,nodeid=1,memdev=m1 \
-device pxb-pcie,id=pcie.1,numa_node=1,bus_nr=2,bus=pcie.0 \
-device ioh3420,id=pcie_port1,bus=pcie.1,chassis=1 \
-device vfio-pci,host=0000:03:00.0,id=hostdev0,bus=pcie_port1

> >
> >>
> >> And, what is the guest OS going to do with this memory?
> > [Luo, Zhigang] the device driver in guest will use this reserved memory.
>
> Okay, so just like CXL memory.
>
> >
> >>
> >> Usually, this SP memory (dax, cxl, ...) is not used as boot memory.
> >> Like on a bare metal system, one would expect that only CXL memory
> >> will be marked as special and put aside to the cxl driver, such that
> >> the OS can boot on ordinary DIMMs, such that cxl can online it etc.
> >>
> >> So maybe you would want to expose this memory using CXL-mem device to
> >> the VM? Or a DIMM?
> >>
> >> I assume the alternative is to tell the VM on the Linux kernel
> >> cmdline to set EFI_MEMORY_SP on this memory. I recall that there is a way to
> achieve that.
> >>
> > [Luo, Zhigang] I know this option. but it requires the end user to know where is the
> memory location in guest side(start address, size).
>
> Right.
>
> >
> >
> >>> In Linux, the SPM memory will be claimed by hmem-dax driver by
> >>> default. With
> >> this patch I can use the following config to pass the SPM memory to guest VM.
> >>> -object
> >>> memory-backend-file,size=30G,id=m1,mem-path=/dev/dax0.0,prealloc=on,
> >>> al
> >>> ign=1G,hmem=on
> >>>
> >>> I was thinking to change the option name from "hmem" to "spm" to
> >>> avoid
> >> confusion.
> >>
> >> Likely it should be specified elsewhere, that you want specific guest
> >> RAM ranges to be EFI_MEMORY_SP. For a DIMM, it could be a property,
> >> similarly maybe for CXL- mem devices (no expert on that).
> >>
> >> For boot memory / machine memory it could be a machine property. But
> >> I'll first have to learn which ranges you actually want to expose
> >> that way, and what the VM will do with that information.
> > [Luo, Zhigang] we want to expose the SPM memory reserved for specific device.
> And we will pass the SPM memory and the device to guest. Then the device driver
> can use the SPM memory in guest side.
>
> Then the device driver should likely have a way to configure that, not the memory
> backend.
>
> After all, the device driver will map it somehow into guest physical address space
> (how?).
>
[Luo, Zhigang] from guest view, it's still system memory, but marked as SPM. So, qemu will map the memory to guest physical address space.
The device driver just claims to use the SPM memory in guest side.

> >
> >>
> >>>
> >>> Do you have any suggestions to achieve this more reasonable?
> >>
> >> The problem with qemu_ram_foreach_block() is that you would indicate
> >> also DIMMs, virtio-mem, ... and even RAMBlocks that are not even used
> >> for backing anything to the VM as EFI_MEMORY_SP, which is wrong.
> > [Luo, Zhigang] qemu_ram_foreach_block() will list all memory block, but in
> pc_update_hmem_memory(), only the memory block with "hmem" flag will be
> updated to SPM memory.
>
> Yes, but imagine a user passing such a memory backend to a DIMM/virtio-mem/boot
> memory etc. It will have very undesired side effects.
>
[Luo, Zhigang] the user should know what he/she is doing when he/she set the flag for the memory region.


> --
> Cheers,
>
> David / dhildenb

Re: [PATCH] hostmem-file: add the 'hmem' option
Posted by Igor Mammedov 4 months, 4 weeks ago
On Tue, 10 Dec 2024 21:51:40 +0000
"Luo, Zhigang" <Zhigang.Luo@amd.com> wrote:

> [AMD Official Use Only - AMD Internal Distribution Only]
> 
> > -----Original Message-----
> > From: David Hildenbrand <david@redhat.com>
> > Sent: Tuesday, December 10, 2024 2:55 PM
> > To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
> > Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>
> > Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
> >
> > On 10.12.24 20:32, Luo, Zhigang wrote:  
> > > [AMD Official Use Only - AMD Internal Distribution Only]
> > >
> > > Hi David,
> > >  
> >
> > Hi,
> >  
> > >>>
> > >>> Thanks for your comments.
> > >>> Let me give you some background for this patch.
> > >>> I am currently engaged in a project that requires to pass the
> > >>> EFI_MEMORY_SP  
> > >> (Special Purpose Memory) type memory from host to a virtual machine
> > >> within QEMU. This memory needs to be EFI_MEMORY_SP type in the
> > >> virtual machine as well.  
> > >>> This particular memory type is essential for the functionality of my project.  
> > >>
> > >> Which exact guest memory will be backed by this memory? All guest-memory?  
> > > [Luo, Zhigang] not all guest-memory. Only the memory reserved for specific  
> > device.
> >
> > Can you show me an example QEMU cmdline, and how you would pass that
> > hostmem-file object to the device?
> >  
> [Luo, Zhigang] the following is an example. m1 is the reserved memory for pci device "0000:03:00.0". both the memory and pci device are set to same numa node.
> 
> -object memory-backend-ram,size=8G,id=m0 \
> -object memory-backend-file,size=16G,id=m1,mem-path=/dev/dax0.0,prealloc=on,align=1G,hmem=on \
> -numa node,nodeid=0,memdev=m0 -numa node,nodeid=1,memdev=m1 \
> -device pxb-pcie,id=pcie.1,numa_node=1,bus_nr=2,bus=pcie.0 \
> -device ioh3420,id=pcie_port1,bus=pcie.1,chassis=1 \
> -device vfio-pci,host=0000:03:00.0,id=hostdev0,bus=pcie_port1

Is /dev/dax0.0 a part of host device 0000:03:00.0 that you pass-through to guest using vfio?



> 
> > >  
> > >>
> > >> And, what is the guest OS going to do with this memory?  
> > > [Luo, Zhigang] the device driver in guest will use this reserved memory.  
> >
> > Okay, so just like CXL memory.
> >  
> > >  
> > >>
> > >> Usually, this SP memory (dax, cxl, ...) is not used as boot memory.
> > >> Like on a bare metal system, one would expect that only CXL memory
> > >> will be marked as special and put aside to the cxl driver, such that
> > >> the OS can boot on ordinary DIMMs, such that cxl can online it etc.
> > >>
> > >> So maybe you would want to expose this memory using CXL-mem device to
> > >> the VM? Or a DIMM?
> > >>
> > >> I assume the alternative is to tell the VM on the Linux kernel
> > >> cmdline to set EFI_MEMORY_SP on this memory. I recall that there is a way to  
> > achieve that.  
> > >>  
> > > [Luo, Zhigang] I know this option. but it requires the end user to know where is the  
> > memory location in guest side(start address, size).
> >
> > Right.
> >  
> > >
> > >  
> > >>> In Linux, the SPM memory will be claimed by hmem-dax driver by
> > >>> default. With  
> > >> this patch I can use the following config to pass the SPM memory to guest VM.  
> > >>> -object
> > >>> memory-backend-file,size=30G,id=m1,mem-path=/dev/dax0.0,prealloc=on,
> > >>> al
> > >>> ign=1G,hmem=on
> > >>>
> > >>> I was thinking to change the option name from "hmem" to "spm" to
> > >>> avoid  
> > >> confusion.
> > >>
> > >> Likely it should be specified elsewhere, that you want specific guest
> > >> RAM ranges to be EFI_MEMORY_SP. For a DIMM, it could be a property,
> > >> similarly maybe for CXL- mem devices (no expert on that).
> > >>
> > >> For boot memory / machine memory it could be a machine property. But
> > >> I'll first have to learn which ranges you actually want to expose
> > >> that way, and what the VM will do with that information.  
> > > [Luo, Zhigang] we want to expose the SPM memory reserved for specific device.  
> > And we will pass the SPM memory and the device to guest. Then the device driver
> > can use the SPM memory in guest side.
> >
> > Then the device driver should likely have a way to configure that, not the memory
> > backend.
> >
> > After all, the device driver will map it somehow into guest physical address space
> > (how?).
> >  
> [Luo, Zhigang] from guest view, it's still system memory, but marked as SPM. So, qemu will map the memory to guest physical address space.
> The device driver just claims to use the SPM memory in guest side.
> 
> > >  
> > >>  
> > >>>
> > >>> Do you have any suggestions to achieve this more reasonable?  
> > >>
> > >> The problem with qemu_ram_foreach_block() is that you would indicate
> > >> also DIMMs, virtio-mem, ... and even RAMBlocks that are not even used
> > >> for backing anything to the VM as EFI_MEMORY_SP, which is wrong.  
> > > [Luo, Zhigang] qemu_ram_foreach_block() will list all memory block, but in  
> > pc_update_hmem_memory(), only the memory block with "hmem" flag will be
> > updated to SPM memory.
> >
> > Yes, but imagine a user passing such a memory backend to a DIMM/virtio-mem/boot
> > memory etc. It will have very undesired side effects.
> >  
> [Luo, Zhigang] the user should know what he/she is doing when he/she set the flag for the memory region.
> 
> 
> > --
> > Cheers,
> >
> > David / dhildenb  
>
RE: [PATCH] hostmem-file: add the 'hmem' option
Posted by Luo, Zhigang 4 months, 4 weeks ago
[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Igor Mammedov <imammedo@redhat.com>
> Sent: Monday, December 16, 2024 9:40 AM
> To: Luo, Zhigang <Zhigang.Luo@amd.com>
> Cc: David Hildenbrand <david@redhat.com>; qemu-devel@nongnu.org;
> kraxel@redhat.com
> Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
>
> On Tue, 10 Dec 2024 21:51:40 +0000
> "Luo, Zhigang" <Zhigang.Luo@amd.com> wrote:
>
> > [AMD Official Use Only - AMD Internal Distribution Only]
> >
> > > -----Original Message-----
> > > From: David Hildenbrand <david@redhat.com>
> > > Sent: Tuesday, December 10, 2024 2:55 PM
> > > To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
> > > Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>
> > > Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
> > >
> > > On 10.12.24 20:32, Luo, Zhigang wrote:
> > > > [AMD Official Use Only - AMD Internal Distribution Only]
> > > >
> > > > Hi David,
> > > >
> > >
> > > Hi,
> > >
> > > >>>
> > > >>> Thanks for your comments.
> > > >>> Let me give you some background for this patch.
> > > >>> I am currently engaged in a project that requires to pass the
> > > >>> EFI_MEMORY_SP
> > > >> (Special Purpose Memory) type memory from host to a virtual
> > > >> machine within QEMU. This memory needs to be EFI_MEMORY_SP type
> > > >> in the virtual machine as well.
> > > >>> This particular memory type is essential for the functionality of my project.
> > > >>
> > > >> Which exact guest memory will be backed by this memory? All guest-
> memory?
> > > > [Luo, Zhigang] not all guest-memory. Only the memory reserved for
> > > > specific
> > > device.
> > >
> > > Can you show me an example QEMU cmdline, and how you would pass that
> > > hostmem-file object to the device?
> > >
> > [Luo, Zhigang] the following is an example. m1 is the reserved memory for pci
> device "0000:03:00.0". both the memory and pci device are set to same numa node.
> >
> > -object memory-backend-ram,size=8G,id=m0 \ -object
> > memory-backend-file,size=16G,id=m1,mem-path=/dev/dax0.0,prealloc=on,al
> > ign=1G,hmem=on \ -numa node,nodeid=0,memdev=m0 -numa
> > node,nodeid=1,memdev=m1 \ -device
> > pxb-pcie,id=pcie.1,numa_node=1,bus_nr=2,bus=pcie.0 \ -device
> > ioh3420,id=pcie_port1,bus=pcie.1,chassis=1 \ -device
> > vfio-pci,host=0000:03:00.0,id=hostdev0,bus=pcie_port1
>
> Is /dev/dax0.0 a part of host device 0000:03:00.0 that you pass-through to guest
> using vfio?
>
[Luo, Zhigang] from ACPI view, they are in same proximity domain and host device 0000:03:00.0 has closest distance to access /dev/dax0.0.

>
>
> >
> > > >
> > > >>
> > > >> And, what is the guest OS going to do with this memory?
> > > > [Luo, Zhigang] the device driver in guest will use this reserved memory.
> > >
> > > Okay, so just like CXL memory.
> > >
> > > >
> > > >>
> > > >> Usually, this SP memory (dax, cxl, ...) is not used as boot memory.
> > > >> Like on a bare metal system, one would expect that only CXL
> > > >> memory will be marked as special and put aside to the cxl driver,
> > > >> such that the OS can boot on ordinary DIMMs, such that cxl can online it etc.
> > > >>
> > > >> So maybe you would want to expose this memory using CXL-mem
> > > >> device to the VM? Or a DIMM?
> > > >>
> > > >> I assume the alternative is to tell the VM on the Linux kernel
> > > >> cmdline to set EFI_MEMORY_SP on this memory. I recall that there
> > > >> is a way to
> > > achieve that.
> > > >>
> > > > [Luo, Zhigang] I know this option. but it requires the end user to
> > > > know where is the
> > > memory location in guest side(start address, size).
> > >
> > > Right.
> > >
> > > >
> > > >
> > > >>> In Linux, the SPM memory will be claimed by hmem-dax driver by
> > > >>> default. With
> > > >> this patch I can use the following config to pass the SPM memory to guest
> VM.
> > > >>> -object
> > > >>> memory-backend-file,size=30G,id=m1,mem-path=/dev/dax0.0,prealloc
> > > >>> =on,
> > > >>> al
> > > >>> ign=1G,hmem=on
> > > >>>
> > > >>> I was thinking to change the option name from "hmem" to "spm" to
> > > >>> avoid
> > > >> confusion.
> > > >>
> > > >> Likely it should be specified elsewhere, that you want specific
> > > >> guest RAM ranges to be EFI_MEMORY_SP. For a DIMM, it could be a
> > > >> property, similarly maybe for CXL- mem devices (no expert on that).
> > > >>
> > > >> For boot memory / machine memory it could be a machine property.
> > > >> But I'll first have to learn which ranges you actually want to
> > > >> expose that way, and what the VM will do with that information.
> > > > [Luo, Zhigang] we want to expose the SPM memory reserved for specific
> device.
> > > And we will pass the SPM memory and the device to guest. Then the
> > > device driver can use the SPM memory in guest side.
> > >
> > > Then the device driver should likely have a way to configure that,
> > > not the memory backend.
> > >
> > > After all, the device driver will map it somehow into guest physical
> > > address space (how?).
> > >
> > [Luo, Zhigang] from guest view, it's still system memory, but marked as SPM. So,
> qemu will map the memory to guest physical address space.
> > The device driver just claims to use the SPM memory in guest side.
> >
> > > >
> > > >>
> > > >>>
> > > >>> Do you have any suggestions to achieve this more reasonable?
> > > >>
> > > >> The problem with qemu_ram_foreach_block() is that you would
> > > >> indicate also DIMMs, virtio-mem, ... and even RAMBlocks that are
> > > >> not even used for backing anything to the VM as EFI_MEMORY_SP, which is
> wrong.
> > > > [Luo, Zhigang] qemu_ram_foreach_block() will list all memory
> > > > block, but in
> > > pc_update_hmem_memory(), only the memory block with "hmem" flag will
> > > be updated to SPM memory.
> > >
> > > Yes, but imagine a user passing such a memory backend to a
> > > DIMM/virtio-mem/boot memory etc. It will have very undesired side effects.
> > >
> > [Luo, Zhigang] the user should know what he/she is doing when he/she set the flag
> for the memory region.
> >
> >
> > > --
> > > Cheers,
> > >
> > > David / dhildenb
> >
Re: [PATCH] hostmem-file: add the 'hmem' option
Posted by David Hildenbrand 5 months ago
On 10.12.24 22:51, Luo, Zhigang wrote:
> [AMD Official Use Only - AMD Internal Distribution Only]
> 
>> -----Original Message-----
>> From: David Hildenbrand <david@redhat.com>
>> Sent: Tuesday, December 10, 2024 2:55 PM
>> To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
>> Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>
>> Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
>>
>> On 10.12.24 20:32, Luo, Zhigang wrote:
>>> [AMD Official Use Only - AMD Internal Distribution Only]
>>>
>>> Hi David,
>>>
>>
>> Hi,
>>
>>>>>
>>>>> Thanks for your comments.
>>>>> Let me give you some background for this patch.
>>>>> I am currently engaged in a project that requires to pass the
>>>>> EFI_MEMORY_SP
>>>> (Special Purpose Memory) type memory from host to a virtual machine
>>>> within QEMU. This memory needs to be EFI_MEMORY_SP type in the
>>>> virtual machine as well.
>>>>> This particular memory type is essential for the functionality of my project.
>>>>
>>>> Which exact guest memory will be backed by this memory? All guest-memory?
>>> [Luo, Zhigang] not all guest-memory. Only the memory reserved for specific
>> device.
>>
>> Can you show me an example QEMU cmdline, and how you would pass that
>> hostmem-file object to the device?
>>
> [Luo, Zhigang] the following is an example. m1 is the reserved memory for pci device "0000:03:00.0". both the memory and pci device are set to same numa node.
> 
> -object memory-backend-ram,size=8G,id=m0 \
> -object memory-backend-file,size=16G,id=m1,mem-path=/dev/dax0.0,prealloc=on,align=1G,hmem=on \
> -numa node,nodeid=0,memdev=m0 -numa node,nodeid=1,memdev=m1 \

Okay, so you expose this memory as a second numa node, and want the 
guest to identify the second numa node as SP to not use it during boot.

Let me CC Jonathan, I am pretty sure he has an idea what to do here.

> -device pxb-pcie,id=pcie.1,numa_node=1,bus_nr=2,bus=pcie.0 \
> -device ioh3420,id=pcie_port1,bus=pcie.1,chassis=1 \
> -device vfio-pci,host=0000:03:00.0,id=hostdev0,bus=pcie_port1
> 
>>>
>>>>
>>>> And, what is the guest OS going to do with this memory?
>>> [Luo, Zhigang] the device driver in guest will use this reserved memory.
>>
>> Okay, so just like CXL memory.
>>
>>>
>>>>
>>>> Usually, this SP memory (dax, cxl, ...) is not used as boot memory.
>>>> Like on a bare metal system, one would expect that only CXL memory
>>>> will be marked as special and put aside to the cxl driver, such that
>>>> the OS can boot on ordinary DIMMs, such that cxl can online it etc.
>>>>
>>>> So maybe you would want to expose this memory using CXL-mem device to
>>>> the VM? Or a DIMM?
>>>>
>>>> I assume the alternative is to tell the VM on the Linux kernel
>>>> cmdline to set EFI_MEMORY_SP on this memory. I recall that there is a way to
>> achieve that.
>>>>
>>> [Luo, Zhigang] I know this option. but it requires the end user to know where is the
>> memory location in guest side(start address, size).
>>
>> Right.
>>
>>>
>>>
>>>>> In Linux, the SPM memory will be claimed by hmem-dax driver by
>>>>> default. With
>>>> this patch I can use the following config to pass the SPM memory to guest VM.
>>>>> -object
>>>>> memory-backend-file,size=30G,id=m1,mem-path=/dev/dax0.0,prealloc=on,
>>>>> al
>>>>> ign=1G,hmem=on
>>>>>
>>>>> I was thinking to change the option name from "hmem" to "spm" to
>>>>> avoid
>>>> confusion.
>>>>
>>>> Likely it should be specified elsewhere, that you want specific guest
>>>> RAM ranges to be EFI_MEMORY_SP. For a DIMM, it could be a property,
>>>> similarly maybe for CXL- mem devices (no expert on that).
>>>>
>>>> For boot memory / machine memory it could be a machine property. But
>>>> I'll first have to learn which ranges you actually want to expose
>>>> that way, and what the VM will do with that information.
>>> [Luo, Zhigang] we want to expose the SPM memory reserved for specific device.
>> And we will pass the SPM memory and the device to guest. Then the device driver
>> can use the SPM memory in guest side.
>>
>> Then the device driver should likely have a way to configure that, not the memory
>> backend.
>>
>> After all, the device driver will map it somehow into guest physical address space
>> (how?).
>>
> [Luo, Zhigang] from guest view, it's still system memory, but marked as SPM. So, qemu will map the memory to guest physical address space.
> The device driver just claims to use the SPM memory in guest side.
> 
>>>
>>>>
>>>>>
>>>>> Do you have any suggestions to achieve this more reasonable?
>>>>
>>>> The problem with qemu_ram_foreach_block() is that you would indicate
>>>> also DIMMs, virtio-mem, ... and even RAMBlocks that are not even used
>>>> for backing anything to the VM as EFI_MEMORY_SP, which is wrong.
>>> [Luo, Zhigang] qemu_ram_foreach_block() will list all memory block, but in
>> pc_update_hmem_memory(), only the memory block with "hmem" flag will be
>> updated to SPM memory.
>>
>> Yes, but imagine a user passing such a memory backend to a DIMM/virtio-mem/boot
>> memory etc. It will have very undesired side effects.
>>
> [Luo, Zhigang] the user should know what he/she is doing when he/she set the flag for the memory region.

No, we must not allow to create insane configurations that don't make 
any sense.

Sufficient to add:

-object memory-backend-file,size=16G,id=unused,mem-path=whatever,hmem=on

to the cmdline to cause a mess.


Maybe it should be a "numa" node configuration like

-numa node,nodeid=1,memdev=m1,sp=on

But I recall that we discussed something related with Jonathan, so I'm 
hoping we can get his input.

-- 
Cheers,

David / dhildenb
RE: [PATCH] hostmem-file: add the 'hmem' option
Posted by Luo, Zhigang 5 months ago
[AMD Official Use Only - AMD Internal Distribution Only]

Hi Jonathan,

Could you please provide your comments?

Thanks,
Zhigang

> -----Original Message-----
> From: David Hildenbrand <david@redhat.com>
> Sent: Tuesday, December 10, 2024 5:02 PM
> To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
> Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>; Jonathan
> Cameron <Jonathan.Cameron@huawei.com>
> Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
>
> On 10.12.24 22:51, Luo, Zhigang wrote:
> > [AMD Official Use Only - AMD Internal Distribution Only]
> >
> >> -----Original Message-----
> >> From: David Hildenbrand <david@redhat.com>
> >> Sent: Tuesday, December 10, 2024 2:55 PM
> >> To: Luo, Zhigang <Zhigang.Luo@amd.com>; qemu-devel@nongnu.org
> >> Cc: kraxel@redhat.com; Igor Mammedov <imammedo@redhat.com>
> >> Subject: Re: [PATCH] hostmem-file: add the 'hmem' option
> >>
> >> On 10.12.24 20:32, Luo, Zhigang wrote:
> >>> [AMD Official Use Only - AMD Internal Distribution Only]
> >>>
> >>> Hi David,
> >>>
> >>
> >> Hi,
> >>
> >>>>>
> >>>>> Thanks for your comments.
> >>>>> Let me give you some background for this patch.
> >>>>> I am currently engaged in a project that requires to pass the
> >>>>> EFI_MEMORY_SP
> >>>> (Special Purpose Memory) type memory from host to a virtual machine
> >>>> within QEMU. This memory needs to be EFI_MEMORY_SP type in the
> >>>> virtual machine as well.
> >>>>> This particular memory type is essential for the functionality of my project.
> >>>>
> >>>> Which exact guest memory will be backed by this memory? All guest-memory?
> >>> [Luo, Zhigang] not all guest-memory. Only the memory reserved for
> >>> specific
> >> device.
> >>
> >> Can you show me an example QEMU cmdline, and how you would pass that
> >> hostmem-file object to the device?
> >>
> > [Luo, Zhigang] the following is an example. m1 is the reserved memory for pci
> device "0000:03:00.0". both the memory and pci device are set to same numa node.
> >
> > -object memory-backend-ram,size=8G,id=m0 \ -object
> > memory-backend-file,size=16G,id=m1,mem-path=/dev/dax0.0,prealloc=on,al
> > ign=1G,hmem=on \ -numa node,nodeid=0,memdev=m0 -numa
> > node,nodeid=1,memdev=m1 \
>
> Okay, so you expose this memory as a second numa node, and want the guest to
> identify the second numa node as SP to not use it during boot.
>
> Let me CC Jonathan, I am pretty sure he has an idea what to do here.
>
> > -device pxb-pcie,id=pcie.1,numa_node=1,bus_nr=2,bus=pcie.0 \ -device
> > ioh3420,id=pcie_port1,bus=pcie.1,chassis=1 \ -device
> > vfio-pci,host=0000:03:00.0,id=hostdev0,bus=pcie_port1
> >
> >>>
> >>>>
> >>>> And, what is the guest OS going to do with this memory?
> >>> [Luo, Zhigang] the device driver in guest will use this reserved memory.
> >>
> >> Okay, so just like CXL memory.
> >>
> >>>
> >>>>
> >>>> Usually, this SP memory (dax, cxl, ...) is not used as boot memory.
> >>>> Like on a bare metal system, one would expect that only CXL memory
> >>>> will be marked as special and put aside to the cxl driver, such
> >>>> that the OS can boot on ordinary DIMMs, such that cxl can online it etc.
> >>>>
> >>>> So maybe you would want to expose this memory using CXL-mem device
> >>>> to the VM? Or a DIMM?
> >>>>
> >>>> I assume the alternative is to tell the VM on the Linux kernel
> >>>> cmdline to set EFI_MEMORY_SP on this memory. I recall that there is
> >>>> a way to
> >> achieve that.
> >>>>
> >>> [Luo, Zhigang] I know this option. but it requires the end user to
> >>> know where is the
> >> memory location in guest side(start address, size).
> >>
> >> Right.
> >>
> >>>
> >>>
> >>>>> In Linux, the SPM memory will be claimed by hmem-dax driver by
> >>>>> default. With
> >>>> this patch I can use the following config to pass the SPM memory to guest VM.
> >>>>> -object
> >>>>> memory-backend-file,size=30G,id=m1,mem-path=/dev/dax0.0,prealloc=o
> >>>>> n,
> >>>>> al
> >>>>> ign=1G,hmem=on
> >>>>>
> >>>>> I was thinking to change the option name from "hmem" to "spm" to
> >>>>> avoid
> >>>> confusion.
> >>>>
> >>>> Likely it should be specified elsewhere, that you want specific
> >>>> guest RAM ranges to be EFI_MEMORY_SP. For a DIMM, it could be a
> >>>> property, similarly maybe for CXL- mem devices (no expert on that).
> >>>>
> >>>> For boot memory / machine memory it could be a machine property.
> >>>> But I'll first have to learn which ranges you actually want to
> >>>> expose that way, and what the VM will do with that information.
> >>> [Luo, Zhigang] we want to expose the SPM memory reserved for specific
> device.
> >> And we will pass the SPM memory and the device to guest. Then the
> >> device driver can use the SPM memory in guest side.
> >>
> >> Then the device driver should likely have a way to configure that,
> >> not the memory backend.
> >>
> >> After all, the device driver will map it somehow into guest physical
> >> address space (how?).
> >>
> > [Luo, Zhigang] from guest view, it's still system memory, but marked as SPM. So,
> qemu will map the memory to guest physical address space.
> > The device driver just claims to use the SPM memory in guest side.
> >
> >>>
> >>>>
> >>>>>
> >>>>> Do you have any suggestions to achieve this more reasonable?
> >>>>
> >>>> The problem with qemu_ram_foreach_block() is that you would
> >>>> indicate also DIMMs, virtio-mem, ... and even RAMBlocks that are
> >>>> not even used for backing anything to the VM as EFI_MEMORY_SP, which is
> wrong.
> >>> [Luo, Zhigang] qemu_ram_foreach_block() will list all memory block,
> >>> but in
> >> pc_update_hmem_memory(), only the memory block with "hmem" flag will
> >> be updated to SPM memory.
> >>
> >> Yes, but imagine a user passing such a memory backend to a
> >> DIMM/virtio-mem/boot memory etc. It will have very undesired side effects.
> >>
> > [Luo, Zhigang] the user should know what he/she is doing when he/she set the flag
> for the memory region.
>
> No, we must not allow to create insane configurations that don't make any sense.
>
> Sufficient to add:
>
> -object memory-backend-file,size=16G,id=unused,mem-path=whatever,hmem=on
>
> to the cmdline to cause a mess.
>
>
> Maybe it should be a "numa" node configuration like
>
> -numa node,nodeid=1,memdev=m1,sp=on
>
> But I recall that we discussed something related with Jonathan, so I'm hoping we
> can get his input.
>
> --
> Cheers,
>
> David / dhildenb