hw/riscv/Kconfig | 2 ++ hw/riscv/virt-acpi-build.c | 7 +++++ hw/riscv/virt.c | 64 +++++++++++++++++++++++++++++++++++++- hw/virtio/virtio-mem.c | 2 +- 4 files changed, 73 insertions(+), 2 deletions(-)
From: Björn Töpel <bjorn@rivosinc.com>
Virtio-based memory devices allows for dynamic resizing of virtual
machine memory, and requires proper hotplugging (add/remove) support
to work.
Enable virtio-md-pci with the corresponding missing hotplugging
callbacks for the RISC-V "virt" machine.
Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
---
This is basic support for MHP that works with DT. There some minor
ACPI SRAT plumbing in there as well. Ideally we'd like proper ACPI MHP
support as well. I have a branch [1], where I've applied this patch,
plus ACPI GED/PC-DIMM MHP support on top of Sunil's QEMU branch
(contains some ACPI DSDT additions) [2], for the curious/brave ones.
However, the ACPI MHP support this is not testable on upstream Linux
yet (ACPI AIA support, and ACPI NUMA SRAT series are ongoing).
I'll follow-up with proper ACPI GED/PC-DIMM MHP patches, once the
dependencies land (Linux kernel and QEMU).
I'll post the Linux MHP/virtio-mem v2 patches later this week!
Cheers,
Björn
[1] https://github.com/bjoto/qemu/commits/virtio-mem-pc-dimm-mhp-acpi/
[2] https://lore.kernel.org/linux-riscv/20240501121742.1215792-1-sunilvl@ventanamicro.com/
---
hw/riscv/Kconfig | 2 ++
hw/riscv/virt-acpi-build.c | 7 +++++
hw/riscv/virt.c | 64 +++++++++++++++++++++++++++++++++++++-
hw/virtio/virtio-mem.c | 2 +-
4 files changed, 73 insertions(+), 2 deletions(-)
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index a2030e3a6ff0..08f82dbb681a 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -56,6 +56,8 @@ config RISCV_VIRT
select PLATFORM_BUS
select ACPI
select ACPI_PCI
+ select VIRTIO_MEM_SUPPORTED
+ select VIRTIO_PMEM_SUPPORTED
config SHAKTI_C
bool
diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 0925528160f8..6dc3baa9ec86 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -610,6 +610,13 @@ build_srat(GArray *table_data, BIOSLinker *linker, RISCVVirtState *vms)
}
}
+ if (ms->device_memory) {
+ build_srat_memory(table_data, ms->device_memory->base,
+ memory_region_size(&ms->device_memory->mr),
+ ms->numa_state->num_nodes - 1,
+ MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
+ }
+
acpi_table_end(linker, &table);
}
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 4fdb66052587..16c2bdbfe6b6 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -53,6 +53,8 @@
#include "hw/pci-host/gpex.h"
#include "hw/display/ramfb.h"
#include "hw/acpi/aml-build.h"
+#include "hw/mem/memory-device.h"
+#include "hw/virtio/virtio-mem-pci.h"
#include "qapi/qapi-visit-common.h"
#include "hw/virtio/virtio-iommu.h"
@@ -1407,6 +1409,7 @@ static void virt_machine_init(MachineState *machine)
DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip;
int i, base_hartid, hart_count;
int socket_count = riscv_socket_count(machine);
+ hwaddr device_memory_base, device_memory_size;
/* Check socket count limit */
if (VIRT_SOCKETS_MAX < socket_count) {
@@ -1553,6 +1556,25 @@ static void virt_machine_init(MachineState *machine)
memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base,
mask_rom);
+ device_memory_base = ROUND_UP(s->memmap[VIRT_DRAM].base + machine->ram_size,
+ GiB);
+ device_memory_size = machine->maxram_size - machine->ram_size;
+
+ if (riscv_is_32bit(&s->soc[0])) {
+ hwaddr memtop = device_memory_base + ROUND_UP(device_memory_size, GiB);
+
+ if (memtop > UINT32_MAX) {
+ error_report("Memory exceeds 32-bit limit by %lu bytes",
+ memtop - UINT32_MAX);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (device_memory_size > 0) {
+ machine_memory_devices_init(machine, device_memory_base,
+ device_memory_size);
+ }
+
/*
* Init fw_cfg. Must be done before riscv_load_fdt, otherwise the
* device tree cannot be altered and we get FDT_ERR_NOSPACE.
@@ -1712,12 +1734,21 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
MachineClass *mc = MACHINE_GET_CLASS(machine);
if (device_is_dynamic_sysbus(mc, dev) ||
- object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
+ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
return HOTPLUG_HANDLER(machine);
}
return NULL;
}
+static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
+ virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
+ }
+}
+
static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
@@ -1735,6 +1766,34 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
create_fdt_virtio_iommu(s, pci_get_bdf(PCI_DEVICE(dev)));
}
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
+ virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
+ }
+}
+
+static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev,
+ Error **errp)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
+ virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev),
+ errp);
+ } else {
+ error_setg(errp, "device unplug request for unsupported device type: %s",
+ object_get_typename(OBJECT(dev)));
+ }
+}
+
+static void virt_machine_device_unplug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
+ virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
+ } else {
+ error_setg(errp, "virt: device unplug for unsupported device"
+ " type: %s", object_get_typename(OBJECT(dev)));
+ }
}
static void virt_machine_class_init(ObjectClass *oc, void *data)
@@ -1757,7 +1816,10 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
assert(!mc->get_hotplug_handler);
mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
+ hc->pre_plug = virt_machine_device_pre_plug_cb;
hc->plug = virt_machine_device_plug_cb;
+ hc->unplug_request = virt_machine_device_unplug_request_cb;
+ hc->unplug = virt_machine_device_unplug_cb;
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
#ifdef CONFIG_TPM
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index ffd119ebacb7..26c976a3b9b8 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -161,7 +161,7 @@ static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
* necessary (as the section size can change). But it's more likely that the
* section size will rather get smaller and not bigger over time.
*/
-#if defined(TARGET_X86_64) || defined(TARGET_I386)
+#if defined(TARGET_X86_64) || defined(TARGET_I386) || defined(TARGET_RISCV)
#define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
#elif defined(TARGET_ARM)
#define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB))
base-commit: 9360070196789cc8b9404b2efaf319384e64b107
--
2.40.1
Hi Björj, On 5/14/24 08:06, Björn Töpel wrote: > From: Björn Töpel <bjorn@rivosinc.com> > > Virtio-based memory devices allows for dynamic resizing of virtual > machine memory, and requires proper hotplugging (add/remove) support > to work. > > Enable virtio-md-pci with the corresponding missing hotplugging > callbacks for the RISC-V "virt" machine. > > Signed-off-by: Björn Töpel <bjorn@rivosinc.com> > --- > This is basic support for MHP that works with DT. There some minor > ACPI SRAT plumbing in there as well. Ideally we'd like proper ACPI MHP > support as well. I have a branch [1], where I've applied this patch, > plus ACPI GED/PC-DIMM MHP support on top of Sunil's QEMU branch > (contains some ACPI DSDT additions) [2], for the curious/brave ones. > However, the ACPI MHP support this is not testable on upstream Linux > yet (ACPI AIA support, and ACPI NUMA SRAT series are ongoing). > > I'll follow-up with proper ACPI GED/PC-DIMM MHP patches, once the > dependencies land (Linux kernel and QEMU). > > I'll post the Linux MHP/virtio-mem v2 patches later this week! > > > Cheers, > Björn > > [1] https://github.com/bjoto/qemu/commits/virtio-mem-pc-dimm-mhp-acpi/ > [2] https://lore.kernel.org/linux-riscv/20240501121742.1215792-1-sunilvl@ventanamicro.com/ > --- > hw/riscv/Kconfig | 2 ++ > hw/riscv/virt-acpi-build.c | 7 +++++ > hw/riscv/virt.c | 64 +++++++++++++++++++++++++++++++++++++- > hw/virtio/virtio-mem.c | 2 +- > 4 files changed, 73 insertions(+), 2 deletions(-) > > diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig > index a2030e3a6ff0..08f82dbb681a 100644 > --- a/hw/riscv/Kconfig > +++ b/hw/riscv/Kconfig > @@ -56,6 +56,8 @@ config RISCV_VIRT > select PLATFORM_BUS > select ACPI > select ACPI_PCI > + select VIRTIO_MEM_SUPPORTED > + select VIRTIO_PMEM_SUPPORTED > > config SHAKTI_C > bool > diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c > index 0925528160f8..6dc3baa9ec86 100644 > --- a/hw/riscv/virt-acpi-build.c > +++ b/hw/riscv/virt-acpi-build.c > @@ -610,6 +610,13 @@ build_srat(GArray *table_data, BIOSLinker *linker, RISCVVirtState *vms) > } > } > > + if (ms->device_memory) { > + build_srat_memory(table_data, ms->device_memory->base, > + memory_region_size(&ms->device_memory->mr), > + ms->numa_state->num_nodes - 1, > + MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); > + } > + > acpi_table_end(linker, &table); When the time comes I believe we'll want this chunk in a separated ACPI patch. > } > > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c > index 4fdb66052587..16c2bdbfe6b6 100644 > --- a/hw/riscv/virt.c > +++ b/hw/riscv/virt.c > @@ -53,6 +53,8 @@ > #include "hw/pci-host/gpex.h" > #include "hw/display/ramfb.h" > #include "hw/acpi/aml-build.h" > +#include "hw/mem/memory-device.h" > +#include "hw/virtio/virtio-mem-pci.h" > #include "qapi/qapi-visit-common.h" > #include "hw/virtio/virtio-iommu.h" > > @@ -1407,6 +1409,7 @@ static void virt_machine_init(MachineState *machine) > DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip; > int i, base_hartid, hart_count; > int socket_count = riscv_socket_count(machine); > + hwaddr device_memory_base, device_memory_size; > > /* Check socket count limit */ > if (VIRT_SOCKETS_MAX < socket_count) { > @@ -1553,6 +1556,25 @@ static void virt_machine_init(MachineState *machine) > memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base, > mask_rom); > > + device_memory_base = ROUND_UP(s->memmap[VIRT_DRAM].base + machine->ram_size, > + GiB); > + device_memory_size = machine->maxram_size - machine->ram_size; > + > + if (riscv_is_32bit(&s->soc[0])) { > + hwaddr memtop = device_memory_base + ROUND_UP(device_memory_size, GiB); > + > + if (memtop > UINT32_MAX) { > + error_report("Memory exceeds 32-bit limit by %lu bytes", > + memtop - UINT32_MAX); > + exit(EXIT_FAILURE); > + } > + } > + > + if (device_memory_size > 0) { > + machine_memory_devices_init(machine, device_memory_base, > + device_memory_size); > + } > + I think we need a design discussion before proceeding here. You're allocating all available memory as a memory device area, but in theory we might also support pc-dimm hotplugs (which would be the equivalent of adding physical RAM dimms to the board.) in the future too. If you're not familiar with this feature you can check it out the docs in [1]. As an example, the 'virt' ARM board (hw/arm/virt.c) reserves a space for this type of hotplug by checking how much 'ram_slots' we're allocating for it: device_memory_size = ms->maxram_size - ms->ram_size + ms->ram_slots * GiB; Other boards do the same with ms->ram_slots. We should consider doing it as well, now, even if we're not up to the point of supporting pc-dimm hotplug, to avoid having to change the memory layout later in the road and breaking existing setups. If we want to copy the ARM board, ram_slots is capped to ACPI_MAX_RAM_SLOTS (256). Each RAM slot is considered to be a 1GiB dimm, i.e. we would reserve 256GiB for them. Note: I do not have the visibility of discussions on the memory management space, and I might be missing details such as "we don't care about pc-dimm hotplug anymore, it's legacy, we're going to support only virtio-md-pci from now on". We had a situation like that with virtio-balloon and virtio-mem in the past, and I'm not sure if this might fall in the same category. I see that David Hildenbrand is also CCed in the patch so he'll let us know if I'm out of line with what I'm asking. [1] https://github.com/qemu/qemu/blob/master/docs/memory-hotplug.txt Thanks, Daniel > /* > * Init fw_cfg. Must be done before riscv_load_fdt, otherwise the > * device tree cannot be altered and we get FDT_ERR_NOSPACE. > @@ -1712,12 +1734,21 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, > MachineClass *mc = MACHINE_GET_CLASS(machine); > > if (device_is_dynamic_sysbus(mc, dev) || > - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { > + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || > + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { > return HOTPLUG_HANDLER(machine); > } > return NULL; > } > > +static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { > + virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); > + } > +} > + > static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, > DeviceState *dev, Error **errp) > { > @@ -1735,6 +1766,34 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, > if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { > create_fdt_virtio_iommu(s, pci_get_bdf(PCI_DEVICE(dev))); > } > + > + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { > + virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); > + } > +} > + > +static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, > + DeviceState *dev, > + Error **errp) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { > + virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), > + errp); > + } else { > + error_setg(errp, "device unplug request for unsupported device type: %s", > + object_get_typename(OBJECT(dev))); > + } > +} > + > +static void virt_machine_device_unplug_cb(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { > + virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); > + } else { > + error_setg(errp, "virt: device unplug for unsupported device" > + " type: %s", object_get_typename(OBJECT(dev))); > + } > } > > static void virt_machine_class_init(ObjectClass *oc, void *data) > @@ -1757,7 +1816,10 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) > assert(!mc->get_hotplug_handler); > mc->get_hotplug_handler = virt_machine_get_hotplug_handler; > > + hc->pre_plug = virt_machine_device_pre_plug_cb; > hc->plug = virt_machine_device_plug_cb; > + hc->unplug_request = virt_machine_device_unplug_request_cb; > + hc->unplug = virt_machine_device_unplug_cb; > > machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); > #ifdef CONFIG_TPM > diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c > index ffd119ebacb7..26c976a3b9b8 100644 > --- a/hw/virtio/virtio-mem.c > +++ b/hw/virtio/virtio-mem.c > @@ -161,7 +161,7 @@ static bool virtio_mem_has_shared_zeropage(RAMBlock *rb) > * necessary (as the section size can change). But it's more likely that the > * section size will rather get smaller and not bigger over time. > */ > -#if defined(TARGET_X86_64) || defined(TARGET_I386) > +#if defined(TARGET_X86_64) || defined(TARGET_I386) || defined(TARGET_RISCV) > #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB)) > #elif defined(TARGET_ARM) > #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB)) > > base-commit: 9360070196789cc8b9404b2efaf319384e64b107
Daniel, Thanks for taking a look! Daniel Henrique Barboza <dbarboza@ventanamicro.com> writes: > Hi Björj, > > On 5/14/24 08:06, Björn Töpel wrote: >> From: Björn Töpel <bjorn@rivosinc.com> >> >> Virtio-based memory devices allows for dynamic resizing of virtual >> machine memory, and requires proper hotplugging (add/remove) support >> to work. >> >> Enable virtio-md-pci with the corresponding missing hotplugging >> callbacks for the RISC-V "virt" machine. >> >> Signed-off-by: Björn Töpel <bjorn@rivosinc.com> >> --- >> This is basic support for MHP that works with DT. There some minor >> ACPI SRAT plumbing in there as well. Ideally we'd like proper ACPI MHP >> support as well. I have a branch [1], where I've applied this patch, >> plus ACPI GED/PC-DIMM MHP support on top of Sunil's QEMU branch >> (contains some ACPI DSDT additions) [2], for the curious/brave ones. >> However, the ACPI MHP support this is not testable on upstream Linux >> yet (ACPI AIA support, and ACPI NUMA SRAT series are ongoing). >> >> I'll follow-up with proper ACPI GED/PC-DIMM MHP patches, once the >> dependencies land (Linux kernel and QEMU). >> >> I'll post the Linux MHP/virtio-mem v2 patches later this week! >> >> >> Cheers, >> Björn >> >> [1] https://github.com/bjoto/qemu/commits/virtio-mem-pc-dimm-mhp-acpi/ >> [2] https://lore.kernel.org/linux-riscv/20240501121742.1215792-1-sunilvl@ventanamicro.com/ >> --- >> hw/riscv/Kconfig | 2 ++ >> hw/riscv/virt-acpi-build.c | 7 +++++ >> hw/riscv/virt.c | 64 +++++++++++++++++++++++++++++++++++++- >> hw/virtio/virtio-mem.c | 2 +- >> 4 files changed, 73 insertions(+), 2 deletions(-) >> >> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig >> index a2030e3a6ff0..08f82dbb681a 100644 >> --- a/hw/riscv/Kconfig >> +++ b/hw/riscv/Kconfig >> @@ -56,6 +56,8 @@ config RISCV_VIRT >> select PLATFORM_BUS >> select ACPI >> select ACPI_PCI >> + select VIRTIO_MEM_SUPPORTED >> + select VIRTIO_PMEM_SUPPORTED >> >> config SHAKTI_C >> bool >> diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c >> index 0925528160f8..6dc3baa9ec86 100644 >> --- a/hw/riscv/virt-acpi-build.c >> +++ b/hw/riscv/virt-acpi-build.c >> @@ -610,6 +610,13 @@ build_srat(GArray *table_data, BIOSLinker *linker, RISCVVirtState *vms) >> } >> } >> >> + if (ms->device_memory) { >> + build_srat_memory(table_data, ms->device_memory->base, >> + memory_region_size(&ms->device_memory->mr), >> + ms->numa_state->num_nodes - 1, >> + MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); >> + } >> + >> acpi_table_end(linker, &table); > > When the time comes I believe we'll want this chunk in a separated ACPI patch. Hmm, I first thought about adding this to the ACPI MHP series, but then realized that virtio-mem relies on SRAT for ACPI boots (again -- RISC-V Linux does not support that upstream yet...). Do you mean that you'd prefer this chunk in a separate patch? Björn
On 5/20/24 15:33, Björn Töpel wrote: > Daniel, > > Thanks for taking a look! > > Daniel Henrique Barboza <dbarboza@ventanamicro.com> writes: > >> Hi Björj, >> >> On 5/14/24 08:06, Björn Töpel wrote: >>> From: Björn Töpel <bjorn@rivosinc.com> >>> >>> Virtio-based memory devices allows for dynamic resizing of virtual >>> machine memory, and requires proper hotplugging (add/remove) support >>> to work. >>> >>> Enable virtio-md-pci with the corresponding missing hotplugging >>> callbacks for the RISC-V "virt" machine. >>> >>> Signed-off-by: Björn Töpel <bjorn@rivosinc.com> >>> --- >>> This is basic support for MHP that works with DT. There some minor >>> ACPI SRAT plumbing in there as well. Ideally we'd like proper ACPI MHP >>> support as well. I have a branch [1], where I've applied this patch, >>> plus ACPI GED/PC-DIMM MHP support on top of Sunil's QEMU branch >>> (contains some ACPI DSDT additions) [2], for the curious/brave ones. >>> However, the ACPI MHP support this is not testable on upstream Linux >>> yet (ACPI AIA support, and ACPI NUMA SRAT series are ongoing). >>> >>> I'll follow-up with proper ACPI GED/PC-DIMM MHP patches, once the >>> dependencies land (Linux kernel and QEMU). >>> >>> I'll post the Linux MHP/virtio-mem v2 patches later this week! >>> >>> >>> Cheers, >>> Björn >>> >>> [1] https://github.com/bjoto/qemu/commits/virtio-mem-pc-dimm-mhp-acpi/ >>> [2] https://lore.kernel.org/linux-riscv/20240501121742.1215792-1-sunilvl@ventanamicro.com/ >>> --- >>> hw/riscv/Kconfig | 2 ++ >>> hw/riscv/virt-acpi-build.c | 7 +++++ >>> hw/riscv/virt.c | 64 +++++++++++++++++++++++++++++++++++++- >>> hw/virtio/virtio-mem.c | 2 +- >>> 4 files changed, 73 insertions(+), 2 deletions(-) >>> >>> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig >>> index a2030e3a6ff0..08f82dbb681a 100644 >>> --- a/hw/riscv/Kconfig >>> +++ b/hw/riscv/Kconfig >>> @@ -56,6 +56,8 @@ config RISCV_VIRT >>> select PLATFORM_BUS >>> select ACPI >>> select ACPI_PCI >>> + select VIRTIO_MEM_SUPPORTED >>> + select VIRTIO_PMEM_SUPPORTED >>> >>> config SHAKTI_C >>> bool >>> diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c >>> index 0925528160f8..6dc3baa9ec86 100644 >>> --- a/hw/riscv/virt-acpi-build.c >>> +++ b/hw/riscv/virt-acpi-build.c >>> @@ -610,6 +610,13 @@ build_srat(GArray *table_data, BIOSLinker *linker, RISCVVirtState *vms) >>> } >>> } >>> >>> + if (ms->device_memory) { >>> + build_srat_memory(table_data, ms->device_memory->base, >>> + memory_region_size(&ms->device_memory->mr), >>> + ms->numa_state->num_nodes - 1, >>> + MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); >>> + } >>> + >>> acpi_table_end(linker, &table); >> >> When the time comes I believe we'll want this chunk in a separated ACPI patch. > > Hmm, I first thought about adding this to the ACPI MHP series, but then > realized that virtio-mem relies on SRAT for ACPI boots (again -- RISC-V > Linux does not support that upstream yet...). > > Do you mean that you'd prefer this chunk in a separate patch? TBH I wouldn't mind keeping this ACPI chunk here but I reckon that the ACPI subsystem review is usually done in separate, with a different set of people reviewing it and so on. We might as well keep it here for now. If more ACPI changes ended up being done (e.g. ACPI unit test changes) then doing a separated ACPI patch makes more sense. Thanks, Daniel > > > Björn
Hi, >> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c >> index 4fdb66052587..16c2bdbfe6b6 100644 >> --- a/hw/riscv/virt.c >> +++ b/hw/riscv/virt.c >> @@ -53,6 +53,8 @@ >> #include "hw/pci-host/gpex.h" >> #include "hw/display/ramfb.h" >> #include "hw/acpi/aml-build.h" >> +#include "hw/mem/memory-device.h" >> +#include "hw/virtio/virtio-mem-pci.h" >> #include "qapi/qapi-visit-common.h" >> #include "hw/virtio/virtio-iommu.h" >> >> @@ -1407,6 +1409,7 @@ static void virt_machine_init(MachineState *machine) >> DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip; >> int i, base_hartid, hart_count; >> int socket_count = riscv_socket_count(machine); >> + hwaddr device_memory_base, device_memory_size; >> >> /* Check socket count limit */ >> if (VIRT_SOCKETS_MAX < socket_count) { >> @@ -1553,6 +1556,25 @@ static void virt_machine_init(MachineState *machine) >> memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base, >> mask_rom); >> >> + device_memory_base = ROUND_UP(s->memmap[VIRT_DRAM].base + machine->ram_size, >> + GiB); >> + device_memory_size = machine->maxram_size - machine->ram_size; >> + >> + if (riscv_is_32bit(&s->soc[0])) { >> + hwaddr memtop = device_memory_base + ROUND_UP(device_memory_size, GiB); >> + >> + if (memtop > UINT32_MAX) { >> + error_report("Memory exceeds 32-bit limit by %lu bytes", >> + memtop - UINT32_MAX); >> + exit(EXIT_FAILURE); >> + } >> + } >> + >> + if (device_memory_size > 0) { >> + machine_memory_devices_init(machine, device_memory_base, >> + device_memory_size); >> + } >> + > > I think we need a design discussion before proceeding here. You're allocating all > available memory as a memory device area, but in theory we might also support > pc-dimm hotplugs (which would be the equivalent of adding physical RAM dimms to > the board.) in the future too. If you're not familiar with this feature you can > check it out the docs in [1]. Note that DIMMs are memory devices as well. You can plug into the memory device area both, ACPI-based memory devices (DIMM, NVDIMM) or virtio-based memory devices (virtio-mem, virtio-pmem). > > As an example, the 'virt' ARM board (hw/arm/virt.c) reserves a space for this > type of hotplug by checking how much 'ram_slots' we're allocating for it: > > device_memory_size = ms->maxram_size - ms->ram_size + ms->ram_slots * GiB; > Note that we increased the region size to be able to fit most requests even if alignment of memory devices is weird. See below. In sane setups, this is usually not required (adding a single additional GB for some flexiility might be good enough). > Other boards do the same with ms->ram_slots. We should consider doing it as well, > now, even if we're not up to the point of supporting pc-dimm hotplug, to avoid > having to change the memory layout later in the road and breaking existing > setups. > > If we want to copy the ARM board, ram_slots is capped to ACPI_MAX_RAM_SLOTS (256). > Each RAM slot is considered to be a 1GiB dimm, i.e. we would reserve 256GiB for > them. This only reserves some *additional* space to fixup weird alignment of memory devices. *not* the actual space for these devices. We don't consider each DIMM to be 1 GiB in size, but add an additional 1 GiB in case we have to align DIMMs in physical address space. I *think* this dates back to old x86 handling where we aligned the address of each DIMM to be at a 1 GiB boundary. So if you would have plugged two 128 MiB DIMMs, you'd have required more than 256 MiB of space in the area after aligning inside the memory device area. > > Note: I do not have the visibility of discussions on the memory management space, > and I might be missing details such as "we don't care about pc-dimm hotplug > anymore, it's legacy, we're going to support only virtio-md-pci from now on". We > had a situation like that with virtio-balloon and virtio-mem in the past, and I'm > not sure if this might fall in the same category. Not sure if I got your comment right, but virtio-mem was never supposed to be a virtio-balloon replacement (especially of the free-page-reporting and memory stats part). > > I see that David Hildenbrand is also CCed in the patch so he'll let us know if > I'm out of line with what I'm asking. Supporting PC-DIMMs might be required at some point when dealing with OSes that don't support virtio-mem and friends. -- Cheers, David / dhildenb
On 5/18/24 16:50, David Hildenbrand wrote: > > Hi, > > >>> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c >>> index 4fdb66052587..16c2bdbfe6b6 100644 >>> --- a/hw/riscv/virt.c >>> +++ b/hw/riscv/virt.c >>> @@ -53,6 +53,8 @@ >>> #include "hw/pci-host/gpex.h" >>> #include "hw/display/ramfb.h" >>> #include "hw/acpi/aml-build.h" >>> +#include "hw/mem/memory-device.h" >>> +#include "hw/virtio/virtio-mem-pci.h" >>> #include "qapi/qapi-visit-common.h" >>> #include "hw/virtio/virtio-iommu.h" >>> @@ -1407,6 +1409,7 @@ static void virt_machine_init(MachineState *machine) >>> DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip; >>> int i, base_hartid, hart_count; >>> int socket_count = riscv_socket_count(machine); >>> + hwaddr device_memory_base, device_memory_size; >>> /* Check socket count limit */ >>> if (VIRT_SOCKETS_MAX < socket_count) { >>> @@ -1553,6 +1556,25 @@ static void virt_machine_init(MachineState *machine) >>> memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base, >>> mask_rom); >>> + device_memory_base = ROUND_UP(s->memmap[VIRT_DRAM].base + machine->ram_size, >>> + GiB); >>> + device_memory_size = machine->maxram_size - machine->ram_size; >>> + >>> + if (riscv_is_32bit(&s->soc[0])) { >>> + hwaddr memtop = device_memory_base + ROUND_UP(device_memory_size, GiB); >>> + >>> + if (memtop > UINT32_MAX) { >>> + error_report("Memory exceeds 32-bit limit by %lu bytes", >>> + memtop - UINT32_MAX); >>> + exit(EXIT_FAILURE); >>> + } >>> + } >>> + >>> + if (device_memory_size > 0) { >>> + machine_memory_devices_init(machine, device_memory_base, >>> + device_memory_size); >>> + } >>> + >> >> I think we need a design discussion before proceeding here. You're allocating all >> available memory as a memory device area, but in theory we might also support >> pc-dimm hotplugs (which would be the equivalent of adding physical RAM dimms to >> the board.) in the future too. If you're not familiar with this feature you can >> check it out the docs in [1]. > > Note that DIMMs are memory devices as well. You can plug into the memory device area both, ACPI-based memory devices (DIMM, NVDIMM) or virtio-based memory devices (virtio-mem, virtio-pmem). > >> >> As an example, the 'virt' ARM board (hw/arm/virt.c) reserves a space for this >> type of hotplug by checking how much 'ram_slots' we're allocating for it: >> >> device_memory_size = ms->maxram_size - ms->ram_size + ms->ram_slots * GiB; >> > > Note that we increased the region size to be able to fit most requests even if alignment of memory devices is weird. See below. > > In sane setups, this is usually not required (adding a single additional GB for some flexiility might be good enough). > >> Other boards do the same with ms->ram_slots. We should consider doing it as well, >> now, even if we're not up to the point of supporting pc-dimm hotplug, to avoid >> having to change the memory layout later in the road and breaking existing >> setups. >> >> If we want to copy the ARM board, ram_slots is capped to ACPI_MAX_RAM_SLOTS (256). >> Each RAM slot is considered to be a 1GiB dimm, i.e. we would reserve 256GiB for >> them. > > This only reserves some *additional* space to fixup weird alignment of memory devices. *not* the actual space for these devices. > > We don't consider each DIMM to be 1 GiB in size, but add an additional 1 GiB in case we have to align DIMMs in physical address space. > > I *think* this dates back to old x86 handling where we aligned the address of each DIMM to be at a 1 GiB boundary. So if you would have plugged two 128 MiB DIMMs, you'd have required more than 256 MiB of space in the area after aligning inside the memory device area. > Thanks for the explanation. I missed the part where the ram_slots were being used just to solve potential alignment issues and pc-dimms could occupy the same space being allocated via machine_memory_devices_init(). This patch isn't far off then. If we take care to avoid plugging unaligned memory we might not even need this spare area. >> >> Note: I do not have the visibility of discussions on the memory management space, >> and I might be missing details such as "we don't care about pc-dimm hotplug >> anymore, it's legacy, we're going to support only virtio-md-pci from now on". We >> had a situation like that with virtio-balloon and virtio-mem in the past, and I'm >> not sure if this might fall in the same category. > > Not sure if I got your comment right, but virtio-mem was never supposed to be a virtio-balloon replacement (especially of the free-page-reporting and memory stats part). I was trying to refer to a situation we faced 3+ years ago in the powerpc machines, where we were trying to add virtio-mem support there given that virtio-mem is/was been seen (as far as I can remember anyways) as a more robust solution than virtio-balloon + DIMM hotplug for guest memory management from the host point of view. I'm probably misrepresenting the whole situation though, it has been awhile :) Thanks, Daniel > >> >> I see that David Hildenbrand is also CCed in the patch so he'll let us know if >> I'm out of line with what I'm asking. > > Supporting PC-DIMMs might be required at some point when dealing with OSes that don't support virtio-mem and friends. >
On 19.05.24 11:24, Daniel Henrique Barboza wrote: > > On 5/18/24 16:50, David Hildenbrand wrote: >> >> Hi, >> >> >>>> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c >>>> index 4fdb66052587..16c2bdbfe6b6 100644 >>>> --- a/hw/riscv/virt.c >>>> +++ b/hw/riscv/virt.c >>>> @@ -53,6 +53,8 @@ >>>> #include "hw/pci-host/gpex.h" >>>> #include "hw/display/ramfb.h" >>>> #include "hw/acpi/aml-build.h" >>>> +#include "hw/mem/memory-device.h" >>>> +#include "hw/virtio/virtio-mem-pci.h" >>>> #include "qapi/qapi-visit-common.h" >>>> #include "hw/virtio/virtio-iommu.h" >>>> @@ -1407,6 +1409,7 @@ static void virt_machine_init(MachineState *machine) >>>> DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip; >>>> int i, base_hartid, hart_count; >>>> int socket_count = riscv_socket_count(machine); >>>> + hwaddr device_memory_base, device_memory_size; >>>> /* Check socket count limit */ >>>> if (VIRT_SOCKETS_MAX < socket_count) { >>>> @@ -1553,6 +1556,25 @@ static void virt_machine_init(MachineState *machine) >>>> memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base, >>>> mask_rom); >>>> + device_memory_base = ROUND_UP(s->memmap[VIRT_DRAM].base + machine->ram_size, >>>> + GiB); >>>> + device_memory_size = machine->maxram_size - machine->ram_size; >>>> + >>>> + if (riscv_is_32bit(&s->soc[0])) { >>>> + hwaddr memtop = device_memory_base + ROUND_UP(device_memory_size, GiB); >>>> + >>>> + if (memtop > UINT32_MAX) { >>>> + error_report("Memory exceeds 32-bit limit by %lu bytes", >>>> + memtop - UINT32_MAX); >>>> + exit(EXIT_FAILURE); >>>> + } >>>> + } >>>> + >>>> + if (device_memory_size > 0) { >>>> + machine_memory_devices_init(machine, device_memory_base, >>>> + device_memory_size); >>>> + } >>>> + >>> >>> I think we need a design discussion before proceeding here. You're allocating all >>> available memory as a memory device area, but in theory we might also support >>> pc-dimm hotplugs (which would be the equivalent of adding physical RAM dimms to >>> the board.) in the future too. If you're not familiar with this feature you can >>> check it out the docs in [1]. >> >> Note that DIMMs are memory devices as well. You can plug into the memory device area both, ACPI-based memory devices (DIMM, NVDIMM) or virtio-based memory devices (virtio-mem, virtio-pmem). >> >>> >>> As an example, the 'virt' ARM board (hw/arm/virt.c) reserves a space for this >>> type of hotplug by checking how much 'ram_slots' we're allocating for it: >>> >>> device_memory_size = ms->maxram_size - ms->ram_size + ms->ram_slots * GiB; >>> >> >> Note that we increased the region size to be able to fit most requests even if alignment of memory devices is weird. See below. >> >> In sane setups, this is usually not required (adding a single additional GB for some flexiility might be good enough). >> >>> Other boards do the same with ms->ram_slots. We should consider doing it as well, >>> now, even if we're not up to the point of supporting pc-dimm hotplug, to avoid >>> having to change the memory layout later in the road and breaking existing >>> setups. >>> >>> If we want to copy the ARM board, ram_slots is capped to ACPI_MAX_RAM_SLOTS (256). >>> Each RAM slot is considered to be a 1GiB dimm, i.e. we would reserve 256GiB for >>> them. >> >> This only reserves some *additional* space to fixup weird alignment of memory devices. *not* the actual space for these devices. >> >> We don't consider each DIMM to be 1 GiB in size, but add an additional 1 GiB in case we have to align DIMMs in physical address space. >> >> I *think* this dates back to old x86 handling where we aligned the address of each DIMM to be at a 1 GiB boundary. So if you would have plugged two 128 MiB DIMMs, you'd have required more than 256 MiB of space in the area after aligning inside the memory device area. >> > > Thanks for the explanation. I missed the part where the ram_slots were being > used just to solve potential alignment issues and pc-dimms could occupy the same > space being allocated via machine_memory_devices_init(). > > This patch isn't far off then. If we take care to avoid plugging unaligned memory > we might not even need this spare area. Right. > >>> >>> Note: I do not have the visibility of discussions on the memory management space, >>> and I might be missing details such as "we don't care about pc-dimm hotplug >>> anymore, it's legacy, we're going to support only virtio-md-pci from now on". We >>> had a situation like that with virtio-balloon and virtio-mem in the past, and I'm >>> not sure if this might fall in the same category. >> >> Not sure if I got your comment right, but virtio-mem was never supposed to be a virtio-balloon replacement (especially of the free-page-reporting and memory stats part). > > I was trying to refer to a situation we faced 3+ years ago in the powerpc machines, > where we were trying to add virtio-mem support there given that virtio-mem is/was > been seen (as far as I can remember anyways) as a more robust solution than > virtio-balloon + DIMM hotplug for guest memory management from the host point of > view. I only heard once that ppc support was being worked on, but don't know of any details what the issue was. PPC+KVM is not really a priority anymore, so my primary focus for the future is adding s390x support (I had prototypes but some things are tricky). Supporting architectures that do their own weird paravirtualized stuff already or don't really support memory hotplug is a bit more tricky than the others. PPC with dlpar falls into that category :) -- Cheers, David / dhildenb
Daniel/David, Daniel Henrique Barboza <dbarboza@ventanamicro.com> writes: > On 5/18/24 16:50, David Hildenbrand wrote: >> >> Hi, >> >> >>>> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c >>>> index 4fdb66052587..16c2bdbfe6b6 100644 >>>> --- a/hw/riscv/virt.c >>>> +++ b/hw/riscv/virt.c >>>> @@ -53,6 +53,8 @@ >>>> #include "hw/pci-host/gpex.h" >>>> #include "hw/display/ramfb.h" >>>> #include "hw/acpi/aml-build.h" >>>> +#include "hw/mem/memory-device.h" >>>> +#include "hw/virtio/virtio-mem-pci.h" >>>> #include "qapi/qapi-visit-common.h" >>>> #include "hw/virtio/virtio-iommu.h" >>>> @@ -1407,6 +1409,7 @@ static void virt_machine_init(MachineState *machine) >>>> DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip; >>>> int i, base_hartid, hart_count; >>>> int socket_count = riscv_socket_count(machine); >>>> + hwaddr device_memory_base, device_memory_size; >>>> /* Check socket count limit */ >>>> if (VIRT_SOCKETS_MAX < socket_count) { >>>> @@ -1553,6 +1556,25 @@ static void virt_machine_init(MachineState *machine) >>>> memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base, >>>> mask_rom); >>>> + device_memory_base = ROUND_UP(s->memmap[VIRT_DRAM].base + machine->ram_size, >>>> + GiB); >>>> + device_memory_size = machine->maxram_size - machine->ram_size; >>>> + >>>> + if (riscv_is_32bit(&s->soc[0])) { >>>> + hwaddr memtop = device_memory_base + ROUND_UP(device_memory_size, GiB); >>>> + >>>> + if (memtop > UINT32_MAX) { >>>> + error_report("Memory exceeds 32-bit limit by %lu bytes", >>>> + memtop - UINT32_MAX); >>>> + exit(EXIT_FAILURE); >>>> + } >>>> + } >>>> + >>>> + if (device_memory_size > 0) { >>>> + machine_memory_devices_init(machine, device_memory_base, >>>> + device_memory_size); >>>> + } >>>> + >>> >>> I think we need a design discussion before proceeding here. You're allocating all >>> available memory as a memory device area, but in theory we might also support >>> pc-dimm hotplugs (which would be the equivalent of adding physical RAM dimms to >>> the board.) in the future too. If you're not familiar with this feature you can >>> check it out the docs in [1]. >> >> Note that DIMMs are memory devices as well. You can plug into the memory device area both, ACPI-based memory devices (DIMM, NVDIMM) or virtio-based memory devices (virtio-mem, virtio-pmem). >> >>> >>> As an example, the 'virt' ARM board (hw/arm/virt.c) reserves a space for this >>> type of hotplug by checking how much 'ram_slots' we're allocating for it: >>> >>> device_memory_size = ms->maxram_size - ms->ram_size + ms->ram_slots * GiB; >>> >> >> Note that we increased the region size to be able to fit most requests even if alignment of memory devices is weird. See below. >> >> In sane setups, this is usually not required (adding a single additional GB for some flexiility might be good enough). >> >>> Other boards do the same with ms->ram_slots. We should consider doing it as well, >>> now, even if we're not up to the point of supporting pc-dimm hotplug, to avoid >>> having to change the memory layout later in the road and breaking existing >>> setups. >>> >>> If we want to copy the ARM board, ram_slots is capped to ACPI_MAX_RAM_SLOTS (256). >>> Each RAM slot is considered to be a 1GiB dimm, i.e. we would reserve 256GiB for >>> them. >> >> This only reserves some *additional* space to fixup weird alignment of memory devices. *not* the actual space for these devices. >> >> We don't consider each DIMM to be 1 GiB in size, but add an additional 1 GiB in case we have to align DIMMs in physical address space. >> >> I *think* this dates back to old x86 handling where we aligned the address of each DIMM to be at a 1 GiB boundary. So if you would have plugged two 128 MiB DIMMs, you'd have required more than 256 MiB of space in the area after aligning inside the memory device area. >> > > Thanks for the explanation. I missed the part where the ram_slots were being > used just to solve potential alignment issues and pc-dimms could occupy the same > space being allocated via machine_memory_devices_init(). > > This patch isn't far off then. If we take care to avoid plugging unaligned memory > we might not even need this spare area. I'm a bit lost here, so please bare with me. We don't require the 1 GiB alignment on RV AFAIU. I'm having a hard time figuring out what missing in my patch. [...] >>> I see that David Hildenbrand is also CCed in the patch so he'll let us know if >>> I'm out of line with what I'm asking. >> >> Supporting PC-DIMMs might be required at some point when dealing with OSes that don't support virtio-mem and friends. ...and also for testing the PC-DIMM ACPI patching path. ;-) Cheers, Björn
On 5/20/24 15:51, Björn Töpel wrote: > Daniel/David, > > Daniel Henrique Barboza <dbarboza@ventanamicro.com> writes: > >> On 5/18/24 16:50, David Hildenbrand wrote: >>> >>> Hi, >>> >>> >>>>> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c >>>>> index 4fdb66052587..16c2bdbfe6b6 100644 >>>>> --- a/hw/riscv/virt.c >>>>> +++ b/hw/riscv/virt.c >>>>> @@ -53,6 +53,8 @@ >>>>> #include "hw/pci-host/gpex.h" >>>>> #include "hw/display/ramfb.h" >>>>> #include "hw/acpi/aml-build.h" >>>>> +#include "hw/mem/memory-device.h" >>>>> +#include "hw/virtio/virtio-mem-pci.h" >>>>> #include "qapi/qapi-visit-common.h" >>>>> #include "hw/virtio/virtio-iommu.h" >>>>> @@ -1407,6 +1409,7 @@ static void virt_machine_init(MachineState *machine) >>>>> DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip; >>>>> int i, base_hartid, hart_count; >>>>> int socket_count = riscv_socket_count(machine); >>>>> + hwaddr device_memory_base, device_memory_size; >>>>> /* Check socket count limit */ >>>>> if (VIRT_SOCKETS_MAX < socket_count) { >>>>> @@ -1553,6 +1556,25 @@ static void virt_machine_init(MachineState *machine) >>>>> memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base, >>>>> mask_rom); >>>>> + device_memory_base = ROUND_UP(s->memmap[VIRT_DRAM].base + machine->ram_size, >>>>> + GiB); >>>>> + device_memory_size = machine->maxram_size - machine->ram_size; >>>>> + >>>>> + if (riscv_is_32bit(&s->soc[0])) { >>>>> + hwaddr memtop = device_memory_base + ROUND_UP(device_memory_size, GiB); >>>>> + >>>>> + if (memtop > UINT32_MAX) { >>>>> + error_report("Memory exceeds 32-bit limit by %lu bytes", >>>>> + memtop - UINT32_MAX); >>>>> + exit(EXIT_FAILURE); >>>>> + } >>>>> + } >>>>> + >>>>> + if (device_memory_size > 0) { >>>>> + machine_memory_devices_init(machine, device_memory_base, >>>>> + device_memory_size); >>>>> + } >>>>> + >>>> >>>> I think we need a design discussion before proceeding here. You're allocating all >>>> available memory as a memory device area, but in theory we might also support >>>> pc-dimm hotplugs (which would be the equivalent of adding physical RAM dimms to >>>> the board.) in the future too. If you're not familiar with this feature you can >>>> check it out the docs in [1]. >>> >>> Note that DIMMs are memory devices as well. You can plug into the memory device area both, ACPI-based memory devices (DIMM, NVDIMM) or virtio-based memory devices (virtio-mem, virtio-pmem). >>> >>>> >>>> As an example, the 'virt' ARM board (hw/arm/virt.c) reserves a space for this >>>> type of hotplug by checking how much 'ram_slots' we're allocating for it: >>>> >>>> device_memory_size = ms->maxram_size - ms->ram_size + ms->ram_slots * GiB; >>>> >>> >>> Note that we increased the region size to be able to fit most requests even if alignment of memory devices is weird. See below. >>> >>> In sane setups, this is usually not required (adding a single additional GB for some flexiility might be good enough). >>> >>>> Other boards do the same with ms->ram_slots. We should consider doing it as well, >>>> now, even if we're not up to the point of supporting pc-dimm hotplug, to avoid >>>> having to change the memory layout later in the road and breaking existing >>>> setups. >>>> >>>> If we want to copy the ARM board, ram_slots is capped to ACPI_MAX_RAM_SLOTS (256). >>>> Each RAM slot is considered to be a 1GiB dimm, i.e. we would reserve 256GiB for >>>> them. >>> >>> This only reserves some *additional* space to fixup weird alignment of memory devices. *not* the actual space for these devices. >>> >>> We don't consider each DIMM to be 1 GiB in size, but add an additional 1 GiB in case we have to align DIMMs in physical address space. >>> >>> I *think* this dates back to old x86 handling where we aligned the address of each DIMM to be at a 1 GiB boundary. So if you would have plugged two 128 MiB DIMMs, you'd have required more than 256 MiB of space in the area after aligning inside the memory device area. >>> >> >> Thanks for the explanation. I missed the part where the ram_slots were being >> used just to solve potential alignment issues and pc-dimms could occupy the same >> space being allocated via machine_memory_devices_init(). >> >> This patch isn't far off then. If we take care to avoid plugging unaligned memory >> we might not even need this spare area. > > I'm a bit lost here, so please bare with me. We don't require the 1 GiB > alignment on RV AFAIU. I'm having a hard time figuring out what missing > in my patch. Forget about the 1 GiB size. This is something that we won't need to deal with because we don't align in 1 Gib. Let's say for example that we want to support pc-dimm hotplug of 256 slots like the 'virt' ARM machine does. Let's also say that we will allow users to hotplug any DIMM size they want, taking care of any alignment issues by ourselves. In hw/riscv/boot.c I see that our alignment sizes are 4Mb for 32 bits and 2Mb for 64 bits. Forget 32 bits a bit and let's say that our alignment is 2Mb. So, in a worst case scenario, an user could hotplug 256 slots, all of them unaligned, and then we would need to align each one of them by adding 2Mb. So, to account for this alignment fixup, we would need 256 * 2Mb RAM reserved for it. What I said about "If we take care to avoid plugging unaligned memory we might not even need this spare area" is a possible design where we would force every hotplugged DIMM to always be memory aligned, avoiding the need for this spare RAM for alignment. This would put a bit of extra straing in users/management apps to always deliver aligned DIMMs. In hindsight this is not needed. It's fairly easy to reserve ACPI_MAX_RAM_SLOTS * (2Mb/4Mb) and let users hotplug whatever DIMM size they want. Hope this explains the situation a bit. If we agree on allocating this spare RAM for hotplugged mem alignment, we'll probalby need a pre-patch to do a little handling of ms->ram_slots, limiting it to ACPI_MAX_RAM_SLOTS (ram_slots can be changed via command line). Then it's a matter of reserving ms->ram_slots * align_size when calculating device_memory_size. Thanks, Daniel > > [...] > >>>> I see that David Hildenbrand is also CCed in the patch so he'll let us know if >>>> I'm out of line with what I'm asking. >>> >>> Supporting PC-DIMMs might be required at some point when dealing with OSes that don't support virtio-mem and friends. > > ...and also for testing the PC-DIMM ACPI patching path. ;-) > > > Cheers, > Björn
Daniel Henrique Barboza <dbarboza@ventanamicro.com> writes: > On 5/20/24 15:51, Björn Töpel wrote: >> Daniel/David, >> >> Daniel Henrique Barboza <dbarboza@ventanamicro.com> writes: >> >>> On 5/18/24 16:50, David Hildenbrand wrote: >>>> >>>> Hi, >>>> >>>> >>>>>> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c >>>>>> index 4fdb66052587..16c2bdbfe6b6 100644 >>>>>> --- a/hw/riscv/virt.c >>>>>> +++ b/hw/riscv/virt.c >>>>>> @@ -53,6 +53,8 @@ >>>>>> #include "hw/pci-host/gpex.h" >>>>>> #include "hw/display/ramfb.h" >>>>>> #include "hw/acpi/aml-build.h" >>>>>> +#include "hw/mem/memory-device.h" >>>>>> +#include "hw/virtio/virtio-mem-pci.h" >>>>>> #include "qapi/qapi-visit-common.h" >>>>>> #include "hw/virtio/virtio-iommu.h" >>>>>> @@ -1407,6 +1409,7 @@ static void virt_machine_init(MachineState *machine) >>>>>> DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip; >>>>>> int i, base_hartid, hart_count; >>>>>> int socket_count = riscv_socket_count(machine); >>>>>> + hwaddr device_memory_base, device_memory_size; >>>>>> /* Check socket count limit */ >>>>>> if (VIRT_SOCKETS_MAX < socket_count) { >>>>>> @@ -1553,6 +1556,25 @@ static void virt_machine_init(MachineState *machine) >>>>>> memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base, >>>>>> mask_rom); >>>>>> + device_memory_base = ROUND_UP(s->memmap[VIRT_DRAM].base + machine->ram_size, >>>>>> + GiB); >>>>>> + device_memory_size = machine->maxram_size - machine->ram_size; >>>>>> + >>>>>> + if (riscv_is_32bit(&s->soc[0])) { >>>>>> + hwaddr memtop = device_memory_base + ROUND_UP(device_memory_size, GiB); >>>>>> + >>>>>> + if (memtop > UINT32_MAX) { >>>>>> + error_report("Memory exceeds 32-bit limit by %lu bytes", >>>>>> + memtop - UINT32_MAX); >>>>>> + exit(EXIT_FAILURE); >>>>>> + } >>>>>> + } >>>>>> + >>>>>> + if (device_memory_size > 0) { >>>>>> + machine_memory_devices_init(machine, device_memory_base, >>>>>> + device_memory_size); >>>>>> + } >>>>>> + >>>>> >>>>> I think we need a design discussion before proceeding here. You're allocating all >>>>> available memory as a memory device area, but in theory we might also support >>>>> pc-dimm hotplugs (which would be the equivalent of adding physical RAM dimms to >>>>> the board.) in the future too. If you're not familiar with this feature you can >>>>> check it out the docs in [1]. >>>> >>>> Note that DIMMs are memory devices as well. You can plug into the memory device area both, ACPI-based memory devices (DIMM, NVDIMM) or virtio-based memory devices (virtio-mem, virtio-pmem). >>>> >>>>> >>>>> As an example, the 'virt' ARM board (hw/arm/virt.c) reserves a space for this >>>>> type of hotplug by checking how much 'ram_slots' we're allocating for it: >>>>> >>>>> device_memory_size = ms->maxram_size - ms->ram_size + ms->ram_slots * GiB; >>>>> >>>> >>>> Note that we increased the region size to be able to fit most requests even if alignment of memory devices is weird. See below. >>>> >>>> In sane setups, this is usually not required (adding a single additional GB for some flexiility might be good enough). >>>> >>>>> Other boards do the same with ms->ram_slots. We should consider doing it as well, >>>>> now, even if we're not up to the point of supporting pc-dimm hotplug, to avoid >>>>> having to change the memory layout later in the road and breaking existing >>>>> setups. >>>>> >>>>> If we want to copy the ARM board, ram_slots is capped to ACPI_MAX_RAM_SLOTS (256). >>>>> Each RAM slot is considered to be a 1GiB dimm, i.e. we would reserve 256GiB for >>>>> them. >>>> >>>> This only reserves some *additional* space to fixup weird alignment of memory devices. *not* the actual space for these devices. >>>> >>>> We don't consider each DIMM to be 1 GiB in size, but add an additional 1 GiB in case we have to align DIMMs in physical address space. >>>> >>>> I *think* this dates back to old x86 handling where we aligned the address of each DIMM to be at a 1 GiB boundary. So if you would have plugged two 128 MiB DIMMs, you'd have required more than 256 MiB of space in the area after aligning inside the memory device area. >>>> >>> >>> Thanks for the explanation. I missed the part where the ram_slots were being >>> used just to solve potential alignment issues and pc-dimms could occupy the same >>> space being allocated via machine_memory_devices_init(). >>> >>> This patch isn't far off then. If we take care to avoid plugging unaligned memory >>> we might not even need this spare area. >> >> I'm a bit lost here, so please bare with me. We don't require the 1 GiB >> alignment on RV AFAIU. I'm having a hard time figuring out what missing >> in my patch. > > Forget about the 1 GiB size. This is something that we won't need to deal with > because we don't align in 1 Gib. > > Let's say for example that we want to support pc-dimm hotplug of 256 slots like the > 'virt' ARM machine does. Let's also say that we will allow users to hotplug any > DIMM size they want, taking care of any alignment issues by ourselves. > > In hw/riscv/boot.c I see that our alignment sizes are 4Mb for 32 bits and 2Mb for > 64 bits. Forget 32 bits a bit and let's say that our alignment is 2Mb. > > So, in a worst case scenario, an user could hotplug 256 slots, all of them unaligned, > and then we would need to align each one of them by adding 2Mb. So, to account for > this alignment fixup, we would need 256 * 2Mb RAM reserved for it. > > What I said about "If we take care to avoid plugging unaligned memory we might not even > need this spare area" is a possible design where we would force every hotplugged DIMM > to always be memory aligned, avoiding the need for this spare RAM for alignment. This > would put a bit of extra straing in users/management apps to always deliver aligned > DIMMs. > > In hindsight this is not needed. It's fairly easy to reserve ACPI_MAX_RAM_SLOTS * (2Mb/4Mb) > and let users hotplug whatever DIMM size they want. > > Hope this explains the situation a bit. If we agree on allocating this spare RAM for > hotplugged mem alignment, we'll probalby need a pre-patch to do a little handling of > ms->ram_slots, limiting it to ACPI_MAX_RAM_SLOTS (ram_slots can be changed via command > line). Then it's a matter of reserving ms->ram_slots * align_size when calculating > device_memory_size. Thanks for the elaborate explaination! I'll take a stab at it in v2. Björn
© 2016 - 2024 Red Hat, Inc.