docs/system/arm/virt.rst | 4 ++++ hw/arm/virt.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+)
The MMIO region size required to support virtualized environments with
large PCI BAR regions can exceed the hardcoded limit configured in QEMU.
For example, a VM with multiple NVIDIA Grace-Hopper GPUs passed through
requires more MMIO memory than the amount provided by VIRT_HIGH_PCIE_MMIO
(currently 512GB). Instead of updating VIRT_HIGH_PCIE_MMIO, introduce a
new parameter, highmem-mmio-size, that specifies the MMIO size required
to support the VM configuration.
Example usage with 1TB MMIO region size:
-machine virt,gic-version=3,highmem-mmio-size=1T
Signed-off-by: Matthew R. Ochs <mochs@nvidia.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
v4: - Added default size to highmem-mmio-size description
v3: - Updated highmem-mmio-size description
v2: - Add unit suffix to example in commit message
- Use existing "high memory region" terminology
- Resolve minor braces nit
docs/system/arm/virt.rst | 4 ++++
hw/arm/virt.c | 38 ++++++++++++++++++++++++++++++++++++++
2 files changed, 42 insertions(+)
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index e67e7f0f7c50..6ff1de1ecbba 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -138,6 +138,10 @@ highmem-mmio
Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO.
The default is ``on``.
+highmem-mmio-size
+ Set the high memory region size for PCI MMIO. Must be a power-of-2 and
+ greater than or equal to the default size (512G).
+
gic-version
Specify the version of the Generic Interrupt Controller (GIC) to provide.
Valid values are:
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 49eb0355ef0c..d8d62df43f04 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2773,6 +2773,36 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
vms->highmem_mmio = value;
}
+static void virt_get_highmem_mmio_size(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ uint64_t size = extended_memmap[VIRT_HIGH_PCIE_MMIO].size;
+
+ visit_type_size(v, name, &size, errp);
+}
+
+static void virt_set_highmem_mmio_size(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ uint64_t size;
+
+ if (!visit_type_size(v, name, &size, errp)) {
+ return;
+ }
+
+ if (!is_power_of_2(size)) {
+ error_setg(errp, "highmem_mmio_size is not a power-of-2");
+ return;
+ }
+
+ if (size < extended_memmap[VIRT_HIGH_PCIE_MMIO].size) {
+ error_setg(errp, "highmem_mmio_size is less than the default (%lu)",
+ extended_memmap[VIRT_HIGH_PCIE_MMIO].size);
+ return;
+ }
+
+ extended_memmap[VIRT_HIGH_PCIE_MMIO].size = size;
+}
static bool virt_get_its(Object *obj, Error **errp)
{
@@ -3446,6 +3476,14 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
"Set on/off to enable/disable high "
"memory region for PCI MMIO");
+ object_class_property_add(oc, "highmem-mmio-size", "size",
+ virt_get_highmem_mmio_size,
+ virt_set_highmem_mmio_size,
+ NULL, NULL);
+ object_class_property_set_description(oc, "highmem-mmio-size",
+ "Set the high memory region size "
+ "for PCI MMIO");
+
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
virt_set_gic_version);
object_class_property_set_description(oc, "gic-version",
--
2.46.0
On Wed, 12 Feb 2025 at 14:55, Matthew R. Ochs <mochs@nvidia.com> wrote: > > The MMIO region size required to support virtualized environments with > large PCI BAR regions can exceed the hardcoded limit configured in QEMU. > For example, a VM with multiple NVIDIA Grace-Hopper GPUs passed through > requires more MMIO memory than the amount provided by VIRT_HIGH_PCIE_MMIO > (currently 512GB). Instead of updating VIRT_HIGH_PCIE_MMIO, introduce a > new parameter, highmem-mmio-size, that specifies the MMIO size required > to support the VM configuration. > > Example usage with 1TB MMIO region size: > -machine virt,gic-version=3,highmem-mmio-size=1T > > Signed-off-by: Matthew R. Ochs <mochs@nvidia.com> > Reviewed-by: Gavin Shan <gshan@redhat.com> > Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > --- > v4: - Added default size to highmem-mmio-size description > v3: - Updated highmem-mmio-size description > v2: - Add unit suffix to example in commit message > - Use existing "high memory region" terminology > - Resolve minor braces nit > > docs/system/arm/virt.rst | 4 ++++ > hw/arm/virt.c | 38 ++++++++++++++++++++++++++++++++++++++ > 2 files changed, 42 insertions(+) > > diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst > index e67e7f0f7c50..6ff1de1ecbba 100644 > --- a/docs/system/arm/virt.rst > +++ b/docs/system/arm/virt.rst > @@ -138,6 +138,10 @@ highmem-mmio > Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO. > The default is ``on``. > > +highmem-mmio-size > + Set the high memory region size for PCI MMIO. Must be a power-of-2 and Nit: no hyphens in "power of 2" here, or in the error message text. > + greater than or equal to the default size (512G). > + > gic-version > Specify the version of the Generic Interrupt Controller (GIC) to provide. > Valid values are: > diff --git a/hw/arm/virt.c b/hw/arm/virt.c > index 49eb0355ef0c..d8d62df43f04 100644 > --- a/hw/arm/virt.c > +++ b/hw/arm/virt.c > @@ -2773,6 +2773,36 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) > vms->highmem_mmio = value; > } > > +static void virt_get_highmem_mmio_size(Object *obj, Visitor *v, const char *name, > + void *opaque, Error **errp) > +{ > + uint64_t size = extended_memmap[VIRT_HIGH_PCIE_MMIO].size; > + > + visit_type_size(v, name, &size, errp); > +} > + > +static void virt_set_highmem_mmio_size(Object *obj, Visitor *v, const char *name, > + void *opaque, Error **errp) > +{ > + uint64_t size; > + > + if (!visit_type_size(v, name, &size, errp)) { > + return; > + } > + > + if (!is_power_of_2(size)) { > + error_setg(errp, "highmem_mmio_size is not a power-of-2"); In these error messages we want to use the property name, which has hyphens, not underscores. > + return; > + } > + > + if (size < extended_memmap[VIRT_HIGH_PCIE_MMIO].size) { > + error_setg(errp, "highmem_mmio_size is less than the default (%lu)", > + extended_memmap[VIRT_HIGH_PCIE_MMIO].size); The size member here is a hwaddr, so %lu is the wrong format string for it (it won't compile on 32-bit hosts). You want HWADDR_PRIu. Printing 512GB as a decimal integer isn't very user friendly in any case. > + return; > + } > + > + extended_memmap[VIRT_HIGH_PCIE_MMIO].size = size; Because we set this field to the new size, if we try to set the property twice, once to a large value and then again to a value smaller than the first one, we'll print an odd error message claiming the default value to be something other than the default value. (Luckily I think you can't do this via the command line; but you might be able to via QMP.) Suggestion: /* If changing this, update the docs for highmem-mmio-size */ #define DEFAULT_HIGH_PCIE_MMIO_SIZE_GB 512 #define DEFAULT_HIGH_PCIE_MMIO_SIZE (DEFAULT_HIGH_PCIE_MMIO_SIZE_GB * GiB) and use it in the definition of extended_memmap[] and in the "if (size < ...)" test for the "smaller than default" check. Have the error message say "highmem_mmio_size cannot be set to a lower value than the default (%d GiB)", DEFAULT_HIGH_PCIE_MMIO_SIZE_GB I agree also with Eric that we should update the comment on extended_memmap[] with e.g. * Note that the highmem_mmio_size property will update the * high PCIE MMIO size field in this array. thanks -- PMM
On Mon, 17 Feb 2025 at 15:35, Peter Maydell <peter.maydell@linaro.org> wrote: > /* If changing this, update the docs for highmem-mmio-size */ > #define DEFAULT_HIGH_PCIE_MMIO_SIZE_GB 512 > #define DEFAULT_HIGH_PCIE_MMIO_SIZE (DEFAULT_HIGH_PCIE_MMIO_SIZE_GB * GiB) > > and use it in the definition of extended_memmap[] and in > the "if (size < ...)" test for the "smaller than default" check. > > Have the error message say > "highmem_mmio_size cannot be set to a lower value than the default (%d GiB)", > DEFAULT_HIGH_PCIE_MMIO_SIZE_GB ...or better, use size_to_str() from qemu/cutils.h, which will pretty-print a number into a human-readable form with a GiB or whatever suffix. thanks -- PMM
On 2/12/25 3:54 PM, Matthew R. Ochs wrote: > The MMIO region size required to support virtualized environments with > large PCI BAR regions can exceed the hardcoded limit configured in QEMU. > For example, a VM with multiple NVIDIA Grace-Hopper GPUs passed through > requires more MMIO memory than the amount provided by VIRT_HIGH_PCIE_MMIO > (currently 512GB). Instead of updating VIRT_HIGH_PCIE_MMIO, introduce a > new parameter, highmem-mmio-size, that specifies the MMIO size required > to support the VM configuration. > > Example usage with 1TB MMIO region size: > -machine virt,gic-version=3,highmem-mmio-size=1T > > Signed-off-by: Matthew R. Ochs <mochs@nvidia.com> > Reviewed-by: Gavin Shan <gshan@redhat.com> > Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > --- > v4: - Added default size to highmem-mmio-size description > v3: - Updated highmem-mmio-size description > v2: - Add unit suffix to example in commit message > - Use existing "high memory region" terminology > - Resolve minor braces nit > > docs/system/arm/virt.rst | 4 ++++ > hw/arm/virt.c | 38 ++++++++++++++++++++++++++++++++++++++ > 2 files changed, 42 insertions(+) > > diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst > index e67e7f0f7c50..6ff1de1ecbba 100644 > --- a/docs/system/arm/virt.rst > +++ b/docs/system/arm/virt.rst > @@ -138,6 +138,10 @@ highmem-mmio > Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO. > The default is ``on``. > > +highmem-mmio-size > + Set the high memory region size for PCI MMIO. Must be a power-of-2 and > + greater than or equal to the default size (512G). > + > gic-version > Specify the version of the Generic Interrupt Controller (GIC) to provide. > Valid values are: > diff --git a/hw/arm/virt.c b/hw/arm/virt.c > index 49eb0355ef0c..d8d62df43f04 100644 > --- a/hw/arm/virt.c > +++ b/hw/arm/virt.c > @@ -2773,6 +2773,36 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) > vms->highmem_mmio = value; > } > > +static void virt_get_highmem_mmio_size(Object *obj, Visitor *v, const char *name, > + void *opaque, Error **errp) > +{ > + uint64_t size = extended_memmap[VIRT_HIGH_PCIE_MMIO].size; > + > + visit_type_size(v, name, &size, errp); > +} > + > +static void virt_set_highmem_mmio_size(Object *obj, Visitor *v, const char *name, > + void *opaque, Error **errp) > +{ > + uint64_t size; > + > + if (!visit_type_size(v, name, &size, errp)) { > + return; > + } > + > + if (!is_power_of_2(size)) { > + error_setg(errp, "highmem_mmio_size is not a power-of-2"); > + return; > + } > + > + if (size < extended_memmap[VIRT_HIGH_PCIE_MMIO].size) { > + error_setg(errp, "highmem_mmio_size is less than the default (%lu)", > + extended_memmap[VIRT_HIGH_PCIE_MMIO].size); > + return; > + } > + > + extended_memmap[VIRT_HIGH_PCIE_MMIO].size = size; > +} > > static bool virt_get_its(Object *obj, Error **errp) > { > @@ -3446,6 +3476,14 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) > "Set on/off to enable/disable high " > "memory region for PCI MMIO"); > > + object_class_property_add(oc, "highmem-mmio-size", "size", > + virt_get_highmem_mmio_size, > + virt_set_highmem_mmio_size, > + NULL, NULL); > + object_class_property_set_description(oc, "highmem-mmio-size", > + "Set the high memory region size " > + "for PCI MMIO"); > + > object_class_property_add_str(oc, "gic-version", virt_get_gic_version, > virt_set_gic_version); > object_class_property_set_description(oc, "gic-version", Reviewed-by: Eric Auger <eric.auger@redhat.com> The only nitpick I have is that if you read static MemMapEntry extended_memmap[] = { /* Additional 64 MB redist region (can contain up to 512 redistributors) */ [VIRT_HIGH_GIC_REDIST2] = { 0x0, 64 * MiB }, [VIRT_HIGH_PCIE_ECAM] = { 0x0, 256 * MiB }, /* Second PCIe window */ [VIRT_HIGH_PCIE_MMIO] = { 0x0, 512 * GiB }, }; and the above comment, it is not obvious that the HIGH_PCI_MMIO can be extended by an option. A distracted reader may not get it. But I don't know if it is worth respinning. Eric
© 2016 - 2025 Red Hat, Inc.