From: Ankit Agrawal <ankita@nvidia.com>
ACPI spec provides a scheme to associate "Generic Initiators" [1]
(e.g. heterogeneous processors and accelerators, GPUs, and I/O devices with
integrated compute or DMA engines GPUs) with Proximity Domains. This is
achieved using Generic Initiator Affinity Structure in SRAT. During bootup,
Linux kernel parse the ACPI SRAT to determine the PXM ids and create a NUMA
node for each unique PXM ID encountered. Qemu currently do not implement
these structures while building SRAT.
Add GI structures while building VM ACPI SRAT. The association between
devices and nodes are stored using acpi-generic-initiator object. Lookup
presence of all such objects and use them to build these structures.
The structure needs a PCI device handle [2] that consists of the device BDF.
The vfio-pci device corresponding to the acpi-generic-initiator object is
located to determine the BDF.
[1] ACPI Spec 6.3, Section 5.2.16.6
[2] ACPI Spec 6.3, Table 5.80
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
---
hw/acpi/acpi-generic-initiator.c | 100 +++++++++++++++++++++++
hw/arm/virt-acpi-build.c | 3 +
include/hw/acpi/acpi-generic-initiator.h | 26 ++++++
3 files changed, 129 insertions(+)
diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
index 5ea51cb81e..a9222438ec 100644
--- a/hw/acpi/acpi-generic-initiator.c
+++ b/hw/acpi/acpi-generic-initiator.c
@@ -16,6 +16,7 @@
#include "hw/pci/pci_device.h"
#include "sysemu/numa.h"
#include "hw/acpi/acpi-generic-initiator.h"
+#include "qemu/error-report.h"
OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, acpi_generic_initiator,
ACPI_GENERIC_INITIATOR, OBJECT,
@@ -82,3 +83,102 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
acpi_generic_initiator_set_host_nodes,
NULL, NULL);
}
+
+static int acpi_generic_initiator_list(Object *obj, void *opaque)
+{
+ GSList **list = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) {
+ *list = g_slist_append(*list, ACPI_GENERIC_INITIATOR(obj));
+ }
+
+ object_child_foreach(obj, acpi_generic_initiator_list, opaque);
+ return 0;
+}
+
+/*
+ * Identify Generic Initiator objects and link them into the list which is
+ * returned to the caller.
+ *
+ * Note: it is the caller's responsibility to free the list to avoid
+ * memory leak.
+ */
+static GSList *acpi_generic_initiator_get_list(void)
+{
+ GSList *list = NULL;
+
+ object_child_foreach(object_get_root(), acpi_generic_initiator_list, &list);
+ return list;
+}
+
+/*
+ * ACPI 6.3:
+ * Table 5-78 Generic Initiator Affinity Structure
+ */
+static
+void build_srat_generic_pci_initiator_affinity(GArray *table_data, int node,
+ PCIDeviceHandle *handle)
+{
+ uint8_t index;
+
+ build_append_int_noprefix(table_data, 5, 1); /* Type */
+ build_append_int_noprefix(table_data, 32, 1); /* Length */
+ build_append_int_noprefix(table_data, 0, 1); /* Reserved */
+ build_append_int_noprefix(table_data, 1, 1); /* Device Handle Type: PCI */
+ build_append_int_noprefix(table_data, node, 4); /* Proximity Domain */
+
+ /* Device Handle - PCI */
+ build_append_int_noprefix(table_data, handle->segment, 2);
+ build_append_int_noprefix(table_data, handle->bdf, 2);
+ for (index = 0; index < 12; index++) {
+ build_append_int_noprefix(table_data, 0, 1);
+ }
+
+ build_append_int_noprefix(table_data, GEN_AFFINITY_ENABLED, 4); /* Flags */
+ build_append_int_noprefix(table_data, 0, 4); /* Reserved */
+}
+
+void build_srat_generic_pci_initiator(GArray *table_data)
+{
+ GSList *gi_list, *list = acpi_generic_initiator_get_list();
+ AcpiGenericInitiator *gi;
+
+ for (gi_list = list; gi_list; gi_list = gi_list->next) {
+ Object *o;
+ uint16List *l;
+ PCIDevice *pci_dev;
+ bool node_specified = false;
+
+ gi = gi_list->data;
+
+ /* User fails to provide a device. */
+ g_assert(gi->device);
+
+ o = object_resolve_path_type(gi->device, TYPE_PCI_DEVICE, NULL);
+ if (!o) {
+ error_printf("Specified device must be a PCI device.\n");
+ g_assert(o);
+ }
+ pci_dev = PCI_DEVICE(o);
+
+ for (l = gi->nodelist; l; l = l->next) {
+ PCIDeviceHandle dev_handle;
+ dev_handle.segment = 0;
+ dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
+ pci_dev->devfn);
+ build_srat_generic_pci_initiator_affinity(table_data,
+ l->value, &dev_handle);
+ node_specified = true;
+ }
+
+ if (!node_specified) {
+ error_report("Generic Initiator device 0:%x:%x.%x has no associated"
+ " NUMA node.", pci_bus_num(pci_get_bus(pci_dev)),
+ PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
+ error_printf("Specify NUMA node with -nodelist option.\n");
+ g_assert(node_specified);
+ }
+ }
+
+ g_slist_free(list);
+}
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 8bc35a483c..00d77327e0 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -58,6 +58,7 @@
#include "migration/vmstate.h"
#include "hw/acpi/ghes.h"
#include "hw/acpi/viot.h"
+#include "hw/acpi/acpi-generic-initiator.h"
#define ARM_SPI_BASE 32
@@ -558,6 +559,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
}
}
+ build_srat_generic_pci_initiator(table_data);
+
if (ms->nvdimms_state->is_enabled) {
nvdimm_build_srat(table_data);
}
diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h
index db3ed02c80..6fdaf887cd 100644
--- a/include/hw/acpi/acpi-generic-initiator.h
+++ b/include/hw/acpi/acpi-generic-initiator.h
@@ -27,4 +27,30 @@ typedef struct AcpiGenericInitiatorClass {
ObjectClass parent_class;
} AcpiGenericInitiatorClass;
+/*
+ * ACPI 6.3:
+ * Table 5-81 Flags – Generic Initiator Affinity Structure
+ */
+typedef enum {
+ GEN_AFFINITY_ENABLED = (1 << 0), /*
+ * If clear, the OSPM ignores the contents
+ * of the Generic Initiator/Port Affinity
+ * Structure. This allows system firmware
+ * to populate the SRAT with a static
+ * number of structures, but only enable
+ * them as necessary.
+ */
+} GenericAffinityFlags;
+
+/*
+ * ACPI 6.3:
+ * Table 5-80 Device Handle - PCI
+ */
+typedef struct PCIDeviceHandle {
+ uint16_t segment;
+ uint16_t bdf;
+} PCIDeviceHandle;
+
+void build_srat_generic_pci_initiator(GArray *table_data);
+
#endif
--
2.34.1
<ankita@nvidia.com> writes: > From: Ankit Agrawal <ankita@nvidia.com> > > ACPI spec provides a scheme to associate "Generic Initiators" [1] > (e.g. heterogeneous processors and accelerators, GPUs, and I/O devices with > integrated compute or DMA engines GPUs) with Proximity Domains. This is > achieved using Generic Initiator Affinity Structure in SRAT. During bootup, > Linux kernel parse the ACPI SRAT to determine the PXM ids and create a NUMA > node for each unique PXM ID encountered. Qemu currently do not implement > these structures while building SRAT. > > Add GI structures while building VM ACPI SRAT. The association between > devices and nodes are stored using acpi-generic-initiator object. Lookup > presence of all such objects and use them to build these structures. > > The structure needs a PCI device handle [2] that consists of the device BDF. > The vfio-pci device corresponding to the acpi-generic-initiator object is > located to determine the BDF. > > [1] ACPI Spec 6.3, Section 5.2.16.6 > [2] ACPI Spec 6.3, Table 5.80 > > Signed-off-by: Ankit Agrawal <ankita@nvidia.com> > --- > hw/acpi/acpi-generic-initiator.c | 100 +++++++++++++++++++++++ > hw/arm/virt-acpi-build.c | 3 + > include/hw/acpi/acpi-generic-initiator.h | 26 ++++++ > 3 files changed, 129 insertions(+) > > diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c > index 5ea51cb81e..a9222438ec 100644 > --- a/hw/acpi/acpi-generic-initiator.c > +++ b/hw/acpi/acpi-generic-initiator.c > @@ -16,6 +16,7 @@ > #include "hw/pci/pci_device.h" > #include "sysemu/numa.h" > #include "hw/acpi/acpi-generic-initiator.h" > +#include "qemu/error-report.h" > > OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, acpi_generic_initiator, > ACPI_GENERIC_INITIATOR, OBJECT, > @@ -82,3 +83,102 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data) > acpi_generic_initiator_set_host_nodes, > NULL, NULL); > } > + > +static int acpi_generic_initiator_list(Object *obj, void *opaque) > +{ > + GSList **list = opaque; > + > + if (object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) { > + *list = g_slist_append(*list, ACPI_GENERIC_INITIATOR(obj)); > + } > + > + object_child_foreach(obj, acpi_generic_initiator_list, opaque); > + return 0; > +} > + > +/* > + * Identify Generic Initiator objects and link them into the list which is > + * returned to the caller. > + * > + * Note: it is the caller's responsibility to free the list to avoid > + * memory leak. > + */ > +static GSList *acpi_generic_initiator_get_list(void) > +{ > + GSList *list = NULL; > + > + object_child_foreach(object_get_root(), acpi_generic_initiator_list, &list); Long line. > + return list; > +} > + > +/* > + * ACPI 6.3: > + * Table 5-78 Generic Initiator Affinity Structure > + */ > +static > +void build_srat_generic_pci_initiator_affinity(GArray *table_data, int node, > + PCIDeviceHandle *handle) Style nitpick: bad line break. We traditionally format like static void build_srat_generic_pci_initiator_affinity(GArray *table_data, int node, PCIDeviceHandle *handle) or, to avoid the long line static void build_srat_generic_pci_initiator_affinity(GArray *table_data, int node, PCIDeviceHandle *handle) but there's also precedence for static void build_srat_generic_pci_initiator_affinity(GArray *table_data, int node, PCIDeviceHandle *handle) > +{ > + uint8_t index; > + > + build_append_int_noprefix(table_data, 5, 1); /* Type */ > + build_append_int_noprefix(table_data, 32, 1); /* Length */ > + build_append_int_noprefix(table_data, 0, 1); /* Reserved */ > + build_append_int_noprefix(table_data, 1, 1); /* Device Handle Type: PCI */ > + build_append_int_noprefix(table_data, node, 4); /* Proximity Domain */ > + > + /* Device Handle - PCI */ > + build_append_int_noprefix(table_data, handle->segment, 2); > + build_append_int_noprefix(table_data, handle->bdf, 2); > + for (index = 0; index < 12; index++) { > + build_append_int_noprefix(table_data, 0, 1); > + } > + > + build_append_int_noprefix(table_data, GEN_AFFINITY_ENABLED, 4); /* Flags */ > + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ > +} > + > +void build_srat_generic_pci_initiator(GArray *table_data) > +{ > + GSList *gi_list, *list = acpi_generic_initiator_get_list(); > + AcpiGenericInitiator *gi; > + > + for (gi_list = list; gi_list; gi_list = gi_list->next) { > + Object *o; > + uint16List *l; > + PCIDevice *pci_dev; > + bool node_specified = false; > + > + gi = gi_list->data; > + > + /* User fails to provide a device. */ > + g_assert(gi->device); Assertions are for programming errors, not for diagnosing or reporting user errors. Instead if (!gi->device) { error_report(...); exit(1); } This assumes the function can only ever run duting initial startup. If that's not ensured, exit(1) is wrong, and you need to return failure instead, so the callers can do the right thing. > + > + o = object_resolve_path_type(gi->device, TYPE_PCI_DEVICE, NULL); > + if (!o) { > + error_printf("Specified device must be a PCI device.\n"); > + g_assert(o); Likewise. > + } > + pci_dev = PCI_DEVICE(o); > + > + for (l = gi->nodelist; l; l = l->next) { > + PCIDeviceHandle dev_handle; > + dev_handle.segment = 0; > + dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), > + pci_dev->devfn); > + build_srat_generic_pci_initiator_affinity(table_data, > + l->value, &dev_handle); > + node_specified = true; > + } > + > + if (!node_specified) { > + error_report("Generic Initiator device 0:%x:%x.%x has no associated" > + " NUMA node.", pci_bus_num(pci_get_bus(pci_dev)), > + PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn)); > + error_printf("Specify NUMA node with -nodelist option.\n"); > + g_assert(node_specified); Likewise. > + } > + } > + > + g_slist_free(list); > +} > diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c > index 8bc35a483c..00d77327e0 100644 > --- a/hw/arm/virt-acpi-build.c > +++ b/hw/arm/virt-acpi-build.c > @@ -58,6 +58,7 @@ > #include "migration/vmstate.h" > #include "hw/acpi/ghes.h" > #include "hw/acpi/viot.h" > +#include "hw/acpi/acpi-generic-initiator.h" > > #define ARM_SPI_BASE 32 > > @@ -558,6 +559,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > } > } > > + build_srat_generic_pci_initiator(table_data); > + > if (ms->nvdimms_state->is_enabled) { > nvdimm_build_srat(table_data); > } > diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h > index db3ed02c80..6fdaf887cd 100644 > --- a/include/hw/acpi/acpi-generic-initiator.h > +++ b/include/hw/acpi/acpi-generic-initiator.h > @@ -27,4 +27,30 @@ typedef struct AcpiGenericInitiatorClass { > ObjectClass parent_class; > } AcpiGenericInitiatorClass; > > +/* > + * ACPI 6.3: > + * Table 5-81 Flags – Generic Initiator Affinity Structure > + */ > +typedef enum { > + GEN_AFFINITY_ENABLED = (1 << 0), /* > + * If clear, the OSPM ignores the contents > + * of the Generic Initiator/Port Affinity > + * Structure. This allows system firmware > + * to populate the SRAT with a static > + * number of structures, but only enable > + * them as necessary. > + */ > +} GenericAffinityFlags; > + > +/* > + * ACPI 6.3: > + * Table 5-80 Device Handle - PCI > + */ > +typedef struct PCIDeviceHandle { > + uint16_t segment; > + uint16_t bdf; > +} PCIDeviceHandle; > + > +void build_srat_generic_pci_initiator(GArray *table_data); > + > #endif
On Sun, 19 Nov 2023 18:31:11 +0530 <ankita@nvidia.com> wrote: > From: Ankit Agrawal <ankita@nvidia.com> > > ACPI spec provides a scheme to associate "Generic Initiators" [1] > (e.g. heterogeneous processors and accelerators, GPUs, and I/O devices with > integrated compute or DMA engines GPUs) with Proximity Domains. This is > achieved using Generic Initiator Affinity Structure in SRAT. During bootup, > Linux kernel parse the ACPI SRAT to determine the PXM ids and create a NUMA > node for each unique PXM ID encountered. Qemu currently do not implement > these structures while building SRAT. > > Add GI structures while building VM ACPI SRAT. The association between > devices and nodes are stored using acpi-generic-initiator object. Lookup > presence of all such objects and use them to build these structures. > > The structure needs a PCI device handle [2] that consists of the device BDF. > The vfio-pci device corresponding to the acpi-generic-initiator object is > located to determine the BDF. > > [1] ACPI Spec 6.3, Section 5.2.16.6 > [2] ACPI Spec 6.3, Table 5.80 > > Signed-off-by: Ankit Agrawal <ankita@nvidia.com> > --- > hw/acpi/acpi-generic-initiator.c | 100 +++++++++++++++++++++++ > hw/arm/virt-acpi-build.c | 3 + > include/hw/acpi/acpi-generic-initiator.h | 26 ++++++ > 3 files changed, 129 insertions(+) > > diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c > index 5ea51cb81e..a9222438ec 100644 > --- a/hw/acpi/acpi-generic-initiator.c > +++ b/hw/acpi/acpi-generic-initiator.c > @@ -16,6 +16,7 @@ > #include "hw/pci/pci_device.h" > #include "sysemu/numa.h" > #include "hw/acpi/acpi-generic-initiator.h" > +#include "qemu/error-report.h" > > OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, acpi_generic_initiator, > ACPI_GENERIC_INITIATOR, OBJECT, > @@ -82,3 +83,102 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data) > acpi_generic_initiator_set_host_nodes, > NULL, NULL); > } > + > +static int acpi_generic_initiator_list(Object *obj, void *opaque) > +{ > + GSList **list = opaque; > + > + if (object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) { > + *list = g_slist_append(*list, ACPI_GENERIC_INITIATOR(obj)); > + } > + > + object_child_foreach(obj, acpi_generic_initiator_list, opaque); > + return 0; > +} > + > +/* > + * Identify Generic Initiator objects and link them into the list which is > + * returned to the caller. > + * > + * Note: it is the caller's responsibility to free the list to avoid > + * memory leak. > + */ > +static GSList *acpi_generic_initiator_get_list(void) > +{ > + GSList *list = NULL; > + > + object_child_foreach(object_get_root(), acpi_generic_initiator_list, &list); > + return list; > +} > + > +/* > + * ACPI 6.3: > + * Table 5-78 Generic Initiator Affinity Structure > + */ > +static > +void build_srat_generic_pci_initiator_affinity(GArray *table_data, int node, > + PCIDeviceHandle *handle) > +{ > + uint8_t index; > + > + build_append_int_noprefix(table_data, 5, 1); /* Type */ > + build_append_int_noprefix(table_data, 32, 1); /* Length */ > + build_append_int_noprefix(table_data, 0, 1); /* Reserved */ > + build_append_int_noprefix(table_data, 1, 1); /* Device Handle Type: PCI */ > + build_append_int_noprefix(table_data, node, 4); /* Proximity Domain */ > + > + /* Device Handle - PCI */ > + build_append_int_noprefix(table_data, handle->segment, 2); > + build_append_int_noprefix(table_data, handle->bdf, 2); > + for (index = 0; index < 12; index++) { > + build_append_int_noprefix(table_data, 0, 1); > + } > + > + build_append_int_noprefix(table_data, GEN_AFFINITY_ENABLED, 4); /* Flags */ > + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ > +} > + > +void build_srat_generic_pci_initiator(GArray *table_data) > +{ > + GSList *gi_list, *list = acpi_generic_initiator_get_list(); > + AcpiGenericInitiator *gi; > + > + for (gi_list = list; gi_list; gi_list = gi_list->next) { > + Object *o; > + uint16List *l; > + PCIDevice *pci_dev; > + bool node_specified = false; > + > + gi = gi_list->data; > + > + /* User fails to provide a device. */ > + g_assert(gi->device); > + > + o = object_resolve_path_type(gi->device, TYPE_PCI_DEVICE, NULL); > + if (!o) { > + error_printf("Specified device must be a PCI device.\n"); > + g_assert(o); > + } > + pci_dev = PCI_DEVICE(o); > + > + for (l = gi->nodelist; l; l = l->next) { > + PCIDeviceHandle dev_handle; > + dev_handle.segment = 0; > + dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), > + pci_dev->devfn); > + build_srat_generic_pci_initiator_affinity(table_data, > + l->value, &dev_handle); > + node_specified = true; > + } > + > + if (!node_specified) { > + error_report("Generic Initiator device 0:%x:%x.%x has no associated" > + " NUMA node.", pci_bus_num(pci_get_bus(pci_dev)), > + PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn)); > + error_printf("Specify NUMA node with -nodelist option.\n"); No such option, -nodelist? > + g_assert(node_specified); I won't claim expertise in QEMU error handling, but an assert is a pretty harsh way to handle failures. Thanks, Alex > + } > + } > + > + g_slist_free(list); > +} > diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c > index 8bc35a483c..00d77327e0 100644 > --- a/hw/arm/virt-acpi-build.c > +++ b/hw/arm/virt-acpi-build.c > @@ -58,6 +58,7 @@ > #include "migration/vmstate.h" > #include "hw/acpi/ghes.h" > #include "hw/acpi/viot.h" > +#include "hw/acpi/acpi-generic-initiator.h" > > #define ARM_SPI_BASE 32 > > @@ -558,6 +559,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > } > } > > + build_srat_generic_pci_initiator(table_data); > + > if (ms->nvdimms_state->is_enabled) { > nvdimm_build_srat(table_data); > } > diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h > index db3ed02c80..6fdaf887cd 100644 > --- a/include/hw/acpi/acpi-generic-initiator.h > +++ b/include/hw/acpi/acpi-generic-initiator.h > @@ -27,4 +27,30 @@ typedef struct AcpiGenericInitiatorClass { > ObjectClass parent_class; > } AcpiGenericInitiatorClass; > > +/* > + * ACPI 6.3: > + * Table 5-81 Flags – Generic Initiator Affinity Structure > + */ > +typedef enum { > + GEN_AFFINITY_ENABLED = (1 << 0), /* > + * If clear, the OSPM ignores the contents > + * of the Generic Initiator/Port Affinity > + * Structure. This allows system firmware > + * to populate the SRAT with a static > + * number of structures, but only enable > + * them as necessary. > + */ > +} GenericAffinityFlags; > + > +/* > + * ACPI 6.3: > + * Table 5-80 Device Handle - PCI > + */ > +typedef struct PCIDeviceHandle { > + uint16_t segment; > + uint16_t bdf; > +} PCIDeviceHandle; > + > +void build_srat_generic_pci_initiator(GArray *table_data); > + > #endif
© 2016 - 2024 Red Hat, Inc.