[PATCH v3 2/2] hw/acpi: Implement the SRAT GI affinity structure

ankita@nvidia.com posted 2 patches 1 year ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Igor Mammedov <imammedo@redhat.com>, Ani Sinha <anisinha@redhat.com>, Peter Maydell <peter.maydell@linaro.org>, Shannon Zhao <shannon.zhaosl@gmail.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, Eduardo Habkost <eduardo@habkost.net>
[PATCH v3 2/2] hw/acpi: Implement the SRAT GI affinity structure
Posted by ankita@nvidia.com 1 year ago
From: Ankit Agrawal <ankita@nvidia.com>

ACPI spec provides a scheme to associate "Generic Initiators" [1]
(e.g. heterogeneous processors and accelerators, GPUs, and I/O devices with
integrated compute or DMA engines GPUs) with Proximity Domains. This is
achieved using Generic Initiator Affinity Structure in SRAT. During bootup,
Linux kernel parse the ACPI SRAT to determine the PXM ids and create a NUMA
node for each unique PXM ID encountered. Qemu currently do not implement
these structures while building SRAT.

Add GI structures while building VM ACPI SRAT. The association between
devices and nodes are stored using acpi-generic-initiator object. Lookup
presence of all such objects and use them to build these structures.

The structure needs a PCI device handle [2] that consists of the device BDF.
The vfio-pci device corresponding to the acpi-generic-initiator object is
located to determine the BDF.

[1] ACPI Spec 6.5, Section 5.2.16.6
[2] ACPI Spec 6.5, Table 5.66

Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
---
 hw/acpi/acpi-generic-initiator.c         | 79 ++++++++++++++++++++++++
 hw/arm/virt-acpi-build.c                 |  3 +
 include/hw/acpi/acpi-generic-initiator.h | 21 +++++++
 3 files changed, 103 insertions(+)

diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
index 0699c878e2..6d0a8fd818 100644
--- a/hw/acpi/acpi-generic-initiator.c
+++ b/hw/acpi/acpi-generic-initiator.c
@@ -78,3 +78,82 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
     object_class_property_add_str(oc, ACPI_GENERIC_INITIATOR_NODELIST_PROP,
                                   NULL, acpi_generic_initiator_set_nodelist);
 }
+
+static int acpi_generic_initiator_list(Object *obj, void *opaque)
+{
+    GSList **list = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) {
+        *list = g_slist_append(*list, ACPI_GENERIC_INITIATOR(obj));
+    }
+
+    object_child_foreach(obj, acpi_generic_initiator_list, opaque);
+    return 0;
+}
+
+/*
+ * Identify Generic Initiator objects and link them into the list which is
+ * returned to the caller.
+ *
+ * Note: it is the caller's responsibility to free the list to avoid
+ * memory leak.
+ */
+static GSList *acpi_generic_initiator_get_list(void)
+{
+    GSList *list = NULL;
+
+    object_child_foreach(object_get_root(), acpi_generic_initiator_list, &list);
+    return list;
+}
+
+/*
+ * ACPI spec, Revision 6.5
+ * 5.2.16.6 Generic Initiator Affinity Structure
+ */
+static
+void build_srat_generic_pci_initiator_affinity(GArray *table_data, int node,
+                                               PCIDeviceHandle *handle)
+{
+    uint8_t index;
+
+    build_append_int_noprefix(table_data, 5, 1);     /* Type */
+    build_append_int_noprefix(table_data, 32, 1);    /* Length */
+    build_append_int_noprefix(table_data, 0, 1);     /* Reserved */
+    build_append_int_noprefix(table_data, 1, 1);     /* Device Handle Type */
+    build_append_int_noprefix(table_data, node, 4);  /* Proximity Domain */
+    build_append_int_noprefix(table_data, handle->segment, 2);
+    build_append_int_noprefix(table_data, handle->bdf, 2);
+
+    /* Reserved */
+    for (index = 0; index < 12; index++) {
+        build_append_int_noprefix(table_data, handle->res[index], 1);
+    }
+
+    build_append_int_noprefix(table_data, GEN_AFFINITY_ENABLED, 4); /* Flags */
+    build_append_int_noprefix(table_data, 0, 4);     /* Reserved */
+}
+
+void build_srat_generic_pci_initiator(GArray *table_data)
+{
+    GSList *gi_list, *list = acpi_generic_initiator_get_list();
+    for (gi_list = list; gi_list; gi_list = gi_list->next) {
+        AcpiGenericInitiator *gi = gi_list->data;
+        Object *o;
+        uint16List *l;
+
+        o = object_resolve_path_type(gi->device, TYPE_VFIO_PCI, NULL);
+        if (!o) {
+            continue;
+        }
+
+        for (l = gi->nodelist; l; l = l->next) {
+            PCIDeviceHandle dev_handle = {0};
+            PCIDevice *pci_dev = PCI_DEVICE(o);
+            dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
+                                                       pci_dev->devfn);
+            build_srat_generic_pci_initiator_affinity(table_data,
+                                                      l->value, &dev_handle);
+        }
+    }
+    g_slist_free(list);
+}
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 6b674231c2..bd53788cef 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -58,6 +58,7 @@
 #include "migration/vmstate.h"
 #include "hw/acpi/ghes.h"
 #include "hw/acpi/viot.h"
+#include "hw/acpi/acpi-generic-initiator.h"
 
 #define ARM_SPI_BASE 32
 
@@ -558,6 +559,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
         }
     }
 
+    build_srat_generic_pci_initiator(table_data);
+
     if (ms->nvdimms_state->is_enabled) {
         nvdimm_build_srat(table_data);
     }
diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h
index bb127b2541..545f46ade5 100644
--- a/include/hw/acpi/acpi-generic-initiator.h
+++ b/include/hw/acpi/acpi-generic-initiator.h
@@ -26,4 +26,25 @@ typedef struct AcpiGenericInitiatorClass {
         ObjectClass parent_class;
 } AcpiGenericInitiatorClass;
 
+/*
+ * ACPI 6.5: Table 5-68 Flags - Generic Initiator
+ */
+typedef enum {
+    GEN_AFFINITY_NOFLAGS = 0,
+    GEN_AFFINITY_ENABLED = (1 << 0),
+    GEN_AFFINITY_ARCH_TRANS = (1 << 1),
+} GenericAffinityFlags;
+
+/*
+ * ACPI 6.5: Table 5-66 Device Handle - PCI
+ * Device Handle definition
+ */
+typedef struct PCIDeviceHandle {
+    uint16_t segment;
+    uint16_t bdf;
+    uint8_t res[12];
+} PCIDeviceHandle;
+
+void build_srat_generic_pci_initiator(GArray *table_data);
+
 #endif
-- 
2.17.1
Re: [PATCH v3 2/2] hw/acpi: Implement the SRAT GI affinity structure
Posted by Michael S. Tsirkin 1 year ago
On Wed, Nov 08, 2023 at 12:30:39AM +0530, ankita@nvidia.com wrote:
> +        for (l = gi->nodelist; l; l = l->next) {
> +            PCIDeviceHandle dev_handle = {0};
> +            PCIDevice *pci_dev = PCI_DEVICE(o);
> +            dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
> +                                                       pci_dev->devfn);
> +            build_srat_generic_pci_initiator_affinity(table_data,
> +                                                      l->value, &dev_handle);
> +        }
> +    }

if you never initialize segment then I don't see why have it.
It's just the bdf, just pass that as parameter no need for a struct.

-- 
MST
Re: [PATCH v3 2/2] hw/acpi: Implement the SRAT GI affinity structure
Posted by Ankit Agrawal 1 year ago
>> +        for (l = gi->nodelist; l; l = l->next) {
>> +            PCIDeviceHandle dev_handle = {0};
>> +            PCIDevice *pci_dev = PCI_DEVICE(o);
>> +            dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
>> +                                                       pci_dev->devfn);
>> +            build_srat_generic_pci_initiator_affinity(table_data,
>> +                                                      l->value, &dev_handle);
>> +        }
>> +    }
>
> if you never initialize segment then I don't see why have it.
> It's just the bdf, just pass that as parameter no need for a struct.
>
> I'd explicitly set the segment to zero just to make it more apparent
> that it would need to be addressed when QEMU adds multi-segment
> support.

Okay, so I'll keep the segment id, but set it to 0 explicitly.

>> + * ACPI spec, Revision 6.5
>
> we normally just say ACPI 6.5 even though a couple of places are more
> verbose.
>
> In ACPI we document *earliest* spec version that includes this, not just
> a random one you looked at. I checked 6.3 and it's there.
> Pls find earliest one.

Will make the change.

>> +typedef enum {
>> +    GEN_AFFINITY_NOFLAGS = 0,
>> +    GEN_AFFINITY_ENABLED = (1 << 0),
>> +    GEN_AFFINITY_ARCH_TRANS = (1 << 1),
>> +} GenericAffinityFlags;
>
> Don't add these one-time use flags. They are impossible to match to
> spec without reading and memorizing all of it. The way we do it in ACPI
> code is this:
>
> (1 << 0) /* [text matching ACPI spec verbatim ] */
>
> this also means you will not add a ton of dead code just because it is
> in the spec.

Ack.

>> +typedef struct PCIDeviceHandle {
>> +    uint16_t segment;
>> +    uint16_t bdf;
>> +    uint8_t res[12];
>
> what is this "res" and why do you need to pass it? It's always 0 isn't
> it?

It is 12 bytes reserved field in the "Device Handle - PCI" described in 
ACPI 6.5, Table 5.66. I'll remove it.

>> +
>> +        o = object_resolve_path_type(gi->device, TYPE_VFIO_PCI, NULL);
>
> As per previous comments, this should not be tied to vfio.  This should
> be able to describe an association between any PCI device and various
> proximity domains, even those beyond this current use case.

Sure, will change it to use TYPE_PCI_DEVICE.

> It also looks like this support just silently fails if the device
> string isn't the right type or isn't found.  That's not good.  Should
> the previous patch validate the device where the Error return is more
> readily available rather than only doing a strdup there?  Maybe then we
> should store the object there rather than a char buffer.

AFAIU in a normal flow currently, a qemu -object is (parsed and) created much
earlier that a -device. This complicates the situation as when the
acpi-generic-initiator object is being created, the device is not available for
error check. Maybe I should treat this object specially to create much later?

> Don't we also still need to enforce that the device is not hotpluggable
> since we're tying it to this fixed ACPI object?  That was implicit when
> previously testing for the non-hotpluggable vfio-pci device type, but
> should rely on something like device_get_hotpluggable() now.

I think this will be similarly problematic as above due to the sequence of
object creation.

> Also the ACPI Generic Initiator supports either a PCI or ACPI device
> handle, where we're only adding PCI support here.  What do we want ACPI
> device support to look like?  Is it sufficient that device= only
> accepts a PCI device now and fails on anything else and would later be
> updated to accept an ACPI device or should the object have different
> entry points, ex. pci_dev = vs acpi_dev= where it might later be
> introspected whether ACPI device support exists?

I am fine with either way. If we prefer different entry points, I can make the
change.
Re: [PATCH v3 2/2] hw/acpi: Implement the SRAT GI affinity structure
Posted by Michael S. Tsirkin 1 year ago
On Mon, Nov 13, 2023 at 11:14:00AM +0000, Ankit Agrawal wrote:
> > It also looks like this support just silently fails if the device
> > string isn't the right type or isn't found.  That's not good.  Should
> > the previous patch validate the device where the Error return is more
> > readily available rather than only doing a strdup there?  Maybe then we
> > should store the object there rather than a char buffer.
> 
> AFAIU in a normal flow currently, a qemu -object is (parsed and) created much
> earlier that a -device. This complicates the situation as when the
> acpi-generic-initiator object is being created, the device is not available for
> error check. Maybe I should treat this object specially to create much later?
> 
> > Don't we also still need to enforce that the device is not hotpluggable
> > since we're tying it to this fixed ACPI object?  That was implicit when
> > previously testing for the non-hotpluggable vfio-pci device type, but
> > should rely on something like device_get_hotpluggable() now.
> 
> I think this will be similarly problematic as above due to the sequence of
> object creation.
> 
> > Also the ACPI Generic Initiator supports either a PCI or ACPI device
> > handle, where we're only adding PCI support here.  What do we want ACPI
> > device support to look like?  Is it sufficient that device= only
> > accepts a PCI device now and fails on anything else and would later be
> > updated to accept an ACPI device or should the object have different
> > entry points, ex. pci_dev = vs acpi_dev= where it might later be
> > introspected whether ACPI device support exists?
> 
> I am fine with either way. If we prefer different entry points, I can make the
> change.


Not the expert on QOM. Hope one of QOM maintainers can answer.
Re: [PATCH v3 2/2] hw/acpi: Implement the SRAT GI affinity structure
Posted by Michael S. Tsirkin 1 year ago
On Wed, Nov 08, 2023 at 12:30:39AM +0530, ankita@nvidia.com wrote:
> From: Ankit Agrawal <ankita@nvidia.com>
> 
> ACPI spec provides a scheme to associate "Generic Initiators" [1]
> (e.g. heterogeneous processors and accelerators, GPUs, and I/O devices with
> integrated compute or DMA engines GPUs) with Proximity Domains. This is
> achieved using Generic Initiator Affinity Structure in SRAT. During bootup,
> Linux kernel parse the ACPI SRAT to determine the PXM ids and create a NUMA
> node for each unique PXM ID encountered. Qemu currently do not implement
> these structures while building SRAT.
> 
> Add GI structures while building VM ACPI SRAT. The association between
> devices and nodes are stored using acpi-generic-initiator object. Lookup
> presence of all such objects and use them to build these structures.
> 
> The structure needs a PCI device handle [2] that consists of the device BDF.
> The vfio-pci device corresponding to the acpi-generic-initiator object is
> located to determine the BDF.
> 
> [1] ACPI Spec 6.5, Section 5.2.16.6
> [2] ACPI Spec 6.5, Table 5.66
> 
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
> ---
>  hw/acpi/acpi-generic-initiator.c         | 79 ++++++++++++++++++++++++
>  hw/arm/virt-acpi-build.c                 |  3 +
>  include/hw/acpi/acpi-generic-initiator.h | 21 +++++++
>  3 files changed, 103 insertions(+)
> 
> diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
> index 0699c878e2..6d0a8fd818 100644
> --- a/hw/acpi/acpi-generic-initiator.c
> +++ b/hw/acpi/acpi-generic-initiator.c
> @@ -78,3 +78,82 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
>      object_class_property_add_str(oc, ACPI_GENERIC_INITIATOR_NODELIST_PROP,
>                                    NULL, acpi_generic_initiator_set_nodelist);
>  }
> +
> +static int acpi_generic_initiator_list(Object *obj, void *opaque)
> +{
> +    GSList **list = opaque;
> +
> +    if (object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) {
> +        *list = g_slist_append(*list, ACPI_GENERIC_INITIATOR(obj));
> +    }
> +
> +    object_child_foreach(obj, acpi_generic_initiator_list, opaque);
> +    return 0;
> +}
> +
> +/*
> + * Identify Generic Initiator objects and link them into the list which is
> + * returned to the caller.
> + *
> + * Note: it is the caller's responsibility to free the list to avoid
> + * memory leak.
> + */
> +static GSList *acpi_generic_initiator_get_list(void)
> +{
> +    GSList *list = NULL;
> +
> +    object_child_foreach(object_get_root(), acpi_generic_initiator_list, &list);
> +    return list;
> +}
> +
> +/*
> + * ACPI spec, Revision 6.5

we normally just say ACPI 6.5 even though a couple of places are more
verbose.

> + * 5.2.16.6 Generic Initiator Affinity Structure
> + */
> +static
> +void build_srat_generic_pci_initiator_affinity(GArray *table_data, int node,
> +                                               PCIDeviceHandle *handle)
> +{
> +    uint8_t index;
> +
> +    build_append_int_noprefix(table_data, 5, 1);     /* Type */
> +    build_append_int_noprefix(table_data, 32, 1);    /* Length */
> +    build_append_int_noprefix(table_data, 0, 1);     /* Reserved */
> +    build_append_int_noprefix(table_data, 1, 1);     /* Device Handle Type */
> +    build_append_int_noprefix(table_data, node, 4);  /* Proximity Domain */
> +    build_append_int_noprefix(table_data, handle->segment, 2);
> +    build_append_int_noprefix(table_data, handle->bdf, 2);
> +
> +    /* Reserved */
> +    for (index = 0; index < 12; index++) {
> +        build_append_int_noprefix(table_data, handle->res[index], 1);
> +    }
> +
> +    build_append_int_noprefix(table_data, GEN_AFFINITY_ENABLED, 4); /* Flags */
> +    build_append_int_noprefix(table_data, 0, 4);     /* Reserved */
> +}
> +
> +void build_srat_generic_pci_initiator(GArray *table_data)
> +{
> +    GSList *gi_list, *list = acpi_generic_initiator_get_list();
> +    for (gi_list = list; gi_list; gi_list = gi_list->next) {
> +        AcpiGenericInitiator *gi = gi_list->data;
> +        Object *o;
> +        uint16List *l;
> +
> +        o = object_resolve_path_type(gi->device, TYPE_VFIO_PCI, NULL);
> +        if (!o) {
> +            continue;
> +        }
> +
> +        for (l = gi->nodelist; l; l = l->next) {
> +            PCIDeviceHandle dev_handle = {0};
> +            PCIDevice *pci_dev = PCI_DEVICE(o);
> +            dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
> +                                                       pci_dev->devfn);
> +            build_srat_generic_pci_initiator_affinity(table_data,
> +                                                      l->value, &dev_handle);
> +        }
> +    }
> +    g_slist_free(list);
> +}
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 6b674231c2..bd53788cef 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -58,6 +58,7 @@
>  #include "migration/vmstate.h"
>  #include "hw/acpi/ghes.h"
>  #include "hw/acpi/viot.h"
> +#include "hw/acpi/acpi-generic-initiator.h"
>  
>  #define ARM_SPI_BASE 32
>  
> @@ -558,6 +559,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
>          }
>      }
>  
> +    build_srat_generic_pci_initiator(table_data);
> +
>      if (ms->nvdimms_state->is_enabled) {
>          nvdimm_build_srat(table_data);
>      }
> diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h
> index bb127b2541..545f46ade5 100644
> --- a/include/hw/acpi/acpi-generic-initiator.h
> +++ b/include/hw/acpi/acpi-generic-initiator.h
> @@ -26,4 +26,25 @@ typedef struct AcpiGenericInitiatorClass {
>          ObjectClass parent_class;
>  } AcpiGenericInitiatorClass;
>  
> +/*
> + * ACPI 6.5: Table 5-68 Flags - Generic Initiator
> + */
> +typedef enum {
> +    GEN_AFFINITY_NOFLAGS = 0,
> +    GEN_AFFINITY_ENABLED = (1 << 0),
> +    GEN_AFFINITY_ARCH_TRANS = (1 << 1),
> +} GenericAffinityFlags;

Don't add these one-time use flags. They are impossible to match to
spec without reading and memorizing all of it. The way we do it in ACPI
code is this:

(1 << 0) /* [text matching ACPI spec verbatim ] */

this also means you will not add a ton of dead code just because it is
in the spec.

> +
> +/*
> + * ACPI 6.5: Table 5-66 Device Handle - PCI

In ACPI we document *earliest* spec version that includes this, not just
a random one you looked at. I checked 6.3 and it's there.
Pls find earliest one.

Same applies everywhere


> + * Device Handle definition

Again match spec text exactly. one line, and "definition" is not there.

> + */
> +typedef struct PCIDeviceHandle {
> +    uint16_t segment;
> +    uint16_t bdf;
> +    uint8_t res[12];

what is this "res" and why do you need to pass it? It's always 0 isn't
it?

> +} PCIDeviceHandle;
> +
> +void build_srat_generic_pci_initiator(GArray *table_data);
> +
>  #endif
> -- 
> 2.17.1
Re: [PATCH v3 2/2] hw/acpi: Implement the SRAT GI affinity structure
Posted by Alex Williamson 1 year ago
On Wed, 8 Nov 2023 00:30:39 +0530
<ankita@nvidia.com> wrote:

> From: Ankit Agrawal <ankita@nvidia.com>
> 
> ACPI spec provides a scheme to associate "Generic Initiators" [1]
> (e.g. heterogeneous processors and accelerators, GPUs, and I/O devices with
> integrated compute or DMA engines GPUs) with Proximity Domains. This is
> achieved using Generic Initiator Affinity Structure in SRAT. During bootup,
> Linux kernel parse the ACPI SRAT to determine the PXM ids and create a NUMA
> node for each unique PXM ID encountered. Qemu currently do not implement
> these structures while building SRAT.
> 
> Add GI structures while building VM ACPI SRAT. The association between
> devices and nodes are stored using acpi-generic-initiator object. Lookup
> presence of all such objects and use them to build these structures.
> 
> The structure needs a PCI device handle [2] that consists of the device BDF.
> The vfio-pci device corresponding to the acpi-generic-initiator object is
> located to determine the BDF.
> 
> [1] ACPI Spec 6.5, Section 5.2.16.6
> [2] ACPI Spec 6.5, Table 5.66
> 
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
> ---
>  hw/acpi/acpi-generic-initiator.c         | 79 ++++++++++++++++++++++++
>  hw/arm/virt-acpi-build.c                 |  3 +
>  include/hw/acpi/acpi-generic-initiator.h | 21 +++++++
>  3 files changed, 103 insertions(+)
> 
> diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
> index 0699c878e2..6d0a8fd818 100644
> --- a/hw/acpi/acpi-generic-initiator.c
> +++ b/hw/acpi/acpi-generic-initiator.c
> @@ -78,3 +78,82 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
>      object_class_property_add_str(oc, ACPI_GENERIC_INITIATOR_NODELIST_PROP,
>                                    NULL, acpi_generic_initiator_set_nodelist);
>  }
> +
> +static int acpi_generic_initiator_list(Object *obj, void *opaque)
> +{
> +    GSList **list = opaque;
> +
> +    if (object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) {
> +        *list = g_slist_append(*list, ACPI_GENERIC_INITIATOR(obj));
> +    }
> +
> +    object_child_foreach(obj, acpi_generic_initiator_list, opaque);
> +    return 0;
> +}
> +
> +/*
> + * Identify Generic Initiator objects and link them into the list which is
> + * returned to the caller.
> + *
> + * Note: it is the caller's responsibility to free the list to avoid
> + * memory leak.
> + */
> +static GSList *acpi_generic_initiator_get_list(void)
> +{
> +    GSList *list = NULL;
> +
> +    object_child_foreach(object_get_root(), acpi_generic_initiator_list, &list);
> +    return list;
> +}
> +
> +/*
> + * ACPI spec, Revision 6.5
> + * 5.2.16.6 Generic Initiator Affinity Structure
> + */
> +static
> +void build_srat_generic_pci_initiator_affinity(GArray *table_data, int node,
> +                                               PCIDeviceHandle *handle)
> +{
> +    uint8_t index;
> +
> +    build_append_int_noprefix(table_data, 5, 1);     /* Type */
> +    build_append_int_noprefix(table_data, 32, 1);    /* Length */
> +    build_append_int_noprefix(table_data, 0, 1);     /* Reserved */
> +    build_append_int_noprefix(table_data, 1, 1);     /* Device Handle Type */

/* Device Handle Type: PCI */

> +    build_append_int_noprefix(table_data, node, 4);  /* Proximity Domain */
> +    build_append_int_noprefix(table_data, handle->segment, 2);
> +    build_append_int_noprefix(table_data, handle->bdf, 2);
> +
> +    /* Reserved */
> +    for (index = 0; index < 12; index++) {
> +        build_append_int_noprefix(table_data, handle->res[index], 1);
> +    }
> +
> +    build_append_int_noprefix(table_data, GEN_AFFINITY_ENABLED, 4); /* Flags */
> +    build_append_int_noprefix(table_data, 0, 4);     /* Reserved */
> +}
> +
> +void build_srat_generic_pci_initiator(GArray *table_data)
> +{
> +    GSList *gi_list, *list = acpi_generic_initiator_get_list();
> +    for (gi_list = list; gi_list; gi_list = gi_list->next) {
> +        AcpiGenericInitiator *gi = gi_list->data;
> +        Object *o;
> +        uint16List *l;
> +
> +        o = object_resolve_path_type(gi->device, TYPE_VFIO_PCI, NULL);

As per previous comments, this should not be tied to vfio.  This should
be able to describe an association between any PCI device and various
proximity domains, even those beyond this current use case.

It also looks like this support just silently fails if the device
string isn't the right type or isn't found.  That's not good.  Should
the previous patch validate the device where the Error return is more
readily available rather than only doing a strdup there?  Maybe then we
should store the object there rather than a char buffer.

Don't we also still need to enforce that the device is not hotpluggable
since we're tying it to this fixed ACPI object?  That was implicit when
previously testing for the non-hotpluggable vfio-pci device type, but
should rely on something like device_get_hotpluggable() now.

Also the ACPI Generic Initiator supports either a PCI or ACPI device
handle, where we're only adding PCI support here.  What do we want ACPI
device support to look like?  Is it sufficient that device= only
accepts a PCI device now and fails on anything else and would later be
updated to accept an ACPI device or should the object have different
entry points, ex. pci_dev = vs acpi_dev= where it might later be
introspected whether ACPI device support exists?

> +        if (!o) {
> +            continue;
> +        }
> +
> +        for (l = gi->nodelist; l; l = l->next) {
> +            PCIDeviceHandle dev_handle = {0};
> +            PCIDevice *pci_dev = PCI_DEVICE(o);

I'd explicitly set the segment to zero just to make it more apparent
that it would need to be addressed when QEMU adds multi-segment
support.  Thanks,

Alex

> +            dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
> +                                                       pci_dev->devfn);
> +            build_srat_generic_pci_initiator_affinity(table_data,
> +                                                      l->value, &dev_handle);
> +        }
> +    }
> +    g_slist_free(list);
> +}
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 6b674231c2..bd53788cef 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -58,6 +58,7 @@
>  #include "migration/vmstate.h"
>  #include "hw/acpi/ghes.h"
>  #include "hw/acpi/viot.h"
> +#include "hw/acpi/acpi-generic-initiator.h"
>  
>  #define ARM_SPI_BASE 32
>  
> @@ -558,6 +559,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
>          }
>      }
>  
> +    build_srat_generic_pci_initiator(table_data);
> +
>      if (ms->nvdimms_state->is_enabled) {
>          nvdimm_build_srat(table_data);
>      }
> diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h
> index bb127b2541..545f46ade5 100644
> --- a/include/hw/acpi/acpi-generic-initiator.h
> +++ b/include/hw/acpi/acpi-generic-initiator.h
> @@ -26,4 +26,25 @@ typedef struct AcpiGenericInitiatorClass {
>          ObjectClass parent_class;
>  } AcpiGenericInitiatorClass;
>  
> +/*
> + * ACPI 6.5: Table 5-68 Flags - Generic Initiator
> + */
> +typedef enum {
> +    GEN_AFFINITY_NOFLAGS = 0,
> +    GEN_AFFINITY_ENABLED = (1 << 0),
> +    GEN_AFFINITY_ARCH_TRANS = (1 << 1),
> +} GenericAffinityFlags;
> +
> +/*
> + * ACPI 6.5: Table 5-66 Device Handle - PCI
> + * Device Handle definition
> + */
> +typedef struct PCIDeviceHandle {
> +    uint16_t segment;
> +    uint16_t bdf;
> +    uint8_t res[12];
> +} PCIDeviceHandle;
> +
> +void build_srat_generic_pci_initiator(GArray *table_data);
> +
>  #endif