From: Nicolin Chen <nicolinc@nvidia.com>
Now that we can have multiple user-creatable smmuv3-nested
devices, each associated with different pci buses, update
IORT ID mappings accordingly.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++++++----------
include/hw/arm/virt.h | 6 ++++++
2 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index e10cad86dd..ec4cdfb2d7 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -276,8 +276,10 @@ static void
build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
int i, nb_nodes, rc_mapping_count;
- size_t node_size, smmu_offset = 0;
+ size_t node_size, *smmu_offset;
AcpiIortIdMapping *idmap;
+ hwaddr base;
+ int irq, num_smmus = 0;
uint32_t id = 0;
GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
@@ -287,7 +289,21 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
/* Table 2 The IORT */
acpi_table_begin(&table, table_data);
- if (vms->iommu == VIRT_IOMMU_SMMUV3) {
+ if (vms->smmu_nested_count) {
+ irq = vms->irqmap[VIRT_SMMU_NESTED] + ARM_SPI_BASE;
+ base = vms->memmap[VIRT_SMMU_NESTED].base;
+ num_smmus = vms->smmu_nested_count;
+ } else if (virt_has_smmuv3(vms)) {
+ irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
+ base = vms->memmap[VIRT_SMMU].base;
+ num_smmus = 1;
+ }
+
+ smmu_offset = g_new0(size_t, num_smmus);
+ nb_nodes = 2; /* RC, ITS */
+ nb_nodes += num_smmus; /* SMMU nodes */
+
+ if (virt_has_smmuv3(vms)) {
AcpiIortIdMapping next_range = {0};
object_child_foreach_recursive(object_get_root(),
@@ -317,10 +333,8 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
g_array_append_val(its_idmaps, next_range);
}
- nb_nodes = 3; /* RC, ITS, SMMUv3 */
rc_mapping_count = smmu_idmaps->len + its_idmaps->len;
} else {
- nb_nodes = 2; /* RC, ITS */
rc_mapping_count = 1;
}
/* Number of IORT Nodes */
@@ -342,10 +356,9 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
/* GIC ITS Identifier Array */
build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4);
- if (vms->iommu == VIRT_IOMMU_SMMUV3) {
- int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
+ for (i = 0; i < num_smmus; i++) {
+ smmu_offset[i] = table_data->len - table.table_offset;
- smmu_offset = table_data->len - table.table_offset;
/* Table 9 SMMUv3 Format */
build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */
node_size = SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE;
@@ -356,7 +369,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
/* Reference to ID Array */
build_append_int_noprefix(table_data, SMMU_V3_ENTRY_SIZE, 4);
/* Base address */
- build_append_int_noprefix(table_data, vms->memmap[VIRT_SMMU].base, 8);
+ build_append_int_noprefix(table_data, base + (i * SMMU_IO_LEN), 8);
/* Flags */
build_append_int_noprefix(table_data, 1 /* COHACC Override */, 4);
build_append_int_noprefix(table_data, 0, 4); /* Reserved */
@@ -367,6 +380,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, irq + 1, 4); /* PRI */
build_append_int_noprefix(table_data, irq + 3, 4); /* GERR */
build_append_int_noprefix(table_data, irq + 2, 4); /* Sync */
+ irq += NUM_SMMU_IRQS;
build_append_int_noprefix(table_data, 0, 4); /* Proximity domain */
/* DeviceID mapping index (ignored since interrupts are GSIV based) */
build_append_int_noprefix(table_data, 0, 4);
@@ -405,7 +419,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, 0, 3); /* Reserved */
/* Output Reference */
- if (vms->iommu == VIRT_IOMMU_SMMUV3) {
+ if (virt_has_smmuv3(vms)) {
AcpiIortIdMapping *range;
/* translated RIDs connect to SMMUv3 node: RC -> SMMUv3 -> ITS */
@@ -413,7 +427,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
/* output IORT node is the smmuv3 node */
build_iort_id_mapping(table_data, range->input_base,
- range->id_count, smmu_offset);
+ range->id_count, smmu_offset[i]);
}
/* bypassed RIDs connect to ITS group node directly: RC -> ITS */
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 50e47a4ef3..304ab134ae 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -219,4 +219,10 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms)
vms->highmem_redists) ? 2 : 1;
}
+static inline bool virt_has_smmuv3(const VirtMachineState *vms)
+{
+ return vms->iommu == VIRT_IOMMU_SMMUV3 ||
+ vms->iommu == VIRT_IOMMU_SMMUV3_NESTED;
+}
+
#endif /* QEMU_ARM_VIRT_H */
--
2.34.1
Hi Shameer, On 11/8/24 13:52, Shameer Kolothum wrote: > From: Nicolin Chen <nicolinc@nvidia.com> > > Now that we can have multiple user-creatable smmuv3-nested > devices, each associated with different pci buses, update > IORT ID mappings accordingly. > > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > --- > hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++++++---------- > include/hw/arm/virt.h | 6 ++++++ > 2 files changed, 30 insertions(+), 10 deletions(-) > > diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c > index e10cad86dd..ec4cdfb2d7 100644 > --- a/hw/arm/virt-acpi-build.c > +++ b/hw/arm/virt-acpi-build.c > @@ -276,8 +276,10 @@ static void > build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > { > int i, nb_nodes, rc_mapping_count; > - size_t node_size, smmu_offset = 0; > + size_t node_size, *smmu_offset; > AcpiIortIdMapping *idmap; > + hwaddr base; > + int irq, num_smmus = 0; > uint32_t id = 0; > GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); > GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); > @@ -287,7 +289,21 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > /* Table 2 The IORT */ > acpi_table_begin(&table, table_data); > > - if (vms->iommu == VIRT_IOMMU_SMMUV3) { > + if (vms->smmu_nested_count) { > + irq = vms->irqmap[VIRT_SMMU_NESTED] + ARM_SPI_BASE; > + base = vms->memmap[VIRT_SMMU_NESTED].base; > + num_smmus = vms->smmu_nested_count; > + } else if (virt_has_smmuv3(vms)) { > + irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; > + base = vms->memmap[VIRT_SMMU].base; > + num_smmus = 1; > + } > + > + smmu_offset = g_new0(size_t, num_smmus); > + nb_nodes = 2; /* RC, ITS */ > + nb_nodes += num_smmus; /* SMMU nodes */ > + > + if (virt_has_smmuv3(vms)) { > AcpiIortIdMapping next_range = {0}; > > object_child_foreach_recursive(object_get_root(), > @@ -317,10 +333,8 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > g_array_append_val(its_idmaps, next_range); > } > > - nb_nodes = 3; /* RC, ITS, SMMUv3 */ > rc_mapping_count = smmu_idmaps->len + its_idmaps->len; > } else { > - nb_nodes = 2; /* RC, ITS */ > rc_mapping_count = 1; > } > /* Number of IORT Nodes */ > @@ -342,10 +356,9 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > /* GIC ITS Identifier Array */ > build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4); > > - if (vms->iommu == VIRT_IOMMU_SMMUV3) { > - int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; > + for (i = 0; i < num_smmus; i++) { > + smmu_offset[i] = table_data->len - table.table_offset; > I would have expected changes in the smmu idmap has well. If a given SMMU instance now protects a given bus hierarchy shouldn't it be reflected in a differentiated SMMU idmap for each of them (RID subset of SMMU->pci-bus mapping to a specific IORT SMMU node)? How is it done currently? Thanks Eric > - smmu_offset = table_data->len - table.table_offset; > /* Table 9 SMMUv3 Format */ > build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */ > node_size = SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE; > @@ -356,7 +369,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > /* Reference to ID Array */ > build_append_int_noprefix(table_data, SMMU_V3_ENTRY_SIZE, 4); > /* Base address */ > - build_append_int_noprefix(table_data, vms->memmap[VIRT_SMMU].base, 8); > + build_append_int_noprefix(table_data, base + (i * SMMU_IO_LEN), 8); > /* Flags */ > build_append_int_noprefix(table_data, 1 /* COHACC Override */, 4); > build_append_int_noprefix(table_data, 0, 4); /* Reserved */ > @@ -367,6 +380,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > build_append_int_noprefix(table_data, irq + 1, 4); /* PRI */ > build_append_int_noprefix(table_data, irq + 3, 4); /* GERR */ > build_append_int_noprefix(table_data, irq + 2, 4); /* Sync */ > + irq += NUM_SMMU_IRQS; > build_append_int_noprefix(table_data, 0, 4); /* Proximity domain */ > /* DeviceID mapping index (ignored since interrupts are GSIV based) */ > build_append_int_noprefix(table_data, 0, 4); > @@ -405,7 +419,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > build_append_int_noprefix(table_data, 0, 3); /* Reserved */ > > /* Output Reference */ > - if (vms->iommu == VIRT_IOMMU_SMMUV3) { > + if (virt_has_smmuv3(vms)) { > AcpiIortIdMapping *range; > > /* translated RIDs connect to SMMUv3 node: RC -> SMMUv3 -> ITS */ > @@ -413,7 +427,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); > /* output IORT node is the smmuv3 node */ > build_iort_id_mapping(table_data, range->input_base, > - range->id_count, smmu_offset); > + range->id_count, smmu_offset[i]); > } > > /* bypassed RIDs connect to ITS group node directly: RC -> ITS */ > diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h > index 50e47a4ef3..304ab134ae 100644 > --- a/include/hw/arm/virt.h > +++ b/include/hw/arm/virt.h > @@ -219,4 +219,10 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) > vms->highmem_redists) ? 2 : 1; > } > > +static inline bool virt_has_smmuv3(const VirtMachineState *vms) > +{ > + return vms->iommu == VIRT_IOMMU_SMMUV3 || > + vms->iommu == VIRT_IOMMU_SMMUV3_NESTED; > +} > + > #endif /* QEMU_ARM_VIRT_H */
> -----Original Message----- > From: Eric Auger <eric.auger@redhat.com> > Sent: Monday, November 18, 2024 10:02 AM > To: Shameerali Kolothum Thodi > <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org; > qemu-devel@nongnu.org > Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com; > ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B) > <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; > Jonathan Cameron <jonathan.cameron@huawei.com>; > zhangfei.gao@linaro.org > Subject: Re: [RFC PATCH 4/5] hw/arm/virt-acpi-build: Build IORT with > multiple SMMU nodes > > > /* Number of IORT Nodes */ > > @@ -342,10 +356,9 @@ build_iort(GArray *table_data, BIOSLinker > *linker, VirtMachineState *vms) > > /* GIC ITS Identifier Array */ > > build_append_int_noprefix(table_data, 0 /* MADT translation_id */, > 4); > > > > - if (vms->iommu == VIRT_IOMMU_SMMUV3) { > > - int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; > > + for (i = 0; i < num_smmus; i++) { > > + smmu_offset[i] = table_data->len - table.table_offset; > > > I would have expected changes in the smmu idmap has well. If a given > SMMU instance now protects a given bus hierarchy shouldn't it be > reflected in a differentiated SMMU idmap for each of them (RID subset of > SMMU->pci-bus mapping to a specific IORT SMMU node)? How is it done > currently? I thought that smmu_idmaps will be handled by this ? object_child_foreach_recursive(object_get_root(), iort_host_bridges, smmu_idmaps); But it is possible that, there is a bug in this IORT generation here as I am not able to hot add devices. It looks like the pciehp interrupt is not generated/received for some reason. Nicolin[0] is suspecting the min/max bus range in iort_host_bridges() may not leave enough ranges for hot add later. Cold plugging devices to different SMMUv3/pcie-pxb seems to be alright. I will debug that soon. Thanks, Shameer [0] https://lore.kernel.org/qemu-devel/ZzPd1F%2FUA2MKMbwl@Asurada-Nvidia/
Hi Shameer, On 11/18/24 12:44, Shameerali Kolothum Thodi wrote: > >> -----Original Message----- >> From: Eric Auger <eric.auger@redhat.com> >> Sent: Monday, November 18, 2024 10:02 AM >> To: Shameerali Kolothum Thodi >> <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org; >> qemu-devel@nongnu.org >> Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com; >> ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B) >> <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; >> Jonathan Cameron <jonathan.cameron@huawei.com>; >> zhangfei.gao@linaro.org >> Subject: Re: [RFC PATCH 4/5] hw/arm/virt-acpi-build: Build IORT with >> multiple SMMU nodes >> >>> /* Number of IORT Nodes */ >>> @@ -342,10 +356,9 @@ build_iort(GArray *table_data, BIOSLinker >> *linker, VirtMachineState *vms) >>> /* GIC ITS Identifier Array */ >>> build_append_int_noprefix(table_data, 0 /* MADT translation_id */, >> 4); >>> - if (vms->iommu == VIRT_IOMMU_SMMUV3) { >>> - int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; >>> + for (i = 0; i < num_smmus; i++) { >>> + smmu_offset[i] = table_data->len - table.table_offset; >>> >> I would have expected changes in the smmu idmap has well. If a given >> SMMU instance now protects a given bus hierarchy shouldn't it be >> reflected in a differentiated SMMU idmap for each of them (RID subset of >> SMMU->pci-bus mapping to a specific IORT SMMU node)? How is it done >> currently? > I thought that smmu_idmaps will be handled by this ? > > object_child_foreach_recursive(object_get_root(), > iort_host_bridges, smmu_idmaps); to me this traverses the qemu object hierarchy to find all host bridges and for each of them builds an idmap array (smmu_idmaps mapping this RC RID range to this SMMU). But to me those idmaps will be assigned to *all* SMMU insteaces leading to a wong IORT description because all SMMUs will be protecting all devices. You shall only retain idmaps which correspond to the pci_bus a given vSMMU is attached to. Then each SMMU will protect a distinct PCIe subtree which does not seem the case today. At least that's my current understanding. Eric > > But it is possible that, there is a bug in this IORT generation here as I am not > able to hot add devices. It looks like the pciehp interrupt is not generated/received > for some reason. Nicolin[0] is suspecting the min/max bus range in > iort_host_bridges() may not leave enough ranges for hot add later. > > Cold plugging devices to different SMMUv3/pcie-pxb seems to be alright. > > I will debug that soon. > > Thanks, > Shameer > [0] https://lore.kernel.org/qemu-devel/ZzPd1F%2FUA2MKMbwl@Asurada-Nvidia/ > >
Hi Eric, > -----Original Message----- > From: Eric Auger <eric.auger@redhat.com> > Sent: Monday, November 18, 2024 1:46 PM > To: Shameerali Kolothum Thodi > <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org; > qemu-devel@nongnu.org > Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com; > ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B) > <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; > Jonathan Cameron <jonathan.cameron@huawei.com>; > zhangfei.gao@linaro.org > Subject: Re: [RFC PATCH 4/5] hw/arm/virt-acpi-build: Build IORT with > multiple SMMU nodes > > >>> /* Number of IORT Nodes */ > >>> @@ -342,10 +356,9 @@ build_iort(GArray *table_data, BIOSLinker > >> *linker, VirtMachineState *vms) > >>> /* GIC ITS Identifier Array */ > >>> build_append_int_noprefix(table_data, 0 /* MADT translation_id */, > >> 4); > >>> - if (vms->iommu == VIRT_IOMMU_SMMUV3) { > >>> - int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; > >>> + for (i = 0; i < num_smmus; i++) { > >>> + smmu_offset[i] = table_data->len - table.table_offset; > >>> > >> I would have expected changes in the smmu idmap has well. If a given > >> SMMU instance now protects a given bus hierarchy shouldn't it be > >> reflected in a differentiated SMMU idmap for each of them (RID subset > of > >> SMMU->pci-bus mapping to a specific IORT SMMU node)? How is it done > >> currently? > > I thought that smmu_idmaps will be handled by this ? > > > > object_child_foreach_recursive(object_get_root(), > > iort_host_bridges, smmu_idmaps); > to me this traverses the qemu object hierarchy to find all host bridges > and for each of them builds an idmap array (smmu_idmaps mapping this > RC > RID range to this SMMU). But to me those idmaps will be assigned to > *all* SMMU insteaces leading to a wong IORT description because all > SMMUs will be protecting all devices. You shall only retain idmaps which > correspond to the pci_bus a given vSMMU is attached to. Then each SMMU > will protect a distinct PCIe subtree which does not seem the case today. > At least that's my current understanding. Ah..right. I will fix that in next version. I think the above won't affect the basic case where I have only one pcie-pxb/SMMUv3. But even in that case hot add seems not working. I tried hacking the min/max ranges as suspected by Nicolin. But still not enough to get it working. Do you have any hint on why the hot add(described below) is not working? Thanks, Shameer > > Eric > > > > > > But it is possible that, there is a bug in this IORT generation here as I am > not > > able to hot add devices. It looks like the pciehp interrupt is not > generated/received > > for some reason. Nicolin[0] is suspecting the min/max bus range in > > iort_host_bridges() may not leave enough ranges for hot add later. > > > > Cold plugging devices to different SMMUv3/pcie-pxb seems to be alright. > > > > I will debug that soon. > > > > Thanks, > > Shameer > > [0] https://lore.kernel.org/qemu-devel/ZzPd1F%2FUA2MKMbwl@Asurada- > Nvidia/ > > > > >
Hi Shameer, On 11/18/24 16:00, Shameerali Kolothum Thodi wrote: > Hi Eric, > >> -----Original Message----- >> From: Eric Auger <eric.auger@redhat.com> >> Sent: Monday, November 18, 2024 1:46 PM >> To: Shameerali Kolothum Thodi >> <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org; >> qemu-devel@nongnu.org >> Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com; >> ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B) >> <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; >> Jonathan Cameron <jonathan.cameron@huawei.com>; >> zhangfei.gao@linaro.org >> Subject: Re: [RFC PATCH 4/5] hw/arm/virt-acpi-build: Build IORT with >> multiple SMMU nodes >> >>>>> /* Number of IORT Nodes */ >>>>> @@ -342,10 +356,9 @@ build_iort(GArray *table_data, BIOSLinker >>>> *linker, VirtMachineState *vms) >>>>> /* GIC ITS Identifier Array */ >>>>> build_append_int_noprefix(table_data, 0 /* MADT translation_id */, >>>> 4); >>>>> - if (vms->iommu == VIRT_IOMMU_SMMUV3) { >>>>> - int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; >>>>> + for (i = 0; i < num_smmus; i++) { >>>>> + smmu_offset[i] = table_data->len - table.table_offset; >>>>> >>>> I would have expected changes in the smmu idmap has well. If a given >>>> SMMU instance now protects a given bus hierarchy shouldn't it be >>>> reflected in a differentiated SMMU idmap for each of them (RID subset >> of >>>> SMMU->pci-bus mapping to a specific IORT SMMU node)? How is it done >>>> currently? >>> I thought that smmu_idmaps will be handled by this ? >>> >>> object_child_foreach_recursive(object_get_root(), >>> iort_host_bridges, smmu_idmaps); >> to me this traverses the qemu object hierarchy to find all host bridges >> and for each of them builds an idmap array (smmu_idmaps mapping this >> RC >> RID range to this SMMU). But to me those idmaps will be assigned to >> *all* SMMU insteaces leading to a wong IORT description because all >> SMMUs will be protecting all devices. You shall only retain idmaps which >> correspond to the pci_bus a given vSMMU is attached to. Then each SMMU >> will protect a distinct PCIe subtree which does not seem the case today. >> At least that's my current understanding. > Ah..right. I will fix that in next version. > > I think the above won't affect the basic case where I have only one > pcie-pxb/SMMUv3. But even in that case hot add seems not working. > > I tried hacking the min/max ranges as suspected by Nicolin. But still not enough to > get it working. Do you have any hint on why the hot add(described below) is not > working? Hum thought the duplicate idmap could be the cause. Otherwise I have no clue. I would advice to fix it first. Eric > > Thanks, > Shameer > >> Eric >> >> >>> But it is possible that, there is a bug in this IORT generation here as I am >> not >>> able to hot add devices. It looks like the pciehp interrupt is not >> generated/received >>> for some reason. Nicolin[0] is suspecting the min/max bus range in >>> iort_host_bridges() may not leave enough ranges for hot add later. >>> >>> Cold plugging devices to different SMMUv3/pcie-pxb seems to be alright. >>> >>> I will debug that soon. >>> >>> Thanks, >>> Shameer >>> [0] https://lore.kernel.org/qemu-devel/ZzPd1F%2FUA2MKMbwl@Asurada- >> Nvidia/ >>>
> -----Original Message----- > From: Eric Auger <eric.auger@redhat.com> > Sent: Monday, November 18, 2024 6:10 PM > To: Shameerali Kolothum Thodi > <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org; > qemu-devel@nongnu.org > Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com; > ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B) > <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; > Jonathan Cameron <jonathan.cameron@huawei.com>; > zhangfei.gao@linaro.org > Subject: Re: [RFC PATCH 4/5] hw/arm/virt-acpi-build: Build IORT with > multiple SMMU nodes [...] > > I think the above won't affect the basic case where I have only one > > pcie-pxb/SMMUv3. But even in that case hot add seems not working. > > > > I tried hacking the min/max ranges as suspected by Nicolin. But still not > enough to > > get it working. Do you have any hint on why the hot add(described > below) is not > > working? > Hum thought the duplicate idmap could be the cause. Otherwise I have no > clue. I would advice to fix it first. I think I have an idea why the hot add was not working. When we have the PCIe topology as something like below, -device pxb-pcie,id=pcie.1,bus_nr=8,bus=pcie.0 \ -device pcie-root-port,id=pcie.port1,bus=pcie.1,chassis=1 \ -device pcie-root-port,id=pcie.port2,bus=pcie.1,chassis=2 \ -device arm-smmuv3-nested,id=smmuv1,pci-bus=pcie.1 \ ... The current IORT generation includes the pcie-root-port dev ids also in the SMMUv3 node idmaps. Hence, when Guest kernel loads, pcieport is also behind the SMMUv3. [ 1.466670] pcieport 0000:64:00.0: Adding to iommu group 1 ... [ 1.448205] pcieport 0000:64:01.0: Adding to iommu group 2 So when we do a hot add, device_add vfio-pci,host=0000:75:00.1,bus=pcie.port1,iommufd=iommufd0 The Qemu hotplug event handler tries to inject an IRQ to the Guest pcieport by retrieving the MSI address it is configured with. hotplug_event_notify() msix_prepare_message(): [address: 0xfffff040] msix_notify() The ITS address retrieved here is actually the SMMUv3 translated iova addr, not the Guest PA. So Guest never sees/receives the interrupt. I did hack the IORT code to exclude the pcie-root-port dev ids from the SMMUv3 node idmaps and the hot add seems to work fine. Looks like we need to find all the pcie-root-port dev ids associated with a SMMUv3/pxb-pcie and exclude them from SMMUv3 node idmaps to get the hot add working. I am not sure though this will create any other issues in IOMMU isolation criteria (ACS etc,), especially if we want to access the device in Guest user space( I hope not). Thanks, Shameer
Hi Shameer, On 11/20/24 15:16, Shameerali Kolothum Thodi wrote: > >> -----Original Message----- >> From: Eric Auger <eric.auger@redhat.com> >> Sent: Monday, November 18, 2024 6:10 PM >> To: Shameerali Kolothum Thodi >> <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org; >> qemu-devel@nongnu.org >> Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com; >> ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B) >> <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; >> Jonathan Cameron <jonathan.cameron@huawei.com>; >> zhangfei.gao@linaro.org >> Subject: Re: [RFC PATCH 4/5] hw/arm/virt-acpi-build: Build IORT with >> multiple SMMU nodes > [...] > >>> I think the above won't affect the basic case where I have only one >>> pcie-pxb/SMMUv3. But even in that case hot add seems not working. >>> >>> I tried hacking the min/max ranges as suspected by Nicolin. But still not >> enough to >>> get it working. Do you have any hint on why the hot add(described >> below) is not >>> working? >> Hum thought the duplicate idmap could be the cause. Otherwise I have no >> clue. I would advice to fix it first. > I think I have an idea why the hot add was not working. > > When we have the PCIe topology as something like below, > > -device pxb-pcie,id=pcie.1,bus_nr=8,bus=pcie.0 \ > -device pcie-root-port,id=pcie.port1,bus=pcie.1,chassis=1 \ > -device pcie-root-port,id=pcie.port2,bus=pcie.1,chassis=2 \ > -device arm-smmuv3-nested,id=smmuv1,pci-bus=pcie.1 \ > ... > > The current IORT generation includes the pcie-root-port dev ids also > in the SMMUv3 node idmaps. > > Hence, when Guest kernel loads, pcieport is also behind the SMMUv3. > > [ 1.466670] pcieport 0000:64:00.0: Adding to iommu group 1 > ... > [ 1.448205] pcieport 0000:64:01.0: Adding to iommu group 2 But it should be the same without multi-instantiation, no? I would have expected this as normal. Has you tested hot-plug without the series laterly? Do you have the same pb? Thanks Eric > > > So when we do a hot add, > device_add vfio-pci,host=0000:75:00.1,bus=pcie.port1,iommufd=iommufd0 > > The Qemu hotplug event handler tries to inject an IRQ to the Guest pcieport > by retrieving the MSI address it is configured with. > > hotplug_event_notify() > msix_prepare_message(): [address: 0xfffff040] > msix_notify() > > The ITS address retrieved here is actually the SMMUv3 translated iova addr, > not the Guest PA. So Guest never sees/receives the interrupt. > > I did hack the IORT code to exclude the pcie-root-port dev ids from the SMMUv3 > node idmaps and the hot add seems to work fine. > > Looks like we need to find all the pcie-root-port dev ids associated with a > SMMUv3/pxb-pcie and exclude them from SMMUv3 node idmaps to get > the hot add working. > > I am not sure though this will create any other issues in IOMMU isolation criteria > (ACS etc,), especially if we want to access the device in Guest user space( I hope > not). > > Thanks, > Shameer > > > >
Hi Eric, > -----Original Message----- > From: Eric Auger <eric.auger@redhat.com> > Sent: Wednesday, November 20, 2024 4:11 PM > To: Shameerali Kolothum Thodi > <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org; > qemu-devel@nongnu.org > Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com; > ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B) > <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; > Jonathan Cameron <jonathan.cameron@huawei.com>; > zhangfei.gao@linaro.org > Subject: Re: [RFC PATCH 4/5] hw/arm/virt-acpi-build: Build IORT with > multiple SMMU nodes > > Hi Shameer, > > On 11/20/24 15:16, Shameerali Kolothum Thodi wrote: > > > >> -----Original Message----- > >> From: Eric Auger <eric.auger@redhat.com> > >> Sent: Monday, November 18, 2024 6:10 PM > >> To: Shameerali Kolothum Thodi > >> <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org; > >> qemu-devel@nongnu.org > >> Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com; > >> ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B) > >> <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; > >> Jonathan Cameron <jonathan.cameron@huawei.com>; > >> zhangfei.gao@linaro.org > >> Subject: Re: [RFC PATCH 4/5] hw/arm/virt-acpi-build: Build IORT with > >> multiple SMMU nodes > > [...] > > > >>> I think the above won't affect the basic case where I have only one > >>> pcie-pxb/SMMUv3. But even in that case hot add seems not working. > >>> > >>> I tried hacking the min/max ranges as suspected by Nicolin. But still not > >> enough to > >>> get it working. Do you have any hint on why the hot add(described > >> below) is not > >>> working? > >> Hum thought the duplicate idmap could be the cause. Otherwise I have > no > >> clue. I would advice to fix it first. > > I think I have an idea why the hot add was not working. > > > > When we have the PCIe topology as something like below, > > > > -device pxb-pcie,id=pcie.1,bus_nr=8,bus=pcie.0 \ > > -device pcie-root-port,id=pcie.port1,bus=pcie.1,chassis=1 \ > > -device pcie-root-port,id=pcie.port2,bus=pcie.1,chassis=2 \ > > -device arm-smmuv3-nested,id=smmuv1,pci-bus=pcie.1 \ > > ... > > > > The current IORT generation includes the pcie-root-port dev ids also > > in the SMMUv3 node idmaps. > > > > Hence, when Guest kernel loads, pcieport is also behind the SMMUv3. > > > > [ 1.466670] pcieport 0000:64:00.0: Adding to iommu group 1 > > ... > > [ 1.448205] pcieport 0000:64:01.0: Adding to iommu group 2 > > But it should be the same without multi-instantiation, no? I would have > expected this as normal. Has you tested hot-plug without the series > laterly? Do you have the same pb? That is a good question. I will give it a try soon and update. Thanks, Shameer.
Hi Eric, > -----Original Message----- > From: Shameerali Kolothum Thodi > Sent: Wednesday, November 20, 2024 4:26 PM > To: 'eric.auger@redhat.com' <eric.auger@redhat.com>; qemu- > arm@nongnu.org; qemu-devel@nongnu.org > Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com; > ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B) > <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; > Jonathan Cameron <jonathan.cameron@huawei.com>; > zhangfei.gao@linaro.org > Subject: RE: [RFC PATCH 4/5] hw/arm/virt-acpi-build: Build IORT with > multiple SMMU nodes > > > > I think I have an idea why the hot add was not working. > > > > > > When we have the PCIe topology as something like below, > > > > > > -device pxb-pcie,id=pcie.1,bus_nr=8,bus=pcie.0 \ -device > > > pcie-root-port,id=pcie.port1,bus=pcie.1,chassis=1 \ -device > > > pcie-root-port,id=pcie.port2,bus=pcie.1,chassis=2 \ -device > > > arm-smmuv3-nested,id=smmuv1,pci-bus=pcie.1 \ ... > > > > > > The current IORT generation includes the pcie-root-port dev ids also > > > in the SMMUv3 node idmaps. > > > > > > Hence, when Guest kernel loads, pcieport is also behind the SMMUv3. > > > > > > [ 1.466670] pcieport 0000:64:00.0: Adding to iommu group 1 > > > ... > > > [ 1.448205] pcieport 0000:64:01.0: Adding to iommu group 2 > > > > But it should be the same without multi-instantiation, no? I would > > have expected this as normal. Has you tested hot-plug without the > > series laterly? Do you have the same pb? > > That is a good question. I will give it a try soon and update. I tried hot add with the current SMMUv3(iommu=smmuv3) and hot add works when I added a virtio dev to pcie-root-port connected to a pxb-pcie. And now I think I know(hopefully) the reason why it is not working with smmuv3-nested case. I think the root cause is this commit here, (series: " cover-letter: Add HW accelerated nesting support for arm SMMUv3") https://github.com/hisilicon/qemu/commit/9b21f28595cef7b1100ae130974605f357ef75d3 This changes the way address space is returned for the devices. static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) { SMMUState *s = opaque; SMMUPciBus *sbus = smmu_get_sbus(s, bus); SMMUDevice *sdev = smmu_get_sdev(s, sbus, bus, devfn); /* Return the system as if the device uses stage-2 only */ if (s->nested && !sdev->s1_hwpt) { return &sdev->as_sysmem; } else { return &sdev->as; } } If we have entries in the SMMUv3 idmap for bus:devfn, then I think we should return IOMMU address space here. But the logic above returns sysmem address space for anything other than vfio/iommufd devices. The hot add works when I hacked the logic to return IOMMU address space for pcie root port devices. Could you please take a look at the commit above and let me know if this indeed could be the problem? Thanks, Shameer
© 2016 - 2024 Red Hat, Inc.