Based on SMMUv3 as a parent device, add a user-creatable
smmuv3-nested device. Subsequent patches will add support to
specify a PCI bus for this device.
Currently only supported for "virt", so hook up the sybus mem & irq
for that as well.
No FDT support is added for now.
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/smmuv3.c | 34 ++++++++++++++++++++++++++++++++++
hw/arm/virt.c | 31 +++++++++++++++++++++++++++++--
hw/core/sysbus-fdt.c | 1 +
include/hw/arm/smmuv3.h | 15 +++++++++++++++
include/hw/arm/virt.h | 6 ++++++
5 files changed, 85 insertions(+), 2 deletions(-)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 2101031a8f..0033eb8125 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -2201,6 +2201,19 @@ static void smmu_realize(DeviceState *d, Error **errp)
smmu_init_irq(s, dev);
}
+static void smmu_nested_realize(DeviceState *d, Error **errp)
+{
+ SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d);
+ SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_GET_CLASS(s_nested);
+ Error *local_err = NULL;
+
+ c->parent_realize(d, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+}
+
static const VMStateDescription vmstate_smmuv3_queue = {
.name = "smmuv3_queue",
.version_id = 1,
@@ -2299,6 +2312,18 @@ static void smmuv3_class_init(ObjectClass *klass, void *data)
device_class_set_props(dc, smmuv3_properties);
}
+static void smmuv3_nested_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_CLASS(klass);
+
+ dc->vmsd = &vmstate_smmuv3;
+ device_class_set_parent_realize(dc, smmu_nested_realize,
+ &c->parent_realize);
+ dc->user_creatable = true;
+ dc->hotpluggable = false;
+}
+
static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
IOMMUNotifierFlag old,
IOMMUNotifierFlag new,
@@ -2337,6 +2362,14 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
imrc->notify_flag_changed = smmuv3_notify_flag_changed;
}
+static const TypeInfo smmuv3_nested_type_info = {
+ .name = TYPE_ARM_SMMUV3_NESTED,
+ .parent = TYPE_ARM_SMMUV3,
+ .instance_size = sizeof(SMMUv3NestedState),
+ .class_size = sizeof(SMMUv3NestedClass),
+ .class_init = smmuv3_nested_class_init,
+};
+
static const TypeInfo smmuv3_type_info = {
.name = TYPE_ARM_SMMUV3,
.parent = TYPE_ARM_SMMU,
@@ -2355,6 +2388,7 @@ static const TypeInfo smmuv3_iommu_memory_region_info = {
static void smmuv3_register_types(void)
{
type_register(&smmuv3_type_info);
+ type_register(&smmuv3_nested_type_info);
type_register(&smmuv3_iommu_memory_region_info);
}
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 780bcff77c..38075f9ab2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -181,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
[VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
[VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
[VIRT_MMIO] = { 0x0a000000, 0x00000200 },
+ [VIRT_SMMU_NESTED] = { 0x0b000000, 0x01000000 },
/* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
[VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
[VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 },
@@ -226,6 +227,7 @@ static const int a15irqmap[] = {
[VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
[VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */
[VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
+ [VIRT_SMMU_NESTED] = 200,
};
static void create_randomness(MachineState *ms, const char *node)
@@ -2883,10 +2885,34 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
+ MachineClass *mc = MACHINE_GET_CLASS(vms);
- if (vms->platform_bus_dev) {
- MachineClass *mc = MACHINE_GET_CLASS(vms);
+ /* For smmuv3-nested devices we need to set the mem & irq */
+ if (device_is_dynamic_sysbus(mc, dev) &&
+ object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_NESTED)) {
+ hwaddr base = vms->memmap[VIRT_SMMU_NESTED].base;
+ int irq = vms->irqmap[VIRT_SMMU_NESTED];
+
+ if (vms->smmu_nested_count >= MAX_SMMU_NESTED) {
+ error_setg(errp, "smmuv3-nested max count reached!");
+ return;
+ }
+
+ base += (vms->smmu_nested_count * SMMU_IO_LEN);
+ irq += (vms->smmu_nested_count * NUM_SMMU_IRQS);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
+ for (int i = 0; i < 4; i++) {
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
+ qdev_get_gpio_in(vms->gic, irq + i));
+ }
+ if (vms->iommu != VIRT_IOMMU_SMMUV3_NESTED) {
+ vms->iommu = VIRT_IOMMU_SMMUV3_NESTED;
+ }
+ vms->smmu_nested_count++;
+ }
+
+ if (vms->platform_bus_dev) {
if (device_is_dynamic_sysbus(mc, dev)) {
platform_bus_link_device(PLATFORM_BUS_DEVICE(vms->platform_bus_dev),
SYS_BUS_DEVICE(dev));
@@ -3067,6 +3093,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3_NESTED);
#ifdef CONFIG_TPM
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
#endif
diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
index eebcd28f9a..0f0d0b3e58 100644
--- a/hw/core/sysbus-fdt.c
+++ b/hw/core/sysbus-fdt.c
@@ -489,6 +489,7 @@ static const BindingEntry bindings[] = {
#ifdef CONFIG_LINUX
TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node),
TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
+ TYPE_BINDING("arm-smmuv3-nested", no_fdt_node),
VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
#endif
#ifdef CONFIG_TPM
diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
index d183a62766..87e628be7a 100644
--- a/include/hw/arm/smmuv3.h
+++ b/include/hw/arm/smmuv3.h
@@ -84,6 +84,21 @@ struct SMMUv3Class {
#define TYPE_ARM_SMMUV3 "arm-smmuv3"
OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3)
+#define TYPE_ARM_SMMUV3_NESTED "arm-smmuv3-nested"
+OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass, ARM_SMMUV3_NESTED)
+
+struct SMMUv3NestedState {
+ SMMUv3State smmuv3_state;
+};
+
+struct SMMUv3NestedClass {
+ /*< private >*/
+ SMMUv3Class smmuv3_class;
+ /*< public >*/
+
+ DeviceRealize parent_realize;
+};
+
#define STAGE1_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S1P)
#define STAGE2_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S2P)
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 46f48fe561..50e47a4ef3 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -50,6 +50,9 @@
/* MMIO region size for SMMUv3 */
#define SMMU_IO_LEN 0x20000
+/* Max supported nested SMMUv3 */
+#define MAX_SMMU_NESTED 128
+
enum {
VIRT_FLASH,
VIRT_MEM,
@@ -62,6 +65,7 @@ enum {
VIRT_GIC_ITS,
VIRT_GIC_REDIST,
VIRT_SMMU,
+ VIRT_SMMU_NESTED,
VIRT_UART0,
VIRT_MMIO,
VIRT_RTC,
@@ -92,6 +96,7 @@ enum {
typedef enum VirtIOMMUType {
VIRT_IOMMU_NONE,
VIRT_IOMMU_SMMUV3,
+ VIRT_IOMMU_SMMUV3_NESTED,
VIRT_IOMMU_VIRTIO,
} VirtIOMMUType;
@@ -155,6 +160,7 @@ struct VirtMachineState {
bool mte;
bool dtb_randomness;
bool second_ns_uart_present;
+ int smmu_nested_count;
OnOffAuto acpi;
VirtGICType gic_version;
VirtIOMMUType iommu;
--
2.34.1
On 11/8/24 13:52, Shameer Kolothum wrote:
> Based on SMMUv3 as a parent device, add a user-creatable
> smmuv3-nested device. Subsequent patches will add support to
> specify a PCI bus for this device.
>
> Currently only supported for "virt", so hook up the sybus mem & irq
> for that as well.
>
> No FDT support is added for now.
>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
> hw/arm/smmuv3.c | 34 ++++++++++++++++++++++++++++++++++
> hw/arm/virt.c | 31 +++++++++++++++++++++++++++++--
> hw/core/sysbus-fdt.c | 1 +
> include/hw/arm/smmuv3.h | 15 +++++++++++++++
> include/hw/arm/virt.h | 6 ++++++
> 5 files changed, 85 insertions(+), 2 deletions(-)
>
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index 2101031a8f..0033eb8125 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -2201,6 +2201,19 @@ static void smmu_realize(DeviceState *d, Error **errp)
> smmu_init_irq(s, dev);
> }
>
> +static void smmu_nested_realize(DeviceState *d, Error **errp)
> +{
> + SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d);
> + SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_GET_CLASS(s_nested);
> + Error *local_err = NULL;
> +
> + c->parent_realize(d, &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return;
> + }
> +}
> +
> static const VMStateDescription vmstate_smmuv3_queue = {
> .name = "smmuv3_queue",
> .version_id = 1,
> @@ -2299,6 +2312,18 @@ static void smmuv3_class_init(ObjectClass *klass, void *data)
> device_class_set_props(dc, smmuv3_properties);
> }
>
> +static void smmuv3_nested_class_init(ObjectClass *klass, void *data)
> +{
> + DeviceClass *dc = DEVICE_CLASS(klass);
> + SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_CLASS(klass);
> +
> + dc->vmsd = &vmstate_smmuv3;
> + device_class_set_parent_realize(dc, smmu_nested_realize,
> + &c->parent_realize);
> + dc->user_creatable = true;
this may be set at the very end of the series eventually.
Eric
> + dc->hotpluggable = false;
> +}
> +
> static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
> IOMMUNotifierFlag old,
> IOMMUNotifierFlag new,
> @@ -2337,6 +2362,14 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
> imrc->notify_flag_changed = smmuv3_notify_flag_changed;
> }
>
> +static const TypeInfo smmuv3_nested_type_info = {
> + .name = TYPE_ARM_SMMUV3_NESTED,
> + .parent = TYPE_ARM_SMMUV3,
> + .instance_size = sizeof(SMMUv3NestedState),
> + .class_size = sizeof(SMMUv3NestedClass),
> + .class_init = smmuv3_nested_class_init,
> +};
> +
> static const TypeInfo smmuv3_type_info = {
> .name = TYPE_ARM_SMMUV3,
> .parent = TYPE_ARM_SMMU,
> @@ -2355,6 +2388,7 @@ static const TypeInfo smmuv3_iommu_memory_region_info = {
> static void smmuv3_register_types(void)
> {
> type_register(&smmuv3_type_info);
> + type_register(&smmuv3_nested_type_info);
> type_register(&smmuv3_iommu_memory_region_info);
> }
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 780bcff77c..38075f9ab2 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -181,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
> [VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
> [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
> [VIRT_MMIO] = { 0x0a000000, 0x00000200 },
> + [VIRT_SMMU_NESTED] = { 0x0b000000, 0x01000000 },
> /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
> [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
> [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 },
> @@ -226,6 +227,7 @@ static const int a15irqmap[] = {
> [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
> [VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */
> [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
> + [VIRT_SMMU_NESTED] = 200,
> };
>
> static void create_randomness(MachineState *ms, const char *node)
> @@ -2883,10 +2885,34 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
> DeviceState *dev, Error **errp)
> {
> VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
> + MachineClass *mc = MACHINE_GET_CLASS(vms);
>
> - if (vms->platform_bus_dev) {
> - MachineClass *mc = MACHINE_GET_CLASS(vms);
> + /* For smmuv3-nested devices we need to set the mem & irq */
> + if (device_is_dynamic_sysbus(mc, dev) &&
> + object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_NESTED)) {
> + hwaddr base = vms->memmap[VIRT_SMMU_NESTED].base;
> + int irq = vms->irqmap[VIRT_SMMU_NESTED];
> +
> + if (vms->smmu_nested_count >= MAX_SMMU_NESTED) {
> + error_setg(errp, "smmuv3-nested max count reached!");
> + return;
> + }
> +
> + base += (vms->smmu_nested_count * SMMU_IO_LEN);
> + irq += (vms->smmu_nested_count * NUM_SMMU_IRQS);
>
> + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
> + for (int i = 0; i < 4; i++) {
> + sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
> + qdev_get_gpio_in(vms->gic, irq + i));
> + }
> + if (vms->iommu != VIRT_IOMMU_SMMUV3_NESTED) {
> + vms->iommu = VIRT_IOMMU_SMMUV3_NESTED;
> + }
> + vms->smmu_nested_count++;
> + }
> +
> + if (vms->platform_bus_dev) {
> if (device_is_dynamic_sysbus(mc, dev)) {
> platform_bus_link_device(PLATFORM_BUS_DEVICE(vms->platform_bus_dev),
> SYS_BUS_DEVICE(dev));
> @@ -3067,6 +3093,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
> machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
> machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
> machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
> + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3_NESTED);
> #ifdef CONFIG_TPM
> machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
> #endif
> diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
> index eebcd28f9a..0f0d0b3e58 100644
> --- a/hw/core/sysbus-fdt.c
> +++ b/hw/core/sysbus-fdt.c
> @@ -489,6 +489,7 @@ static const BindingEntry bindings[] = {
> #ifdef CONFIG_LINUX
> TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node),
> TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
> + TYPE_BINDING("arm-smmuv3-nested", no_fdt_node),
> VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
> #endif
> #ifdef CONFIG_TPM
> diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> index d183a62766..87e628be7a 100644
> --- a/include/hw/arm/smmuv3.h
> +++ b/include/hw/arm/smmuv3.h
> @@ -84,6 +84,21 @@ struct SMMUv3Class {
> #define TYPE_ARM_SMMUV3 "arm-smmuv3"
> OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3)
>
> +#define TYPE_ARM_SMMUV3_NESTED "arm-smmuv3-nested"
> +OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass, ARM_SMMUV3_NESTED)
> +
> +struct SMMUv3NestedState {
> + SMMUv3State smmuv3_state;
> +};
> +
> +struct SMMUv3NestedClass {
> + /*< private >*/
> + SMMUv3Class smmuv3_class;
> + /*< public >*/
> +
> + DeviceRealize parent_realize;
> +};
> +
> #define STAGE1_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S1P)
> #define STAGE2_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S2P)
>
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index 46f48fe561..50e47a4ef3 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -50,6 +50,9 @@
> /* MMIO region size for SMMUv3 */
> #define SMMU_IO_LEN 0x20000
>
> +/* Max supported nested SMMUv3 */
> +#define MAX_SMMU_NESTED 128
> +
> enum {
> VIRT_FLASH,
> VIRT_MEM,
> @@ -62,6 +65,7 @@ enum {
> VIRT_GIC_ITS,
> VIRT_GIC_REDIST,
> VIRT_SMMU,
> + VIRT_SMMU_NESTED,
> VIRT_UART0,
> VIRT_MMIO,
> VIRT_RTC,
> @@ -92,6 +96,7 @@ enum {
> typedef enum VirtIOMMUType {
> VIRT_IOMMU_NONE,
> VIRT_IOMMU_SMMUV3,
> + VIRT_IOMMU_SMMUV3_NESTED,
> VIRT_IOMMU_VIRTIO,
> } VirtIOMMUType;
>
> @@ -155,6 +160,7 @@ struct VirtMachineState {
> bool mte;
> bool dtb_randomness;
> bool second_ns_uart_present;
> + int smmu_nested_count;
> OnOffAuto acpi;
> VirtGICType gic_version;
> VirtIOMMUType iommu;
Hi Shameer,
On 11/8/24 13:52, Shameer Kolothum wrote:
> Based on SMMUv3 as a parent device, add a user-creatable
> smmuv3-nested device. Subsequent patches will add support to
> specify a PCI bus for this device.
>
> Currently only supported for "virt", so hook up the sybus mem & irq
> for that as well.
>
> No FDT support is added for now.
>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
> hw/arm/smmuv3.c | 34 ++++++++++++++++++++++++++++++++++
> hw/arm/virt.c | 31 +++++++++++++++++++++++++++++--
> hw/core/sysbus-fdt.c | 1 +
> include/hw/arm/smmuv3.h | 15 +++++++++++++++
> include/hw/arm/virt.h | 6 ++++++
> 5 files changed, 85 insertions(+), 2 deletions(-)
>
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index 2101031a8f..0033eb8125 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -2201,6 +2201,19 @@ static void smmu_realize(DeviceState *d, Error **errp)
> smmu_init_irq(s, dev);
> }
>
> +static void smmu_nested_realize(DeviceState *d, Error **errp)
> +{
> + SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d);
nit: s/s_nested/ns or just s?
> + SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_GET_CLASS(s_nested);
> + Error *local_err = NULL;
> +
> + c->parent_realize(d, &local_err);
I think it is safe to use errp directly here.
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return;
> + }
> +}
> +
> static const VMStateDescription vmstate_smmuv3_queue = {
> .name = "smmuv3_queue",
> .version_id = 1,
> @@ -2299,6 +2312,18 @@ static void smmuv3_class_init(ObjectClass *klass, void *data)
> device_class_set_props(dc, smmuv3_properties);
> }
>
> +static void smmuv3_nested_class_init(ObjectClass *klass, void *data)
> +{
> + DeviceClass *dc = DEVICE_CLASS(klass);
> + SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_CLASS(klass);
> +
> + dc->vmsd = &vmstate_smmuv3;
> + device_class_set_parent_realize(dc, smmu_nested_realize,
> + &c->parent_realize);
> + dc->user_creatable = true;
> + dc->hotpluggable = false;
> +}
> +
> static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
> IOMMUNotifierFlag old,
> IOMMUNotifierFlag new,
> @@ -2337,6 +2362,14 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
> imrc->notify_flag_changed = smmuv3_notify_flag_changed;
> }
>
> +static const TypeInfo smmuv3_nested_type_info = {
> + .name = TYPE_ARM_SMMUV3_NESTED,
> + .parent = TYPE_ARM_SMMUV3,
> + .instance_size = sizeof(SMMUv3NestedState),
> + .class_size = sizeof(SMMUv3NestedClass),
> + .class_init = smmuv3_nested_class_init,
> +};
> +
> static const TypeInfo smmuv3_type_info = {
> .name = TYPE_ARM_SMMUV3,
> .parent = TYPE_ARM_SMMU,
> @@ -2355,6 +2388,7 @@ static const TypeInfo smmuv3_iommu_memory_region_info = {
> static void smmuv3_register_types(void)
> {
> type_register(&smmuv3_type_info);
> + type_register(&smmuv3_nested_type_info);
> type_register(&smmuv3_iommu_memory_region_info);
> }
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 780bcff77c..38075f9ab2 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -181,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
> [VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
> [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
> [VIRT_MMIO] = { 0x0a000000, 0x00000200 },
> + [VIRT_SMMU_NESTED] = { 0x0b000000, 0x01000000 },
I agree with Mostafa that the _NESTED terminology may not be the best
choice.
The motivation behind that multi-instance attempt, as introduced in
https://lore.kernel.org/all/ZEcT%2F7erkhHDaNvD@Asurada-Nvidia/
was:
- SMMUs with different feature bits
- support of VCMDQ HW extension for SMMU CMDQ
- need for separate S1 invalidation paths
If I understand correctly this is mostly wanted for VCMDQ handling? if
this is correct we may indicate that somehow in the terminology.
If I understand correctly VCMDQ terminology is NVidia specific while
ECMDQ is the baseline (?).
> /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
> [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
> [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 },
> @@ -226,6 +227,7 @@ static const int a15irqmap[] = {
> [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
> [VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */
> [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
> + [VIRT_SMMU_NESTED] = 200,
What is the max IRQs expected to be consumed. Wother to comment for next
interrupt user.
> };
>
> static void create_randomness(MachineState *ms, const char *node)
> @@ -2883,10 +2885,34 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
> DeviceState *dev, Error **errp)
> {
> VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
> + MachineClass *mc = MACHINE_GET_CLASS(vms);
>
> - if (vms->platform_bus_dev) {
> - MachineClass *mc = MACHINE_GET_CLASS(vms);
> + /* For smmuv3-nested devices we need to set the mem & irq */
> + if (device_is_dynamic_sysbus(mc, dev) &&
> + object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_NESTED)) {
why did you choose not using the PLATFORM BUS infra which does that kind
of binding automatically (also it provisions for dedicated MMIOs and
IRQs). At least you would need to justify in the commit msg I think
> + hwaddr base = vms->memmap[VIRT_SMMU_NESTED].base;
> + int irq = vms->irqmap[VIRT_SMMU_NESTED];
> +
> + if (vms->smmu_nested_count >= MAX_SMMU_NESTED) {
> + error_setg(errp, "smmuv3-nested max count reached!");
> + return;
> + }
> +
> + base += (vms->smmu_nested_count * SMMU_IO_LEN);
> + irq += (vms->smmu_nested_count * NUM_SMMU_IRQS);
>
> + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
> + for (int i = 0; i < 4; i++) {
> + sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
> + qdev_get_gpio_in(vms->gic, irq + i));
> + }
> + if (vms->iommu != VIRT_IOMMU_SMMUV3_NESTED) {
> + vms->iommu = VIRT_IOMMU_SMMUV3_NESTED;
> + }
> + vms->smmu_nested_count++;
this kind of check would definitively not integrated in the platform bus
but this could be introduced generically in the framework though or
special cased after the platform_bus_link_device
> + }
> +
> + if (vms->platform_bus_dev) {
> if (device_is_dynamic_sysbus(mc, dev)) {
> platform_bus_link_device(PLATFORM_BUS_DEVICE(vms->platform_bus_dev),
> SYS_BUS_DEVICE(dev));
> @@ -3067,6 +3093,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
> machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
> machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
> machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
> + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3_NESTED);
> #ifdef CONFIG_TPM
> machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
> #endif
> diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
> index eebcd28f9a..0f0d0b3e58 100644
> --- a/hw/core/sysbus-fdt.c
> +++ b/hw/core/sysbus-fdt.c
> @@ -489,6 +489,7 @@ static const BindingEntry bindings[] = {
> #ifdef CONFIG_LINUX
> TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node),
> TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
> + TYPE_BINDING("arm-smmuv3-nested", no_fdt_node),
> VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
> #endif
> #ifdef CONFIG_TPM
> diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> index d183a62766..87e628be7a 100644
> --- a/include/hw/arm/smmuv3.h
> +++ b/include/hw/arm/smmuv3.h
> @@ -84,6 +84,21 @@ struct SMMUv3Class {
> #define TYPE_ARM_SMMUV3 "arm-smmuv3"
> OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3)
>
> +#define TYPE_ARM_SMMUV3_NESTED "arm-smmuv3-nested"
> +OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass, ARM_SMMUV3_NESTED)
> +
> +struct SMMUv3NestedState {
> + SMMUv3State smmuv3_state;
> +};
> +
> +struct SMMUv3NestedClass {
> + /*< private >*/
> + SMMUv3Class smmuv3_class;
> + /*< public >*/
> +
> + DeviceRealize parent_realize;
> +};
> +
> #define STAGE1_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S1P)
> #define STAGE2_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S2P)
>
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index 46f48fe561..50e47a4ef3 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -50,6 +50,9 @@
> /* MMIO region size for SMMUv3 */
> #define SMMU_IO_LEN 0x20000
>
> +/* Max supported nested SMMUv3 */
> +#define MAX_SMMU_NESTED 128
Ouch, that many?!
> +
> enum {
> VIRT_FLASH,
> VIRT_MEM,
> @@ -62,6 +65,7 @@ enum {
> VIRT_GIC_ITS,
> VIRT_GIC_REDIST,
> VIRT_SMMU,
> + VIRT_SMMU_NESTED,
> VIRT_UART0,
> VIRT_MMIO,
> VIRT_RTC,
> @@ -92,6 +96,7 @@ enum {
> typedef enum VirtIOMMUType {
> VIRT_IOMMU_NONE,
> VIRT_IOMMU_SMMUV3,
> + VIRT_IOMMU_SMMUV3_NESTED,
> VIRT_IOMMU_VIRTIO,
> } VirtIOMMUType;
>
> @@ -155,6 +160,7 @@ struct VirtMachineState {
> bool mte;
> bool dtb_randomness;
> bool second_ns_uart_present;
> + int smmu_nested_count;
> OnOffAuto acpi;
> VirtGICType gic_version;
> VirtIOMMUType iommu;
Thanks
Eric
Hi Eric,
> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: Wednesday, November 13, 2024 5:12 PM
> To: Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org;
> qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com;
> ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B)
> <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>;
> Jonathan Cameron <jonathan.cameron@huawei.com>;
> zhangfei.gao@linaro.org
> Subject: Re: [RFC PATCH 2/5] hw/arm/smmuv3: Add initial support for
> SMMUv3 Nested device
>
> Hi Shameer,
> On 11/8/24 13:52, Shameer Kolothum wrote:
> > Based on SMMUv3 as a parent device, add a user-creatable smmuv3-
> nested
> > device. Subsequent patches will add support to specify a PCI bus for
> > this device.
> >
> > Currently only supported for "virt", so hook up the sybus mem & irq
> > for that as well.
> >
> > No FDT support is added for now.
> >
> > Signed-off-by: Shameer Kolothum
> <shameerali.kolothum.thodi@huawei.com>
> > ---
> > hw/arm/smmuv3.c | 34 ++++++++++++++++++++++++++++++++++
> > hw/arm/virt.c | 31 +++++++++++++++++++++++++++++--
> > hw/core/sysbus-fdt.c | 1 +
> > include/hw/arm/smmuv3.h | 15 +++++++++++++++
> > include/hw/arm/virt.h | 6 ++++++
> > 5 files changed, 85 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index
> > 2101031a8f..0033eb8125 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -2201,6 +2201,19 @@ static void smmu_realize(DeviceState *d, Error
> **errp)
> > smmu_init_irq(s, dev);
> > }
> >
> > +static void smmu_nested_realize(DeviceState *d, Error **errp) {
> > + SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d);
> nit: s/s_nested/ns or just s?
> > + SMMUv3NestedClass *c =
> ARM_SMMUV3_NESTED_GET_CLASS(s_nested);
> > + Error *local_err = NULL;
> > +
> > + c->parent_realize(d, &local_err);
> I think it is safe to use errp directly here.
Ok.
> > + if (local_err) {
> > + error_propagate(errp, local_err);
> > + return;
> > + }
> > +}
> > +
> > static const VMStateDescription vmstate_smmuv3_queue = {
> > .name = "smmuv3_queue",
> > .version_id = 1,
> > @@ -2299,6 +2312,18 @@ static void smmuv3_class_init(ObjectClass
> *klass, void *data)
> > device_class_set_props(dc, smmuv3_properties); }
> >
> > +static void smmuv3_nested_class_init(ObjectClass *klass, void *data)
> > +{
> > + DeviceClass *dc = DEVICE_CLASS(klass);
> > + SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_CLASS(klass);
> > +
> > + dc->vmsd = &vmstate_smmuv3;
> > + device_class_set_parent_realize(dc, smmu_nested_realize,
> > + &c->parent_realize);
> > + dc->user_creatable = true;
> > + dc->hotpluggable = false;
> > +}
> > +
> > static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
> > IOMMUNotifierFlag old,
> > IOMMUNotifierFlag new, @@
> > -2337,6 +2362,14 @@ static void
> smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
> > imrc->notify_flag_changed = smmuv3_notify_flag_changed; }
> >
> > +static const TypeInfo smmuv3_nested_type_info = {
> > + .name = TYPE_ARM_SMMUV3_NESTED,
> > + .parent = TYPE_ARM_SMMUV3,
> > + .instance_size = sizeof(SMMUv3NestedState),
> > + .class_size = sizeof(SMMUv3NestedClass),
> > + .class_init = smmuv3_nested_class_init,
> > +};
> > +
> > static const TypeInfo smmuv3_type_info = {
> > .name = TYPE_ARM_SMMUV3,
> > .parent = TYPE_ARM_SMMU,
> > @@ -2355,6 +2388,7 @@ static const TypeInfo
> > smmuv3_iommu_memory_region_info = { static void
> > smmuv3_register_types(void) {
> > type_register(&smmuv3_type_info);
> > + type_register(&smmuv3_nested_type_info);
> > type_register(&smmuv3_iommu_memory_region_info);
> > }
> >
> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c index
> > 780bcff77c..38075f9ab2 100644
> > --- a/hw/arm/virt.c
> > +++ b/hw/arm/virt.c
> > @@ -181,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
> > [VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
> > [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
> > [VIRT_MMIO] = { 0x0a000000, 0x00000200 },
> > + [VIRT_SMMU_NESTED] = { 0x0b000000, 0x01000000 },
> I agree with Mostafa that the _NESTED terminology may not be the best
> choice.
Yes. Agree.
> The motivation behind that multi-instance attempt, as introduced in
> https://lore.kernel.org/all/ZEcT%2F7erkhHDaNvD@Asurada-Nvidia/
> was:
> - SMMUs with different feature bits
> - support of VCMDQ HW extension for SMMU CMDQ
> - need for separate S1 invalidation paths
>
> If I understand correctly this is mostly wanted for VCMDQ handling? if this
> is correct we may indicate that somehow in the terminology.
>
Not just for VCMDQ, but it benefits when we have multiple physical SMMUv3
instances as well.
> If I understand correctly VCMDQ terminology is NVidia specific while ECMDQ
> is the baseline (?).
Yes, VCMDQ is NVIDIA specific. And ECMDQ is ARM SMMUv3, but don’t think we
can associate ECMDQ with a virtual SMMUv3.
> > /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that
> size */
> > [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
> > [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 },
> > @@ -226,6 +227,7 @@ static const int a15irqmap[] = {
> > [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
> > [VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */
> > [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS
> > -1 */
> > + [VIRT_SMMU_NESTED] = 200,
> What is the max IRQs expected to be consumed. Wother to comment for
> next interrupt user.
Depends on how many we plan to support max and each requires minimum 4. I will
update with a comment here.
> > };
> >
> > static void create_randomness(MachineState *ms, const char *node) @@
> > -2883,10 +2885,34 @@ static void
> virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
> > DeviceState *dev, Error
> > **errp) {
> > VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
> > + MachineClass *mc = MACHINE_GET_CLASS(vms);
> >
> > - if (vms->platform_bus_dev) {
> > - MachineClass *mc = MACHINE_GET_CLASS(vms);
> > + /* For smmuv3-nested devices we need to set the mem & irq */
> > + if (device_is_dynamic_sysbus(mc, dev) &&
> > + object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_NESTED)) {
> why did you choose not using the PLATFORM BUS infra which does that
> kind of binding automatically (also it provisions for dedicated MMIOs and
> IRQs). At least you would need to justify in the commit msg I think
Because I was not sure how to do this binding otherwise. I couldn't find
any such precedence for a dynamic platform bus dev binding
MMIOs/IRQs(May be I didn't look hard). I mentioned it in cover letter.
Could you please give me some pointers/example for this? I will also
take another look.
> > + hwaddr base = vms->memmap[VIRT_SMMU_NESTED].base;
> > + int irq = vms->irqmap[VIRT_SMMU_NESTED];
> > +
> > + if (vms->smmu_nested_count >= MAX_SMMU_NESTED) {
> > + error_setg(errp, "smmuv3-nested max count reached!");
> > + return;
> > + }
> > +
> > + base += (vms->smmu_nested_count * SMMU_IO_LEN);
> > + irq += (vms->smmu_nested_count * NUM_SMMU_IRQS);
> >
> > + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
> > + for (int i = 0; i < 4; i++) {
> > + sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
> > + qdev_get_gpio_in(vms->gic, irq + i));
> > + }
> > + if (vms->iommu != VIRT_IOMMU_SMMUV3_NESTED) {
> > + vms->iommu = VIRT_IOMMU_SMMUV3_NESTED;
> > + }
> > + vms->smmu_nested_count++;
> this kind of check would definitively not integrated in the platform bus but
> this could be introduced generically in the framework though or special
> cased after the platform_bus_link_device
Ok. So I assume there is a better way to link the MMIOs/IRQs as you mentioned
above and we can add another helper to track this count as well.
> > + }
> > +
> > + if (vms->platform_bus_dev) {
> > if (device_is_dynamic_sysbus(mc, dev)) {
> > platform_bus_link_device(PLATFORM_BUS_DEVICE(vms-
> >platform_bus_dev),
> > SYS_BUS_DEVICE(dev)); @@ -3067,6
> > +3093,7 @@ static void virt_machine_class_init(ObjectClass *oc, void
> *data)
> > machine_class_allow_dynamic_sysbus_dev(mc,
> TYPE_VFIO_AMD_XGBE);
> > machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
> > machine_class_allow_dynamic_sysbus_dev(mc,
> TYPE_VFIO_PLATFORM);
> > + machine_class_allow_dynamic_sysbus_dev(mc,
> > + TYPE_ARM_SMMUV3_NESTED);
> > #ifdef CONFIG_TPM
> > machine_class_allow_dynamic_sysbus_dev(mc,
> TYPE_TPM_TIS_SYSBUS);
> > #endif diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c index
> > eebcd28f9a..0f0d0b3e58 100644
> > --- a/hw/core/sysbus-fdt.c
> > +++ b/hw/core/sysbus-fdt.c
> > @@ -489,6 +489,7 @@ static const BindingEntry bindings[] = { #ifdef
> > CONFIG_LINUX
> > TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC,
> add_calxeda_midway_xgmac_fdt_node),
> > TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
> > + TYPE_BINDING("arm-smmuv3-nested", no_fdt_node),
> > VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a",
> > add_amd_xgbe_fdt_node), #endif #ifdef CONFIG_TPM diff --git
> > a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h index
> > d183a62766..87e628be7a 100644
> > --- a/include/hw/arm/smmuv3.h
> > +++ b/include/hw/arm/smmuv3.h
> > @@ -84,6 +84,21 @@ struct SMMUv3Class {
> > #define TYPE_ARM_SMMUV3 "arm-smmuv3"
> > OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3)
> >
> > +#define TYPE_ARM_SMMUV3_NESTED "arm-smmuv3-nested"
> > +OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass,
> > +ARM_SMMUV3_NESTED)
> > +
> > +struct SMMUv3NestedState {
> > + SMMUv3State smmuv3_state;
> > +};
> > +
> > +struct SMMUv3NestedClass {
> > + /*< private >*/
> > + SMMUv3Class smmuv3_class;
> > + /*< public >*/
> > +
> > + DeviceRealize parent_realize;
> > +};
> > +
> > #define STAGE1_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S1P)
> > #define STAGE2_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S2P)
> >
> > diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index
> > 46f48fe561..50e47a4ef3 100644
> > --- a/include/hw/arm/virt.h
> > +++ b/include/hw/arm/virt.h
> > @@ -50,6 +50,9 @@
> > /* MMIO region size for SMMUv3 */
> > #define SMMU_IO_LEN 0x20000
> >
> > +/* Max supported nested SMMUv3 */
> > +#define MAX_SMMU_NESTED 128
> Ouch, that many?!
😊. I just came up with the max we can support the allocated MMIO space.
We do have systems at present which has 8 physical SMMUv3s at the moment.
Probably 16/32 would be a better number I guess.
Thanks,
Shameer
On Thu, Nov 14, 2024 at 08:20:08AM +0000, Shameerali Kolothum Thodi wrote: > > > diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index > > > 46f48fe561..50e47a4ef3 100644 > > > --- a/include/hw/arm/virt.h > > > +++ b/include/hw/arm/virt.h > > > @@ -50,6 +50,9 @@ > > > /* MMIO region size for SMMUv3 */ > > > #define SMMU_IO_LEN 0x20000 > > > > > > +/* Max supported nested SMMUv3 */ > > > +#define MAX_SMMU_NESTED 128 > > Ouch, that many?! > > 😊. I just came up with the max we can support the allocated MMIO space. > We do have systems at present which has 8 physical SMMUv3s at the moment. > Probably 16/32 would be a better number I guess. FWIW, we have systems having 20 physical SMMUs at this moment :) Thanks Nicolin
Hi Shameer,
On 11/14/24 09:20, Shameerali Kolothum Thodi wrote:
> Hi Eric,
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: Wednesday, November 13, 2024 5:12 PM
>> To: Shameerali Kolothum Thodi
>> <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org;
>> qemu-devel@nongnu.org
>> Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com;
>> ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B)
>> <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>;
>> Jonathan Cameron <jonathan.cameron@huawei.com>;
>> zhangfei.gao@linaro.org
>> Subject: Re: [RFC PATCH 2/5] hw/arm/smmuv3: Add initial support for
>> SMMUv3 Nested device
>>
>> Hi Shameer,
>> On 11/8/24 13:52, Shameer Kolothum wrote:
>>> Based on SMMUv3 as a parent device, add a user-creatable smmuv3-
>> nested
>>> device. Subsequent patches will add support to specify a PCI bus for
>>> this device.
>>>
>>> Currently only supported for "virt", so hook up the sybus mem & irq
>>> for that as well.
>>>
>>> No FDT support is added for now.
>>>
>>> Signed-off-by: Shameer Kolothum
>> <shameerali.kolothum.thodi@huawei.com>
>>> ---
>>> hw/arm/smmuv3.c | 34 ++++++++++++++++++++++++++++++++++
>>> hw/arm/virt.c | 31 +++++++++++++++++++++++++++++--
>>> hw/core/sysbus-fdt.c | 1 +
>>> include/hw/arm/smmuv3.h | 15 +++++++++++++++
>>> include/hw/arm/virt.h | 6 ++++++
>>> 5 files changed, 85 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index
>>> 2101031a8f..0033eb8125 100644
>>> --- a/hw/arm/smmuv3.c
>>> +++ b/hw/arm/smmuv3.c
>>> @@ -2201,6 +2201,19 @@ static void smmu_realize(DeviceState *d, Error
>> **errp)
>>> smmu_init_irq(s, dev);
>>> }
>>>
>>> +static void smmu_nested_realize(DeviceState *d, Error **errp) {
>>> + SMMUv3NestedState *s_nested = ARM_SMMUV3_NESTED(d);
>> nit: s/s_nested/ns or just s?
>>> + SMMUv3NestedClass *c =
>> ARM_SMMUV3_NESTED_GET_CLASS(s_nested);
>>> + Error *local_err = NULL;
>>> +
>>> + c->parent_realize(d, &local_err);
>> I think it is safe to use errp directly here.
> Ok.
>
>>> + if (local_err) {
>>> + error_propagate(errp, local_err);
>>> + return;
>>> + }
>>> +}
>>> +
>>> static const VMStateDescription vmstate_smmuv3_queue = {
>>> .name = "smmuv3_queue",
>>> .version_id = 1,
>>> @@ -2299,6 +2312,18 @@ static void smmuv3_class_init(ObjectClass
>> *klass, void *data)
>>> device_class_set_props(dc, smmuv3_properties); }
>>>
>>> +static void smmuv3_nested_class_init(ObjectClass *klass, void *data)
>>> +{
>>> + DeviceClass *dc = DEVICE_CLASS(klass);
>>> + SMMUv3NestedClass *c = ARM_SMMUV3_NESTED_CLASS(klass);
>>> +
>>> + dc->vmsd = &vmstate_smmuv3;
>>> + device_class_set_parent_realize(dc, smmu_nested_realize,
>>> + &c->parent_realize);
>>> + dc->user_creatable = true;
>>> + dc->hotpluggable = false;
>>> +}
>>> +
>>> static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
>>> IOMMUNotifierFlag old,
>>> IOMMUNotifierFlag new, @@
>>> -2337,6 +2362,14 @@ static void
>> smmuv3_iommu_memory_region_class_init(ObjectClass *klass,
>>> imrc->notify_flag_changed = smmuv3_notify_flag_changed; }
>>>
>>> +static const TypeInfo smmuv3_nested_type_info = {
>>> + .name = TYPE_ARM_SMMUV3_NESTED,
>>> + .parent = TYPE_ARM_SMMUV3,
>>> + .instance_size = sizeof(SMMUv3NestedState),
>>> + .class_size = sizeof(SMMUv3NestedClass),
>>> + .class_init = smmuv3_nested_class_init,
>>> +};
>>> +
>>> static const TypeInfo smmuv3_type_info = {
>>> .name = TYPE_ARM_SMMUV3,
>>> .parent = TYPE_ARM_SMMU,
>>> @@ -2355,6 +2388,7 @@ static const TypeInfo
>>> smmuv3_iommu_memory_region_info = { static void
>>> smmuv3_register_types(void) {
>>> type_register(&smmuv3_type_info);
>>> + type_register(&smmuv3_nested_type_info);
>>> type_register(&smmuv3_iommu_memory_region_info);
>>> }
>>>
>>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c index
>>> 780bcff77c..38075f9ab2 100644
>>> --- a/hw/arm/virt.c
>>> +++ b/hw/arm/virt.c
>>> @@ -181,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
>>> [VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
>>> [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
>>> [VIRT_MMIO] = { 0x0a000000, 0x00000200 },
>>> + [VIRT_SMMU_NESTED] = { 0x0b000000, 0x01000000 },
>> I agree with Mostafa that the _NESTED terminology may not be the best
>> choice.
> Yes. Agree.
Nicolin's suggestion to use a reference to HW acceleration looks
sensible to me.
>
>> The motivation behind that multi-instance attempt, as introduced in
>> https://lore.kernel.org/all/ZEcT%2F7erkhHDaNvD@Asurada-Nvidia/
>> was:
>> - SMMUs with different feature bits
>> - support of VCMDQ HW extension for SMMU CMDQ
>> - need for separate S1 invalidation paths
>>
>> If I understand correctly this is mostly wanted for VCMDQ handling? if this
>> is correct we may indicate that somehow in the terminology.
>>
> Not just for VCMDQ, but it benefits when we have multiple physical SMMUv3
> instances as well.
>
>> If I understand correctly VCMDQ terminology is NVidia specific while ECMDQ
>> is the baseline (?).
> Yes, VCMDQ is NVIDIA specific. And ECMDQ is ARM SMMUv3, but don’t think we
> can associate ECMDQ with a virtual SMMUv3.
ok
>
>>> /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that
>> size */
>>> [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
>>> [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 },
>>> @@ -226,6 +227,7 @@ static const int a15irqmap[] = {
>>> [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
>>> [VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */
>>> [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS
>>> -1 */
>>> + [VIRT_SMMU_NESTED] = 200,
>> What is the max IRQs expected to be consumed. Wother to comment for
>> next interrupt user.
> Depends on how many we plan to support max and each requires minimum 4. I will
> update with a comment here.
>
>>> };
>>>
>>> static void create_randomness(MachineState *ms, const char *node) @@
>>> -2883,10 +2885,34 @@ static void
>> virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
>>> DeviceState *dev, Error
>>> **errp) {
>>> VirtMachineState *vms = VIRT_MACHINE(hotplug_dev);
>>> + MachineClass *mc = MACHINE_GET_CLASS(vms);
>>>
>>> - if (vms->platform_bus_dev) {
>>> - MachineClass *mc = MACHINE_GET_CLASS(vms);
>>> + /* For smmuv3-nested devices we need to set the mem & irq */
>>> + if (device_is_dynamic_sysbus(mc, dev) &&
>>> + object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3_NESTED)) {
>> why did you choose not using the PLATFORM BUS infra which does that
>> kind of binding automatically (also it provisions for dedicated MMIOs and
>> IRQs). At least you would need to justify in the commit msg I think
> Because I was not sure how to do this binding otherwise. I couldn't find
> any such precedence for a dynamic platform bus dev binding
> MMIOs/IRQs(May be I didn't look hard). I mentioned it in cover letter.
>
> Could you please give me some pointers/example for this? I will also
> take another look.
vfio platform users such automatic binding (however you must check that
vfio platform bus mmio and irq space is large enough for your needs).
the binding is transparently handled by
if (vms->platform_bus_dev) {
if (device_is_dynamic_sysbus(mc, dev)) {
platform_bus_link_device(PLATFORM_BUS_DEVICE(vms->platform_bus_dev),
SYS_BUS_DEVICE(dev));
}
}
>
>>> + hwaddr base = vms->memmap[VIRT_SMMU_NESTED].base;
>>> + int irq = vms->irqmap[VIRT_SMMU_NESTED];
>>> +
>>> + if (vms->smmu_nested_count >= MAX_SMMU_NESTED) {
>>> + error_setg(errp, "smmuv3-nested max count reached!");
>>> + return;
>>> + }
>>> +
>>> + base += (vms->smmu_nested_count * SMMU_IO_LEN);
>>> + irq += (vms->smmu_nested_count * NUM_SMMU_IRQS);
>>>
>>> + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
>>> + for (int i = 0; i < 4; i++) {
>>> + sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
>>> + qdev_get_gpio_in(vms->gic, irq + i));
>>> + }
>>> + if (vms->iommu != VIRT_IOMMU_SMMUV3_NESTED) {
>>> + vms->iommu = VIRT_IOMMU_SMMUV3_NESTED;
>>> + }
>>> + vms->smmu_nested_count++;
>> this kind of check would definitively not integrated in the platform bus but
>> this could be introduced generically in the framework though or special
>> cased after the platform_bus_link_device
> Ok. So I assume there is a better way to link the MMIOs/IRQs as you mentioned
> above and we can add another helper to track this count as well.
>
>>> + }
>>> +
>>> + if (vms->platform_bus_dev) {
>>> if (device_is_dynamic_sysbus(mc, dev)) {
>>> platform_bus_link_device(PLATFORM_BUS_DEVICE(vms-
>>> platform_bus_dev),
>>> SYS_BUS_DEVICE(dev)); @@ -3067,6
>>> +3093,7 @@ static void virt_machine_class_init(ObjectClass *oc, void
>> *data)
>>> machine_class_allow_dynamic_sysbus_dev(mc,
>> TYPE_VFIO_AMD_XGBE);
>>> machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
>>> machine_class_allow_dynamic_sysbus_dev(mc,
>> TYPE_VFIO_PLATFORM);
>>> + machine_class_allow_dynamic_sysbus_dev(mc,
>>> + TYPE_ARM_SMMUV3_NESTED);
>>> #ifdef CONFIG_TPM
>>> machine_class_allow_dynamic_sysbus_dev(mc,
>> TYPE_TPM_TIS_SYSBUS);
>>> #endif diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c index
>>> eebcd28f9a..0f0d0b3e58 100644
>>> --- a/hw/core/sysbus-fdt.c
>>> +++ b/hw/core/sysbus-fdt.c
>>> @@ -489,6 +489,7 @@ static const BindingEntry bindings[] = { #ifdef
>>> CONFIG_LINUX
>>> TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC,
>> add_calxeda_midway_xgmac_fdt_node),
>>> TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
>>> + TYPE_BINDING("arm-smmuv3-nested", no_fdt_node),
>>> VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a",
>>> add_amd_xgbe_fdt_node), #endif #ifdef CONFIG_TPM diff --git
>>> a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h index
>>> d183a62766..87e628be7a 100644
>>> --- a/include/hw/arm/smmuv3.h
>>> +++ b/include/hw/arm/smmuv3.h
>>> @@ -84,6 +84,21 @@ struct SMMUv3Class {
>>> #define TYPE_ARM_SMMUV3 "arm-smmuv3"
>>> OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3)
>>>
>>> +#define TYPE_ARM_SMMUV3_NESTED "arm-smmuv3-nested"
>>> +OBJECT_DECLARE_TYPE(SMMUv3NestedState, SMMUv3NestedClass,
>>> +ARM_SMMUV3_NESTED)
>>> +
>>> +struct SMMUv3NestedState {
>>> + SMMUv3State smmuv3_state;
>>> +};
>>> +
>>> +struct SMMUv3NestedClass {
>>> + /*< private >*/
>>> + SMMUv3Class smmuv3_class;
>>> + /*< public >*/
>>> +
>>> + DeviceRealize parent_realize;
>>> +};
>>> +
>>> #define STAGE1_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S1P)
>>> #define STAGE2_SUPPORTED(s) FIELD_EX32(s->idr[0], IDR0, S2P)
>>>
>>> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index
>>> 46f48fe561..50e47a4ef3 100644
>>> --- a/include/hw/arm/virt.h
>>> +++ b/include/hw/arm/virt.h
>>> @@ -50,6 +50,9 @@
>>> /* MMIO region size for SMMUv3 */
>>> #define SMMU_IO_LEN 0x20000
>>>
>>> +/* Max supported nested SMMUv3 */ if (vms->platform_bus_dev) {
>>> if (device_is_dynamic_sysbus(mc, dev)) {
>>> platform_bus_link_device(PLATFORM_BUS_DEVICE(vms->platform_bus_dev),
>>> SYS_BUS_DEVICE(dev));
>>> }
>>> }
>>>
>>> +#define MAX_SMMU_NESTED 128
>> Ouch, that many?!
> 😊. I just came up with the max we can support the allocated MMIO space.
> We do have systems at present which has 8 physical SMMUv3s at the moment.
> Probably 16/32 would be a better number I guess.
OK thanks
Eric
>
> Thanks,
> Shameer
Hi Eric,
> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: Thursday, November 14, 2024 8:42 AM
> To: Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org;
> qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; jgg@nvidia.com; nicolinc@nvidia.com;
> ddutile@redhat.com; Linuxarm <linuxarm@huawei.com>; Wangzhou (B)
> <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>;
> Jonathan Cameron <jonathan.cameron@huawei.com>;
> zhangfei.gao@linaro.org
> Subject: Re: [RFC PATCH 2/5] hw/arm/smmuv3: Add initial support for
> SMMUv3 Nested device
> >> why did you choose not using the PLATFORM BUS infra which does that
> >> kind of binding automatically (also it provisions for dedicated MMIOs
> and
> >> IRQs). At least you would need to justify in the commit msg I think
> > Because I was not sure how to do this binding otherwise. I couldn't find
> > any such precedence for a dynamic platform bus dev binding
> > MMIOs/IRQs(May be I didn't look hard). I mentioned it in cover letter.
> >
> > Could you please give me some pointers/example for this? I will also
> > take another look.
> vfio platform users such automatic binding (however you must check that
> vfio platform bus mmio and irq space is large enough for your needs).
>
> the binding is transparently handled by
> if (vms->platform_bus_dev) {
> if (device_is_dynamic_sysbus(mc, dev)) {
>
> platform_bus_link_device(PLATFORM_BUS_DEVICE(vms-
> >platform_bus_dev),
> SYS_BUS_DEVICE(dev));
> }
> }
Ah..I see. I missed that it does that transparently. And use
platform_bus_get_mmio_addr() for retrieving it for ACPI/DT similar to what TPM
device is doing.
So we don't need specific entries for this device in memmap/irqmap.
I will give it a try.
Thanks,
Shameer.
Hi Eric,
On Wed, Nov 13, 2024 at 06:12:15PM +0100, Eric Auger wrote:
> On 11/8/24 13:52, Shameer Kolothum wrote:
> > @@ -181,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
> > [VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
> > [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
> > [VIRT_MMIO] = { 0x0a000000, 0x00000200 },
> > + [VIRT_SMMU_NESTED] = { 0x0b000000, 0x01000000 },
> I agree with Mostafa that the _NESTED terminology may not be the best
> choice.
> The motivation behind that multi-instance attempt, as introduced in
> https://lore.kernel.org/all/ZEcT%2F7erkhHDaNvD@Asurada-Nvidia/
> was:
> - SMMUs with different feature bits
> - support of VCMDQ HW extension for SMMU CMDQ
> - need for separate S1 invalidation paths
>
> If I understand correctly this is mostly wanted for VCMDQ handling? if
> this is correct we may indicate that somehow in the terminology.
>
> If I understand correctly VCMDQ terminology is NVidia specific while
> ECMDQ is the baseline (?).
VCMDQ makes a multi-vSMMU-instance design a hard requirement, yet
the point (3) for separate invalidation paths also matters. Jason
suggested VMM in base case to create multi vSMMU instances as the
kernel doc mentioned here:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/userspace-api/iommufd.rst#n84
W.r.t naming, maybe something related to "hardware-accelerated"?
Thanks
Nicolin
On 11/13/24 1:05 PM, Nicolin Chen wrote:
> Hi Eric,
>
> On Wed, Nov 13, 2024 at 06:12:15PM +0100, Eric Auger wrote:
>> On 11/8/24 13:52, Shameer Kolothum wrote:
>>> @@ -181,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
>>> [VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
>>> [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
>>> [VIRT_MMIO] = { 0x0a000000, 0x00000200 },
>>> + [VIRT_SMMU_NESTED] = { 0x0b000000, 0x01000000 },
>
>> I agree with Mostafa that the _NESTED terminology may not be the best
>> choice.
>> The motivation behind that multi-instance attempt, as introduced in
>> https://lore.kernel.org/all/ZEcT%2F7erkhHDaNvD@Asurada-Nvidia/
>> was:
>> - SMMUs with different feature bits
>> - support of VCMDQ HW extension for SMMU CMDQ
>> - need for separate S1 invalidation paths
>>
>> If I understand correctly this is mostly wanted for VCMDQ handling? if
>> this is correct we may indicate that somehow in the terminology.
>>
>> If I understand correctly VCMDQ terminology is NVidia specific while
>> ECMDQ is the baseline (?).
>
> VCMDQ makes a multi-vSMMU-instance design a hard requirement, yet
> the point (3) for separate invalidation paths also matters. Jason
> suggested VMM in base case to create multi vSMMU instances as the
> kernel doc mentioned here:
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/Documentation/userspace-api/iommufd.rst#n84
>
> W.r.t naming, maybe something related to "hardware-accelerated"?
>
Given that 'accel' has been used for hw-acceleration elsewhere, that seems like a reasonable 'mode'.
But, it needs a paramater to state was is being accelerated.
i.e., the more global 'accel=kvm' has 'kvm'.
For SMMUv3, NVIDIA-specific vCMDQ, it needs a parameter to state that specifically,
since I'm concluding from reading the SMMUv3 version G.a spec, that ECMDQ was added
to be able to assign an ECMDQ to a VM, and let the VM do CMDQ driven invalidations via
a similar mechanism as assigned PCI-device mmio space in a VM.
So, how should the QEMU invocation select what parts to 'accel' in the vSMMUv3 given
to the VM? ... and given the history of hw-based, virt-acceleration, I can only guess
more SMMUv3 accel tweaks will be found/desired/implemented.
So, given there is an NVIDIA-specific/like ECMDQ, but different, the accel parameter
chosen has to consider 'name-space collision', i.e., accel=nv-vcmdq and accel=ecmdq,
unless sw can be made to smartly probe and determine the underlying diffs, and have
equivalent functionality, in which case, a simpler 'accel=vcmdq' could be used.
Finally, wrt libvirt, how does it know/tell what can and should be used?
For ECMDQ, something under sysfs for an SMMUv3 could expose its presence/capability/availability
(tag for use/alloc'd for a VM), or an ioctl/cdev i/f to the SMMUv3.
But how does one know today that there's NVIDIA-vCMDQ support on its SMMUv3? -- is it
exposed in sysfs, ioctl, cdev?
... and all needs to be per-instance ....
... libvirt (or any other VMM orchestrator) will need to determine compatibility for
live migration. e.g., can one live migrate an accel=nv-vcmdq-based VM to a host with
accel=ecmdq support? only nv-vcmdq? what if there are version diffs of nv-vcmdq over time?
-- apologies, but I don't know the minute details of nv-vcmdq to determine if that's unlikely or not.
Once the qemu-smmuv3-api is defined, with the recognition of what libvirt (or any other VMM) needs to probe/check/use for hw-accelerated features,
I think it'll be more straight-fwd to implement, and (clearly) understand from a qemu command line. :)
Thanks,
- Don
> Thanks
> Nicolin
>
> -----Original Message-----
> From: Donald Dutile <ddutile@redhat.com>
> Sent: Tuesday, November 26, 2024 6:29 PM
> To: Nicolin Chen <nicolinc@nvidia.com>; Eric Auger
> <eric.auger@redhat.com>
> Cc: Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org;
> qemu-devel@nongnu.org; peter.maydell@linaro.org; jgg@nvidia.com;
> Linuxarm <linuxarm@huawei.com>; Wangzhou (B)
> <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>;
> Jonathan Cameron <jonathan.cameron@huawei.com>;
> zhangfei.gao@linaro.org
> Subject: Re: [RFC PATCH 2/5] hw/arm/smmuv3: Add initial support for
> SMMUv3 Nested device
>
>
>
> On 11/13/24 1:05 PM, Nicolin Chen wrote:
> > Hi Eric,
> >
> > On Wed, Nov 13, 2024 at 06:12:15PM +0100, Eric Auger wrote:
> >> On 11/8/24 13:52, Shameer Kolothum wrote:
> >>> @@ -181,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
> >>> [VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
> >>> [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
> >>> [VIRT_MMIO] = { 0x0a000000, 0x00000200 },
> >>> + [VIRT_SMMU_NESTED] = { 0x0b000000, 0x01000000 },
> >
> >> I agree with Mostafa that the _NESTED terminology may not be the best
> >> choice.
> >> The motivation behind that multi-instance attempt, as introduced in
> >> https://lore.kernel.org/all/ZEcT%2F7erkhHDaNvD@Asurada-Nvidia/
> >> was:
> >> - SMMUs with different feature bits
> >> - support of VCMDQ HW extension for SMMU CMDQ
> >> - need for separate S1 invalidation paths
> >>
> >> If I understand correctly this is mostly wanted for VCMDQ handling? if
> >> this is correct we may indicate that somehow in the terminology.
> >>
> >> If I understand correctly VCMDQ terminology is NVidia specific while
> >> ECMDQ is the baseline (?).
> >
> > VCMDQ makes a multi-vSMMU-instance design a hard requirement, yet
> > the point (3) for separate invalidation paths also matters. Jason
> > suggested VMM in base case to create multi vSMMU instances as the
> > kernel doc mentioned here:
> > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-
> next.git/tree/Documentation/userspace-api/iommufd.rst#n84
> >
> > W.r.t naming, maybe something related to "hardware-accelerated"?
> >
> Given that 'accel' has been used for hw-acceleration elsewhere, that seems
> like a reasonable 'mode'.
> But, it needs a paramater to state was is being accelerated.
> i.e., the more global 'accel=kvm' has 'kvm'.
I was thinking more like calling this hw accelerated nested SMMUv3 emulation
as 'smmuv3-accel'. This avoids confusion with the already existing
'iommu=smmuv3' that also has a nested emulation support.
ie,
-device arm-smmuv3-accel,id=smmuv1,bus=pcie.1 \
>
> For SMMUv3, NVIDIA-specific vCMDQ, it needs a parameter to state that
> specifically,
> since I'm concluding from reading the SMMUv3 version G.a spec, that
> ECMDQ was added
> to be able to assign an ECMDQ to a VM,
Not sure the intention of ECMDQ as per that specification is to assign
it to a VM. I think the main idea behind it is to have one Command Queue
per host CPU to eliminate lock contention while submitting commands
to SMMU.
AFAIK it is not safe to assign one of the ECMDQ to guest yet. I think there is no
way you can associate a VMID with ECMDQ. So there is no plan to
support ARM ECMDQ now.
NVIDIA VCMDQ is a completely vendor specific one. Perhaps ARM may come
up with an assignable CMDQ in future though.
and let the VM do CMDQ driven
> invalidations via
> a similar mechanism as assigned PCI-device mmio space in a VM.
> So, how should the QEMU invocation select what parts to 'accel' in the
> vSMMUv3 given
> to the VM? ... and given the history of hw-based, virt-acceleration, I can
> only guess
> more SMMUv3 accel tweaks will be found/desired/implemented.
>
> So, given there is an NVIDIA-specific/like ECMDQ, but different, the accel
> parameter
> chosen has to consider 'name-space collision', i.e., accel=nv-vcmdq and
> accel=ecmdq,
> unless sw can be made to smartly probe and determine the underlying
> diffs, and have
> equivalent functionality, in which case, a simpler 'accel=vcmdq' could be
> used.
>
Yep. Probably we could abstract that from the user and handle it within
Qemu when the kernel reports the capability based on physical SMMUv3.
> Finally, wrt libvirt, how does it know/tell what can and should be used?
> For ECMDQ, something under sysfs for an SMMUv3 could expose its
> presence/capability/availability
> (tag for use/alloc'd for a VM), or an ioctl/cdev i/f to the SMMUv3.
> But how does one know today that there's NVIDIA-vCMDQ support on its
> SMMUv3? -- is it
> exposed in sysfs, ioctl, cdev?
I think the capability will be reported through a IOCTL. Nicolin ?
> ... and all needs to be per-instance ....
> ... libvirt (or any other VMM orchestrator) will need to determine
> compatibility for
> live migration. e.g., can one live migrate an accel=nv-vcmdq-based VM to
> a host with
> accel=ecmdq support? only nv-vcmdq? what if there are version diffs of
> nv-vcmdq over time?
> -- apologies, but I don't know the minute details of nv-vcmdq to
> determine if that's unlikely or not.
Yes. This require more thought. But our first aim is get the basic smmuv3-accel
support.
Thanks,
Shameer
On 11/27/24 5:21 AM, Shameerali Kolothum Thodi wrote:
>
>
>> -----Original Message-----
>> From: Donald Dutile <ddutile@redhat.com>
>> Sent: Tuesday, November 26, 2024 6:29 PM
>> To: Nicolin Chen <nicolinc@nvidia.com>; Eric Auger
>> <eric.auger@redhat.com>
>> Cc: Shameerali Kolothum Thodi
>> <shameerali.kolothum.thodi@huawei.com>; qemu-arm@nongnu.org;
>> qemu-devel@nongnu.org; peter.maydell@linaro.org; jgg@nvidia.com;
>> Linuxarm <linuxarm@huawei.com>; Wangzhou (B)
>> <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>;
>> Jonathan Cameron <jonathan.cameron@huawei.com>;
>> zhangfei.gao@linaro.org
>> Subject: Re: [RFC PATCH 2/5] hw/arm/smmuv3: Add initial support for
>> SMMUv3 Nested device
>>
>>
>>
>> On 11/13/24 1:05 PM, Nicolin Chen wrote:
>>> Hi Eric,
>>>
>>> On Wed, Nov 13, 2024 at 06:12:15PM +0100, Eric Auger wrote:
>>>> On 11/8/24 13:52, Shameer Kolothum wrote:
>>>>> @@ -181,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
>>>>> [VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
>>>>> [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
>>>>> [VIRT_MMIO] = { 0x0a000000, 0x00000200 },
>>>>> + [VIRT_SMMU_NESTED] = { 0x0b000000, 0x01000000 },
>>>
>>>> I agree with Mostafa that the _NESTED terminology may not be the best
>>>> choice.
>>>> The motivation behind that multi-instance attempt, as introduced in
>>>> https://lore.kernel.org/all/ZEcT%2F7erkhHDaNvD@Asurada-Nvidia/
>>>> was:
>>>> - SMMUs with different feature bits
>>>> - support of VCMDQ HW extension for SMMU CMDQ
>>>> - need for separate S1 invalidation paths
>>>>
>>>> If I understand correctly this is mostly wanted for VCMDQ handling? if
>>>> this is correct we may indicate that somehow in the terminology.
>>>>
>>>> If I understand correctly VCMDQ terminology is NVidia specific while
>>>> ECMDQ is the baseline (?).
>>>
>>> VCMDQ makes a multi-vSMMU-instance design a hard requirement, yet
>>> the point (3) for separate invalidation paths also matters. Jason
>>> suggested VMM in base case to create multi vSMMU instances as the
>>> kernel doc mentioned here:
>>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-
>> next.git/tree/Documentation/userspace-api/iommufd.rst#n84
>>>
>>> W.r.t naming, maybe something related to "hardware-accelerated"?
>>>
>> Given that 'accel' has been used for hw-acceleration elsewhere, that seems
>> like a reasonable 'mode'.
>> But, it needs a paramater to state was is being accelerated.
>> i.e., the more global 'accel=kvm' has 'kvm'.
>
> I was thinking more like calling this hw accelerated nested SMMUv3 emulation
> as 'smmuv3-accel'. This avoids confusion with the already existing
> 'iommu=smmuv3' that also has a nested emulation support.
>
> ie,
> -device arm-smmuv3-accel,id=smmuv1,bus=pcie.1 \
>
I -think- you are saying below, that we have to think a bit more about this
device tagging. I'm thinking more like
- device arm-smmuv3,accel=<vcmdq>,id=smmu1,bus=pcie.1 \
>>
>> For SMMUv3, NVIDIA-specific vCMDQ, it needs a parameter to state that
>> specifically,
>> since I'm concluding from reading the SMMUv3 version G.a spec, that
>> ECMDQ was added
>> to be able to assign an ECMDQ to a VM,
>
> Not sure the intention of ECMDQ as per that specification is to assign
> it to a VM. I think the main idea behind it is to have one Command Queue
> per host CPU to eliminate lock contention while submitting commands
> to SMMU.
>
> AFAIK it is not safe to assign one of the ECMDQ to guest yet. I think there is no
> way you can associate a VMID with ECMDQ. So there is no plan to
> support ARM ECMDQ now.
>
> NVIDIA VCMDQ is a completely vendor specific one. Perhaps ARM may come
> up with an assignable CMDQ in future though.
>
> and let the VM do CMDQ driven
>> invalidations via
>> a similar mechanism as assigned PCI-device mmio space in a VM.
>> So, how should the QEMU invocation select what parts to 'accel' in the
>> vSMMUv3 given
>> to the VM? ... and given the history of hw-based, virt-acceleration, I can
>> only guess
>> more SMMUv3 accel tweaks will be found/desired/implemented.
>>
>> So, given there is an NVIDIA-specific/like ECMDQ, but different, the accel
>> parameter
>> chosen has to consider 'name-space collision', i.e., accel=nv-vcmdq and
>> accel=ecmdq,
>> unless sw can be made to smartly probe and determine the underlying
>> diffs, and have
>> equivalent functionality, in which case, a simpler 'accel=vcmdq' could be
>> used.
>>
>
> Yep. Probably we could abstract that from the user and handle it within
> Qemu when the kernel reports the capability based on physical SMMUv3.
>
>> Finally, wrt libvirt, how does it know/tell what can and should be used?
>> For ECMDQ, something under sysfs for an SMMUv3 could expose its
>> presence/capability/availability
>> (tag for use/alloc'd for a VM), or an ioctl/cdev i/f to the SMMUv3.
>> But how does one know today that there's NVIDIA-vCMDQ support on its
>> SMMUv3? -- is it
>> exposed in sysfs, ioctl, cdev?
>
> I think the capability will be reported through a IOCTL. Nicolin ?
>
>> ... and all needs to be per-instance ....
>> ... libvirt (or any other VMM orchestrator) will need to determine
>> compatibility for
>> live migration. e.g., can one live migrate an accel=nv-vcmdq-based VM to
>> a host with
>> accel=ecmdq support? only nv-vcmdq? what if there are version diffs of
>> nv-vcmdq over time?
>> -- apologies, but I don't know the minute details of nv-vcmdq to
>> determine if that's unlikely or not.
>
> Yes. This require more thought. But our first aim is get the basic smmuv3-accel
> support.
>
> Thanks,
> Shameer
>
> -----Original Message----- > From: Donald Dutile <ddutile@redhat.com> > Sent: Thursday, November 28, 2024 4:29 AM > To: Shameerali Kolothum Thodi > <shameerali.kolothum.thodi@huawei.com>; Nicolin Chen > <nicolinc@nvidia.com>; Eric Auger <eric.auger@redhat.com> > Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org; > peter.maydell@linaro.org; jgg@nvidia.com; Linuxarm > <linuxarm@huawei.com>; Wangzhou (B) <wangzhou1@hisilicon.com>; > jiangkunkun <jiangkunkun@huawei.com>; Jonathan Cameron > <jonathan.cameron@huawei.com>; zhangfei.gao@linaro.org > Subject: Re: [RFC PATCH 2/5] hw/arm/smmuv3: Add initial support for > SMMUv3 Nested device > > > >>> W.r.t naming, maybe something related to "hardware-accelerated"? > >>> > >> Given that 'accel' has been used for hw-acceleration elsewhere, that > seems > >> like a reasonable 'mode'. > >> But, it needs a paramater to state was is being accelerated. > >> i.e., the more global 'accel=kvm' has 'kvm'. > > > > I was thinking more like calling this hw accelerated nested SMMUv3 > emulation > > as 'smmuv3-accel'. This avoids confusion with the already existing > > 'iommu=smmuv3' that also has a nested emulation support. > > > > ie, > > -device arm-smmuv3-accel,id=smmuv1,bus=pcie.1 \ > > > I -think- you are saying below, that we have to think a bit more about this > device tagging. I'm thinking more like > - device arm-smmuv3,accel=<vcmdq>,id=smmu1,bus=pcie.1 \ Ok. But I think the initial suggestion to call this something else other than arm-smmuv3 came from the fact that it makes use of physical SMMUv3 nested stage support. This is required for vfio-pci assignment. So I used "accel" in that context. That is what I mean by basic functionality of this SMMUV3 device. If we need any additional accelerated feature support then that can be provided as "properties" on top of this. Like, - device arm-smmuv3-accel,id=smmu1,bus=pcie.1,vcmdq=on \ Or may be as Nicolin's suggestion(without explicit "vcmdq") of probing for vCMDQ support transparently and falling back to basic support if not available. I prefer the first one which gives an option to turn off if required. But don’t have any strong opinion either way. Thanks, Shameer.
On Wed, Nov 27, 2024 at 11:29:06PM -0500, Donald Dutile wrote: > On 11/27/24 5:21 AM, Shameerali Kolothum Thodi wrote: > > > > W.r.t naming, maybe something related to "hardware-accelerated"? > > > > > > > Given that 'accel' has been used for hw-acceleration elsewhere, that seems > > > like a reasonable 'mode'. > > > But, it needs a paramater to state was is being accelerated. > > > i.e., the more global 'accel=kvm' has 'kvm'. > > > > I was thinking more like calling this hw accelerated nested SMMUv3 emulation > > as 'smmuv3-accel'. This avoids confusion with the already existing > > 'iommu=smmuv3' that also has a nested emulation support. > > > > ie, > > -device arm-smmuv3-accel,id=smmuv1,bus=pcie.1 \ > > .. > I -think- you are saying below, that we have to think a bit more about this > device tagging. I'm thinking more like > - device arm-smmuv3,accel=<vcmdq>,id=smmu1,bus=pcie.1 \ I wonder if we really need a "vcmdq" enabling/disabling option? Jason's suggested approach for a vIOMMU allocation is to retry- on-failure, so my draft patches allocate a TEGRA241_CMDQV type of vIOMMU first, and then fall back to a regular SMMUV3 type if it fails. So, a host that doesn't have a VCMDQ capability could still work with the fallback/default pathway. Otherwise, we'd need to expose some sysfs node as you mentioned in the other reply, for libvirt to set or hide a "vcmdq" option. Thanks Nicolin
On Wed, Nov 27, 2024 at 08:44:47PM -0800, Nicolin Chen wrote: > On Wed, Nov 27, 2024 at 11:29:06PM -0500, Donald Dutile wrote: > > On 11/27/24 5:21 AM, Shameerali Kolothum Thodi wrote: > > > > > W.r.t naming, maybe something related to "hardware-accelerated"? > > > > > > > > > Given that 'accel' has been used for hw-acceleration elsewhere, that seems > > > > like a reasonable 'mode'. > > > > But, it needs a paramater to state was is being accelerated. > > > > i.e., the more global 'accel=kvm' has 'kvm'. > > > > > > I was thinking more like calling this hw accelerated nested SMMUv3 emulation > > > as 'smmuv3-accel'. This avoids confusion with the already existing > > > 'iommu=smmuv3' that also has a nested emulation support. > > > > > > ie, > > > -device arm-smmuv3-accel,id=smmuv1,bus=pcie.1 \ > > > > .. > > I -think- you are saying below, that we have to think a bit more about this > > device tagging. I'm thinking more like > > - device arm-smmuv3,accel=<vcmdq>,id=smmu1,bus=pcie.1 \ > > I wonder if we really need a "vcmdq" enabling/disabling option? > > Jason's suggested approach for a vIOMMU allocation is to retry- > on-failure, so my draft patches allocate a TEGRA241_CMDQV type > of vIOMMU first, and then fall back to a regular SMMUV3 type if > it fails. So, a host that doesn't have a VCMDQ capability could > still work with the fallback/default pathway. It needs to be configurable so the VM can be configured in a consistent way across nodes autodetection of host features is nice for experimenting but scale deployments should precisely specify every detail about the VM and not rely on host detection. Otherwise the VM instance type will be ill defined.. Jason
On 11/28/24 7:54 AM, Jason Gunthorpe wrote: > On Wed, Nov 27, 2024 at 08:44:47PM -0800, Nicolin Chen wrote: >> On Wed, Nov 27, 2024 at 11:29:06PM -0500, Donald Dutile wrote: >>> On 11/27/24 5:21 AM, Shameerali Kolothum Thodi wrote: >>>>>> W.r.t naming, maybe something related to "hardware-accelerated"? >>>>>> >>>>> Given that 'accel' has been used for hw-acceleration elsewhere, that seems >>>>> like a reasonable 'mode'. >>>>> But, it needs a paramater to state was is being accelerated. >>>>> i.e., the more global 'accel=kvm' has 'kvm'. >>>> >>>> I was thinking more like calling this hw accelerated nested SMMUv3 emulation >>>> as 'smmuv3-accel'. This avoids confusion with the already existing >>>> 'iommu=smmuv3' that also has a nested emulation support. >>>> >>>> ie, >>>> -device arm-smmuv3-accel,id=smmuv1,bus=pcie.1 \ >>>> >> .. >>> I -think- you are saying below, that we have to think a bit more about this >>> device tagging. I'm thinking more like >>> - device arm-smmuv3,accel=<vcmdq>,id=smmu1,bus=pcie.1 \ >> >> I wonder if we really need a "vcmdq" enabling/disabling option? >> >> Jason's suggested approach for a vIOMMU allocation is to retry- >> on-failure, so my draft patches allocate a TEGRA241_CMDQV type >> of vIOMMU first, and then fall back to a regular SMMUV3 type if >> it fails. So, a host that doesn't have a VCMDQ capability could >> still work with the fallback/default pathway. > > It needs to be configurable so the VM can be configured in a > consistent way across nodes > Yes. To expound further, one wants to be able to define an 'acceptable' VM criteria, so libvirt (or OpenStack?) can find and generate the list of 'acceptable nodes', priori typically, that can be a match for the acceptance criteria. Conversely, if one specifies a set of systems that one wants to be able to migrate across, then libvirt needs to find and select/set the features|attributes that enable the VM to migrate in a compatible way. > autodetection of host features is nice for experimenting but scale > deployments should precisely specify every detail about the VM and not > rely on host detection. Otherwise the VM instance type will be ill > defined.. > > Jason >
On Thu, Nov 28, 2024 at 08:54:26AM -0400, Jason Gunthorpe wrote: > On Wed, Nov 27, 2024 at 08:44:47PM -0800, Nicolin Chen wrote: > > On Wed, Nov 27, 2024 at 11:29:06PM -0500, Donald Dutile wrote: > > > On 11/27/24 5:21 AM, Shameerali Kolothum Thodi wrote: > > > > > > W.r.t naming, maybe something related to "hardware-accelerated"? > > > > > > > > > > > Given that 'accel' has been used for hw-acceleration elsewhere, that seems > > > > > like a reasonable 'mode'. > > > > > But, it needs a paramater to state was is being accelerated. > > > > > i.e., the more global 'accel=kvm' has 'kvm'. > > > > > > > > I was thinking more like calling this hw accelerated nested SMMUv3 emulation > > > > as 'smmuv3-accel'. This avoids confusion with the already existing > > > > 'iommu=smmuv3' that also has a nested emulation support. > > > > > > > > ie, > > > > -device arm-smmuv3-accel,id=smmuv1,bus=pcie.1 \ > > > > > > .. > > > I -think- you are saying below, that we have to think a bit more about this > > > device tagging. I'm thinking more like > > > - device arm-smmuv3,accel=<vcmdq>,id=smmu1,bus=pcie.1 \ > > > > I wonder if we really need a "vcmdq" enabling/disabling option? > > > > Jason's suggested approach for a vIOMMU allocation is to retry- > > on-failure, so my draft patches allocate a TEGRA241_CMDQV type > > of vIOMMU first, and then fall back to a regular SMMUV3 type if > > it fails. So, a host that doesn't have a VCMDQ capability could > > still work with the fallback/default pathway. > > It needs to be configurable so the VM can be configured in a > consistent way across nodes > > autodetection of host features is nice for experimenting but scale > deployments should precisely specify every detail about the VM and not > rely on host detection. Otherwise the VM instance type will be ill > defined.. In that case, we'd need to expose a vcmdq capability somewhere. We do for vIOMMU via hw_info. Should we keep the consistency? Otherwise, some sysfs nodes (easier for libvirt) could do the job too: num_available_vintfs, max_vcmdqs_per_vintf, and etc. Thanks Nicolin
On Wed, Nov 27, 2024 at 10:21:24AM +0000, Shameerali Kolothum Thodi wrote: > > For SMMUv3, NVIDIA-specific vCMDQ, it needs a parameter to state that > > specifically, > > since I'm concluding from reading the SMMUv3 version G.a spec, that > > ECMDQ was added > > to be able to assign an ECMDQ to a VM, > > Not sure the intention of ECMDQ as per that specification is to assign > it to a VM. I think the main idea behind it is to have one Command Queue > per host CPU to eliminate lock contention while submitting commands > to SMMU. Right > AFAIK it is not safe to assign one of the ECMDQ to guest yet. I think there is no > way you can associate a VMID with ECMDQ. So there is no plan to > support ARM ECMDQ now. Yep > NVIDIA VCMDQ is a completely vendor specific one. Perhaps ARM may come > up with an assignable CMDQ in future though. Yes, it is easy to imagine an ECMDQ extension that provides the same HW features that VCMDQ has in future. I hope ARM will develop one. > > ... and all needs to be per-instance .... > > ... libvirt (or any other VMM orchestrator) will need to determine > > compatibility for > > live migration. e.g., can one live migrate an accel=nv-vcmdq-based VM to > > a host with > > accel=ecmdq support? only nv-vcmdq? what if there are version diffs of > > nv-vcmdq over time? > > -- apologies, but I don't know the minute details of nv-vcmdq to > > determine if that's unlikely or not. > > Yes. This require more thought. But our first aim is get the basic smmuv3-accel > support. Yeah, there is no live migration support yet in the SMMU qmeu driver, AFAIK? When it gets done the supported options will have to be considered Jason
On 11/27/24 11:00 AM, Jason Gunthorpe wrote: > On Wed, Nov 27, 2024 at 10:21:24AM +0000, Shameerali Kolothum Thodi wrote: >>> For SMMUv3, NVIDIA-specific vCMDQ, it needs a parameter to state that >>> specifically, >>> since I'm concluding from reading the SMMUv3 version G.a spec, that >>> ECMDQ was added >>> to be able to assign an ECMDQ to a VM, >> >> Not sure the intention of ECMDQ as per that specification is to assign >> it to a VM. I think the main idea behind it is to have one Command Queue >> per host CPU to eliminate lock contention while submitting commands >> to SMMU. > > Right > >> AFAIK it is not safe to assign one of the ECMDQ to guest yet. I think there is no >> way you can associate a VMID with ECMDQ. So there is no plan to >> support ARM ECMDQ now. > > Yep > 'Yet' ... The association would be done via the VMM -- no different then what associates an assigned device to a VM today -- no hw-level (VM-)ID needed; a matter of exposing it to the VM, or not; or mapping the (virtual) CMDQ to the mapped/associated ECMDQ. They are purposedly mapped 64K apart from each other, enabling page-level protection, which I doubt is a per-CPU req for lock contention avoidance (large-cache-block spaced would be sufficient, even 4k; it's 64k spaced btwn ECMDQ regs .. the largest ARM page size. Summary: let's not assume this can't happen, and the chosen cmdline prevents it. >> NVIDIA VCMDQ is a completely vendor specific one. Perhaps ARM may come >> up with an assignable CMDQ in future though. > > Yes, it is easy to imagine an ECMDQ extension that provides the same HW > features that VCMDQ has in future. I hope ARM will develop one. > Right, so how to know what op is being "accel"'d wrt smmuv3. >>> ... and all needs to be per-instance .... >>> ... libvirt (or any other VMM orchestrator) will need to determine >>> compatibility for >>> live migration. e.g., can one live migrate an accel=nv-vcmdq-based VM to >>> a host with >>> accel=ecmdq support? only nv-vcmdq? what if there are version diffs of >>> nv-vcmdq over time? >>> -- apologies, but I don't know the minute details of nv-vcmdq to >>> determine if that's unlikely or not. >> >> Yes. This require more thought. But our first aim is get the basic smmuv3-accel >> support. > > Yeah, there is no live migration support yet in the SMMU qmeu driver, > AFAIK? > > When it gets done the supported options will have to be considered > > Jason >
On Wed, Nov 27, 2024 at 06:03:23PM -0500, Donald Dutile wrote: > The association would be done via the VMM -- no different then what associates > an assigned device to a VM today -- no hw-level (VM-)ID needed; a matter of exposing > it to the VM, or not; or mapping the (virtual) CMDQ to the mapped/associated ECMDQ. > They are purposedly mapped 64K apart from each other, enabling page-level protection, > which I doubt is a per-CPU req for lock contention avoidance (large-cache-block > spaced would be sufficient, even 4k; it's 64k spaced btwn ECMDQ regs .. the largest > ARM page size. There are commands a VM could stuff on the ECMQ that would harm the hypervisor. Without VMID isolation you can't use ECMDQ like this today. Jason
On 11/27/24 17:00, Jason Gunthorpe wrote: > On Wed, Nov 27, 2024 at 10:21:24AM +0000, Shameerali Kolothum Thodi wrote: >>> For SMMUv3, NVIDIA-specific vCMDQ, it needs a parameter to state that >>> specifically, >>> since I'm concluding from reading the SMMUv3 version G.a spec, that >>> ECMDQ was added >>> to be able to assign an ECMDQ to a VM, >> Not sure the intention of ECMDQ as per that specification is to assign >> it to a VM. I think the main idea behind it is to have one Command Queue >> per host CPU to eliminate lock contention while submitting commands >> to SMMU. > Right > >> AFAIK it is not safe to assign one of the ECMDQ to guest yet. I think there is no >> way you can associate a VMID with ECMDQ. So there is no plan to >> support ARM ECMDQ now. > Yep > >> NVIDIA VCMDQ is a completely vendor specific one. Perhaps ARM may come >> up with an assignable CMDQ in future though. > Yes, it is easy to imagine an ECMDQ extension that provides the same HW > features that VCMDQ has in future. I hope ARM will develop one. > >>> ... and all needs to be per-instance .... >>> ... libvirt (or any other VMM orchestrator) will need to determine >>> compatibility for >>> live migration. e.g., can one live migrate an accel=nv-vcmdq-based VM to >>> a host with >>> accel=ecmdq support? only nv-vcmdq? what if there are version diffs of >>> nv-vcmdq over time? >>> -- apologies, but I don't know the minute details of nv-vcmdq to >>> determine if that's unlikely or not. >> Yes. This require more thought. But our first aim is get the basic smmuv3-accel >> support. > Yeah, there is no live migration support yet in the SMMU qmeu driver, > AFAIK? the non accelerated SMMU QEMU device does support migration. Eric > > When it gets done the supported options will have to be considered > > Jason >
Hi, Eric On Thu, 28 Nov 2024 at 00:06, Eric Auger <eric.auger@redhat.com> wrote: > > Yeah, there is no live migration support yet in the SMMU qmeu driver, > > AFAIK? > the non accelerated SMMU QEMU device does support migration. Could you clarify more about this? The migration is not supported if using viommu (SMMU QEMU device), isn't it? Thanks
On 11/28/24 04:25, Zhangfei Gao wrote: > Hi, Eric > > On Thu, 28 Nov 2024 at 00:06, Eric Auger <eric.auger@redhat.com> wrote: > >>> Yeah, there is no live migration support yet in the SMMU qmeu driver, >>> AFAIK? >> the non accelerated SMMU QEMU device does support migration. > Could you clarify more about this? > The migration is not supported if using viommu (SMMU QEMU device), isn't it? No this is not correct. Current QEMU SMMU device *does* support migration (see VMStateDescription) as well as qemu virtio-iommu device. so for instance if you run a guest with smmuv3 and protected virtio-pci devices this is supposed to be migratable. If it does not work this is bug and this should be fixed ;-) Thanks Eric > > Thanks >
> -----Original Message----- > From: Eric Auger <eric.auger@redhat.com> > Sent: Thursday, November 28, 2024 8:07 AM > To: Zhangfei Gao <zhangfei.gao@linaro.org> > Cc: Jason Gunthorpe <jgg@nvidia.com>; Shameerali Kolothum Thodi > <shameerali.kolothum.thodi@huawei.com>; Donald Dutile > <ddutile@redhat.com>; Nicolin Chen <nicolinc@nvidia.com>; qemu- > arm@nongnu.org; qemu-devel@nongnu.org; peter.maydell@linaro.org; > Linuxarm <linuxarm@huawei.com>; Wangzhou (B) > <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; > Jonathan Cameron <jonathan.cameron@huawei.com> > Subject: Re: [RFC PATCH 2/5] hw/arm/smmuv3: Add initial support for > SMMUv3 Nested device > > > > On 11/28/24 04:25, Zhangfei Gao wrote: > > Hi, Eric > > > > On Thu, 28 Nov 2024 at 00:06, Eric Auger <eric.auger@redhat.com> wrote: > > > >>> Yeah, there is no live migration support yet in the SMMU qmeu driver, > >>> AFAIK? > >> the non accelerated SMMU QEMU device does support migration. > > Could you clarify more about this? > > The migration is not supported if using viommu (SMMU QEMU device), > isn't it? > No this is not correct. Current QEMU SMMU device *does* support > migration (see VMStateDescription) as well as qemu virtio-iommu device. > so for instance if you run a guest with smmuv3 and protected virtio-pci > devices this is supposed to be migratable. If it does not work this is > bug and this should be fixed ;-) I think if I am right Zhangfei was testing with vfio-pci device assigned on his vSVA branch. But migration with vfio device is currently explicitly blocked if vIOMMU is present. I think Joao is working on it here[1]. But we may require additional support when we have vSVA to handle any in-flight page fault handling gracefully. Thanks, Shameer 1. https://lore.kernel.org/all/20230622214845.3980-1-joao.m.martins@oracle.com/
On Thu, Nov 28, 2024 at 08:28:15AM +0000, Shameerali Kolothum Thodi wrote: > I think if I am right Zhangfei was testing with vfio-pci device assigned on his vSVA > branch. But migration with vfio device is currently explicitly blocked if vIOMMU is > present. > > I think Joao is working on it here[1]. Right, this is what I was thinking of. What use is smmu migration support if VFIO side blocks any useful configuration? Jason
Hi Shameer, On 11/28/24 09:28, Shameerali Kolothum Thodi wrote: > >> -----Original Message----- >> From: Eric Auger <eric.auger@redhat.com> >> Sent: Thursday, November 28, 2024 8:07 AM >> To: Zhangfei Gao <zhangfei.gao@linaro.org> >> Cc: Jason Gunthorpe <jgg@nvidia.com>; Shameerali Kolothum Thodi >> <shameerali.kolothum.thodi@huawei.com>; Donald Dutile >> <ddutile@redhat.com>; Nicolin Chen <nicolinc@nvidia.com>; qemu- >> arm@nongnu.org; qemu-devel@nongnu.org; peter.maydell@linaro.org; >> Linuxarm <linuxarm@huawei.com>; Wangzhou (B) >> <wangzhou1@hisilicon.com>; jiangkunkun <jiangkunkun@huawei.com>; >> Jonathan Cameron <jonathan.cameron@huawei.com> >> Subject: Re: [RFC PATCH 2/5] hw/arm/smmuv3: Add initial support for >> SMMUv3 Nested device >> >> >> >> On 11/28/24 04:25, Zhangfei Gao wrote: >>> Hi, Eric >>> >>> On Thu, 28 Nov 2024 at 00:06, Eric Auger <eric.auger@redhat.com> wrote: >>> >>>>> Yeah, there is no live migration support yet in the SMMU qmeu driver, >>>>> AFAIK? >>>> the non accelerated SMMU QEMU device does support migration. >>> Could you clarify more about this? >>> The migration is not supported if using viommu (SMMU QEMU device), >> isn't it? >> No this is not correct. Current QEMU SMMU device *does* support >> migration (see VMStateDescription) as well as qemu virtio-iommu device. >> so for instance if you run a guest with smmuv3 and protected virtio-pci >> devices this is supposed to be migratable. If it does not work this is >> bug and this should be fixed ;-) > I think if I am right Zhangfei was testing with vfio-pci device assigned on his vSVA > branch. But migration with vfio device is currently explicitly blocked if vIOMMU is > present. definitively I was talking about migration vSMMU/VFIO which is not upstream. > > I think Joao is working on it here[1]. > > But we may require additional support when we have vSVA to handle any > in-flight page fault handling gracefully. > > Thanks, > Shameer > 1. https://lore.kernel.org/all/20230622214845.3980-1-joao.m.martins@oracle.com/ Thanks Eric > > >
© 2016 - 2026 Red Hat, Inc.