From: Nicolin Chen <nicolinc@nvidia.com>
Implement a set_iommu_device callback:
-Find an existing S2 hwpt to test attach() or allocate a new one
(Devices behind the same physical SMMU should share an S2 HWPT.)
-Attach the device to the S2 hwp
-Allocate a viommu with the returned s2 hwpt.
-Allocate bypass and abort hwpt and attach bypass hwpt.
-and add it to its device list
Also add an unset_iommu_device doing the opposite cleanup routine.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
hw/arm/meson.build | 2 +-
hw/arm/smmuv3-accel.c | 183 ++++++++++++++++++++++++++++++++++
hw/arm/trace-events | 4 +
include/hw/arm/smmuv3-accel.h | 23 +++++
include/system/iommufd.h | 6 ++
5 files changed, 217 insertions(+), 1 deletion(-)
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index e8593363b0..dd41a86619 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -55,7 +55,7 @@ arm_ss.add(when: 'CONFIG_MUSCA', if_true: files('musca.c'))
arm_ss.add(when: 'CONFIG_ARMSSE', if_true: files('armsse.c'))
arm_ss.add(when: 'CONFIG_FSL_IMX7', if_true: files('fsl-imx7.c', 'mcimx7d-sabre.c'))
arm_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmuv3.c'))
-arm_ss.add(when: 'CONFIG_ARM_SMMUV3_ACCEL', if_true: files('smmuv3-accel.c'))
+arm_ss.add(when: ['CONFIG_ARM_SMMUV3_ACCEL', 'CONFIG_IOMMUFD'], if_true: files('smmuv3-accel.c'))
arm_ss.add(when: 'CONFIG_FSL_IMX6UL', if_true: files('fsl-imx6ul.c', 'mcimx6ul-evk.c'))
arm_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_soc.c'))
arm_ss.add(when: 'CONFIG_XEN', if_true: files(
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index 6610ebe4be..1c696649d5 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -7,6 +7,8 @@
*/
#include "qemu/osdep.h"
+#include "trace.h"
+#include "qemu/error-report.h"
#include "hw/arm/smmuv3-accel.h"
#include "hw/pci/pci_bridge.h"
@@ -30,6 +32,185 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *s, SMMUPciBus *sbus,
return accel_dev;
}
+static bool
+smmuv3_accel_dev_attach_viommu(SMMUv3AccelDevice *accel_dev,
+ HostIOMMUDeviceIOMMUFD *idev, Error **errp)
+{
+ struct iommu_hwpt_arm_smmuv3 bypass_data = {
+ .ste = { 0x9ULL, 0x0ULL },
+ };
+ struct iommu_hwpt_arm_smmuv3 abort_data = {
+ .ste = { 0x1ULL, 0x0ULL },
+ };
+ SMMUDevice *sdev = &accel_dev->sdev;
+ SMMUState *s = sdev->smmu;
+ SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
+ SMMUS2Hwpt *s2_hwpt;
+ SMMUViommu *viommu;
+ uint32_t s2_hwpt_id;
+ uint32_t viommu_id;
+
+ if (s_accel->viommu) {
+ accel_dev->viommu = s_accel->viommu;
+ return host_iommu_device_iommufd_attach_hwpt(
+ idev, s_accel->viommu->s2_hwpt->hwpt_id, errp);
+ }
+
+ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT,
+ IOMMU_HWPT_DATA_NONE, 0, NULL,
+ &s2_hwpt_id, errp)) {
+ return false;
+ }
+
+ /* Attach to S2 for MSI cookie */
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, s2_hwpt_id, errp)) {
+ goto free_s2_hwpt;
+ }
+
+ if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
+ IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
+ s2_hwpt_id, &viommu_id, errp)) {
+ goto detach_s2_hwpt;
+ }
+
+ viommu = g_new0(SMMUViommu, 1);
+ viommu->core.viommu_id = viommu_id;
+ viommu->core.s2_hwpt_id = s2_hwpt_id;
+ viommu->core.iommufd = idev->iommufd;
+
+ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+ viommu->core.viommu_id, 0,
+ IOMMU_HWPT_DATA_ARM_SMMUV3,
+ sizeof(abort_data), &abort_data,
+ &viommu->abort_hwpt_id, errp)) {
+ error_report("failed to allocate an abort pagetable");
+ goto free_viommu;
+ }
+
+ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+ viommu->core.viommu_id, 0,
+ IOMMU_HWPT_DATA_ARM_SMMUV3,
+ sizeof(bypass_data), &bypass_data,
+ &viommu->bypass_hwpt_id, errp)) {
+ error_report("failed to allocate a bypass pagetable");
+ goto free_abort_hwpt;
+ }
+
+ /*
+ * Attach the bypass STE which means S1 bypass and S2 translate.
+ * This is to make sure that the vIOMMU object is now associated
+ * with the device and has this STE installed in the host SMMUV3.
+ */
+ if (!host_iommu_device_iommufd_attach_hwpt(
+ idev, viommu->bypass_hwpt_id, errp)) {
+ error_report("failed to attach the bypass pagetable");
+ goto free_bypass_hwpt;
+ }
+
+ s2_hwpt = g_new0(SMMUS2Hwpt, 1);
+ s2_hwpt->iommufd = idev->iommufd;
+ s2_hwpt->hwpt_id = s2_hwpt_id;
+ s2_hwpt->ioas_id = idev->ioas_id;
+
+ viommu->iommufd = idev->iommufd;
+ viommu->s2_hwpt = s2_hwpt;
+
+ s_accel->viommu = viommu;
+ accel_dev->viommu = viommu;
+ return true;
+
+free_bypass_hwpt:
+ iommufd_backend_free_id(idev->iommufd, viommu->bypass_hwpt_id);
+free_abort_hwpt:
+ iommufd_backend_free_id(idev->iommufd, viommu->abort_hwpt_id);
+free_viommu:
+ iommufd_backend_free_id(idev->iommufd, viommu->core.viommu_id);
+ g_free(viommu);
+detach_s2_hwpt:
+ host_iommu_device_iommufd_attach_hwpt(idev, accel_dev->idev->ioas_id, errp);
+free_s2_hwpt:
+ iommufd_backend_free_id(idev->iommufd, s2_hwpt_id);
+ return false;
+}
+
+static bool smmuv3_accel_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *hiod, Error **errp)
+{
+ HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
+ SMMUState *s = opaque;
+ SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
+ SMMUPciBus *sbus = smmu_get_sbus(s, bus);
+ SMMUv3AccelDevice *accel_dev = smmuv3_accel_get_dev(s, sbus, bus, devfn);
+ SMMUDevice *sdev = &accel_dev->sdev;
+
+ if (!idev) {
+ return true;
+ }
+
+ if (accel_dev->idev) {
+ if (accel_dev->idev != idev) {
+ error_report("Device 0x%x already ha an associated idev",
+ smmu_get_sid(sdev));
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ if (!smmuv3_accel_dev_attach_viommu(accel_dev, idev, errp)) {
+ error_report("Unable to attach viommu");
+ return false;
+ }
+
+ accel_dev->idev = idev;
+ QLIST_INSERT_HEAD(&s_accel->viommu->device_list, accel_dev, next);
+ trace_smmuv3_accel_set_iommu_device(devfn, smmu_get_sid(sdev));
+ return true;
+}
+
+static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
+ int devfn)
+{
+ SMMUDevice *sdev;
+ SMMUv3AccelDevice *accel_dev;
+ SMMUViommu *viommu;
+ SMMUState *s = opaque;
+ SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
+ SMMUPciBus *sbus = g_hash_table_lookup(s->smmu_pcibus_by_busptr, bus);
+
+ if (!sbus) {
+ return;
+ }
+
+ sdev = sbus->pbdev[devfn];
+ if (!sdev) {
+ return;
+ }
+
+ accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
+ if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
+ accel_dev->idev->ioas_id,
+ NULL)) {
+ error_report("Unable to attach dev to the default HW pagetable");
+ }
+
+
+ accel_dev->idev = NULL;
+ QLIST_REMOVE(accel_dev, next);
+ trace_smmuv3_accel_unset_iommu_device(devfn, smmu_get_sid(sdev));
+
+ viommu = s_accel->viommu;
+ if (QLIST_EMPTY(&viommu->device_list)) {
+ iommufd_backend_free_id(viommu->iommufd, viommu->bypass_hwpt_id);
+ iommufd_backend_free_id(viommu->iommufd, viommu->abort_hwpt_id);
+ iommufd_backend_free_id(viommu->iommufd, viommu->core.viommu_id);
+ iommufd_backend_free_id(viommu->iommufd, viommu->s2_hwpt->hwpt_id);
+ g_free(viommu->s2_hwpt);
+ g_free(viommu);
+ s_accel->viommu = NULL;
+ }
+}
static AddressSpace *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque,
int devfn)
{
@@ -77,6 +258,8 @@ static void smmu_accel_realize(DeviceState *d, Error **errp)
return;
}
bs->get_address_space = smmuv3_accel_find_add_as;
+ bs->set_iommu_device = smmuv3_accel_set_iommu_device;
+ bs->unset_iommu_device = smmuv3_accel_unset_iommu_device;
}
static void smmuv3_accel_class_init(ObjectClass *klass, void *data)
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 7790db780e..17960794bf 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -58,6 +58,10 @@ smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s
smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
smmu_reset_exit(void) ""
+#smmuv3-accel.c
+smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x)"
+smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x"
+
# strongarm.c
strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
strongarm_ssp_read_underrun(void) "SSP rx underrun"
diff --git a/include/hw/arm/smmuv3-accel.h b/include/hw/arm/smmuv3-accel.h
index 86c0523063..aca6838dca 100644
--- a/include/hw/arm/smmuv3-accel.h
+++ b/include/hw/arm/smmuv3-accel.h
@@ -12,16 +12,39 @@
#include "hw/arm/smmu-common.h"
#include "hw/arm/smmuv3.h"
#include "qom/object.h"
+#include "system/iommufd.h"
+
+#include <linux/iommufd.h>
#define TYPE_ARM_SMMUV3_ACCEL "arm-smmuv3-accel"
OBJECT_DECLARE_TYPE(SMMUv3AccelState, SMMUv3AccelClass, ARM_SMMUV3_ACCEL)
+typedef struct SMMUS2Hwpt {
+ IOMMUFDBackend *iommufd;
+ uint32_t hwpt_id;
+ uint32_t ioas_id;
+} SMMUS2Hwpt;
+
+typedef struct SMMUViommu {
+ IOMMUFDBackend *iommufd;
+ IOMMUFDViommu core;
+ SMMUS2Hwpt *s2_hwpt;
+ uint32_t bypass_hwpt_id;
+ uint32_t abort_hwpt_id;
+ QLIST_HEAD(, SMMUv3AccelDevice) device_list;
+ QLIST_ENTRY(SMMUViommu) next;
+} SMMUViommu;
+
typedef struct SMMUv3AccelDevice {
SMMUDevice sdev;
+ HostIOMMUDeviceIOMMUFD *idev;
+ SMMUViommu *viommu;
+ QLIST_ENTRY(SMMUv3AccelDevice) next;
} SMMUv3AccelDevice;
struct SMMUv3AccelState {
SMMUv3State smmuv3_state;
+ SMMUViommu *viommu;
};
struct SMMUv3AccelClass {
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index 53920bae5b..9c106ea078 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -37,6 +37,12 @@ struct IOMMUFDBackend {
/*< public >*/
};
+typedef struct IOMMUFDViommu {
+ IOMMUFDBackend *iommufd;
+ uint32_t s2_hwpt_id;
+ uint32_t viommu_id;
+} IOMMUFDViommu;
+
bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
void iommufd_backend_disconnect(IOMMUFDBackend *be);
--
2.34.1
Hi Shameer,
On 3/11/25 3:10 PM, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> Implement a set_iommu_device callback:
> -Find an existing S2 hwpt to test attach() or allocate a new one
> (Devices behind the same physical SMMU should share an S2 HWPT.)
> -Attach the device to the S2 hwp
> -Allocate a viommu with the returned s2 hwpt.
> -Allocate bypass and abort hwpt and attach bypass hwpt.
> -and add it to its device list
>
> Also add an unset_iommu_device doing the opposite cleanup routine.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
> hw/arm/meson.build | 2 +-
> hw/arm/smmuv3-accel.c | 183 ++++++++++++++++++++++++++++++++++
> hw/arm/trace-events | 4 +
> include/hw/arm/smmuv3-accel.h | 23 +++++
> include/system/iommufd.h | 6 ++
> 5 files changed, 217 insertions(+), 1 deletion(-)
>
> diff --git a/hw/arm/meson.build b/hw/arm/meson.build
> index e8593363b0..dd41a86619 100644
> --- a/hw/arm/meson.build
> +++ b/hw/arm/meson.build
> @@ -55,7 +55,7 @@ arm_ss.add(when: 'CONFIG_MUSCA', if_true: files('musca.c'))
> arm_ss.add(when: 'CONFIG_ARMSSE', if_true: files('armsse.c'))
> arm_ss.add(when: 'CONFIG_FSL_IMX7', if_true: files('fsl-imx7.c', 'mcimx7d-sabre.c'))
> arm_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmuv3.c'))
> -arm_ss.add(when: 'CONFIG_ARM_SMMUV3_ACCEL', if_true: files('smmuv3-accel.c'))
> +arm_ss.add(when: ['CONFIG_ARM_SMMUV3_ACCEL', 'CONFIG_IOMMUFD'], if_true: files('smmuv3-accel.c'))
I guess we could set from patch 3 onwards?
> arm_ss.add(when: 'CONFIG_FSL_IMX6UL', if_true: files('fsl-imx6ul.c', 'mcimx6ul-evk.c'))
> arm_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_soc.c'))
> arm_ss.add(when: 'CONFIG_XEN', if_true: files(
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index 6610ebe4be..1c696649d5 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -7,6 +7,8 @@
> */
>
> #include "qemu/osdep.h"
> +#include "trace.h"
> +#include "qemu/error-report.h"
>
> #include "hw/arm/smmuv3-accel.h"
> #include "hw/pci/pci_bridge.h"
> @@ -30,6 +32,185 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *s, SMMUPciBus *sbus,
> return accel_dev;
> }
>
> +static bool
> +smmuv3_accel_dev_attach_viommu(SMMUv3AccelDevice *accel_dev,
> + HostIOMMUDeviceIOMMUFD *idev, Error **errp)
> +{
> + struct iommu_hwpt_arm_smmuv3 bypass_data = {
> + .ste = { 0x9ULL, 0x0ULL },
I would suggest to use defines.
> + };
> + struct iommu_hwpt_arm_smmuv3 abort_data = {
> + .ste = { 0x1ULL, 0x0ULL },
same
> + };
> + SMMUDevice *sdev = &accel_dev->sdev;
> + SMMUState *s = sdev->smmu;
> + SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> + SMMUS2Hwpt *s2_hwpt;
> + SMMUViommu *viommu;
> + uint32_t s2_hwpt_id;
> + uint32_t viommu_id;
> +
> + if (s_accel->viommu) {
> + accel_dev->viommu = s_accel->viommu;
> + return host_iommu_device_iommufd_attach_hwpt(
> + idev, s_accel->viommu->s2_hwpt->hwpt_id, errp);
> + }
> +
> + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->ioas_id,
> + IOMMU_HWPT_ALLOC_NEST_PARENT,
> + IOMMU_HWPT_DATA_NONE, 0, NULL,
> + &s2_hwpt_id, errp)) {
> + return false;
> + }
> +
> + /* Attach to S2 for MSI cookie */
> + if (!host_iommu_device_iommufd_attach_hwpt(idev, s2_hwpt_id, errp)) {
> + goto free_s2_hwpt;
> + }
> +
> + if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
> + IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
> + s2_hwpt_id, &viommu_id, errp)) {
> + goto detach_s2_hwpt;
> + }
> +
> + viommu = g_new0(SMMUViommu, 1);
> + viommu->core.viommu_id = viommu_id;
> + viommu->core.s2_hwpt_id = s2_hwpt_id;
> + viommu->core.iommufd = idev->iommufd;
> +
> + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> + viommu->core.viommu_id, 0,
> + IOMMU_HWPT_DATA_ARM_SMMUV3,
> + sizeof(abort_data), &abort_data,
> + &viommu->abort_hwpt_id, errp)) {
> + error_report("failed to allocate an abort pagetable");
is that error_report needed as we have the error handle already?
> + goto free_viommu;
> + }
> +
> + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> + viommu->core.viommu_id, 0,
> + IOMMU_HWPT_DATA_ARM_SMMUV3,
> + sizeof(bypass_data), &bypass_data,
> + &viommu->bypass_hwpt_id, errp)) {
> + error_report("failed to allocate a bypass pagetable");
same
> + goto free_abort_hwpt;
> + }
> +
> + /*
> + * Attach the bypass STE which means S1 bypass and S2 translate.
> + * This is to make sure that the vIOMMU object is now associated
> + * with the device and has this STE installed in the host SMMUV3.
> + */
> + if (!host_iommu_device_iommufd_attach_hwpt(
> + idev, viommu->bypass_hwpt_id, errp)) {
> + error_report("failed to attach the bypass pagetable");
same
if you prefer you can add error "hint"
> + goto free_bypass_hwpt;
> + }
> +
> + s2_hwpt = g_new0(SMMUS2Hwpt, 1);
> + s2_hwpt->iommufd = idev->iommufd;
> + s2_hwpt->hwpt_id = s2_hwpt_id;
> + s2_hwpt->ioas_id = idev->ioas_id;
> +
> + viommu->iommufd = idev->iommufd;
> + viommu->s2_hwpt = s2_hwpt;
> +
> + s_accel->viommu = viommu;
> + accel_dev->viommu = viommu;
> + return true;
> +
> +free_bypass_hwpt:
> + iommufd_backend_free_id(idev->iommufd, viommu->bypass_hwpt_id);
> +free_abort_hwpt:
> + iommufd_backend_free_id(idev->iommufd, viommu->abort_hwpt_id);
> +free_viommu:
> + iommufd_backend_free_id(idev->iommufd, viommu->core.viommu_id);
> + g_free(viommu);
> +detach_s2_hwpt:
> + host_iommu_device_iommufd_attach_hwpt(idev, accel_dev->idev->ioas_id, errp);
This should be detach.
Eric
> +free_s2_hwpt:
> + iommufd_backend_free_id(idev->iommufd, s2_hwpt_id);
> + return false;
> +}
> +
> +static bool smmuv3_accel_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> + HostIOMMUDevice *hiod, Error **errp)
> +{
> + HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
> + SMMUState *s = opaque;
> + SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> + SMMUPciBus *sbus = smmu_get_sbus(s, bus);
> + SMMUv3AccelDevice *accel_dev = smmuv3_accel_get_dev(s, sbus, bus, devfn);
> + SMMUDevice *sdev = &accel_dev->sdev;
> +
> + if (!idev) {
> + return true;
> + }
> +
> + if (accel_dev->idev) {
> + if (accel_dev->idev != idev) {
> + error_report("Device 0x%x already ha an associated idev",
> + smmu_get_sid(sdev));
> + return false;
> + } else {
> + return true;
> + }
> + }
> +
> + if (!smmuv3_accel_dev_attach_viommu(accel_dev, idev, errp)) {
> + error_report("Unable to attach viommu");
> + return false;
> + }
> +
> + accel_dev->idev = idev;
> + QLIST_INSERT_HEAD(&s_accel->viommu->device_list, accel_dev, next);
> + trace_smmuv3_accel_set_iommu_device(devfn, smmu_get_sid(sdev));
> + return true;
> +}
> +
> +static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> + int devfn)
> +{
> + SMMUDevice *sdev;
> + SMMUv3AccelDevice *accel_dev;
> + SMMUViommu *viommu;
> + SMMUState *s = opaque;
> + SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> + SMMUPciBus *sbus = g_hash_table_lookup(s->smmu_pcibus_by_busptr, bus);
> +
> + if (!sbus) {
> + return;
> + }
> +
> + sdev = sbus->pbdev[devfn];
> + if (!sdev) {
> + return;
> + }
> +
> + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> + accel_dev->idev->ioas_id,
> + NULL)) {
> + error_report("Unable to attach dev to the default HW pagetable");
> + }
> +
> +
> + accel_dev->idev = NULL;
> + QLIST_REMOVE(accel_dev, next);
> + trace_smmuv3_accel_unset_iommu_device(devfn, smmu_get_sid(sdev));
> +
> + viommu = s_accel->viommu;
> + if (QLIST_EMPTY(&viommu->device_list)) {
> + iommufd_backend_free_id(viommu->iommufd, viommu->bypass_hwpt_id);
> + iommufd_backend_free_id(viommu->iommufd, viommu->abort_hwpt_id);
> + iommufd_backend_free_id(viommu->iommufd, viommu->core.viommu_id);
> + iommufd_backend_free_id(viommu->iommufd, viommu->s2_hwpt->hwpt_id);
> + g_free(viommu->s2_hwpt);
> + g_free(viommu);
> + s_accel->viommu = NULL;
> + }
> +}
> static AddressSpace *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque,
> int devfn)
> {
> @@ -77,6 +258,8 @@ static void smmu_accel_realize(DeviceState *d, Error **errp)
> return;
> }
> bs->get_address_space = smmuv3_accel_find_add_as;
> + bs->set_iommu_device = smmuv3_accel_set_iommu_device;
> + bs->unset_iommu_device = smmuv3_accel_unset_iommu_device;
> }
>
> static void smmuv3_accel_class_init(ObjectClass *klass, void *data)
> diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> index 7790db780e..17960794bf 100644
> --- a/hw/arm/trace-events
> +++ b/hw/arm/trace-events
> @@ -58,6 +58,10 @@ smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s
> smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
> smmu_reset_exit(void) ""
>
> +#smmuv3-accel.c
> +smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x)"
> +smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x (sid=0x%x"
> +
> # strongarm.c
> strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
> strongarm_ssp_read_underrun(void) "SSP rx underrun"
> diff --git a/include/hw/arm/smmuv3-accel.h b/include/hw/arm/smmuv3-accel.h
> index 86c0523063..aca6838dca 100644
> --- a/include/hw/arm/smmuv3-accel.h
> +++ b/include/hw/arm/smmuv3-accel.h
> @@ -12,16 +12,39 @@
> #include "hw/arm/smmu-common.h"
> #include "hw/arm/smmuv3.h"
> #include "qom/object.h"
> +#include "system/iommufd.h"
> +
> +#include <linux/iommufd.h>
>
> #define TYPE_ARM_SMMUV3_ACCEL "arm-smmuv3-accel"
> OBJECT_DECLARE_TYPE(SMMUv3AccelState, SMMUv3AccelClass, ARM_SMMUV3_ACCEL)
>
> +typedef struct SMMUS2Hwpt {
> + IOMMUFDBackend *iommufd;
> + uint32_t hwpt_id;
> + uint32_t ioas_id;
> +} SMMUS2Hwpt;
> +
> +typedef struct SMMUViommu {
> + IOMMUFDBackend *iommufd;
> + IOMMUFDViommu core;
> + SMMUS2Hwpt *s2_hwpt;
> + uint32_t bypass_hwpt_id;
> + uint32_t abort_hwpt_id;
> + QLIST_HEAD(, SMMUv3AccelDevice) device_list;
> + QLIST_ENTRY(SMMUViommu) next;
> +} SMMUViommu;
> +
> typedef struct SMMUv3AccelDevice {
> SMMUDevice sdev;
> + HostIOMMUDeviceIOMMUFD *idev;
> + SMMUViommu *viommu;
> + QLIST_ENTRY(SMMUv3AccelDevice) next;
> } SMMUv3AccelDevice;
>
> struct SMMUv3AccelState {
> SMMUv3State smmuv3_state;
> + SMMUViommu *viommu;
> };
>
> struct SMMUv3AccelClass {
> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
> index 53920bae5b..9c106ea078 100644
> --- a/include/system/iommufd.h
> +++ b/include/system/iommufd.h
> @@ -37,6 +37,12 @@ struct IOMMUFDBackend {
> /*< public >*/
> };
>
> +typedef struct IOMMUFDViommu {
> + IOMMUFDBackend *iommufd;
> + uint32_t s2_hwpt_id;
> + uint32_t viommu_id;
> +} IOMMUFDViommu;
> +
> bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
> void iommufd_backend_disconnect(IOMMUFDBackend *be);
>
On Tue, Mar 11, 2025 at 02:10:34PM +0000, Shameer Kolothum wrote:
> @@ -30,6 +32,185 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *s, SMMUPciBus *sbus,
> return accel_dev;
> }
>
> +static bool
> +smmuv3_accel_dev_attach_viommu(SMMUv3AccelDevice *accel_dev,
> + HostIOMMUDeviceIOMMUFD *idev, Error **errp)
With vEVENTQ v9, vDEVICE (vSID) is required to attach a device
to a proxy NESTED hwpt (applicable to bypass/abort HWPTs too).
So, host_iommu_device_iommufd_attach_hwpt() would fail in this
function because vSID isn't ready at this stage. So all those
calls should be moved out of the function, then this should be
likely "smmuv3_accel_dev_alloc_viommu"?
That being said, I don't know when QEMU actually prepare a BDF
number for a vfio-pci device. The only place that I see it is
ready is at guest-level SMMU installing the Stream Table, i.e.
in smmuv3_accel_install_nested_ste().
> +{
> + struct iommu_hwpt_arm_smmuv3 bypass_data = {
> + .ste = { 0x9ULL, 0x0ULL },
> + };
> + struct iommu_hwpt_arm_smmuv3 abort_data = {
> + .ste = { 0x1ULL, 0x0ULL },
> + };
> + SMMUDevice *sdev = &accel_dev->sdev;
> + SMMUState *s = sdev->smmu;
> + SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> + SMMUS2Hwpt *s2_hwpt;
> + SMMUViommu *viommu;
> + uint32_t s2_hwpt_id;
> + uint32_t viommu_id;
> +
> + if (s_accel->viommu) {
> + accel_dev->viommu = s_accel->viommu;
> + return host_iommu_device_iommufd_attach_hwpt(
> + idev, s_accel->viommu->s2_hwpt->hwpt_id, errp);
Yea, here is my bad. We shouldn't attach a device to s2_hwpt,
since eventually s2_hwpt would be a shared hwpt across SMMUs.
> + /* Attach to S2 for MSI cookie */
> + if (!host_iommu_device_iommufd_attach_hwpt(idev, s2_hwpt_id, errp)) {
> + goto free_s2_hwpt;
> + }
With the merged sw_msi series, we don't need this anymore.
> + /*
> + * Attach the bypass STE which means S1 bypass and S2 translate.
> + * This is to make sure that the vIOMMU object is now associated
> + * with the device and has this STE installed in the host SMMUV3.
> + */
> + if (!host_iommu_device_iommufd_attach_hwpt(
> + idev, viommu->bypass_hwpt_id, errp)) {
> + error_report("failed to attach the bypass pagetable");
> + goto free_bypass_hwpt;
> + }
Ditto. We have to postpone this until vdevice is allocated.
> +static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> + int devfn)
> +{
> + SMMUDevice *sdev;
> + SMMUv3AccelDevice *accel_dev;
> + SMMUViommu *viommu;
> + SMMUState *s = opaque;
> + SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> + SMMUPciBus *sbus = g_hash_table_lookup(s->smmu_pcibus_by_busptr, bus);
> +
> + if (!sbus) {
> + return;
> + }
> +
> + sdev = sbus->pbdev[devfn];
> + if (!sdev) {
> + return;
> + }
> +
> + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> + accel_dev->idev->ioas_id,
> + NULL)) {
> + error_report("Unable to attach dev to the default HW pagetable");
> + }
> +
> +
Could drop the extra line.
Thanks
Nicolin
Hi Nicolin,
> -----Original Message-----
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, March 11, 2025 9:08 PM
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
> eric.auger@redhat.com; peter.maydell@linaro.org; jgg@nvidia.com;
> ddutile@redhat.com; berrange@redhat.com; nathanc@nvidia.com;
> mochs@nvidia.com; smostafa@google.com; Linuxarm
> <linuxarm@huawei.com>; Wangzhou (B) <wangzhou1@hisilicon.com>;
> jiangkunkun <jiangkunkun@huawei.com>; Jonathan Cameron
> <jonathan.cameron@huawei.com>; zhangfei.gao@linaro.org
> Subject: Re: [RFC PATCH v2 09/20] hw/arm/smmuv3-accel: Add
> set/unset_iommu_device callback
>
> On Tue, Mar 11, 2025 at 02:10:34PM +0000, Shameer Kolothum wrote:
> > @@ -30,6 +32,185 @@ static SMMUv3AccelDevice
> *smmuv3_accel_get_dev(SMMUState *s, SMMUPciBus *sbus,
> > return accel_dev;
> > }
> >
> > +static bool
> > +smmuv3_accel_dev_attach_viommu(SMMUv3AccelDevice *accel_dev,
> > + HostIOMMUDeviceIOMMUFD *idev, Error **errp)
>
> With vEVENTQ v9, vDEVICE (vSID) is required to attach a device
> to a proxy NESTED hwpt (applicable to bypass/abort HWPTs too).
> So, host_iommu_device_iommufd_attach_hwpt() would fail in this
> function because vSID isn't ready at this stage. So all those
> calls should be moved out of the function, then this should be
> likely "smmuv3_accel_dev_alloc_viommu"?
>
> That being said, I don't know when QEMU actually prepare a BDF
> number for a vfio-pci device. The only place that I see it is
> ready is at guest-level SMMU installing the Stream Table, i.e.
> in smmuv3_accel_install_nested_ste().
>
> > +{
> > + struct iommu_hwpt_arm_smmuv3 bypass_data = {
> > + .ste = { 0x9ULL, 0x0ULL },
> > + };
> > + struct iommu_hwpt_arm_smmuv3 abort_data = {
> > + .ste = { 0x1ULL, 0x0ULL },
> > + };
> > + SMMUDevice *sdev = &accel_dev->sdev;
> > + SMMUState *s = sdev->smmu;
> > + SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> > + SMMUS2Hwpt *s2_hwpt;
> > + SMMUViommu *viommu;
> > + uint32_t s2_hwpt_id;
> > + uint32_t viommu_id;
> > +
> > + if (s_accel->viommu) {
> > + accel_dev->viommu = s_accel->viommu;
> > + return host_iommu_device_iommufd_attach_hwpt(
> > + idev, s_accel->viommu->s2_hwpt->hwpt_id, errp);
>
> Yea, here is my bad. We shouldn't attach a device to s2_hwpt,
> since eventually s2_hwpt would be a shared hwpt across SMMUs.
>
> > + /* Attach to S2 for MSI cookie */
> > + if (!host_iommu_device_iommufd_attach_hwpt(idev, s2_hwpt_id,
> errp)) {
> > + goto free_s2_hwpt;
> > + }
>
> With the merged sw_msi series, we don't need this anymore.
>
> > + /*
> > + * Attach the bypass STE which means S1 bypass and S2 translate.
> > + * This is to make sure that the vIOMMU object is now associated
> > + * with the device and has this STE installed in the host SMMUV3.
> > + */
> > + if (!host_iommu_device_iommufd_attach_hwpt(
> > + idev, viommu->bypass_hwpt_id, errp)) {
> > + error_report("failed to attach the bypass pagetable");
> > + goto free_bypass_hwpt;
> > + }
>
> Ditto. We have to postpone this until vdevice is allocated.
Ok. I will take a look based on the vEVENTQ v9 series.
I guess this Qemu branch of yours is a more representative of the changes described
above?
https://github.com/nicolinc/qemu/tree/wip/for_iommufd_veventq-v9
Right?
Thanks,
Shameer
> > +static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void
> *opaque,
> > + int devfn)
> > +{
> > + SMMUDevice *sdev;
> > + SMMUv3AccelDevice *accel_dev;
> > + SMMUViommu *viommu;
> > + SMMUState *s = opaque;
> > + SMMUv3AccelState *s_accel = ARM_SMMUV3_ACCEL(s);
> > + SMMUPciBus *sbus = g_hash_table_lookup(s-
> >smmu_pcibus_by_busptr, bus);
> > +
> > + if (!sbus) {
> > + return;
> > + }
> > +
> > + sdev = sbus->pbdev[devfn];
> > + if (!sdev) {
> > + return;
> > + }
> > +
> > + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> > + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> > + accel_dev->idev->ioas_id,
> > + NULL)) {
> > + error_report("Unable to attach dev to the default HW pagetable");
> > + }
> > +
> > +
>
> Could drop the extra line.
>
> Thanks
> Nicolin
On Mon, Mar 17, 2025 at 08:38:23AM +0000, Shameerali Kolothum Thodi wrote:
> Hi Nicolin,
>
> > -----Original Message-----
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Tuesday, March 11, 2025 9:08 PM
> > To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> > Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
> > eric.auger@redhat.com; peter.maydell@linaro.org; jgg@nvidia.com;
> > ddutile@redhat.com; berrange@redhat.com; nathanc@nvidia.com;
> > mochs@nvidia.com; smostafa@google.com; Linuxarm
> > <linuxarm@huawei.com>; Wangzhou (B) <wangzhou1@hisilicon.com>;
> > jiangkunkun <jiangkunkun@huawei.com>; Jonathan Cameron
> > <jonathan.cameron@huawei.com>; zhangfei.gao@linaro.org
> > Subject: Re: [RFC PATCH v2 09/20] hw/arm/smmuv3-accel: Add
> > set/unset_iommu_device callback
> >
> > On Tue, Mar 11, 2025 at 02:10:34PM +0000, Shameer Kolothum wrote:
> > > @@ -30,6 +32,185 @@ static SMMUv3AccelDevice
> > *smmuv3_accel_get_dev(SMMUState *s, SMMUPciBus *sbus,
> > > return accel_dev;
> > > }
> > >
> > > +static bool
> > > +smmuv3_accel_dev_attach_viommu(SMMUv3AccelDevice *accel_dev,
> > > + HostIOMMUDeviceIOMMUFD *idev, Error **errp)
> >
> > With vEVENTQ v9, vDEVICE (vSID) is required to attach a device
> > to a proxy NESTED hwpt (applicable to bypass/abort HWPTs too).
> > So, host_iommu_device_iommufd_attach_hwpt() would fail in this
> > function because vSID isn't ready at this stage. So all those
> > calls should be moved out of the function, then this should be
> > likely "smmuv3_accel_dev_alloc_viommu"?
> >
> > That being said, I don't know when QEMU actually prepare a BDF
> > number for a vfio-pci device. The only place that I see it is
> > ready is at guest-level SMMU installing the Stream Table, i.e.
> > in smmuv3_accel_install_nested_ste().
> > > + /*
> > > + * Attach the bypass STE which means S1 bypass and S2 translate.
> > > + * This is to make sure that the vIOMMU object is now associated
> > > + * with the device and has this STE installed in the host SMMUV3.
> > > + */
> > > + if (!host_iommu_device_iommufd_attach_hwpt(
> > > + idev, viommu->bypass_hwpt_id, errp)) {
> > > + error_report("failed to attach the bypass pagetable");
> > > + goto free_bypass_hwpt;
> > > + }
> >
> > Ditto. We have to postpone this until vdevice is allocated.
>
> Ok. I will take a look based on the vEVENTQ v9 series.
> I guess this Qemu branch of yours is a more representative of the changes described
> above?
> https://github.com/nicolinc/qemu/commits/wip/for_iommufd_veventq-v9/
Yes.
Mainly this change:
https://github.com/nicolinc/qemu/commit/d8f496eaf528f1c397f2374a999b8b23fd55c75b
Thanks
Nicolin
© 2016 - 2026 Red Hat, Inc.