[v6] vEVENTQ support for accelerated SMMUv3 devices

[PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices

Posted by Shameer Kolothum 1 month, 3 weeks ago

From: Nicolin Chen <nicolinc@nvidia.com>

When the guest enables the Event Queue and a vIOMMU is present, allocate a
vEVENTQ object so that host-side events related to the vIOMMU can be
received and propagated back to the guest.

For cold-plugged devices using SMMUv3 acceleration, the vIOMMU is created
before the guest boots. In this case, the vEVENTQ is allocated when the
guest writes to SMMU_CR0 and sets EVENTQEN = 1.

If no cold-plugged device exists at boot (i.e. no vIOMMU initially), the
vEVENTQ is allocated when a vIOMMU is created, i.e. during the first
device hot-plug.

Also, rename the local error variable and refactor smmu_writel() to use
a single error accumulator with error_propagate().

Event read and propagation will be added in a later patch.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
 hw/arm/smmuv3-accel.c | 61 +++++++++++++++++++++++++++++++++++++++++--
 hw/arm/smmuv3-accel.h |  6 +++++
 hw/arm/smmuv3.c       | 17 +++++++-----
 3 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index c19c526fca..d92fcb1a89 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -390,6 +390,19 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
                    sizeof(Cmd), &entry_num, cmd, errp);
 }
 
+static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
+{
+    IOMMUFDVeventq *veventq = accel->veventq;
+
+    if (!veventq) {
+        return;
+    }
+    close(veventq->veventq_fd);
+    iommufd_backend_free_id(accel->viommu->iommufd, veventq->veventq_id);
+    g_free(veventq);
+    accel->veventq = NULL;
+}
+
 static void smmuv3_accel_free_viommu(SMMUv3AccelState *accel)
 {
     IOMMUFDViommu *viommu = accel->viommu;
@@ -397,6 +410,7 @@ static void smmuv3_accel_free_viommu(SMMUv3AccelState *accel)
     if (!viommu) {
         return;
     }
+    smmuv3_accel_free_veventq(accel);
     iommufd_backend_free_id(viommu->iommufd, accel->bypass_hwpt_id);
     iommufd_backend_free_id(viommu->iommufd, accel->abort_hwpt_id);
     iommufd_backend_free_id(viommu->iommufd, accel->viommu->viommu_id);
@@ -404,6 +418,41 @@ static void smmuv3_accel_free_viommu(SMMUv3AccelState *accel)
     accel->viommu = NULL;
 }
 
+bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
+{
+    SMMUv3AccelState *accel = s->s_accel;
+    IOMMUFDVeventq *veventq;
+    uint32_t veventq_id;
+    uint32_t veventq_fd;
+
+    if (!accel || !accel->viommu) {
+        return true;
+    }
+
+    if (accel->veventq) {
+        return true;
+    }
+
+    if (!smmuv3_eventq_enabled(s)) {
+        return true;
+    }
+
+    if (!iommufd_backend_alloc_veventq(accel->viommu->iommufd,
+                                       accel->viommu->viommu_id,
+                                       IOMMU_VEVENTQ_TYPE_ARM_SMMUV3,
+                                       1 << s->eventq.log2size, &veventq_id,
+                                       &veventq_fd, errp)) {
+        return false;
+    }
+
+    veventq = g_new(IOMMUFDVeventq, 1);
+    veventq->veventq_id = veventq_id;
+    veventq->veventq_fd = veventq_fd;
+    veventq->viommu = accel->viommu;
+    accel->veventq = veventq;
+    return true;
+}
+
 static bool
 smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
                           Error **errp)
@@ -429,6 +478,7 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
     viommu->viommu_id = viommu_id;
     viommu->s2_hwpt_id = s2_hwpt_id;
     viommu->iommufd = idev->iommufd;
+    accel->viommu = viommu;
 
     /*
      * Pre-allocate HWPTs for S1 bypass and abort cases. These will be attached
@@ -448,14 +498,20 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
         goto free_abort_hwpt;
     }
 
+    /* Allocate a vEVENTQ if guest has enabled event queue */
+    if (!smmuv3_accel_alloc_veventq(s, errp)) {
+        goto free_bypass_hwpt;
+    }
+
     /* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
     hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
     if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
-        goto free_bypass_hwpt;
+        goto free_veventq;
     }
-    accel->viommu = viommu;
     return true;
 
+free_veventq:
+    smmuv3_accel_free_veventq(accel);
 free_bypass_hwpt:
     iommufd_backend_free_id(idev->iommufd, accel->bypass_hwpt_id);
 free_abort_hwpt:
@@ -463,6 +519,7 @@ free_abort_hwpt:
 free_viommu:
     iommufd_backend_free_id(idev->iommufd, viommu->viommu_id);
     g_free(viommu);
+    accel->viommu = NULL;
     return false;
 }
 
diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
index a8a64802ec..dba6c71de5 100644
--- a/hw/arm/smmuv3-accel.h
+++ b/hw/arm/smmuv3-accel.h
@@ -22,6 +22,7 @@
  */
 typedef struct SMMUv3AccelState {
     IOMMUFDViommu *viommu;
+    IOMMUFDVeventq *veventq;
     uint32_t bypass_hwpt_id;
     uint32_t abort_hwpt_id;
     QLIST_HEAD(, SMMUv3AccelDevice) device_list;
@@ -50,6 +51,7 @@ bool smmuv3_accel_attach_gbpa_hwpt(SMMUv3State *s, Error **errp);
 bool smmuv3_accel_issue_inv_cmd(SMMUv3State *s, void *cmd, SMMUDevice *sdev,
                                 Error **errp);
 void smmuv3_accel_idr_override(SMMUv3State *s);
+bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp);
 void smmuv3_accel_reset(SMMUv3State *s);
 #else
 static inline void smmuv3_accel_init(SMMUv3State *s)
@@ -80,6 +82,10 @@ smmuv3_accel_issue_inv_cmd(SMMUv3State *s, void *cmd, SMMUDevice *sdev,
 static inline void smmuv3_accel_idr_override(SMMUv3State *s)
 {
 }
+static inline bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
+{
+    return true;
+}
 static inline void smmuv3_accel_reset(SMMUv3State *s)
 {
 }
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index c08d58c579..f804d3af25 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1597,14 +1597,17 @@ static MemTxResult smmu_writell(SMMUv3State *s, hwaddr offset,
 static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
                                uint64_t data, MemTxAttrs attrs)
 {
-    Error *local_err = NULL;
+    Error *err = NULL, *local_err = NULL;
 
     switch (offset) {
     case A_CR0:
         s->cr[0] = data;
         s->cr0ack = data & ~SMMU_CR0_RESERVED;
         /* in case the command queue has been enabled */
-        smmuv3_cmdq_consume(s, &local_err);
+        smmuv3_cmdq_consume(s, &err);
+        /* Allocate vEVENTQ if EVENTQ is enabled and a vIOMMU is available */
+        smmuv3_accel_alloc_veventq(s, &local_err);
+        error_propagate(&err, local_err);
         break;
     case A_CR1:
         s->cr[1] = data;
@@ -1621,7 +1624,7 @@ static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
          * By acknowledging the CMDQ_ERR, SW may notify cmds can
          * be processed again
          */
-        smmuv3_cmdq_consume(s, &local_err);
+        smmuv3_cmdq_consume(s, &err);
         break;
     case A_GERROR_IRQ_CFG0: /* 64b */
         s->gerror_irq_cfg0 = deposit64(s->gerror_irq_cfg0, 0, 32, data);
@@ -1643,7 +1646,7 @@ static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
         if (data & R_GBPA_UPDATE_MASK) {
             /* Ignore update bit as write is synchronous. */
             s->gbpa = data & ~R_GBPA_UPDATE_MASK;
-            smmuv3_accel_attach_gbpa_hwpt(s, &local_err);
+            smmuv3_accel_attach_gbpa_hwpt(s, &err);
         }
         break;
     case A_STRTAB_BASE: /* 64b */
@@ -1671,7 +1674,7 @@ static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
         break;
     case A_CMDQ_PROD:
         s->cmdq.prod = data;
-        smmuv3_cmdq_consume(s, &local_err);
+        smmuv3_cmdq_consume(s, &err);
         break;
     case A_CMDQ_CONS:
         s->cmdq.cons = data;
@@ -1711,8 +1714,8 @@ static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
         break;
     }
 
-    if (local_err) {
-        error_report_err(local_err);
+    if (err) {
+        error_report_err(err);
     }
     return MEMTX_OK;
 }
-- 
2.43.0

Re: [PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices

Posted by Jonathan Cameron via qemu development 1 month, 3 weeks ago

On Fri, 13 Feb 2026 10:39:40 +0000
Shameer Kolothum <skolothumtho@nvidia.com> wrote:

> From: Nicolin Chen <nicolinc@nvidia.com>
> 
> When the guest enables the Event Queue and a vIOMMU is present, allocate a
> vEVENTQ object so that host-side events related to the vIOMMU can be
> received and propagated back to the guest.
> 
> For cold-plugged devices using SMMUv3 acceleration, the vIOMMU is created
> before the guest boots. In this case, the vEVENTQ is allocated when the
> guest writes to SMMU_CR0 and sets EVENTQEN = 1.
> 
> If no cold-plugged device exists at boot (i.e. no vIOMMU initially), the
> vEVENTQ is allocated when a vIOMMU is created, i.e. during the first
> device hot-plug.
> 
> Also, rename the local error variable and refactor smmu_writel() to use
> a single error accumulator with error_propagate().

Why not split the rename out as a separate patch?  I don't hugely mind just
feels like some noise in here could have been broken out before the real
change and made it a tiny bit easier to review.

I wouldn't bother unless you are respinning again for other reasons though!

J

> 
> Event read and propagation will be added in a later patch.
> 
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> Reviewed-by: Eric Auger <eric.auger@redhat.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>

Re: [PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices

Posted by Nicolin Chen 1 month, 3 weeks ago

On Fri, Feb 13, 2026 at 10:39:40AM +0000, Shameer Kolothum wrote:
>  static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
>                                 uint64_t data, MemTxAttrs attrs)
>  {
> -    Error *local_err = NULL;
> +    Error *err = NULL, *local_err = NULL;
>  
>      switch (offset) {
>      case A_CR0:
>          s->cr[0] = data;
>          s->cr0ack = data & ~SMMU_CR0_RESERVED;
>          /* in case the command queue has been enabled */
> -        smmuv3_cmdq_consume(s, &local_err);
> +        smmuv3_cmdq_consume(s, &err);
> +        /* Allocate vEVENTQ if EVENTQ is enabled and a vIOMMU is available */
> +        smmuv3_accel_alloc_veventq(s, &local_err);
> +        error_propagate(&err, local_err);

It would be cleaner to have:

     Error *local_err2 = NULL;
     Error *local_err = NULL;

         smmuv3_cmdq_consume(s, &local_err);
+        /* Allocate vEVENTQ if EVENTQ is enabled and a vIOMMU is available */
+        smmuv3_accel_alloc_veventq(s, &local_err2);
+        error_propagate(&local_err, local_err2);

Otherwise,

Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>

RE: [PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices

Posted by Shameer Kolothum Thodi 1 month, 3 weeks ago


> -----Original Message-----
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: 13 February 2026 22:19
> To: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
> Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
> eric.auger@redhat.com; peter.maydell@linaro.org; Nathan Chen
> <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jiandi An
> <jan@nvidia.com>; Jason Gunthorpe <jgg@nvidia.com>;
> jonathan.cameron@huawei.com; zhangfei.gao@linaro.org;
> zhenzhong.duan@intel.com; Krishnakant Jaju <kjaju@nvidia.com>
> Subject: Re: [PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for
> accelerated SMMUv3 devices
> 
> On Fri, Feb 13, 2026 at 10:39:40AM +0000, Shameer Kolothum wrote:
> >  static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
> >                                 uint64_t data, MemTxAttrs attrs)  {
> > -    Error *local_err = NULL;
> > +    Error *err = NULL, *local_err = NULL;
> >
> >      switch (offset) {
> >      case A_CR0:
> >          s->cr[0] = data;
> >          s->cr0ack = data & ~SMMU_CR0_RESERVED;
> >          /* in case the command queue has been enabled */
> > -        smmuv3_cmdq_consume(s, &local_err);
> > +        smmuv3_cmdq_consume(s, &err);
> > +        /* Allocate vEVENTQ if EVENTQ is enabled and a vIOMMU is available
> */
> > +        smmuv3_accel_alloc_veventq(s, &local_err);
> > +        error_propagate(&err, local_err);
> 
> It would be cleaner to have:
> 
>      Error *local_err2 = NULL;
>      Error *local_err = NULL;

Yeah. I had a bit of back and forth on this. My thinking was to use err as
an accumulating error pointer in case additional error paths are added
later, with local_err_x used for intermediate calls.

Either way is fine I guess for now.

Thanks,
Shameer

Re: [PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices

Posted by Peter Maydell 1 month, 3 weeks ago

On Mon, 16 Feb 2026 at 08:58, Shameer Kolothum Thodi
<skolothumtho@nvidia.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: 13 February 2026 22:19
> > To: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
> > Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
> > eric.auger@redhat.com; peter.maydell@linaro.org; Nathan Chen
> > <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jiandi An
> > <jan@nvidia.com>; Jason Gunthorpe <jgg@nvidia.com>;
> > jonathan.cameron@huawei.com; zhangfei.gao@linaro.org;
> > zhenzhong.duan@intel.com; Krishnakant Jaju <kjaju@nvidia.com>
> > Subject: Re: [PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for
> > accelerated SMMUv3 devices
> >
> > On Fri, Feb 13, 2026 at 10:39:40AM +0000, Shameer Kolothum wrote:
> > >  static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
> > >                                 uint64_t data, MemTxAttrs attrs)  {
> > > -    Error *local_err = NULL;
> > > +    Error *err = NULL, *local_err = NULL;
> > >
> > >      switch (offset) {
> > >      case A_CR0:
> > >          s->cr[0] = data;
> > >          s->cr0ack = data & ~SMMU_CR0_RESERVED;
> > >          /* in case the command queue has been enabled */
> > > -        smmuv3_cmdq_consume(s, &local_err);
> > > +        smmuv3_cmdq_consume(s, &err);
> > > +        /* Allocate vEVENTQ if EVENTQ is enabled and a vIOMMU is available
> > */
> > > +        smmuv3_accel_alloc_veventq(s, &local_err);
> > > +        error_propagate(&err, local_err);
> >
> > It would be cleaner to have:
> >
> >      Error *local_err2 = NULL;
> >      Error *local_err = NULL;
>
> Yeah. I had a bit of back and forth on this. My thinking was to use err as
> an accumulating error pointer in case additional error paths are added
> later, with local_err_x used for intermediate calls.
>
> Either way is fine I guess for now.

Generally speaking we should try to stick to one of the
defined standard patterns for Error handling in the
big comment in include/qapi/error.h.

Yours is the "receive and accumulate multiple errors" pattern, I think?
That works, but do you really want to throw away the second error
message without reporting it if both calls fail?

Plus, from the commit message:

> Also, rename the local error variable and refactor smmu_writel() to use
> a single error accumulator with error_propagate().

If you find yourself writing "Also, ..." in a commit message,
it's often a sign that you should consider splitting the
commit in two.

thanks
-- PMM

RE: [PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices

Posted by Shameer Kolothum Thodi 1 month, 3 weeks ago


> -----Original Message-----
> From: Peter Maydell <peter.maydell@linaro.org>
> Sent: 16 February 2026 16:57
> To: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
> Cc: Nicolin Chen <nicolinc@nvidia.com>; qemu-arm@nongnu.org; qemu-
> devel@nongnu.org; eric.auger@redhat.com; Nathan Chen
> <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jiandi An
> <jan@nvidia.com>; Jason Gunthorpe <jgg@nvidia.com>;
> jonathan.cameron@huawei.com; zhangfei.gao@linaro.org;
> zhenzhong.duan@intel.com; Krishnakant Jaju <kjaju@nvidia.com>
> Subject: Re: [PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for
> accelerated SMMUv3 devices
> 
> External email: Use caution opening links or attachments
> 
> 
> On Mon, 16 Feb 2026 at 08:58, Shameer Kolothum Thodi
> <skolothumtho@nvidia.com> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: 13 February 2026 22:19
> > > To: Shameer Kolothum Thodi <skolothumtho@nvidia.com>
> > > Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
> > > eric.auger@redhat.com; peter.maydell@linaro.org; Nathan Chen
> > > <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jiandi An
> > > <jan@nvidia.com>; Jason Gunthorpe <jgg@nvidia.com>;
> > > jonathan.cameron@huawei.com; zhangfei.gao@linaro.org;
> > > zhenzhong.duan@intel.com; Krishnakant Jaju <kjaju@nvidia.com>
> > > Subject: Re: [PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for
> > > accelerated SMMUv3 devices
> > >
> > > On Fri, Feb 13, 2026 at 10:39:40AM +0000, Shameer Kolothum wrote:
> > > >  static MemTxResult smmu_writel(SMMUv3State *s, hwaddr offset,
> > > >                                 uint64_t data, MemTxAttrs attrs)  {
> > > > -    Error *local_err = NULL;
> > > > +    Error *err = NULL, *local_err = NULL;
> > > >
> > > >      switch (offset) {
> > > >      case A_CR0:
> > > >          s->cr[0] = data;
> > > >          s->cr0ack = data & ~SMMU_CR0_RESERVED;
> > > >          /* in case the command queue has been enabled */
> > > > -        smmuv3_cmdq_consume(s, &local_err);
> > > > +        smmuv3_cmdq_consume(s, &err);
> > > > +        /* Allocate vEVENTQ if EVENTQ is enabled and a vIOMMU is
> available
> > > */
> > > > +        smmuv3_accel_alloc_veventq(s, &local_err);
> > > > +        error_propagate(&err, local_err);
> > >
> > > It would be cleaner to have:
> > >
> > >      Error *local_err2 = NULL;
> > >      Error *local_err = NULL;
> >
> > Yeah. I had a bit of back and forth on this. My thinking was to use err as
> > an accumulating error pointer in case additional error paths are added
> > later, with local_err_x used for intermediate calls.
> >
> > Either way is fine I guess for now.
> 
> Generally speaking we should try to stick to one of the
> defined standard patterns for Error handling in the
> big comment in include/qapi/error.h.
> 
> Yours is the "receive and accumulate multiple errors" pattern, I think?

Yes, this currently uses that pattern.

> That works, but do you really want to throw away the second error
> message without reporting it if both calls fail?

This was considered earlier, but switched to the error_propagate()
pattern to follow the accumulation style. Since the two calls are
independent, I agree it is better to report both explicitly. I will
update this in the next revision if there are no further concerns.

> Plus, from the commit message:
> 
> > Also, rename the local error variable and refactor smmu_writel() to use
> > a single error accumulator with error_propagate().
> 
> If you find yourself writing "Also, ..." in a commit message,
> it's often a sign that you should consider splitting the
> commit in two.

Sure. Jonathan also mentioned that. (I was lazy)

Thanks,
Shameer

[PATCH v6 1/5] backends/iommufd: Introduce iommufd_backend_alloc_veventq
[PATCH v6 2/5] hw/arm/smmuv3-accel: Add viommu free helper
[PATCH v6 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices
[PATCH v6 4/5] hw/arm/smmuv3: Introduce a helper function for event propagation
[PATCH v6 5/5] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events