[v5] vEVENTQ support for accelerated SMMUv3 devices

[PATCH v5 5/5] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events

Posted by Shameer Kolothum 16 hours ago

Install an event handler on the vEVENTQ fd to read and propagate host
generated vIOMMU events to the guest.

The handler runs in QEMU's main loop, using a non-blocking fd registered
via qemu_set_fd_handler().

Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
 hw/arm/smmuv3-accel.c | 62 +++++++++++++++++++++++++++++++++++++++++++
 hw/arm/smmuv3-accel.h |  2 ++
 2 files changed, 64 insertions(+)

diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index d92fcb1a89..0d5dcef941 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -390,6 +390,48 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
                    sizeof(Cmd), &entry_num, cmd, errp);
 }
 
+static void smmuv3_accel_event_read(void *opaque)
+{
+    SMMUv3State *s = opaque;
+    SMMUv3AccelState *accel = s->s_accel;
+    struct {
+        struct iommufd_vevent_header hdr;
+        struct iommu_vevent_arm_smmuv3 vevent;
+    } buf;
+    uint32_t last_seq = accel->last_event_seq;
+    ssize_t bytes;
+
+    bytes = read(accel->veventq->veventq_fd, &buf, sizeof(buf));
+    if (bytes <= 0) {
+        if (errno == EAGAIN || errno == EINTR) {
+            return;
+        }
+        error_report_once("vEVENTQ: read failed (%m)");
+        return;
+    }
+
+    if (bytes == sizeof(buf.hdr) &&
+        (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)) {
+        error_report_once("vEVENTQ has lost events");
+        accel->event_start = false;
+        return;
+    }
+    if (bytes < sizeof(buf)) {
+        error_report_once("vEVENTQ: incomplete read (%zd/%zd bytes)",
+                          bytes, sizeof(buf));
+        return;
+    }
+
+    /* Check sequence in hdr for lost events if any */
+    if (accel->event_start && (buf.hdr.sequence - last_seq != 1)) {
+        error_report_once("vEVENTQ: detected lost %u event(s)",
+                          buf.hdr.sequence - last_seq - 1);
+    }
+    accel->last_event_seq = buf.hdr.sequence;
+    accel->event_start = true;
+    smmuv3_propagate_event(s, (Evt *)&buf.vevent);
+}
+
 static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
 {
     IOMMUFDVeventq *veventq = accel->veventq;
@@ -397,6 +439,7 @@ static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
     if (!veventq) {
         return;
     }
+    qemu_set_fd_handler(veventq->veventq_fd, NULL, NULL, NULL);
     close(veventq->veventq_fd);
     iommufd_backend_free_id(accel->viommu->iommufd, veventq->veventq_id);
     g_free(veventq);
@@ -424,6 +467,7 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
     IOMMUFDVeventq *veventq;
     uint32_t veventq_id;
     uint32_t veventq_fd;
+    int flags;
 
     if (!accel || !accel->viommu) {
         return true;
@@ -445,12 +489,30 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
         return false;
     }
 
+    flags = fcntl(veventq_fd, F_GETFL);
+    if (flags < 0) {
+        error_setg_errno(errp, errno, "Failed to get flags for vEVENTQ fd");
+        goto free_veventq;
+    }
+    if (fcntl(veventq_fd, F_SETFL, flags | O_NONBLOCK) < 0) {
+        error_setg_errno(errp, errno, "Failed to set O_NONBLOCK on vEVENTQ fd");
+        goto free_veventq;
+    }
+
     veventq = g_new(IOMMUFDVeventq, 1);
     veventq->veventq_id = veventq_id;
     veventq->veventq_fd = veventq_fd;
     veventq->viommu = accel->viommu;
     accel->veventq = veventq;
+
+    /* Set up event handler for veventq fd */
+    qemu_set_fd_handler(veventq_fd, smmuv3_accel_event_read, NULL, s);
     return true;
+
+free_veventq:
+    close(veventq_fd);
+    iommufd_backend_free_id(accel->viommu->iommufd, veventq_id);
+    return false;
 }
 
 static bool
diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
index dba6c71de5..c9c10e55c3 100644
--- a/hw/arm/smmuv3-accel.h
+++ b/hw/arm/smmuv3-accel.h
@@ -23,6 +23,8 @@
 typedef struct SMMUv3AccelState {
     IOMMUFDViommu *viommu;
     IOMMUFDVeventq *veventq;
+    uint32_t last_event_seq;
+    bool event_start;
     uint32_t bypass_hwpt_id;
     uint32_t abort_hwpt_id;
     QLIST_HEAD(, SMMUv3AccelDevice) device_list;
-- 
2.43.0

Re: [PATCH v5 5/5] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events

Posted by Eric Auger 10 hours ago

Hi Shameer,

On 2/11/26 9:34 AM, Shameer Kolothum wrote:
> Install an event handler on the vEVENTQ fd to read and propagate host
> generated vIOMMU events to the guest.
>
> The handler runs in QEMU's main loop, using a non-blocking fd registered
> via qemu_set_fd_handler().
>
> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
>  hw/arm/smmuv3-accel.c | 62 +++++++++++++++++++++++++++++++++++++++++++
>  hw/arm/smmuv3-accel.h |  2 ++
>  2 files changed, 64 insertions(+)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index d92fcb1a89..0d5dcef941 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -390,6 +390,48 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
>                     sizeof(Cmd), &entry_num, cmd, errp);
>  }
>  
> +static void smmuv3_accel_event_read(void *opaque)
> +{
> +    SMMUv3State *s = opaque;
> +    SMMUv3AccelState *accel = s->s_accel;
> +    struct {
> +        struct iommufd_vevent_header hdr;
> +        struct iommu_vevent_arm_smmuv3 vevent;
> +    } buf;
> +    uint32_t last_seq = accel->last_event_seq;
> +    ssize_t bytes;
> +
> +    bytes = read(accel->veventq->veventq_fd, &buf, sizeof(buf));
> +    if (bytes <= 0) {
> +        if (errno == EAGAIN || errno == EINTR) {
> +            return;
> +        }
> +        error_report_once("vEVENTQ: read failed (%m)");
> +        return;
> +    }
> +
> +    if (bytes == sizeof(buf.hdr) &&
> +        (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)) {
> +        error_report_once("vEVENTQ has lost events");
in case in the future we end up with several vEVENTQs, it may be
relevant to log the queue id/type

After reading the linux uapi again, this seems to record a queue
overflow. I would replace the 

"vEVENTQ has lost events" trace by "queue <type> <id> has overflowed". This would allow differentiation with the case below.

> +        accel->event_start = false;
> +        return;
> +    }
> +    if (bytes < sizeof(buf)) {
> +        error_report_once("vEVENTQ: incomplete read (%zd/%zd bytes)",
> +                          bytes, sizeof(buf));
> +        return;
> +    }
> +
> +    /* Check sequence in hdr for lost events if any */
> +    if (accel->event_start && (buf.hdr.sequence - last_seq != 1)) {
wonder if we need to handle any hypothetical wrap-up situation?
> +        error_report_once("vEVENTQ: detected lost %u event(s)",
> +                          buf.hdr.sequence - last_seq - 1);
> +    }
> +    accel->last_event_seq = buf.hdr.sequence;
> +    accel->event_start = true;
> +    smmuv3_propagate_event(s, (Evt *)&buf.vevent);
> +}
> +
>  static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
>  {
>      IOMMUFDVeventq *veventq = accel->veventq;
> @@ -397,6 +439,7 @@ static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
>      if (!veventq) {
>          return;
>      }
> +    qemu_set_fd_handler(veventq->veventq_fd, NULL, NULL, NULL);
>      close(veventq->veventq_fd);
>      iommufd_backend_free_id(accel->viommu->iommufd, veventq->veventq_id);
>      g_free(veventq);
> @@ -424,6 +467,7 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
>      IOMMUFDVeventq *veventq;
>      uint32_t veventq_id;
>      uint32_t veventq_fd;
> +    int flags;
>  
>      if (!accel || !accel->viommu) {
>          return true;
> @@ -445,12 +489,30 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
>          return false;
>      }
>  
> +    flags = fcntl(veventq_fd, F_GETFL);
> +    if (flags < 0) {
> +        error_setg_errno(errp, errno, "Failed to get flags for vEVENTQ fd");
> +        goto free_veventq;
> +    }
> +    if (fcntl(veventq_fd, F_SETFL, flags | O_NONBLOCK) < 0) {
> +        error_setg_errno(errp, errno, "Failed to set O_NONBLOCK on vEVENTQ fd");
> +        goto free_veventq;
> +    }
> +
>      veventq = g_new(IOMMUFDVeventq, 1);
>      veventq->veventq_id = veventq_id;
>      veventq->veventq_fd = veventq_fd;
>      veventq->viommu = accel->viommu;
>      accel->veventq = veventq;
> +
> +    /* Set up event handler for veventq fd */
> +    qemu_set_fd_handler(veventq_fd, smmuv3_accel_event_read, NULL, s);
>      return true;
> +
> +free_veventq:
> +    close(veventq_fd);
> +    iommufd_backend_free_id(accel->viommu->iommufd, veventq_id);
> +    return false;
>  }
>  
>  static bool
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index dba6c71de5..c9c10e55c3 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -23,6 +23,8 @@
>  typedef struct SMMUv3AccelState {
>      IOMMUFDViommu *viommu;
>      IOMMUFDVeventq *veventq;
> +    uint32_t last_event_seq;
> +    bool event_start;
>      uint32_t bypass_hwpt_id;
>      uint32_t abort_hwpt_id;
>      QLIST_HEAD(, SMMUv3AccelDevice) device_list;
Thanks

Eric

RE: [PATCH v5 5/5] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events

Posted by Shameer Kolothum Thodi 10 hours ago

Hi Eric,

> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 11 February 2026 14:26
> To: Shameer Kolothum Thodi <skolothumtho@nvidia.com>; qemu-
> arm@nongnu.org; qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; Nicolin Chen <nicolinc@nvidia.com>; Nathan
> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jiandi An
> <jan@nvidia.com>; Jason Gunthorpe <jgg@nvidia.com>;
> jonathan.cameron@huawei.com; zhangfei.gao@linaro.org;
> zhenzhong.duan@intel.com; Krishnakant Jaju <kjaju@nvidia.com>
> Subject: Re: [PATCH v5 5/5] hw/arm/smmuv3-accel: Read and propagate host
> vIOMMU events
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi Shameer,
> 
> On 2/11/26 9:34 AM, Shameer Kolothum wrote:
> > Install an event handler on the vEVENTQ fd to read and propagate host
> > generated vIOMMU events to the guest.
> >
> > The handler runs in QEMU's main loop, using a non-blocking fd registered
> > via qemu_set_fd_handler().
> >
> > Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> > ---
> >  hw/arm/smmuv3-accel.c | 62
> +++++++++++++++++++++++++++++++++++++++++++
> >  hw/arm/smmuv3-accel.h |  2 ++
> >  2 files changed, 64 insertions(+)
> >
> > diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> > index d92fcb1a89..0d5dcef941 100644
> > --- a/hw/arm/smmuv3-accel.c
> > +++ b/hw/arm/smmuv3-accel.c
> > @@ -390,6 +390,48 @@ bool
> smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice
> *sdev,
> >                     sizeof(Cmd), &entry_num, cmd, errp);
> >  }
> >
> > +static void smmuv3_accel_event_read(void *opaque)
> > +{
> > +    SMMUv3State *s = opaque;
> > +    SMMUv3AccelState *accel = s->s_accel;
> > +    struct {
> > +        struct iommufd_vevent_header hdr;
> > +        struct iommu_vevent_arm_smmuv3 vevent;
> > +    } buf;
> > +    uint32_t last_seq = accel->last_event_seq;
> > +    ssize_t bytes;
> > +
> > +    bytes = read(accel->veventq->veventq_fd, &buf, sizeof(buf));
> > +    if (bytes <= 0) {
> > +        if (errno == EAGAIN || errno == EINTR) {
> > +            return;
> > +        }
> > +        error_report_once("vEVENTQ: read failed (%m)");
> > +        return;
> > +    }
> > +
> > +    if (bytes == sizeof(buf.hdr) &&
> > +        (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)) {
> > +        error_report_once("vEVENTQ has lost events");
> in case in the future we end up with several vEVENTQs, it may be
> relevant to log the queue id/type
> 
> After reading the linux uapi again, this seems to record a queue
> overflow. I would replace the
> 
> "vEVENTQ has lost events" trace by "queue <type> <id> has overflowed". This
> would allow differentiation with the case below.

Ok. Make sense. <id> is also useful with multiple SMMUv3 instances.

> 
> > +        accel->event_start = false;
> > +        return;
> > +    }
> > +    if (bytes < sizeof(buf)) {
> > +        error_report_once("vEVENTQ: incomplete read (%zd/%zd bytes)",
> > +                          bytes, sizeof(buf));
> > +        return;
> > +    }
> > +
> > +    /* Check sequence in hdr for lost events if any */
> > +    if (accel->event_start && (buf.hdr.sequence - last_seq != 1)) {
> wonder if we need to handle any hypothetical wrap-up situation?

This should not be required. The sequence is __u32 and the delta
 is computed using unsigned arithmetic, so wrap-around is handled
naturally.

Thanks,
Shameer

Re: [PATCH v5 5/5] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events

Posted by Eric Auger 7 hours ago

Hi Shameer,

On 2/11/26 3:58 PM, Shameer Kolothum Thodi wrote:
> Hi Eric,
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: 11 February 2026 14:26
>> To: Shameer Kolothum Thodi <skolothumtho@nvidia.com>; qemu-
>> arm@nongnu.org; qemu-devel@nongnu.org
>> Cc: peter.maydell@linaro.org; Nicolin Chen <nicolinc@nvidia.com>; Nathan
>> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jiandi An
>> <jan@nvidia.com>; Jason Gunthorpe <jgg@nvidia.com>;
>> jonathan.cameron@huawei.com; zhangfei.gao@linaro.org;
>> zhenzhong.duan@intel.com; Krishnakant Jaju <kjaju@nvidia.com>
>> Subject: Re: [PATCH v5 5/5] hw/arm/smmuv3-accel: Read and propagate host
>> vIOMMU events
>>
>> External email: Use caution opening links or attachments
>>
>>
>> Hi Shameer,
>>
>> On 2/11/26 9:34 AM, Shameer Kolothum wrote:
>>> Install an event handler on the vEVENTQ fd to read and propagate host
>>> generated vIOMMU events to the guest.
>>>
>>> The handler runs in QEMU's main loop, using a non-blocking fd registered
>>> via qemu_set_fd_handler().
>>>
>>> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
>>> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
>>> ---
>>>  hw/arm/smmuv3-accel.c | 62
>> +++++++++++++++++++++++++++++++++++++++++++
>>>  hw/arm/smmuv3-accel.h |  2 ++
>>>  2 files changed, 64 insertions(+)
>>>
>>> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
>>> index d92fcb1a89..0d5dcef941 100644
>>> --- a/hw/arm/smmuv3-accel.c
>>> +++ b/hw/arm/smmuv3-accel.c
>>> @@ -390,6 +390,48 @@ bool
>> smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice
>> *sdev,
>>>                     sizeof(Cmd), &entry_num, cmd, errp);
>>>  }
>>>
>>> +static void smmuv3_accel_event_read(void *opaque)
>>> +{
>>> +    SMMUv3State *s = opaque;
>>> +    SMMUv3AccelState *accel = s->s_accel;
>>> +    struct {
>>> +        struct iommufd_vevent_header hdr;
>>> +        struct iommu_vevent_arm_smmuv3 vevent;
>>> +    } buf;
>>> +    uint32_t last_seq = accel->last_event_seq;
>>> +    ssize_t bytes;
>>> +
>>> +    bytes = read(accel->veventq->veventq_fd, &buf, sizeof(buf));
>>> +    if (bytes <= 0) {
>>> +        if (errno == EAGAIN || errno == EINTR) {
>>> +            return;
>>> +        }
>>> +        error_report_once("vEVENTQ: read failed (%m)");
>>> +        return;
>>> +    }
>>> +
>>> +    if (bytes == sizeof(buf.hdr) &&
>>> +        (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)) {
>>> +        error_report_once("vEVENTQ has lost events");
>> in case in the future we end up with several vEVENTQs, it may be
>> relevant to log the queue id/type
>>
>> After reading the linux uapi again, this seems to record a queue
>> overflow. I would replace the
>>
>> "vEVENTQ has lost events" trace by "queue <type> <id> has overflowed". This
>> would allow differentiation with the case below.
> Ok. Make sense. <id> is also useful with multiple SMMUv3 instances.
>
>>> +        accel->event_start = false;
>>> +        return;
>>> +    }
>>> +    if (bytes < sizeof(buf)) {
>>> +        error_report_once("vEVENTQ: incomplete read (%zd/%zd bytes)",
>>> +                          bytes, sizeof(buf));
>>> +        return;
>>> +    }
>>> +
>>> +    /* Check sequence in hdr for lost events if any */
>>> +    if (accel->event_start && (buf.hdr.sequence - last_seq != 1)) {
>> wonder if we need to handle any hypothetical wrap-up situation?
> This should not be required. The sequence is __u32 and the delta
>  is computed using unsigned arithmetic, so wrap-around is handled
> naturally.
Ah that's right.

With the 1st comment above taken into account, feel free to add my 

Reviewed-by: Eric Auger <eric.auger@redhat.com>

Thanks

Eric
>
> Thanks,
> Shameer

[PATCH v5 1/5] backends/iommufd: Introduce iommufd_backend_alloc_veventq
[PATCH v5 2/5] hw/arm/smmuv3-accel: Add viommu free helper
[PATCH v5 3/5] hw/arm/smmuv3-accel: Allocate vEVENTQ for accelerated SMMUv3 devices
[PATCH v5 4/5] hw/arm/smmuv3: Introduce a helper function for event propagation
[PATCH v5 5/5] hw/arm/smmuv3-accel: Read and propagate host vIOMMU events