Install an event handler on the vEVENTQ fd to read and propagate host
generated vIOMMU events to the guest.
The handler runs in QEMU's main loop, using a non-blocking fd registered
via qemu_set_fd_handler().
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
hw/arm/smmuv3-accel.c | 62 +++++++++++++++++++++++++++++++++++++++++++
hw/arm/smmuv3-accel.h | 2 ++
2 files changed, 64 insertions(+)
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index d92fcb1a89..0d5dcef941 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -390,6 +390,48 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
sizeof(Cmd), &entry_num, cmd, errp);
}
+static void smmuv3_accel_event_read(void *opaque)
+{
+ SMMUv3State *s = opaque;
+ SMMUv3AccelState *accel = s->s_accel;
+ struct {
+ struct iommufd_vevent_header hdr;
+ struct iommu_vevent_arm_smmuv3 vevent;
+ } buf;
+ uint32_t last_seq = accel->last_event_seq;
+ ssize_t bytes;
+
+ bytes = read(accel->veventq->veventq_fd, &buf, sizeof(buf));
+ if (bytes <= 0) {
+ if (errno == EAGAIN || errno == EINTR) {
+ return;
+ }
+ error_report_once("vEVENTQ: read failed (%m)");
+ return;
+ }
+
+ if (bytes == sizeof(buf.hdr) &&
+ (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)) {
+ error_report_once("vEVENTQ has lost events");
+ accel->event_start = false;
+ return;
+ }
+ if (bytes < sizeof(buf)) {
+ error_report_once("vEVENTQ: incomplete read (%zd/%zd bytes)",
+ bytes, sizeof(buf));
+ return;
+ }
+
+ /* Check sequence in hdr for lost events if any */
+ if (accel->event_start && (buf.hdr.sequence - last_seq != 1)) {
+ error_report_once("vEVENTQ: detected lost %u event(s)",
+ buf.hdr.sequence - last_seq - 1);
+ }
+ accel->last_event_seq = buf.hdr.sequence;
+ accel->event_start = true;
+ smmuv3_propagate_event(s, (Evt *)&buf.vevent);
+}
+
static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
{
IOMMUFDVeventq *veventq = accel->veventq;
@@ -397,6 +439,7 @@ static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
if (!veventq) {
return;
}
+ qemu_set_fd_handler(veventq->veventq_fd, NULL, NULL, NULL);
close(veventq->veventq_fd);
iommufd_backend_free_id(accel->viommu->iommufd, veventq->veventq_id);
g_free(veventq);
@@ -424,6 +467,7 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
IOMMUFDVeventq *veventq;
uint32_t veventq_id;
uint32_t veventq_fd;
+ int flags;
if (!accel || !accel->viommu) {
return true;
@@ -445,12 +489,30 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
return false;
}
+ flags = fcntl(veventq_fd, F_GETFL);
+ if (flags < 0) {
+ error_setg_errno(errp, errno, "Failed to get flags for vEVENTQ fd");
+ goto free_veventq;
+ }
+ if (fcntl(veventq_fd, F_SETFL, flags | O_NONBLOCK) < 0) {
+ error_setg_errno(errp, errno, "Failed to set O_NONBLOCK on vEVENTQ fd");
+ goto free_veventq;
+ }
+
veventq = g_new(IOMMUFDVeventq, 1);
veventq->veventq_id = veventq_id;
veventq->veventq_fd = veventq_fd;
veventq->viommu = accel->viommu;
accel->veventq = veventq;
+
+ /* Set up event handler for veventq fd */
+ qemu_set_fd_handler(veventq_fd, smmuv3_accel_event_read, NULL, s);
return true;
+
+free_veventq:
+ close(veventq_fd);
+ iommufd_backend_free_id(accel->viommu->iommufd, veventq_id);
+ return false;
}
static bool
diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
index dba6c71de5..c9c10e55c3 100644
--- a/hw/arm/smmuv3-accel.h
+++ b/hw/arm/smmuv3-accel.h
@@ -23,6 +23,8 @@
typedef struct SMMUv3AccelState {
IOMMUFDViommu *viommu;
IOMMUFDVeventq *veventq;
+ uint32_t last_event_seq;
+ bool event_start;
uint32_t bypass_hwpt_id;
uint32_t abort_hwpt_id;
QLIST_HEAD(, SMMUv3AccelDevice) device_list;
--
2.43.0
Hi Shameer,
On 2/11/26 9:34 AM, Shameer Kolothum wrote:
> Install an event handler on the vEVENTQ fd to read and propagate host
> generated vIOMMU events to the guest.
>
> The handler runs in QEMU's main loop, using a non-blocking fd registered
> via qemu_set_fd_handler().
>
> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
> hw/arm/smmuv3-accel.c | 62 +++++++++++++++++++++++++++++++++++++++++++
> hw/arm/smmuv3-accel.h | 2 ++
> 2 files changed, 64 insertions(+)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index d92fcb1a89..0d5dcef941 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -390,6 +390,48 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
> sizeof(Cmd), &entry_num, cmd, errp);
> }
>
> +static void smmuv3_accel_event_read(void *opaque)
> +{
> + SMMUv3State *s = opaque;
> + SMMUv3AccelState *accel = s->s_accel;
> + struct {
> + struct iommufd_vevent_header hdr;
> + struct iommu_vevent_arm_smmuv3 vevent;
> + } buf;
> + uint32_t last_seq = accel->last_event_seq;
> + ssize_t bytes;
> +
> + bytes = read(accel->veventq->veventq_fd, &buf, sizeof(buf));
> + if (bytes <= 0) {
> + if (errno == EAGAIN || errno == EINTR) {
> + return;
> + }
> + error_report_once("vEVENTQ: read failed (%m)");
> + return;
> + }
> +
> + if (bytes == sizeof(buf.hdr) &&
> + (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)) {
> + error_report_once("vEVENTQ has lost events");
in case in the future we end up with several vEVENTQs, it may be
relevant to log the queue id/type
After reading the linux uapi again, this seems to record a queue
overflow. I would replace the
"vEVENTQ has lost events" trace by "queue <type> <id> has overflowed". This would allow differentiation with the case below.
> + accel->event_start = false;
> + return;
> + }
> + if (bytes < sizeof(buf)) {
> + error_report_once("vEVENTQ: incomplete read (%zd/%zd bytes)",
> + bytes, sizeof(buf));
> + return;
> + }
> +
> + /* Check sequence in hdr for lost events if any */
> + if (accel->event_start && (buf.hdr.sequence - last_seq != 1)) {
wonder if we need to handle any hypothetical wrap-up situation?
> + error_report_once("vEVENTQ: detected lost %u event(s)",
> + buf.hdr.sequence - last_seq - 1);
> + }
> + accel->last_event_seq = buf.hdr.sequence;
> + accel->event_start = true;
> + smmuv3_propagate_event(s, (Evt *)&buf.vevent);
> +}
> +
> static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
> {
> IOMMUFDVeventq *veventq = accel->veventq;
> @@ -397,6 +439,7 @@ static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
> if (!veventq) {
> return;
> }
> + qemu_set_fd_handler(veventq->veventq_fd, NULL, NULL, NULL);
> close(veventq->veventq_fd);
> iommufd_backend_free_id(accel->viommu->iommufd, veventq->veventq_id);
> g_free(veventq);
> @@ -424,6 +467,7 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
> IOMMUFDVeventq *veventq;
> uint32_t veventq_id;
> uint32_t veventq_fd;
> + int flags;
>
> if (!accel || !accel->viommu) {
> return true;
> @@ -445,12 +489,30 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
> return false;
> }
>
> + flags = fcntl(veventq_fd, F_GETFL);
> + if (flags < 0) {
> + error_setg_errno(errp, errno, "Failed to get flags for vEVENTQ fd");
> + goto free_veventq;
> + }
> + if (fcntl(veventq_fd, F_SETFL, flags | O_NONBLOCK) < 0) {
> + error_setg_errno(errp, errno, "Failed to set O_NONBLOCK on vEVENTQ fd");
> + goto free_veventq;
> + }
> +
> veventq = g_new(IOMMUFDVeventq, 1);
> veventq->veventq_id = veventq_id;
> veventq->veventq_fd = veventq_fd;
> veventq->viommu = accel->viommu;
> accel->veventq = veventq;
> +
> + /* Set up event handler for veventq fd */
> + qemu_set_fd_handler(veventq_fd, smmuv3_accel_event_read, NULL, s);
> return true;
> +
> +free_veventq:
> + close(veventq_fd);
> + iommufd_backend_free_id(accel->viommu->iommufd, veventq_id);
> + return false;
> }
>
> static bool
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index dba6c71de5..c9c10e55c3 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -23,6 +23,8 @@
> typedef struct SMMUv3AccelState {
> IOMMUFDViommu *viommu;
> IOMMUFDVeventq *veventq;
> + uint32_t last_event_seq;
> + bool event_start;
> uint32_t bypass_hwpt_id;
> uint32_t abort_hwpt_id;
> QLIST_HEAD(, SMMUv3AccelDevice) device_list;
Thanks
Eric
Hi Eric,
> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 11 February 2026 14:26
> To: Shameer Kolothum Thodi <skolothumtho@nvidia.com>; qemu-
> arm@nongnu.org; qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; Nicolin Chen <nicolinc@nvidia.com>; Nathan
> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jiandi An
> <jan@nvidia.com>; Jason Gunthorpe <jgg@nvidia.com>;
> jonathan.cameron@huawei.com; zhangfei.gao@linaro.org;
> zhenzhong.duan@intel.com; Krishnakant Jaju <kjaju@nvidia.com>
> Subject: Re: [PATCH v5 5/5] hw/arm/smmuv3-accel: Read and propagate host
> vIOMMU events
>
> External email: Use caution opening links or attachments
>
>
> Hi Shameer,
>
> On 2/11/26 9:34 AM, Shameer Kolothum wrote:
> > Install an event handler on the vEVENTQ fd to read and propagate host
> > generated vIOMMU events to the guest.
> >
> > The handler runs in QEMU's main loop, using a non-blocking fd registered
> > via qemu_set_fd_handler().
> >
> > Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> > ---
> > hw/arm/smmuv3-accel.c | 62
> +++++++++++++++++++++++++++++++++++++++++++
> > hw/arm/smmuv3-accel.h | 2 ++
> > 2 files changed, 64 insertions(+)
> >
> > diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> > index d92fcb1a89..0d5dcef941 100644
> > --- a/hw/arm/smmuv3-accel.c
> > +++ b/hw/arm/smmuv3-accel.c
> > @@ -390,6 +390,48 @@ bool
> smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice
> *sdev,
> > sizeof(Cmd), &entry_num, cmd, errp);
> > }
> >
> > +static void smmuv3_accel_event_read(void *opaque)
> > +{
> > + SMMUv3State *s = opaque;
> > + SMMUv3AccelState *accel = s->s_accel;
> > + struct {
> > + struct iommufd_vevent_header hdr;
> > + struct iommu_vevent_arm_smmuv3 vevent;
> > + } buf;
> > + uint32_t last_seq = accel->last_event_seq;
> > + ssize_t bytes;
> > +
> > + bytes = read(accel->veventq->veventq_fd, &buf, sizeof(buf));
> > + if (bytes <= 0) {
> > + if (errno == EAGAIN || errno == EINTR) {
> > + return;
> > + }
> > + error_report_once("vEVENTQ: read failed (%m)");
> > + return;
> > + }
> > +
> > + if (bytes == sizeof(buf.hdr) &&
> > + (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)) {
> > + error_report_once("vEVENTQ has lost events");
> in case in the future we end up with several vEVENTQs, it may be
> relevant to log the queue id/type
>
> After reading the linux uapi again, this seems to record a queue
> overflow. I would replace the
>
> "vEVENTQ has lost events" trace by "queue <type> <id> has overflowed". This
> would allow differentiation with the case below.
Ok. Make sense. <id> is also useful with multiple SMMUv3 instances.
>
> > + accel->event_start = false;
> > + return;
> > + }
> > + if (bytes < sizeof(buf)) {
> > + error_report_once("vEVENTQ: incomplete read (%zd/%zd bytes)",
> > + bytes, sizeof(buf));
> > + return;
> > + }
> > +
> > + /* Check sequence in hdr for lost events if any */
> > + if (accel->event_start && (buf.hdr.sequence - last_seq != 1)) {
> wonder if we need to handle any hypothetical wrap-up situation?
This should not be required. The sequence is __u32 and the delta
is computed using unsigned arithmetic, so wrap-around is handled
naturally.
Thanks,
Shameer
Hi Shameer,
On 2/11/26 3:58 PM, Shameer Kolothum Thodi wrote:
> Hi Eric,
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: 11 February 2026 14:26
>> To: Shameer Kolothum Thodi <skolothumtho@nvidia.com>; qemu-
>> arm@nongnu.org; qemu-devel@nongnu.org
>> Cc: peter.maydell@linaro.org; Nicolin Chen <nicolinc@nvidia.com>; Nathan
>> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jiandi An
>> <jan@nvidia.com>; Jason Gunthorpe <jgg@nvidia.com>;
>> jonathan.cameron@huawei.com; zhangfei.gao@linaro.org;
>> zhenzhong.duan@intel.com; Krishnakant Jaju <kjaju@nvidia.com>
>> Subject: Re: [PATCH v5 5/5] hw/arm/smmuv3-accel: Read and propagate host
>> vIOMMU events
>>
>> External email: Use caution opening links or attachments
>>
>>
>> Hi Shameer,
>>
>> On 2/11/26 9:34 AM, Shameer Kolothum wrote:
>>> Install an event handler on the vEVENTQ fd to read and propagate host
>>> generated vIOMMU events to the guest.
>>>
>>> The handler runs in QEMU's main loop, using a non-blocking fd registered
>>> via qemu_set_fd_handler().
>>>
>>> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
>>> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
>>> ---
>>> hw/arm/smmuv3-accel.c | 62
>> +++++++++++++++++++++++++++++++++++++++++++
>>> hw/arm/smmuv3-accel.h | 2 ++
>>> 2 files changed, 64 insertions(+)
>>>
>>> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
>>> index d92fcb1a89..0d5dcef941 100644
>>> --- a/hw/arm/smmuv3-accel.c
>>> +++ b/hw/arm/smmuv3-accel.c
>>> @@ -390,6 +390,48 @@ bool
>> smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice
>> *sdev,
>>> sizeof(Cmd), &entry_num, cmd, errp);
>>> }
>>>
>>> +static void smmuv3_accel_event_read(void *opaque)
>>> +{
>>> + SMMUv3State *s = opaque;
>>> + SMMUv3AccelState *accel = s->s_accel;
>>> + struct {
>>> + struct iommufd_vevent_header hdr;
>>> + struct iommu_vevent_arm_smmuv3 vevent;
>>> + } buf;
>>> + uint32_t last_seq = accel->last_event_seq;
>>> + ssize_t bytes;
>>> +
>>> + bytes = read(accel->veventq->veventq_fd, &buf, sizeof(buf));
>>> + if (bytes <= 0) {
>>> + if (errno == EAGAIN || errno == EINTR) {
>>> + return;
>>> + }
>>> + error_report_once("vEVENTQ: read failed (%m)");
>>> + return;
>>> + }
>>> +
>>> + if (bytes == sizeof(buf.hdr) &&
>>> + (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)) {
>>> + error_report_once("vEVENTQ has lost events");
>> in case in the future we end up with several vEVENTQs, it may be
>> relevant to log the queue id/type
>>
>> After reading the linux uapi again, this seems to record a queue
>> overflow. I would replace the
>>
>> "vEVENTQ has lost events" trace by "queue <type> <id> has overflowed". This
>> would allow differentiation with the case below.
> Ok. Make sense. <id> is also useful with multiple SMMUv3 instances.
>
>>> + accel->event_start = false;
>>> + return;
>>> + }
>>> + if (bytes < sizeof(buf)) {
>>> + error_report_once("vEVENTQ: incomplete read (%zd/%zd bytes)",
>>> + bytes, sizeof(buf));
>>> + return;
>>> + }
>>> +
>>> + /* Check sequence in hdr for lost events if any */
>>> + if (accel->event_start && (buf.hdr.sequence - last_seq != 1)) {
>> wonder if we need to handle any hypothetical wrap-up situation?
> This should not be required. The sequence is __u32 and the delta
> is computed using unsigned arithmetic, so wrap-around is handled
> naturally.
Ah that's right.
With the 1st comment above taken into account, feel free to add my
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Thanks
Eric
>
> Thanks,
> Shameer
© 2016 - 2026 Red Hat, Inc.