Install an event handler on the vEVENTQ fd to read and propagate host
generated vIOMMU events to the guest.
The handler runs in QEMU’s main loop, using a non-blocking fd registered
via qemu_set_fd_handler().
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
hw/arm/smmuv3-accel.c | 60 +++++++++++++++++++++++++++++++++++++++++++
hw/arm/smmuv3-accel.h | 2 ++
2 files changed, 62 insertions(+)
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index e8028d4be5..ab57eae575 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -390,6 +390,60 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
sizeof(Cmd), &entry_num, cmd, errp);
}
+static void smmuv3_accel_event_read(void *opaque)
+{
+ SMMUv3State *s = opaque;
+ SMMUv3AccelState *accel = s->s_accel;
+ struct {
+ struct iommufd_vevent_header hdr;
+ struct iommu_vevent_arm_smmuv3 vevent;
+ } buf;
+ ssize_t readsz = sizeof(buf);
+ uint32_t last_seq = accel->last_event_seq;
+ ssize_t bytes;
+
+ bytes = read(accel->veventq->veventq_fd, &buf, readsz);
+ if (bytes <= 0) {
+ if (errno == EAGAIN || errno == EINTR) {
+ return;
+ }
+ error_report("vEVENTQ: read failed (%m)");
+ return;
+ }
+
+ if (bytes < readsz) {
+ error_report("vEVENTQ: incomplete read (%zd/%zd bytes)", bytes, readsz);
+ return;
+ }
+
+ if (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS) {
+ error_report("vEVENTQ has lost events");
+ accel->event_start = false;
+ accel->last_event_seq = 0;
+ return;
+ }
+
+ /* Check sequence in hdr for lost events if any */
+ if (accel->event_start) {
+ uint32_t expected = (last_seq == INT_MAX) ? 0 : last_seq + 1;
+
+ if (buf.hdr.sequence != expected) {
+ uint32_t delta;
+
+ if (buf.hdr.sequence >= last_seq) {
+ delta = buf.hdr.sequence - last_seq;
+ } else {
+ /* Handle wraparound from INT_MAX */
+ delta = (INT_MAX - last_seq) + buf.hdr.sequence + 1;
+ }
+ error_report_once("vEVENTQ: detected lost %u event(s)", delta - 1);
+ }
+ }
+ accel->last_event_seq = buf.hdr.sequence;
+ accel->event_start = true;
+ smmuv3_propagate_event(s, (Evt *)&buf.vevent);
+}
+
static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
{
IOMMUFDVeventq *veventq = accel->veventq;
@@ -397,6 +451,8 @@ static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
if (!veventq) {
return;
}
+ qemu_set_fd_handler(veventq->veventq_fd, NULL, NULL, NULL);
+ close(veventq->veventq_fd);
iommufd_backend_free_id(accel->viommu->iommufd, veventq->veventq_id);
g_free(veventq);
accel->veventq = NULL;
@@ -439,6 +495,10 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
veventq->veventq_fd = veventq_fd;
veventq->viommu = accel->viommu;
accel->veventq = veventq;
+
+ /* Set up event handler for veventq fd */
+ fcntl(veventq_fd, F_SETFL, O_NONBLOCK);
+ qemu_set_fd_handler(veventq_fd, smmuv3_accel_event_read, NULL, s);
return true;
}
diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
index 92048bb674..ba0f40a565 100644
--- a/hw/arm/smmuv3-accel.h
+++ b/hw/arm/smmuv3-accel.h
@@ -23,6 +23,8 @@
typedef struct SMMUv3AccelState {
IOMMUFDViommu *viommu;
IOMMUFDVeventq *veventq;
+ uint32_t last_event_seq;
+ bool event_start;
uint32_t bypass_hwpt_id;
uint32_t abort_hwpt_id;
QLIST_HEAD(, SMMUv3AccelDevice) device_list;
--
2.43.0
Hi Shameer,
On 1/27/26 11:33 AM, Shameer Kolothum wrote:
> Install an event handler on the vEVENTQ fd to read and propagate host
> generated vIOMMU events to the guest.
>
> The handler runs in QEMU’s main loop, using a non-blocking fd registered
> via qemu_set_fd_handler().
>
> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
> hw/arm/smmuv3-accel.c | 60 +++++++++++++++++++++++++++++++++++++++++++
> hw/arm/smmuv3-accel.h | 2 ++
> 2 files changed, 62 insertions(+)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index e8028d4be5..ab57eae575 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -390,6 +390,60 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice *sdev,
> sizeof(Cmd), &entry_num, cmd, errp);
> }
>
> +static void smmuv3_accel_event_read(void *opaque)
> +{
> + SMMUv3State *s = opaque;
> + SMMUv3AccelState *accel = s->s_accel;
> + struct {
> + struct iommufd_vevent_header hdr;
> + struct iommu_vevent_arm_smmuv3 vevent;
> + } buf;
> + ssize_t readsz = sizeof(buf);
> + uint32_t last_seq = accel->last_event_seq;
> + ssize_t bytes;
> +
> + bytes = read(accel->veventq->veventq_fd, &buf, readsz);
in case we receive a header with LOST_EVENTS at the end of tail, no data
is expected. So I am not sure you will be able to handle that case.
> + if (bytes <= 0) {
> + if (errno == EAGAIN || errno == EINTR) {
> + return;
> + }
> + error_report("vEVENTQ: read failed (%m)");
_once?
> + return;
> + }
> +
> + if (bytes < readsz) {
> + error_report("vEVENTQ: incomplete read (%zd/%zd bytes)", bytes, readsz);
> + return;
> + }
> +
> + if (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS) {
> + error_report("vEVENTQ has lost events");
> + accel->event_start = false;
> + accel->last_event_seq = 0;
actually I read again the include/uapi/linux/iommufd.h and nothing tells
sequence == 0 after a LOST_EVENTS.
> + return;
> + }
> +
> + /* Check sequence in hdr for lost events if any */
> + if (accel->event_start) {
> + uint32_t expected = (last_seq == INT_MAX) ? 0 : last_seq + 1;
> +
> + if (buf.hdr.sequence != expected) {
But can this happen? Normally you should have a LOST_EVENTS reported by
the kernel, no?
> + uint32_t delta;
> +
> + if (buf.hdr.sequence >= last_seq) {
> + delta = buf.hdr.sequence - last_seq;
> + } else {
> + /* Handle wraparound from INT_MAX */
> + delta = (INT_MAX - last_seq) + buf.hdr.sequence + 1;
> + }
> + error_report_once("vEVENTQ: detected lost %u event(s)", delta - 1);
> + }
> + }
> + accel->last_event_seq = buf.hdr.sequence;
> + accel->event_start = true;
> + smmuv3_propagate_event(s, (Evt *)&buf.vevent);
> +}
> +
> static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
> {
> IOMMUFDVeventq *veventq = accel->veventq;
> @@ -397,6 +451,8 @@ static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
> if (!veventq) {
> return;
> }
> + qemu_set_fd_handler(veventq->veventq_fd, NULL, NULL, NULL);
> + close(veventq->veventq_fd);
> iommufd_backend_free_id(accel->viommu->iommufd, veventq->veventq_id);
> g_free(veventq);
> accel->veventq = NULL;
> @@ -439,6 +495,10 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
> veventq->veventq_fd = veventq_fd;
> veventq->viommu = accel->viommu;
> accel->veventq = veventq;
> +
> + /* Set up event handler for veventq fd */
> + fcntl(veventq_fd, F_SETFL, O_NONBLOCK);
Looks this can fail.
Thanks
Eric
> + qemu_set_fd_handler(veventq_fd, smmuv3_accel_event_read, NULL, s);
> return true;
> }
>
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index 92048bb674..ba0f40a565 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -23,6 +23,8 @@
> typedef struct SMMUv3AccelState {
> IOMMUFDViommu *viommu;
> IOMMUFDVeventq *veventq;
> + uint32_t last_event_seq;
> + bool event_start;
> uint32_t bypass_hwpt_id;
> uint32_t abort_hwpt_id;
> QLIST_HEAD(, SMMUv3AccelDevice) device_list;
> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 02 February 2026 12:59
> To: Shameer Kolothum Thodi <skolothumtho@nvidia.com>; qemu-
> arm@nongnu.org; qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; Nicolin Chen <nicolinc@nvidia.com>; Nathan
> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jason
> Gunthorpe <jgg@nvidia.com>; jonathan.cameron@huawei.com;
> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; Krishnakant Jaju
> <kjaju@nvidia.com>
> Subject: Re: [PATCH v3 4/4] hw/arm/smmuv3-accel: Read and propagate host
> vIOMMU events
>
> External email: Use caution opening links or attachments
>
>
> Hi Shameer,
>
> On 1/27/26 11:33 AM, Shameer Kolothum wrote:
> > Install an event handler on the vEVENTQ fd to read and propagate host
> > generated vIOMMU events to the guest.
> >
> > The handler runs in QEMU’s main loop, using a non-blocking fd registered
> > via qemu_set_fd_handler().
> >
> > Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> > ---
> > hw/arm/smmuv3-accel.c | 60
> +++++++++++++++++++++++++++++++++++++++++++
> > hw/arm/smmuv3-accel.h | 2 ++
> > 2 files changed, 62 insertions(+)
> >
> > diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> > index e8028d4be5..ab57eae575 100644
> > --- a/hw/arm/smmuv3-accel.c
> > +++ b/hw/arm/smmuv3-accel.c
> > @@ -390,6 +390,60 @@ bool
> smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice
> *sdev,
> > sizeof(Cmd), &entry_num, cmd, errp);
> > }
> >
> > +static void smmuv3_accel_event_read(void *opaque)
> > +{
> > + SMMUv3State *s = opaque;
> > + SMMUv3AccelState *accel = s->s_accel;
> > + struct {
> > + struct iommufd_vevent_header hdr;
> > + struct iommu_vevent_arm_smmuv3 vevent;
> > + } buf;
> > + ssize_t readsz = sizeof(buf);
> > + uint32_t last_seq = accel->last_event_seq;
> > + ssize_t bytes;
> > +
> > + bytes = read(accel->veventq->veventq_fd, &buf, readsz);
> in case we receive a header with LOST_EVENTS at the end of tail, no data
> is expected. So I am not sure you will be able to handle that case.
Good catch. We need to check bytes < sizeof(hdr) first to cover
that.
> > + if (bytes <= 0) {
> > + if (errno == EAGAIN || errno == EINTR) {
> > + return;
> > + }
> > + error_report("vEVENTQ: read failed (%m)");
> _once?
> > + return;
> > + }
> > +
> > + if (bytes < readsz) {
> > + error_report("vEVENTQ: incomplete read (%zd/%zd bytes)", bytes,
> readsz);
> > + return;
> > + }
> > +
> > + if (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS) {
> > + error_report("vEVENTQ has lost events");
> > + accel->event_start = false;
> > + accel->last_event_seq = 0;
> actually I read again the include/uapi/linux/iommufd.h and nothing tells
> sequence == 0 after a LOST_EVENTS.
Yes. Sequence looks to be continuous.
> > + return;
> > + }
> > +
> > + /* Check sequence in hdr for lost events if any */
> > + if (accel->event_start) {
> > + uint32_t expected = (last_seq == INT_MAX) ? 0 : last_seq + 1;
> > +
> > + if (buf.hdr.sequence != expected) {
> But can this happen? Normally you should have a LOST_EVENTS reported by
> the kernel, no?
From the header:
"
And this sequence index is expected to be monotonic to the sequence index of
the previous vEVENT. If two adjacent sequence indexes has a delta larger than
1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs:
"
It doesn't say the FLAG_LOST_EVEN will be set for above.
AFAICS, the only case where the flag set is for below one:
"
If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT
providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS
header would be added to the tail, and no data would follow this header:
"
*
> > + uint32_t delta;
> > +
> > + if (buf.hdr.sequence >= last_seq) {
> > + delta = buf.hdr.sequence - last_seq;
> > + } else {
> > + /* Handle wraparound from INT_MAX */
> > + delta = (INT_MAX - last_seq) + buf.hdr.sequence + 1;
> > + }
> > + error_report_once("vEVENTQ: detected lost %u event(s)", delta - 1);
> > + }
> > + }
> > + accel->last_event_seq = buf.hdr.sequence;
> > + accel->event_start = true;
> > + smmuv3_propagate_event(s, (Evt *)&buf.vevent);
> > +}
> > +
> > static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
> > {
> > IOMMUFDVeventq *veventq = accel->veventq;
> > @@ -397,6 +451,8 @@ static void
> smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
> > if (!veventq) {
> > return;
> > }
> > + qemu_set_fd_handler(veventq->veventq_fd, NULL, NULL, NULL);
> > + close(veventq->veventq_fd);
> > iommufd_backend_free_id(accel->viommu->iommufd, veventq-
> >veventq_id);
> > g_free(veventq);
> > accel->veventq = NULL;
> > @@ -439,6 +495,10 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State
> *s, Error **errp)
> > veventq->veventq_fd = veventq_fd;
> > veventq->viommu = accel->viommu;
> > accel->veventq = veventq;
> > +
> > + /* Set up event handler for veventq fd */
> > + fcntl(veventq_fd, F_SETFL, O_NONBLOCK);
> Looks this can fail.
Ok. Will add a check.
Thanks,
Shameer
On 2/2/26 3:12 PM, Shameer Kolothum Thodi wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: 02 February 2026 12:59
>> To: Shameer Kolothum Thodi <skolothumtho@nvidia.com>; qemu-
>> arm@nongnu.org; qemu-devel@nongnu.org
>> Cc: peter.maydell@linaro.org; Nicolin Chen <nicolinc@nvidia.com>; Nathan
>> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jason
>> Gunthorpe <jgg@nvidia.com>; jonathan.cameron@huawei.com;
>> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; Krishnakant Jaju
>> <kjaju@nvidia.com>
>> Subject: Re: [PATCH v3 4/4] hw/arm/smmuv3-accel: Read and propagate host
>> vIOMMU events
>>
>> External email: Use caution opening links or attachments
>>
>>
>> Hi Shameer,
>>
>> On 1/27/26 11:33 AM, Shameer Kolothum wrote:
>>> Install an event handler on the vEVENTQ fd to read and propagate host
>>> generated vIOMMU events to the guest.
>>>
>>> The handler runs in QEMU’s main loop, using a non-blocking fd registered
>>> via qemu_set_fd_handler().
>>>
>>> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
>>> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
>>> ---
>>> hw/arm/smmuv3-accel.c | 60
>> +++++++++++++++++++++++++++++++++++++++++++
>>> hw/arm/smmuv3-accel.h | 2 ++
>>> 2 files changed, 62 insertions(+)
>>>
>>> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
>>> index e8028d4be5..ab57eae575 100644
>>> --- a/hw/arm/smmuv3-accel.c
>>> +++ b/hw/arm/smmuv3-accel.c
>>> @@ -390,6 +390,60 @@ bool
>> smmuv3_accel_issue_inv_cmd(SMMUv3State *bs, void *cmd, SMMUDevice
>> *sdev,
>>> sizeof(Cmd), &entry_num, cmd, errp);
>>> }
>>>
>>> +static void smmuv3_accel_event_read(void *opaque)
>>> +{
>>> + SMMUv3State *s = opaque;
>>> + SMMUv3AccelState *accel = s->s_accel;
>>> + struct {
>>> + struct iommufd_vevent_header hdr;
>>> + struct iommu_vevent_arm_smmuv3 vevent;
>>> + } buf;
>>> + ssize_t readsz = sizeof(buf);
>>> + uint32_t last_seq = accel->last_event_seq;
>>> + ssize_t bytes;
>>> +
>>> + bytes = read(accel->veventq->veventq_fd, &buf, readsz);
>> in case we receive a header with LOST_EVENTS at the end of tail, no data
>> is expected. So I am not sure you will be able to handle that case.
> Good catch. We need to check bytes < sizeof(hdr) first to cover
> that.
>
>>> + if (bytes <= 0) {
>>> + if (errno == EAGAIN || errno == EINTR) {
>>> + return;
>>> + }
>>> + error_report("vEVENTQ: read failed (%m)");
>> _once?
>>> + return;
>>> + }
>>> +
>>> + if (bytes < readsz) {
>>> + error_report("vEVENTQ: incomplete read (%zd/%zd bytes)", bytes,
>> readsz);
>>> + return;
>>> + }
>>> +
>>> + if (buf.hdr.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS) {
>>> + error_report("vEVENTQ has lost events");
>>> + accel->event_start = false;
>>> + accel->last_event_seq = 0;
>> actually I read again the include/uapi/linux/iommufd.h and nothing tells
>> sequence == 0 after a LOST_EVENTS.
> Yes. Sequence looks to be continuous.
>
>>> + return;
>>> + }
>>> +
>>> + /* Check sequence in hdr for lost events if any */
>>> + if (accel->event_start) {
>>> + uint32_t expected = (last_seq == INT_MAX) ? 0 : last_seq + 1;
>>> +
>>> + if (buf.hdr.sequence != expected) {
>> But can this happen? Normally you should have a LOST_EVENTS reported by
>> the kernel, no?
> From the header:
> "
> And this sequence index is expected to be monotonic to the sequence index of
> the previous vEVENT. If two adjacent sequence indexes has a delta larger than
> 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs:
> "
>
> It doesn't say the FLAG_LOST_EVEN will be set for above.
agreed. I missed that.
Thanks
Eric
>
> AFAICS, the only case where the flag set is for below one:
> "
> If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT
> providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS
> header would be added to the tail, and no data would follow this header:
> "
>
> *
>>> + uint32_t delta;
>>> +
>>> + if (buf.hdr.sequence >= last_seq) {
>>> + delta = buf.hdr.sequence - last_seq;
>>> + } else {
>>> + /* Handle wraparound from INT_MAX */
>>> + delta = (INT_MAX - last_seq) + buf.hdr.sequence + 1;
>>> + }
>>> + error_report_once("vEVENTQ: detected lost %u event(s)", delta - 1);
>>> + }
>>> + }
>>> + accel->last_event_seq = buf.hdr.sequence;
>>> + accel->event_start = true;
>>> + smmuv3_propagate_event(s, (Evt *)&buf.vevent);
>>> +}
>>> +
>>> static void smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
>>> {
>>> IOMMUFDVeventq *veventq = accel->veventq;
>>> @@ -397,6 +451,8 @@ static void
>> smmuv3_accel_free_veventq(SMMUv3AccelState *accel)
>>> if (!veventq) {
>>> return;
>>> }
>>> + qemu_set_fd_handler(veventq->veventq_fd, NULL, NULL, NULL);
>>> + close(veventq->veventq_fd);
>>> iommufd_backend_free_id(accel->viommu->iommufd, veventq-
>>> veventq_id);
>>> g_free(veventq);
>>> accel->veventq = NULL;
>>> @@ -439,6 +495,10 @@ bool smmuv3_accel_alloc_veventq(SMMUv3State
>> *s, Error **errp)
>>> veventq->veventq_fd = veventq_fd;
>>> veventq->viommu = accel->viommu;
>>> accel->veventq = veventq;
>>> +
>>> + /* Set up event handler for veventq fd */
>>> + fcntl(veventq_fd, F_SETFL, O_NONBLOCK);
>> Looks this can fail.
> Ok. Will add a check.
>
> Thanks,
> Shameer
© 2016 - 2026 Red Hat, Inc.