This helper passes cache invalidation request from guest to invalidate
stage-1 page table cache in host hardware.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
include/system/iommufd.h | 4 ++++
backends/iommufd.c | 33 +++++++++++++++++++++++++++++++++
backends/trace-events | 1 +
3 files changed, 38 insertions(+)
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index cbab75bfbf..5399519626 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -61,6 +61,10 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
uint64_t iova, ram_addr_t size,
uint64_t page_size, uint64_t *data,
Error **errp);
+bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data_ptr,
+ Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index b73f75cd0b..c8788a6438 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -311,6 +311,39 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
return true;
}
+bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data_ptr,
+ Error **errp)
+{
+ int ret, fd = be->fd;
+ uint32_t total_entries = *entry_num;
+ struct iommu_hwpt_invalidate cache = {
+ .size = sizeof(cache),
+ .hwpt_id = id,
+ .data_type = data_type,
+ .entry_len = entry_len,
+ .entry_num = total_entries,
+ .data_uptr = (uintptr_t)data_ptr,
+ };
+
+ ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
+ trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len,
+ total_entries, cache.entry_num,
+ (uintptr_t)data_ptr,
+ ret ? errno : 0);
+ if (ret) {
+ *entry_num = cache.entry_num;
+ error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:"
+ " totally %d entries, processed %d entries",
+ total_entries, cache.entry_num);
+ } else {
+ g_assert(total_entries == cache.entry_num);
+ }
+
+ return !ret;
+}
+
static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
{
HostIOMMUDeviceCaps *caps = &hiod->caps;
diff --git a/backends/trace-events b/backends/trace-events
index 40811a3162..7278214ea5 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -18,3 +18,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_
iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
+iommufd_backend_invalidate_cache(int iommufd, uint32_t id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
--
2.34.1
Hello Zhenzhong,
On 5/28/25 08:04, Zhenzhong Duan wrote:
> This helper passes cache invalidation request from guest to invalidate
> stage-1 page table cache in host hardware.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> include/system/iommufd.h | 4 ++++
> backends/iommufd.c | 33 +++++++++++++++++++++++++++++++++
> backends/trace-events | 1 +
> 3 files changed, 38 insertions(+)
>
> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
> index cbab75bfbf..5399519626 100644
> --- a/include/system/iommufd.h
> +++ b/include/system/iommufd.h
> @@ -61,6 +61,10 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
> uint64_t iova, ram_addr_t size,
> uint64_t page_size, uint64_t *data,
> Error **errp);
> +bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
> + uint32_t data_type, uint32_t entry_len,
> + uint32_t *entry_num, void *data_ptr,
> + Error **errp);
>
> #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
> #endif
> diff --git a/backends/iommufd.c b/backends/iommufd.c
> index b73f75cd0b..c8788a6438 100644
> --- a/backends/iommufd.c
> +++ b/backends/iommufd.c
> @@ -311,6 +311,39 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
> return true;
> }
>
> +bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
> + uint32_t data_type, uint32_t entry_len,
> + uint32_t *entry_num, void *data_ptr,
> + Error **errp)
> +{
> + int ret, fd = be->fd;
> + uint32_t total_entries = *entry_num;
> + struct iommu_hwpt_invalidate cache = {
> + .size = sizeof(cache),
> + .hwpt_id = id,
> + .data_type = data_type,
> + .entry_len = entry_len,
> + .entry_num = total_entries,
> + .data_uptr = (uintptr_t)data_ptr,
Minor, other helpers use a 'data' variable name.
> + };
> +
> + ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
> + trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len,
> + total_entries, cache.entry_num,
> + (uintptr_t)data_ptr,
> + ret ? errno : 0);
> + if (ret) {
> + *entry_num = cache.entry_num;
> + error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:"
> + " totally %d entries, processed %d entries",
> + total_entries, cache.entry_num);
> + } else {
> + g_assert(total_entries == cache.entry_num);
Killing the VMM because a kernel device ioctl failed is brute force.
Can't we update the 'Error *' parameter instead to report that the
invalidation is partial or something went wrong ?
What kind of errors are we trying to catch ?
Looking at the kernel iommufd_hwpt_invalidate() routine and
intel_nested_cache_invalidate_user(), it doesn't seem possible to
return a different number of cache entries. Are you anticipating
other implementations (sMMU) ?
Thanks,
C.
Hi Cédric,
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Subject: Re: [PATCH v1 1/6] backends/iommufd: Add a helper to invalidate user-
>managed HWPT
>
>Hello Zhenzhong,
>
>On 5/28/25 08:04, Zhenzhong Duan wrote:
>> This helper passes cache invalidation request from guest to invalidate
>> stage-1 page table cache in host hardware.
>>
>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> include/system/iommufd.h | 4 ++++
>> backends/iommufd.c | 33 +++++++++++++++++++++++++++++++++
>> backends/trace-events | 1 +
>> 3 files changed, 38 insertions(+)
>>
>> diff --git a/include/system/iommufd.h b/include/system/iommufd.h
>> index cbab75bfbf..5399519626 100644
>> --- a/include/system/iommufd.h
>> +++ b/include/system/iommufd.h
>> @@ -61,6 +61,10 @@ bool
>iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
>> uint64_t iova, ram_addr_t size,
>> uint64_t page_size, uint64_t *data,
>> Error **errp);
>> +bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
>> + uint32_t data_type, uint32_t entry_len,
>> + uint32_t *entry_num, void *data_ptr,
>> + Error **errp);
>>
>> #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD
>TYPE_HOST_IOMMU_DEVICE "-iommufd"
>> #endif
>> diff --git a/backends/iommufd.c b/backends/iommufd.c
>> index b73f75cd0b..c8788a6438 100644
>> --- a/backends/iommufd.c
>> +++ b/backends/iommufd.c
>> @@ -311,6 +311,39 @@ bool
>iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
>> return true;
>> }
>>
>> +bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
>> + uint32_t data_type, uint32_t entry_len,
>> + uint32_t *entry_num, void *data_ptr,
>> + Error **errp)
>> +{
>> + int ret, fd = be->fd;
>> + uint32_t total_entries = *entry_num;
>> + struct iommu_hwpt_invalidate cache = {
>> + .size = sizeof(cache),
>> + .hwpt_id = id,
>> + .data_type = data_type,
>> + .entry_len = entry_len,
>> + .entry_num = total_entries,
>> + .data_uptr = (uintptr_t)data_ptr,
>
>Minor, other helpers use a 'data' variable name.
Will do.
>
>> + };
>> +
>> + ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
>> + trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len,
>> + total_entries, cache.entry_num,
>> + (uintptr_t)data_ptr,
>> + ret ? errno : 0);
>> + if (ret) {
>> + *entry_num = cache.entry_num;
>> + error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:"
>> + " totally %d entries, processed %d entries",
>> + total_entries, cache.entry_num);
>> + } else {
>> + g_assert(total_entries == cache.entry_num);
>
>Killing the VMM because a kernel device ioctl failed is brute force.
>Can't we update the 'Error *' parameter instead to report that the
>invalidation is partial or something went wrong ?
Will do, like below:
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -339,7 +339,10 @@ bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
" totally %d entries, processed %d entries",
total_entries, cache.entry_num);
} else {
- g_assert(total_entries == cache.entry_num);
+ error_setg_errno(errp, -EFAULT, "IOMMU_HWPT_INVALIDATE succeed with unprocessed entries:"
+ " totally %d entries, processed %d entries",
+ total_entries, cache.entry_num);
+ ret = -EFAULT;
}
return !ret;
>
>What kind of errors are we trying to catch ?
I'm taking it as a kernel bug when ret = 0 and total_entries != cache.entry_num
>
>Looking at the kernel iommufd_hwpt_invalidate() routine and
>intel_nested_cache_invalidate_user(), it doesn't seem possible to
>return a different number of cache entries. Are you anticipating
>other implementations (sMMU) ?
Yes, same for sMMU's arm_vsmmu_cache_invalidate() and selftest's
mock_viommu_cache_invalidate() and mock_domain_cache_invalidate_user().
I'm not sure if this should apply to all types of IOMMUs, uAPI doc doesn't talk about it.
@Liu, Yi L, @nicolinc@nvidia.com, @Jason Gunthorpe, should I treat ret = 0 and total_entries != cache.entry_num as a kernel bug or not?
Thanks
Zhenzhong
On Thu, May 29, 2025 at 06:46:20AM +0000, Duan, Zhenzhong wrote: > >Looking at the kernel iommufd_hwpt_invalidate() routine and > >intel_nested_cache_invalidate_user(), it doesn't seem possible to > >return a different number of cache entries. Are you anticipating > >other implementations (sMMU) ? > > Yes, same for sMMU's arm_vsmmu_cache_invalidate() and selftest's > mock_viommu_cache_invalidate() and mock_domain_cache_invalidate_user(). > > I'm not sure if this should apply to all types of IOMMUs, uAPI doc doesn't talk about it. It should. The uAPI defines that at entry_num: * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) ... * @entry_num: Input the number of cache invalidation requests in the array. * Output the number of requests successfully handled by kernel. This applies to either ret != 0 case too. > @Liu, Yi L, @nicolinc@nvidia.com, @Jason Gunthorpe, should I treat ret = 0 > and total_entries != cache.entry_num as a kernel bug or not? Selftest has that coverage, so it would be a kernel bug that will unlikely occur. That being said, it doesn't hurt to do that IMHO. Thanks Nicolin
© 2016 - 2026 Red Hat, Inc.