[PATCH v7 19/23] Workaround for ERRATA_772415_SPR17

Zhenzhong Duan posted 23 patches 3 days, 2 hours ago
[PATCH v7 19/23] Workaround for ERRATA_772415_SPR17
Posted by Zhenzhong Duan 3 days, 2 hours ago
On a system influenced by ERRATA_772415, IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17
is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the readonly
range mapped on second stage page table could still be written.

Reference from 4th Gen Intel Xeon Processor Scalable Family Specification
Update, Errata Details, SPR17.
https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/eagle-stream/sapphire-rapids-specification-update/

Also copied the SPR17 details from above link:
"Problem: When remapping hardware is configured by system software in
scalable mode as Nested (PGTT=011b) and with PWSNP field Set in the
PASID-table-entry, it may Set Accessed bit and Dirty bit (and Extended
Access bit if enabled) in first-stage page-table entries even when
second-stage mappings indicate that corresponding first-stage page-table
is Read-Only.

Implication: Due to this erratum, pages mapped as Read-only in second-stage
page-tables may be modified by remapping hardware Access/Dirty bit updates.

Workaround: None identified. System software enabling nested translations
for a VM should ensure that there are no read-only pages in the
corresponding second-stage mappings."

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/vfio/iommufd.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index f9d0926274..f9da0e79cc 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -15,6 +15,7 @@
 #include <linux/vfio.h>
 #include <linux/iommufd.h>
 
+#include "hw/iommu.h"
 #include "hw/vfio/vfio-device.h"
 #include "qemu/error-report.h"
 #include "trace.h"
@@ -351,6 +352,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
     VFIOContainer *bcontainer = VFIO_IOMMU(container);
     uint32_t type, flags = 0;
     uint64_t hw_caps;
+    VendorCaps caps;
     VFIOIOASHwpt *hwpt;
     uint32_t hwpt_id;
     int ret;
@@ -396,7 +398,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
      * instead.
      */
     if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
-                                         &type, NULL, 0, &hw_caps, errp)) {
+                                         &type, &caps, sizeof(caps), &hw_caps,
+                                         errp)) {
         return false;
     }
 
@@ -411,6 +414,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
      */
     if (vfio_device_get_viommu_flags_want_nesting(vbasedev)) {
         flags |= IOMMU_HWPT_ALLOC_NEST_PARENT;
+
+        if (host_iommu_extract_quirks(type, &caps) &
+            HOST_IOMMU_QUIRK_NESTING_PARENT_BYPASS_RO) {
+            bcontainer->bypass_ro = true;
+        }
     }
 
     if (cpr_is_incoming()) {
-- 
2.47.1
Re: [PATCH v7 19/23] Workaround for ERRATA_772415_SPR17
Posted by Cédric Le Goater 2 days, 17 hours ago
On 10/24/25 10:43, Zhenzhong Duan wrote:
> On a system influenced by ERRATA_772415, IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17
> is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the readonly
> range mapped on second stage page table could still be written.
> 
> Reference from 4th Gen Intel Xeon Processor Scalable Family Specification
> Update, Errata Details, SPR17.
> https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/eagle-stream/sapphire-rapids-specification-update/
> 
> Also copied the SPR17 details from above link:
> "Problem: When remapping hardware is configured by system software in
> scalable mode as Nested (PGTT=011b) and with PWSNP field Set in the
> PASID-table-entry, it may Set Accessed bit and Dirty bit (and Extended
> Access bit if enabled) in first-stage page-table entries even when
> second-stage mappings indicate that corresponding first-stage page-table
> is Read-Only.
> 
> Implication: Due to this erratum, pages mapped as Read-only in second-stage
> page-tables may be modified by remapping hardware Access/Dirty bit updates.
> 
> Workaround: None identified. System software enabling nested translations
> for a VM should ensure that there are no read-only pages in the
> corresponding second-stage mappings."
> 
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/vfio/iommufd.c | 10 +++++++++-
>   1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index f9d0926274..f9da0e79cc 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -15,6 +15,7 @@
>   #include <linux/vfio.h>
>   #include <linux/iommufd.h>
>   
> +#include "hw/iommu.h"

Changes look ok apart from this include.


Thanks,

C.



>   #include "hw/vfio/vfio-device.h"
>   #include "qemu/error-report.h"
>   #include "trace.h"
> @@ -351,6 +352,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>       VFIOContainer *bcontainer = VFIO_IOMMU(container);
>       uint32_t type, flags = 0;
>       uint64_t hw_caps;
> +    VendorCaps caps;
>       VFIOIOASHwpt *hwpt;
>       uint32_t hwpt_id;
>       int ret;
> @@ -396,7 +398,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>        * instead.
>        */
>       if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
> -                                         &type, NULL, 0, &hw_caps, errp)) {
> +                                         &type, &caps, sizeof(caps), &hw_caps,
> +                                         errp)) {
>           return false;
>       }
>   
> @@ -411,6 +414,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>        */
>       if (vfio_device_get_viommu_flags_want_nesting(vbasedev)) {
>           flags |= IOMMU_HWPT_ALLOC_NEST_PARENT;
> +
> +        if (host_iommu_extract_quirks(type, &caps) &
> +            HOST_IOMMU_QUIRK_NESTING_PARENT_BYPASS_RO) {
> +            bcontainer->bypass_ro = true;
> +        }
>       }
>   
>       if (cpr_is_incoming()) {
Re: [PATCH v7 19/23] Workaround for ERRATA_772415_SPR17
Posted by Cédric Le Goater 2 days, 17 hours ago
On 10/24/25 10:43, Zhenzhong Duan wrote:
> On a system influenced by ERRATA_772415, IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17
> is repored by IOMMU_DEVICE_GET_HW_INFO. Due to this errata, even the readonly
> range mapped on second stage page table could still be written.
> 
> Reference from 4th Gen Intel Xeon Processor Scalable Family Specification
> Update, Errata Details, SPR17.
> https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/eagle-stream/sapphire-rapids-specification-update/
> 
> Also copied the SPR17 details from above link:
> "Problem: When remapping hardware is configured by system software in
> scalable mode as Nested (PGTT=011b) and with PWSNP field Set in the
> PASID-table-entry, it may Set Accessed bit and Dirty bit (and Extended
> Access bit if enabled) in first-stage page-table entries even when
> second-stage mappings indicate that corresponding first-stage page-table
> is Read-Only.
> 
> Implication: Due to this erratum, pages mapped as Read-only in second-stage
> page-tables may be modified by remapping hardware Access/Dirty bit updates.
> 
> Workaround: None identified. System software enabling nested translations
> for a VM should ensure that there are no read-only pages in the
> corresponding second-stage mappings."
> 
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/vfio/iommufd.c | 10 +++++++++-
>   1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index f9d0926274..f9da0e79cc 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -15,6 +15,7 @@
>   #include <linux/vfio.h>
>   #include <linux/iommufd.h>
>   
> +#include "hw/iommu.h"

Changes look ok apart from this include.


Thanks,

C.



>   #include "hw/vfio/vfio-device.h"
>   #include "qemu/error-report.h"
>   #include "trace.h"
> @@ -351,6 +352,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>       VFIOContainer *bcontainer = VFIO_IOMMU(container);
>       uint32_t type, flags = 0;
>       uint64_t hw_caps;
> +    VendorCaps caps;
>       VFIOIOASHwpt *hwpt;
>       uint32_t hwpt_id;
>       int ret;
> @@ -396,7 +398,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>        * instead.
>        */
>       if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
> -                                         &type, NULL, 0, &hw_caps, errp)) {
> +                                         &type, &caps, sizeof(caps), &hw_caps,
> +                                         errp)) {
>           return false;
>       }
>   
> @@ -411,6 +414,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>        */
>       if (vfio_device_get_viommu_flags_want_nesting(vbasedev)) {
>           flags |= IOMMU_HWPT_ALLOC_NEST_PARENT;
> +
> +        if (host_iommu_extract_quirks(type, &caps) &
> +            HOST_IOMMU_QUIRK_NESTING_PARENT_BYPASS_RO) {
> +            bcontainer->bypass_ro = true;
> +        }
>       }
>   
>       if (cpr_is_incoming()) {