drivers/vfio/pci/nvgrace-gpu/main.c | 4 ++-- drivers/vfio/pci/vfio_pci_rdwr.c | 19 +++++++++++++++---- include/linux/vfio_pci_core.h | 2 +- 3 files changed, 18 insertions(+), 7 deletions(-)
Commit 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio
pci") enables qword access to the PCI bar resources. However certain
devices (e.g. Intel X710) are observed with problem upon qword accesses
to the rom bar, e.g. triggering PCI aer errors.
Instead of trying to identify all broken devices, universally disable
qword access to the rom bar i.e. going back to the old way which worked
reliably for years.
Reported-by: Farrah Chen <farrah.chen@intel.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220740
Fixes: 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio pci")
Cc: stable@vger.kernel.org
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
---
drivers/vfio/pci/nvgrace-gpu/main.c | 4 ++--
drivers/vfio/pci/vfio_pci_rdwr.c | 19 +++++++++++++++----
include/linux/vfio_pci_core.h | 2 +-
3 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index e346392b72f6..9b39184f76b7 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -491,7 +491,7 @@ nvgrace_gpu_map_and_read(struct nvgrace_gpu_pci_core_device *nvdev,
ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false,
nvdev->resmem.ioaddr,
buf, offset, mem_count,
- 0, 0, false);
+ 0, 0, false, true);
}
return ret;
@@ -609,7 +609,7 @@ nvgrace_gpu_map_and_write(struct nvgrace_gpu_pci_core_device *nvdev,
ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false,
nvdev->resmem.ioaddr,
(char __user *)buf, pos, mem_count,
- 0, 0, true);
+ 0, 0, true, true);
}
return ret;
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 6192788c8ba3..95dc7e04cb08 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -135,7 +135,7 @@ VFIO_IORDWR(64)
ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
void __iomem *io, char __user *buf,
loff_t off, size_t count, size_t x_start,
- size_t x_end, bool iswrite)
+ size_t x_end, bool iswrite, bool allow_qword)
{
ssize_t done = 0;
int ret;
@@ -150,7 +150,7 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
else
fillable = 0;
- if (fillable >= 8 && !(off % 8)) {
+ if (allow_qword && fillable >= 8 && !(off % 8)) {
ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
io, buf, off, &filled);
if (ret)
@@ -234,6 +234,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
void __iomem *io;
struct resource *res = &vdev->pdev->resource[bar];
ssize_t done;
+ bool allow_qword = true;
if (pci_resource_start(pdev, bar))
end = pci_resource_len(pdev, bar);
@@ -262,6 +263,16 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
if (!io)
return -ENOMEM;
x_end = end;
+
+ /*
+ * Certain devices (e.g. Intel X710) don't support qword
+ * access to the ROM bar. Otherwise PCI AER errors might be
+ * triggered.
+ *
+ * Disable qword access to the ROM bar universally, which
+ * worked reliably for years before qword access is enabled.
+ */
+ allow_qword = false;
} else {
int ret = vfio_pci_core_setup_barmap(vdev, bar);
if (ret) {
@@ -278,7 +289,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
}
done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
- count, x_start, x_end, iswrite);
+ count, x_start, x_end, iswrite, allow_qword);
if (done >= 0)
*ppos += done;
@@ -352,7 +363,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
* to the memory enable bit in the command register.
*/
done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
- 0, 0, iswrite);
+ 0, 0, iswrite, true);
vga_put(vdev->pdev, rsrc);
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f541044e42a2..3a75b76eaed3 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -133,7 +133,7 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
void __iomem *io, char __user *buf,
loff_t off, size_t count, size_t x_start,
- size_t x_end, bool iswrite);
+ size_t x_end, bool iswrite, bool allow_qword);
bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt,
loff_t reg_start, size_t reg_cnt,
loff_t *buf_offset,
--
2.43.0
On Fri, 12 Dec 2025 02:09:41 +0000
Kevin Tian <kevin.tian@intel.com> wrote:
> Commit 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio
> pci") enables qword access to the PCI bar resources. However certain
> devices (e.g. Intel X710) are observed with problem upon qword accesses
> to the rom bar, e.g. triggering PCI aer errors.
>
> Instead of trying to identify all broken devices, universally disable
> qword access to the rom bar i.e. going back to the old way which worked
> reliably for years.
Thanks for finding the root cause of this, Kevin. I think it would add
useful context in the commit log here to describe that the ROM is
somewhat unique in this respect because it's cached by QEMU which
simply does a pread() of the remaining size until it gets the full
contents. The other BARs would only perform operations at the same
access width as their guest drivers.
> Reported-by: Farrah Chen <farrah.chen@intel.com>
> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220740
> Fixes: 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio pci")
> Cc: stable@vger.kernel.org
> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
> ---
> drivers/vfio/pci/nvgrace-gpu/main.c | 4 ++--
> drivers/vfio/pci/vfio_pci_rdwr.c | 19 +++++++++++++++----
> include/linux/vfio_pci_core.h | 2 +-
> 3 files changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
> index e346392b72f6..9b39184f76b7 100644
> --- a/drivers/vfio/pci/nvgrace-gpu/main.c
> +++ b/drivers/vfio/pci/nvgrace-gpu/main.c
> @@ -491,7 +491,7 @@ nvgrace_gpu_map_and_read(struct nvgrace_gpu_pci_core_device *nvdev,
> ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false,
> nvdev->resmem.ioaddr,
> buf, offset, mem_count,
> - 0, 0, false);
> + 0, 0, false, true);
> }
>
> return ret;
> @@ -609,7 +609,7 @@ nvgrace_gpu_map_and_write(struct nvgrace_gpu_pci_core_device *nvdev,
> ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false,
> nvdev->resmem.ioaddr,
> (char __user *)buf, pos, mem_count,
> - 0, 0, true);
> + 0, 0, true, true);
> }
>
> return ret;
> diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
> index 6192788c8ba3..95dc7e04cb08 100644
> --- a/drivers/vfio/pci/vfio_pci_rdwr.c
> +++ b/drivers/vfio/pci/vfio_pci_rdwr.c
> @@ -135,7 +135,7 @@ VFIO_IORDWR(64)
> ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
> void __iomem *io, char __user *buf,
> loff_t off, size_t count, size_t x_start,
> - size_t x_end, bool iswrite)
> + size_t x_end, bool iswrite, bool allow_qword)
I've been trying to think about how we avoid yet another bool arg here.
What do you think about creating an enum:
enum vfio_pci_io_width {
VFIO_PCI_IO_WIDTH_1 = 1,
VFIO_PCI_IO_WIDTH_2 = 2,
VFIO_PCI_IO_WIDTH_4 = 4,
VFIO_PCI_IO_WIDTH_8 = 8,
};
The arg here would then be enum vfio_pci_io_width max_width, and for
each test we'd add a condition && max_width >= 8 (4, 2), where we can
assume byte access as the minimum regardless of the arg. It's a little
more than we need, but it follows a simple pattern and I think makes
the call sites a bit more intuitive.
> {
> ssize_t done = 0;
> int ret;
> @@ -150,7 +150,7 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
> else
> fillable = 0;
>
> - if (fillable >= 8 && !(off % 8)) {
> + if (allow_qword && fillable >= 8 && !(off % 8)) {
> ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
> io, buf, off, &filled);
> if (ret)
> @@ -234,6 +234,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
> void __iomem *io;
> struct resource *res = &vdev->pdev->resource[bar];
> ssize_t done;
> + bool allow_qword = true;
>
> if (pci_resource_start(pdev, bar))
> end = pci_resource_len(pdev, bar);
> @@ -262,6 +263,16 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
> if (!io)
> return -ENOMEM;
> x_end = end;
> +
> + /*
> + * Certain devices (e.g. Intel X710) don't support qword
> + * access to the ROM bar. Otherwise PCI AER errors might be
> + * triggered.
> + *
> + * Disable qword access to the ROM bar universally, which
> + * worked reliably for years before qword access is enabled.
> + */
> + allow_qword = false;
> } else {
> int ret = vfio_pci_core_setup_barmap(vdev, bar);
> if (ret) {
> @@ -278,7 +289,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
> }
>
> done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
> - count, x_start, x_end, iswrite);
> + count, x_start, x_end, iswrite, allow_qword);
>
> if (done >= 0)
> *ppos += done;
> @@ -352,7 +363,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
> * to the memory enable bit in the command register.
> */
> done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
> - 0, 0, iswrite);
> + 0, 0, iswrite, true);
I have no basis other than paranoia and "VGA is old and a 64-bit access
to it seem wrong", but I'm tempted to restrict this to dword access as
well. I don't want to take your fix off track from it's specific goal
though. Thanks,
Alex
>
> vga_put(vdev->pdev, rsrc);
>
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index f541044e42a2..3a75b76eaed3 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -133,7 +133,7 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
> ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
> void __iomem *io, char __user *buf,
> loff_t off, size_t count, size_t x_start,
> - size_t x_end, bool iswrite);
> + size_t x_end, bool iswrite, bool allow_qword);
> bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt,
> loff_t reg_start, size_t reg_cnt,
> loff_t *buf_offset,
> From: Alex Williamson <alex@shazbot.org>
> Sent: Thursday, December 18, 2025 6:08 AM
>
> On Fri, 12 Dec 2025 02:09:41 +0000
> Kevin Tian <kevin.tian@intel.com> wrote:
>
> > Commit 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio
> > pci") enables qword access to the PCI bar resources. However certain
> > devices (e.g. Intel X710) are observed with problem upon qword accesses
> > to the rom bar, e.g. triggering PCI aer errors.
> >
> > Instead of trying to identify all broken devices, universally disable
> > qword access to the rom bar i.e. going back to the old way which worked
> > reliably for years.
>
> Thanks for finding the root cause of this, Kevin. I think it would add
> useful context in the commit log here to describe that the ROM is
> somewhat unique in this respect because it's cached by QEMU which
> simply does a pread() of the remaining size until it gets the full
> contents. The other BARs would only perform operations at the same
> access width as their guest drivers.
will do
> > ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool
> test_mem,
> > void __iomem *io, char __user *buf,
> > loff_t off, size_t count, size_t x_start,
> > - size_t x_end, bool iswrite)
> > + size_t x_end, bool iswrite, bool allow_qword)
>
> I've been trying to think about how we avoid yet another bool arg here.
> What do you think about creating an enum:
>
> enum vfio_pci_io_width {
> VFIO_PCI_IO_WIDTH_1 = 1,
> VFIO_PCI_IO_WIDTH_2 = 2,
> VFIO_PCI_IO_WIDTH_4 = 4,
> VFIO_PCI_IO_WIDTH_8 = 8,
> };
>
> The arg here would then be enum vfio_pci_io_width max_width, and for
> each test we'd add a condition && max_width >= 8 (4, 2), where we can
> assume byte access as the minimum regardless of the arg. It's a little
> more than we need, but it follows a simple pattern and I think makes
> the call sites a bit more intuitive.
make sense
> > @@ -352,7 +363,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device
> *vdev, char __user *buf,
> > * to the memory enable bit in the command register.
> > */
> > done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
> > - 0, 0, iswrite);
> > + 0, 0, iswrite, true);
>
> I have no basis other than paranoia and "VGA is old and a 64-bit access
> to it seem wrong", but I'm tempted to restrict this to dword access as
> well. I don't want to take your fix off track from it's specific goal
> though. Thanks,
>
I'll add a new patch for it.
© 2016 - 2026 Red Hat, Inc.