Some PCIe devices trigger PCI bus errors when accesses are made to
unassigned regions within their PCI configuration space. On certain
platforms, this can lead to host system hangs or reboots.
The current vfio-pci driver allows guests to access unassigned regions
in the PCI configuration space. Therefore, when such a device is passed
through to a guest, the guest can induce a host system hang or reboot
through crafted configuration space accesses, posing a threat to
system availability.
This patch introduces auditing support for config space accesses to
unassigned regions. When enabled, this logs such accesses for all
passthrough devices.
This feature is controlled via a new Kconfig option:
CONFIG_VFIO_PCI_UNASSIGNED_ACCESS_AUDIT
A new audit event type, AUDIT_VFIO, has been introduced to support
this, allowing administrators to monitor and investigate suspicious
behavior by guests.
Co-developed by: William Wang <xwill@bu.edu>
Signed-off-by: William Wang <xwill@bu.edu>
Signed-off-by: Chathura Rajapaksha <chath@bu.edu>
---
drivers/vfio/pci/Kconfig | 12 ++++++++
drivers/vfio/pci/vfio_pci_config.c | 46 ++++++++++++++++++++++++++++--
include/uapi/linux/audit.h | 1 +
3 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index c3bcb6911c53..7f9f16262b90 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -42,6 +42,18 @@ config VFIO_PCI_IGD
and LPC bridge config space.
To enable Intel IGD assignment through vfio-pci, say Y.
+
+config VFIO_PCI_UNASSIGNED_ACCESS_AUDIT
+ bool "Audit accesses to unassigned PCI configuration regions"
+ depends on AUDIT && VFIO_PCI_CORE
+ help
+ Some PCIe devices are known to cause bus errors when accessing
+ unassigned PCI configuration space, potentially leading to host
+ system hangs on certain platforms. When enabled, this option
+ audits accesses to unassigned PCI configuration regions.
+
+ If you don't know what to do here, say N.
+
endif
config VFIO_PCI_ZDEV_KVM
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index cb4d11aa5598..ddd10904d60f 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -25,6 +25,7 @@
#include <linux/uaccess.h>
#include <linux/vfio.h>
#include <linux/slab.h>
+#include <linux/audit.h>
#include "vfio_pci_priv.h"
@@ -1980,6 +1981,37 @@ static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_core_device *vdev,
return i;
}
+enum vfio_audit {
+ VFIO_AUDIT_READ,
+ VFIO_AUDIT_WRITE,
+ VFIO_AUDIT_MAX,
+};
+
+static const char * const vfio_audit_str[VFIO_AUDIT_MAX] = {
+ [VFIO_AUDIT_READ] = "READ",
+ [VFIO_AUDIT_WRITE] = "WRITE",
+};
+
+static void vfio_audit_access(const struct pci_dev *pdev,
+ size_t count, loff_t *ppos, bool blocked, unsigned int op)
+{
+ struct audit_buffer *ab;
+
+ if (WARN_ON_ONCE(op >= VFIO_AUDIT_MAX))
+ return;
+ if (audit_enabled == AUDIT_OFF)
+ return;
+ ab = audit_log_start(audit_context(), GFP_ATOMIC, AUDIT_VFIO);
+ if (unlikely(!ab))
+ return;
+ audit_log_format(ab,
+ "device=%04x:%02x:%02x.%d access=%s offset=0x%llx size=%ld blocked=%u\n",
+ pci_domain_nr(pdev->bus), pdev->bus->number,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+ vfio_audit_str[op], *ppos, count, blocked);
+ audit_log_end(ab);
+}
+
static ssize_t vfio_config_do_rw(struct vfio_pci_core_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
@@ -1989,6 +2021,7 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_core_device *vdev, char __user
int cap_start = 0, offset;
u8 cap_id;
ssize_t ret;
+ bool blocked;
if (*ppos < 0 || *ppos >= pdev->cfg_size ||
*ppos + count > pdev->cfg_size)
@@ -2011,13 +2044,22 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_core_device *vdev, char __user
cap_id = vdev->pci_config_map[*ppos];
if (cap_id == PCI_CAP_ID_INVALID) {
- if (((iswrite && block_pci_unassigned_write) ||
+ blocked = (((iswrite && block_pci_unassigned_write) ||
(!iswrite && block_pci_unassigned_read)) &&
- !pci_uaccess_lookup(pdev))
+ !pci_uaccess_lookup(pdev));
+ if (blocked)
perm = &block_unassigned_perms;
else
perm = &unassigned_perms;
cap_start = *ppos;
+ if (IS_ENABLED(CONFIG_VFIO_PCI_UNASSIGNED_ACCESS_AUDIT)) {
+ if (iswrite)
+ vfio_audit_access(pdev, count, ppos, blocked,
+ VFIO_AUDIT_WRITE);
+ else
+ vfio_audit_access(pdev, count, ppos, blocked,
+ VFIO_AUDIT_READ);
+ }
} else if (cap_id == PCI_CAP_ID_INVALID_VIRT) {
perm = &virt_perms;
cap_start = *ppos;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 9a4ecc9f6dc5..c0aace7384f3 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -122,6 +122,7 @@
#define AUDIT_OPENAT2 1337 /* Record showing openat2 how args */
#define AUDIT_DM_CTRL 1338 /* Device Mapper target control */
#define AUDIT_DM_EVENT 1339 /* Device Mapper events */
+#define AUDIT_VFIO 1340 /* VFIO events */
#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */
#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */
--
2.34.1
On Apr 26, 2025 Chathura Rajapaksha <chathura.abeyrathne.lk@gmail.com> wrote:
>
> Some PCIe devices trigger PCI bus errors when accesses are made to
> unassigned regions within their PCI configuration space. On certain
> platforms, this can lead to host system hangs or reboots.
>
> The current vfio-pci driver allows guests to access unassigned regions
> in the PCI configuration space. Therefore, when such a device is passed
> through to a guest, the guest can induce a host system hang or reboot
> through crafted configuration space accesses, posing a threat to
> system availability.
>
> This patch introduces auditing support for config space accesses to
> unassigned regions. When enabled, this logs such accesses for all
> passthrough devices.
> This feature is controlled via a new Kconfig option:
>
> CONFIG_VFIO_PCI_UNASSIGNED_ACCESS_AUDIT
>
> A new audit event type, AUDIT_VFIO, has been introduced to support
> this, allowing administrators to monitor and investigate suspicious
> behavior by guests.
I try to encourage people to put a sample audit record in the commit
description as it helps others, even those not overly familiar with the
Linux kernel, know what to expect in the audit log and provide feedback.
> Co-developed by: William Wang <xwill@bu.edu>
>
> Signed-off-by: William Wang <xwill@bu.edu>
> Signed-off-by: Chathura Rajapaksha <chath@bu.edu>
> ---
> drivers/vfio/pci/Kconfig | 12 ++++++++
> drivers/vfio/pci/vfio_pci_config.c | 46 ++++++++++++++++++++++++++++--
> include/uapi/linux/audit.h | 1 +
> 3 files changed, 57 insertions(+), 2 deletions(-)
...
> diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
> index cb4d11aa5598..ddd10904d60f 100644
> --- a/drivers/vfio/pci/vfio_pci_config.c
> +++ b/drivers/vfio/pci/vfio_pci_config.c
> @@ -25,6 +25,7 @@
> #include <linux/uaccess.h>
> #include <linux/vfio.h>
> #include <linux/slab.h>
> +#include <linux/audit.h>
>
> #include "vfio_pci_priv.h"
>
> @@ -1980,6 +1981,37 @@ static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_core_device *vdev,
> return i;
> }
>
> +enum vfio_audit {
> + VFIO_AUDIT_READ,
> + VFIO_AUDIT_WRITE,
> + VFIO_AUDIT_MAX,
> +};
> +
> +static const char * const vfio_audit_str[VFIO_AUDIT_MAX] = {
> + [VFIO_AUDIT_READ] = "READ",
> + [VFIO_AUDIT_WRITE] = "WRITE",
> +};
We generally don't capitalize things like this in audit, "read" and
"write" would be preferred.
> +static void vfio_audit_access(const struct pci_dev *pdev,
> + size_t count, loff_t *ppos, bool blocked, unsigned int op)
> +{
> + struct audit_buffer *ab;
> +
> + if (WARN_ON_ONCE(op >= VFIO_AUDIT_MAX))
> + return;
> + if (audit_enabled == AUDIT_OFF)
> + return;
> + ab = audit_log_start(audit_context(), GFP_ATOMIC, AUDIT_VFIO);
> + if (unlikely(!ab))
> + return;
> + audit_log_format(ab,
> + "device=%04x:%02x:%02x.%d access=%s offset=0x%llx size=%ld blocked=%u\n",
> + pci_domain_nr(pdev->bus), pdev->bus->number,
> + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
> + vfio_audit_str[op], *ppos, count, blocked);
> + audit_log_end(ab);
> +}
In the commit description you talk about a general PCIe device issue
in the first paragraph before going into the specifics of the VFIO
driver. That's all well and good, but it makes me wonder if this
audit code above is better done as a generic PCI function that other
PCI drivers could use if they had similar concerns? Please correct
me if I'm wrong, but other than symbol naming I don't see anyting
above which is specific to VFIO. Thoughts?
Beyond that, I might also change the "access=" field to "op=" as we
already use the "op=" field name for similar things in audit, it would
be good to leverage that familiarity here. Similarly using "res=",
specifically "res=0" for failure/blocked or "res=1" allowed, would
better fit with audit conventions.
> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index 9a4ecc9f6dc5..c0aace7384f3 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -122,6 +122,7 @@
> #define AUDIT_OPENAT2 1337 /* Record showing openat2 how args */
> #define AUDIT_DM_CTRL 1338 /* Device Mapper target control */
> #define AUDIT_DM_EVENT 1339 /* Device Mapper events */
> +#define AUDIT_VFIO 1340 /* VFIO events */
If the audit code above becomes more generalized as discussed, I would
expect this to change to AUDIT_VFIO to AUDIT_PCI, or something similar.
--
paul-moore.com
Hi Bjorn and Paul,
Thank you for your comments, and sorry for the late reply.
On Mon, Apr 28, 2025 at 11:05 AM Bjorn Helgaas <helgaas@kernel.org> wrote:
> Add blank line between paragraphs.
> Use imperative mood ("Introduce" instead of "This patch introduces
> ..." and "Add ..." instead of "A new type has been introduced").
> Simplify this patch by adding "blocked" in the first patch. Then you
> won't have to touch the permission checking that is unrelated to the
> audit logging. Consider adding a helper to do the checking and return
> "blocked" so it doesn't clutter vfio_config_do_rw().
I will address the above comments in the next revision.
On Fri, May 16, 2025 at 4:41 PM Paul Moore <paul@paul-moore.com> wrote:
> I try to encourage people to put a sample audit record in the commit
> description as it helps others, even those not overly familiar with the
> Linux kernel, know what to expect in the audit log and provide feedback.
> > +static const char * const vfio_audit_str[VFIO_AUDIT_MAX] = {
> > + [VFIO_AUDIT_READ] = "READ",
> > + [VFIO_AUDIT_WRITE] = "WRITE",
> > +};
>
> We generally don't capitalize things like this in audit, "read" and
> "write" would be preferred.
I will address the above comments in the next revision.
The following is the expected audit message when a write is performed
to an unassigned PCI config region:
device=0000:01:00.1 access=WRITE offset=0x298 size=1 blocked=0
> In the commit description you talk about a general PCIe device issue
> in the first paragraph before going into the specifics of the VFIO
> driver. That's all well and good, but it makes me wonder if this
> audit code above is better done as a generic PCI function that other
> PCI drivers could use if they had similar concerns? Please correct
> me if I'm wrong, but other than symbol naming I don't see anyting
> above which is specific to VFIO. Thoughts?
While the issue is independent of VFIO, the security and availability
concerns arise when guests are able to write to unassigned PCI config
regions on devices passed through using VFIO. That's why we thought it
would be better to audit these accesses in the VFIO driver. Given this
context, do you think it would be more appropriate to audit these
accesses through a generic PCI function instead?
> Beyond that, I might also change the "access=" field to "op=" as we
> already use the "op=" field name for similar things in audit, it would
> be good to leverage that familiarity here. Similarly using "res=",
> specifically "res=0" for failure/blocked or "res=1" allowed, would
> better fit with audit conventions.
Thanks for the suggestions, I will address these in the next revision.
Regards,
Chathura
On Tue, May 20, 2025 at 12:34 PM Chathura Rajapaksha <chathura.abeyrathne.lk@gmail.com> wrote: > On Fri, May 16, 2025 at 4:41 PM Paul Moore <paul@paul-moore.com> wrote: > > > In the commit description you talk about a general PCIe device issue > > in the first paragraph before going into the specifics of the VFIO > > driver. That's all well and good, but it makes me wonder if this > > audit code above is better done as a generic PCI function that other > > PCI drivers could use if they had similar concerns? Please correct > > me if I'm wrong, but other than symbol naming I don't see anyting > > above which is specific to VFIO. Thoughts? > > While the issue is independent of VFIO, the security and availability > concerns arise when guests are able to write to unassigned PCI config > regions on devices passed through using VFIO. That's why we thought it > would be better to audit these accesses in the VFIO driver. Given this > context, do you think it would be more appropriate to audit these > accesses through a generic PCI function instead? I would suggest a generic PCI function, e.g. pci_audit_access(...), that lives in the general PCI code and would be suitable for callers other than VFIO, that you can call from within vfio_config_do_rw() when Bad Things happen. Does that make sense? -- paul-moore.com
On Sat, Apr 26, 2025 at 09:22:49PM +0000, Chathura Rajapaksha wrote:
> Some PCIe devices trigger PCI bus errors when accesses are made to
> unassigned regions within their PCI configuration space. On certain
> platforms, this can lead to host system hangs or reboots.
>
> The current vfio-pci driver allows guests to access unassigned regions
> in the PCI configuration space. Therefore, when such a device is passed
> through to a guest, the guest can induce a host system hang or reboot
> through crafted configuration space accesses, posing a threat to
> system availability.
>
> This patch introduces auditing support for config space accesses to
> unassigned regions. When enabled, this logs such accesses for all
> passthrough devices.
> This feature is controlled via a new Kconfig option:
Add blank line between paragraphs.
> CONFIG_VFIO_PCI_UNASSIGNED_ACCESS_AUDIT
>
> A new audit event type, AUDIT_VFIO, has been introduced to support
> this, allowing administrators to monitor and investigate suspicious
> behavior by guests.
Use imperative mood ("Introduce" instead of "This patch introduces
..." and "Add ..." instead of "A new type has been introduced").
> Co-developed by: William Wang <xwill@bu.edu>
> Signed-off-by: William Wang <xwill@bu.edu>
> Signed-off-by: Chathura Rajapaksha <chath@bu.edu>
> ---
> drivers/vfio/pci/Kconfig | 12 ++++++++
> drivers/vfio/pci/vfio_pci_config.c | 46 ++++++++++++++++++++++++++++--
> include/uapi/linux/audit.h | 1 +
> 3 files changed, 57 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
> index c3bcb6911c53..7f9f16262b90 100644
> --- a/drivers/vfio/pci/Kconfig
> +++ b/drivers/vfio/pci/Kconfig
> @@ -42,6 +42,18 @@ config VFIO_PCI_IGD
> and LPC bridge config space.
>
> To enable Intel IGD assignment through vfio-pci, say Y.
> +
> +config VFIO_PCI_UNASSIGNED_ACCESS_AUDIT
> + bool "Audit accesses to unassigned PCI configuration regions"
> + depends on AUDIT && VFIO_PCI_CORE
> + help
> + Some PCIe devices are known to cause bus errors when accessing
> + unassigned PCI configuration space, potentially leading to host
> + system hangs on certain platforms. When enabled, this option
> + audits accesses to unassigned PCI configuration regions.
> +
> + If you don't know what to do here, say N.
> +
> endif
>
> config VFIO_PCI_ZDEV_KVM
> diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
> index cb4d11aa5598..ddd10904d60f 100644
> --- a/drivers/vfio/pci/vfio_pci_config.c
> +++ b/drivers/vfio/pci/vfio_pci_config.c
> @@ -25,6 +25,7 @@
> #include <linux/uaccess.h>
> #include <linux/vfio.h>
> #include <linux/slab.h>
> +#include <linux/audit.h>
>
> #include "vfio_pci_priv.h"
>
> @@ -1980,6 +1981,37 @@ static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_core_device *vdev,
> return i;
> }
>
> +enum vfio_audit {
> + VFIO_AUDIT_READ,
> + VFIO_AUDIT_WRITE,
> + VFIO_AUDIT_MAX,
> +};
> +
> +static const char * const vfio_audit_str[VFIO_AUDIT_MAX] = {
> + [VFIO_AUDIT_READ] = "READ",
> + [VFIO_AUDIT_WRITE] = "WRITE",
> +};
> +
> +static void vfio_audit_access(const struct pci_dev *pdev,
> + size_t count, loff_t *ppos, bool blocked, unsigned int op)
> +{
> + struct audit_buffer *ab;
> +
> + if (WARN_ON_ONCE(op >= VFIO_AUDIT_MAX))
> + return;
> + if (audit_enabled == AUDIT_OFF)
> + return;
> + ab = audit_log_start(audit_context(), GFP_ATOMIC, AUDIT_VFIO);
> + if (unlikely(!ab))
> + return;
> + audit_log_format(ab,
> + "device=%04x:%02x:%02x.%d access=%s offset=0x%llx size=%ld blocked=%u\n",
> + pci_domain_nr(pdev->bus), pdev->bus->number,
> + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
> + vfio_audit_str[op], *ppos, count, blocked);
> + audit_log_end(ab);
> +}
> +
> static ssize_t vfio_config_do_rw(struct vfio_pci_core_device *vdev, char __user *buf,
> size_t count, loff_t *ppos, bool iswrite)
> {
> @@ -1989,6 +2021,7 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_core_device *vdev, char __user
> int cap_start = 0, offset;
> u8 cap_id;
> ssize_t ret;
> + bool blocked;
>
> if (*ppos < 0 || *ppos >= pdev->cfg_size ||
> *ppos + count > pdev->cfg_size)
> @@ -2011,13 +2044,22 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_core_device *vdev, char __user
> cap_id = vdev->pci_config_map[*ppos];
>
> if (cap_id == PCI_CAP_ID_INVALID) {
> - if (((iswrite && block_pci_unassigned_write) ||
> + blocked = (((iswrite && block_pci_unassigned_write) ||
> (!iswrite && block_pci_unassigned_read)) &&
> - !pci_uaccess_lookup(pdev))
> + !pci_uaccess_lookup(pdev));
> + if (blocked)
> perm = &block_unassigned_perms;
> else
> perm = &unassigned_perms;
> cap_start = *ppos;
> + if (IS_ENABLED(CONFIG_VFIO_PCI_UNASSIGNED_ACCESS_AUDIT)) {
> + if (iswrite)
> + vfio_audit_access(pdev, count, ppos, blocked,
> + VFIO_AUDIT_WRITE);
> + else
> + vfio_audit_access(pdev, count, ppos, blocked,
> + VFIO_AUDIT_READ);
> + }
Simplify this patch by adding "blocked" in the first patch. Then you
won't have to touch the permission checking that is unrelated to the
audit logging. Consider adding a helper to do the checking and return
"blocked" so it doesn't clutter vfio_config_do_rw().
> } else if (cap_id == PCI_CAP_ID_INVALID_VIRT) {
> perm = &virt_perms;
> cap_start = *ppos;
> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index 9a4ecc9f6dc5..c0aace7384f3 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -122,6 +122,7 @@
> #define AUDIT_OPENAT2 1337 /* Record showing openat2 how args */
> #define AUDIT_DM_CTRL 1338 /* Device Mapper target control */
> #define AUDIT_DM_EVENT 1339 /* Device Mapper events */
> +#define AUDIT_VFIO 1340 /* VFIO events */
>
> #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */
> #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */
> --
> 2.34.1
>
© 2016 - 2026 Red Hat, Inc.